pci.c revision 221138
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 221138 2011-04-27 20:08:44Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/ehcireg.h>
66#include <dev/usb/controller/ohcireg.h>
67#include <dev/usb/controller/uhcireg.h>
68
69#include "pcib_if.h"
70#include "pci_if.h"
71
72#define	PCIR_IS_BIOS(cfg, reg)						\
73	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
74	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
75
76
77static pci_addr_t	pci_mapbase(uint64_t mapreg);
78static const char	*pci_maptype(uint64_t mapreg);
79static int		pci_mapsize(uint64_t testval);
80static int		pci_maprange(uint64_t mapreg);
81static pci_addr_t	pci_rombase(uint64_t mapreg);
82static int		pci_romsize(uint64_t testval);
83static void		pci_fixancient(pcicfgregs *cfg);
84static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
85
86static int		pci_porten(device_t dev);
87static int		pci_memen(device_t dev);
88static void		pci_assign_interrupt(device_t bus, device_t dev,
89			    int force_route);
90static int		pci_add_map(device_t bus, device_t dev, int reg,
91			    struct resource_list *rl, int force, int prefetch);
92static int		pci_probe(device_t dev);
93static int		pci_attach(device_t dev);
94static void		pci_load_vendor_data(void);
95static int		pci_describe_parse_line(char **ptr, int *vendor,
96			    int *device, char **desc);
97static char		*pci_describe_device(device_t dev);
98static int		pci_modevent(module_t mod, int what, void *arg);
99static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
100			    pcicfgregs *cfg);
101static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
102static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
103			    int reg, uint32_t *data);
104#if 0
105static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
106			    int reg, uint32_t data);
107#endif
108static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
109static void		pci_disable_msi(device_t dev);
110static void		pci_enable_msi(device_t dev, uint64_t address,
111			    uint16_t data);
112static void		pci_enable_msix(device_t dev, u_int index,
113			    uint64_t address, uint32_t data);
114static void		pci_mask_msix(device_t dev, u_int index);
115static void		pci_unmask_msix(device_t dev, u_int index);
116static int		pci_msi_blacklisted(void);
117static void		pci_resume_msi(device_t dev);
118static void		pci_resume_msix(device_t dev);
119static int		pci_remap_intr_method(device_t bus, device_t dev,
120			    u_int irq);
121
122static device_method_t pci_methods[] = {
123	/* Device interface */
124	DEVMETHOD(device_probe,		pci_probe),
125	DEVMETHOD(device_attach,	pci_attach),
126	DEVMETHOD(device_detach,	bus_generic_detach),
127	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
128	DEVMETHOD(device_suspend,	pci_suspend),
129	DEVMETHOD(device_resume,	pci_resume),
130
131	/* Bus interface */
132	DEVMETHOD(bus_print_child,	pci_print_child),
133	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
134	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
135	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
136	DEVMETHOD(bus_driver_added,	pci_driver_added),
137	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
138	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
139
140	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
141	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
142	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
143	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
144	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
145	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
146	DEVMETHOD(bus_activate_resource, pci_activate_resource),
147	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
148	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
149	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
150	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
151
152	/* PCI interface */
153	DEVMETHOD(pci_read_config,	pci_read_config_method),
154	DEVMETHOD(pci_write_config,	pci_write_config_method),
155	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
156	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
157	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
158	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
159	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
160	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
161	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
162	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
163	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
164	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
165	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
166	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
167	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
168	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
169	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
170	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
171
172	{ 0, 0 }
173};
174
175DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
176
177static devclass_t pci_devclass;
178DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
179MODULE_VERSION(pci, 1);
180
181static char	*pci_vendordata;
182static size_t	pci_vendordata_size;
183
184
185struct pci_quirk {
186	uint32_t devid;	/* Vendor/device of the card */
187	int	type;
188#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
189#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
190#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
191	int	arg1;
192	int	arg2;
193};
194
195struct pci_quirk pci_quirks[] = {
196	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
197	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
198	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
199	/* As does the Serverworks OSB4 (the SMBus mapping register) */
200	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
201
202	/*
203	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
204	 * or the CMIC-SL (AKA ServerWorks GC_LE).
205	 */
206	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208
209	/*
210	 * MSI doesn't work on earlier Intel chipsets including
211	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
212	 */
213	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
219	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220
221	/*
222	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
223	 * bridge.
224	 */
225	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226
227	/*
228	 * Some virtualization environments emulate an older chipset
229	 * but support MSI just fine.  QEMU uses the Intel 82440.
230	 */
231	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
232
233	{ 0 }
234};
235
236/* map register information */
237#define	PCI_MAPMEM	0x01	/* memory map */
238#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
239#define	PCI_MAPPORT	0x04	/* port map */
240
241struct devlist pci_devq;
242uint32_t pci_generation;
243uint32_t pci_numdevs = 0;
244static int pcie_chipset, pcix_chipset;
245
246/* sysctl vars */
247SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
248
249static int pci_enable_io_modes = 1;
250TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
251SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
252    &pci_enable_io_modes, 1,
253    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
254enable these bits correctly.  We'd like to do this all the time, but there\n\
255are some peripherals that this causes problems with.");
256
257static int pci_do_power_nodriver = 0;
258TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
259SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
260    &pci_do_power_nodriver, 0,
261  "Place a function into D3 state when no driver attaches to it.  0 means\n\
262disable.  1 means conservatively place devices into D3 state.  2 means\n\
263agressively place devices into D3 state.  3 means put absolutely everything\n\
264in D3 state.");
265
266int pci_do_power_resume = 1;
267TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
268SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
269    &pci_do_power_resume, 1,
270  "Transition from D3 -> D0 on resume.");
271
272int pci_do_power_suspend = 1;
273TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
274SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
275    &pci_do_power_suspend, 1,
276  "Transition from D0 -> D3 on suspend.");
277
278static int pci_do_msi = 1;
279TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
280SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
281    "Enable support for MSI interrupts");
282
283static int pci_do_msix = 1;
284TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
285SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
286    "Enable support for MSI-X interrupts");
287
288static int pci_honor_msi_blacklist = 1;
289TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
290SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
291    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
292
293#if defined(__i386__) || defined(__amd64__)
294static int pci_usb_takeover = 1;
295#else
296static int pci_usb_takeover = 0;
297#endif
298TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
299SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
300    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
301Disable this if you depend on BIOS emulation of USB devices, that is\n\
302you use USB devices (like keyboard or mouse) but do not load USB drivers");
303
304/* Find a device_t by bus/slot/function in domain 0 */
305
306device_t
307pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
308{
309
310	return (pci_find_dbsf(0, bus, slot, func));
311}
312
313/* Find a device_t by domain/bus/slot/function */
314
315device_t
316pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
317{
318	struct pci_devinfo *dinfo;
319
320	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
321		if ((dinfo->cfg.domain == domain) &&
322		    (dinfo->cfg.bus == bus) &&
323		    (dinfo->cfg.slot == slot) &&
324		    (dinfo->cfg.func == func)) {
325			return (dinfo->cfg.dev);
326		}
327	}
328
329	return (NULL);
330}
331
332/* Find a device_t by vendor/device ID */
333
334device_t
335pci_find_device(uint16_t vendor, uint16_t device)
336{
337	struct pci_devinfo *dinfo;
338
339	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
340		if ((dinfo->cfg.vendor == vendor) &&
341		    (dinfo->cfg.device == device)) {
342			return (dinfo->cfg.dev);
343		}
344	}
345
346	return (NULL);
347}
348
349static int
350pci_printf(pcicfgregs *cfg, const char *fmt, ...)
351{
352	va_list ap;
353	int retval;
354
355	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
356	    cfg->func);
357	va_start(ap, fmt);
358	retval += vprintf(fmt, ap);
359	va_end(ap);
360	return (retval);
361}
362
363/* return base address of memory or port map */
364
365static pci_addr_t
366pci_mapbase(uint64_t mapreg)
367{
368
369	if (PCI_BAR_MEM(mapreg))
370		return (mapreg & PCIM_BAR_MEM_BASE);
371	else
372		return (mapreg & PCIM_BAR_IO_BASE);
373}
374
375/* return map type of memory or port map */
376
377static const char *
378pci_maptype(uint64_t mapreg)
379{
380
381	if (PCI_BAR_IO(mapreg))
382		return ("I/O Port");
383	if (mapreg & PCIM_BAR_MEM_PREFETCH)
384		return ("Prefetchable Memory");
385	return ("Memory");
386}
387
388/* return log2 of map size decoded for memory or port map */
389
390static int
391pci_mapsize(uint64_t testval)
392{
393	int ln2size;
394
395	testval = pci_mapbase(testval);
396	ln2size = 0;
397	if (testval != 0) {
398		while ((testval & 1) == 0)
399		{
400			ln2size++;
401			testval >>= 1;
402		}
403	}
404	return (ln2size);
405}
406
407/* return base address of device ROM */
408
409static pci_addr_t
410pci_rombase(uint64_t mapreg)
411{
412
413	return (mapreg & PCIM_BIOS_ADDR_MASK);
414}
415
416/* return log2 of map size decided for device ROM */
417
418static int
419pci_romsize(uint64_t testval)
420{
421	int ln2size;
422
423	testval = pci_rombase(testval);
424	ln2size = 0;
425	if (testval != 0) {
426		while ((testval & 1) == 0)
427		{
428			ln2size++;
429			testval >>= 1;
430		}
431	}
432	return (ln2size);
433}
434
435/* return log2 of address range supported by map register */
436
437static int
438pci_maprange(uint64_t mapreg)
439{
440	int ln2range = 0;
441
442	if (PCI_BAR_IO(mapreg))
443		ln2range = 32;
444	else
445		switch (mapreg & PCIM_BAR_MEM_TYPE) {
446		case PCIM_BAR_MEM_32:
447			ln2range = 32;
448			break;
449		case PCIM_BAR_MEM_1MB:
450			ln2range = 20;
451			break;
452		case PCIM_BAR_MEM_64:
453			ln2range = 64;
454			break;
455		}
456	return (ln2range);
457}
458
459/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
460
461static void
462pci_fixancient(pcicfgregs *cfg)
463{
464	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
465		return;
466
467	/* PCI to PCI bridges use header type 1 */
468	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
469		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
470}
471
472/* extract header type specific config data */
473
474static void
475pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
476{
477#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
478	switch (cfg->hdrtype & PCIM_HDRTYPE) {
479	case PCIM_HDRTYPE_NORMAL:
480		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
481		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
482		cfg->nummaps	    = PCI_MAXMAPS_0;
483		break;
484	case PCIM_HDRTYPE_BRIDGE:
485		cfg->nummaps	    = PCI_MAXMAPS_1;
486		break;
487	case PCIM_HDRTYPE_CARDBUS:
488		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
489		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
490		cfg->nummaps	    = PCI_MAXMAPS_2;
491		break;
492	}
493#undef REG
494}
495
496/* read configuration header into pcicfgregs structure */
497struct pci_devinfo *
498pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
499{
500#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
501	pcicfgregs *cfg = NULL;
502	struct pci_devinfo *devlist_entry;
503	struct devlist *devlist_head;
504
505	devlist_head = &pci_devq;
506
507	devlist_entry = NULL;
508
509	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
510		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
511		if (devlist_entry == NULL)
512			return (NULL);
513
514		cfg = &devlist_entry->cfg;
515
516		cfg->domain		= d;
517		cfg->bus		= b;
518		cfg->slot		= s;
519		cfg->func		= f;
520		cfg->vendor		= REG(PCIR_VENDOR, 2);
521		cfg->device		= REG(PCIR_DEVICE, 2);
522		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
523		cfg->statreg		= REG(PCIR_STATUS, 2);
524		cfg->baseclass		= REG(PCIR_CLASS, 1);
525		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
526		cfg->progif		= REG(PCIR_PROGIF, 1);
527		cfg->revid		= REG(PCIR_REVID, 1);
528		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
529		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
530		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
531		cfg->intpin		= REG(PCIR_INTPIN, 1);
532		cfg->intline		= REG(PCIR_INTLINE, 1);
533
534		cfg->mingnt		= REG(PCIR_MINGNT, 1);
535		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
536
537		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
538		cfg->hdrtype		&= ~PCIM_MFDEV;
539		STAILQ_INIT(&cfg->maps);
540
541		pci_fixancient(cfg);
542		pci_hdrtypedata(pcib, b, s, f, cfg);
543
544		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
545			pci_read_cap(pcib, cfg);
546
547		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
548
549		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
550		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
551		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
552		devlist_entry->conf.pc_sel.pc_func = cfg->func;
553		devlist_entry->conf.pc_hdr = cfg->hdrtype;
554
555		devlist_entry->conf.pc_subvendor = cfg->subvendor;
556		devlist_entry->conf.pc_subdevice = cfg->subdevice;
557		devlist_entry->conf.pc_vendor = cfg->vendor;
558		devlist_entry->conf.pc_device = cfg->device;
559
560		devlist_entry->conf.pc_class = cfg->baseclass;
561		devlist_entry->conf.pc_subclass = cfg->subclass;
562		devlist_entry->conf.pc_progif = cfg->progif;
563		devlist_entry->conf.pc_revid = cfg->revid;
564
565		pci_numdevs++;
566		pci_generation++;
567	}
568	return (devlist_entry);
569#undef REG
570}
571
572static void
573pci_read_cap(device_t pcib, pcicfgregs *cfg)
574{
575#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
576#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
577#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
578	uint64_t addr;
579#endif
580	uint32_t val;
581	int	ptr, nextptr, ptrptr;
582
583	switch (cfg->hdrtype & PCIM_HDRTYPE) {
584	case PCIM_HDRTYPE_NORMAL:
585	case PCIM_HDRTYPE_BRIDGE:
586		ptrptr = PCIR_CAP_PTR;
587		break;
588	case PCIM_HDRTYPE_CARDBUS:
589		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
590		break;
591	default:
592		return;		/* no extended capabilities support */
593	}
594	nextptr = REG(ptrptr, 1);	/* sanity check? */
595
596	/*
597	 * Read capability entries.
598	 */
599	while (nextptr != 0) {
600		/* Sanity check */
601		if (nextptr > 255) {
602			printf("illegal PCI extended capability offset %d\n",
603			    nextptr);
604			return;
605		}
606		/* Find the next entry */
607		ptr = nextptr;
608		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
609
610		/* Process this entry */
611		switch (REG(ptr + PCICAP_ID, 1)) {
612		case PCIY_PMG:		/* PCI power management */
613			if (cfg->pp.pp_cap == 0) {
614				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
615				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
616				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
617				if ((nextptr - ptr) > PCIR_POWER_DATA)
618					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
619			}
620			break;
621		case PCIY_HT:		/* HyperTransport */
622			/* Determine HT-specific capability type. */
623			val = REG(ptr + PCIR_HT_COMMAND, 2);
624
625			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
626				cfg->ht.ht_slave = ptr;
627
628#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
629			switch (val & PCIM_HTCMD_CAP_MASK) {
630			case PCIM_HTCAP_MSI_MAPPING:
631				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
632					/* Sanity check the mapping window. */
633					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
634					    4);
635					addr <<= 32;
636					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
637					    4);
638					if (addr != MSI_INTEL_ADDR_BASE)
639						device_printf(pcib,
640	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
641						    cfg->domain, cfg->bus,
642						    cfg->slot, cfg->func,
643						    (long long)addr);
644				} else
645					addr = MSI_INTEL_ADDR_BASE;
646
647				cfg->ht.ht_msimap = ptr;
648				cfg->ht.ht_msictrl = val;
649				cfg->ht.ht_msiaddr = addr;
650				break;
651			}
652#endif
653			break;
654		case PCIY_MSI:		/* PCI MSI */
655			cfg->msi.msi_location = ptr;
656			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
657			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
658						     PCIM_MSICTRL_MMC_MASK)>>1);
659			break;
660		case PCIY_MSIX:		/* PCI MSI-X */
661			cfg->msix.msix_location = ptr;
662			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
663			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
664			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
665			val = REG(ptr + PCIR_MSIX_TABLE, 4);
666			cfg->msix.msix_table_bar = PCIR_BAR(val &
667			    PCIM_MSIX_BIR_MASK);
668			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
669			val = REG(ptr + PCIR_MSIX_PBA, 4);
670			cfg->msix.msix_pba_bar = PCIR_BAR(val &
671			    PCIM_MSIX_BIR_MASK);
672			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
673			break;
674		case PCIY_VPD:		/* PCI Vital Product Data */
675			cfg->vpd.vpd_reg = ptr;
676			break;
677		case PCIY_SUBVENDOR:
678			/* Should always be true. */
679			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
680			    PCIM_HDRTYPE_BRIDGE) {
681				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
682				cfg->subvendor = val & 0xffff;
683				cfg->subdevice = val >> 16;
684			}
685			break;
686		case PCIY_PCIX:		/* PCI-X */
687			/*
688			 * Assume we have a PCI-X chipset if we have
689			 * at least one PCI-PCI bridge with a PCI-X
690			 * capability.  Note that some systems with
691			 * PCI-express or HT chipsets might match on
692			 * this check as well.
693			 */
694			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
695			    PCIM_HDRTYPE_BRIDGE)
696				pcix_chipset = 1;
697			break;
698		case PCIY_EXPRESS:	/* PCI-express */
699			/*
700			 * Assume we have a PCI-express chipset if we have
701			 * at least one PCI-express device.
702			 */
703			pcie_chipset = 1;
704			break;
705		default:
706			break;
707		}
708	}
709
710
711#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
712	/*
713	 * Enable the MSI mapping window for all HyperTransport
714	 * slaves.  PCI-PCI bridges have their windows enabled via
715	 * PCIB_MAP_MSI().
716	 */
717	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
718	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
719		device_printf(pcib,
720	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
721		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
722		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
723		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
724		     2);
725	}
726#endif
727/* REG and WREG use carry through to next functions */
728}
729
730/*
731 * PCI Vital Product Data
732 */
733
734#define	PCI_VPD_TIMEOUT		1000000
735
736static int
737pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
738{
739	int count = PCI_VPD_TIMEOUT;
740
741	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
742
743	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
744
745	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
746		if (--count < 0)
747			return (ENXIO);
748		DELAY(1);	/* limit looping */
749	}
750	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
751
752	return (0);
753}
754
755#if 0
756static int
757pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
758{
759	int count = PCI_VPD_TIMEOUT;
760
761	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
762
763	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
764	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
765	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
766		if (--count < 0)
767			return (ENXIO);
768		DELAY(1);	/* limit looping */
769	}
770
771	return (0);
772}
773#endif
774
775#undef PCI_VPD_TIMEOUT
776
777struct vpd_readstate {
778	device_t	pcib;
779	pcicfgregs	*cfg;
780	uint32_t	val;
781	int		bytesinval;
782	int		off;
783	uint8_t		cksum;
784};
785
786static int
787vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
788{
789	uint32_t reg;
790	uint8_t byte;
791
792	if (vrs->bytesinval == 0) {
793		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
794			return (ENXIO);
795		vrs->val = le32toh(reg);
796		vrs->off += 4;
797		byte = vrs->val & 0xff;
798		vrs->bytesinval = 3;
799	} else {
800		vrs->val = vrs->val >> 8;
801		byte = vrs->val & 0xff;
802		vrs->bytesinval--;
803	}
804
805	vrs->cksum += byte;
806	*data = byte;
807	return (0);
808}
809
810static void
811pci_read_vpd(device_t pcib, pcicfgregs *cfg)
812{
813	struct vpd_readstate vrs;
814	int state;
815	int name;
816	int remain;
817	int i;
818	int alloc, off;		/* alloc/off for RO/W arrays */
819	int cksumvalid;
820	int dflen;
821	uint8_t byte;
822	uint8_t byte2;
823
824	/* init vpd reader */
825	vrs.bytesinval = 0;
826	vrs.off = 0;
827	vrs.pcib = pcib;
828	vrs.cfg = cfg;
829	vrs.cksum = 0;
830
831	state = 0;
832	name = remain = i = 0;	/* shut up stupid gcc */
833	alloc = off = 0;	/* shut up stupid gcc */
834	dflen = 0;		/* shut up stupid gcc */
835	cksumvalid = -1;
836	while (state >= 0) {
837		if (vpd_nextbyte(&vrs, &byte)) {
838			state = -2;
839			break;
840		}
841#if 0
842		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
843		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
844		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
845#endif
846		switch (state) {
847		case 0:		/* item name */
848			if (byte & 0x80) {
849				if (vpd_nextbyte(&vrs, &byte2)) {
850					state = -2;
851					break;
852				}
853				remain = byte2;
854				if (vpd_nextbyte(&vrs, &byte2)) {
855					state = -2;
856					break;
857				}
858				remain |= byte2 << 8;
859				if (remain > (0x7f*4 - vrs.off)) {
860					state = -1;
861					printf(
862			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
863					    cfg->domain, cfg->bus, cfg->slot,
864					    cfg->func, remain);
865				}
866				name = byte & 0x7f;
867			} else {
868				remain = byte & 0x7;
869				name = (byte >> 3) & 0xf;
870			}
871			switch (name) {
872			case 0x2:	/* String */
873				cfg->vpd.vpd_ident = malloc(remain + 1,
874				    M_DEVBUF, M_WAITOK);
875				i = 0;
876				state = 1;
877				break;
878			case 0xf:	/* End */
879				state = -1;
880				break;
881			case 0x10:	/* VPD-R */
882				alloc = 8;
883				off = 0;
884				cfg->vpd.vpd_ros = malloc(alloc *
885				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
886				    M_WAITOK | M_ZERO);
887				state = 2;
888				break;
889			case 0x11:	/* VPD-W */
890				alloc = 8;
891				off = 0;
892				cfg->vpd.vpd_w = malloc(alloc *
893				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
894				    M_WAITOK | M_ZERO);
895				state = 5;
896				break;
897			default:	/* Invalid data, abort */
898				state = -1;
899				break;
900			}
901			break;
902
903		case 1:	/* Identifier String */
904			cfg->vpd.vpd_ident[i++] = byte;
905			remain--;
906			if (remain == 0)  {
907				cfg->vpd.vpd_ident[i] = '\0';
908				state = 0;
909			}
910			break;
911
912		case 2:	/* VPD-R Keyword Header */
913			if (off == alloc) {
914				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
915				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
916				    M_DEVBUF, M_WAITOK | M_ZERO);
917			}
918			cfg->vpd.vpd_ros[off].keyword[0] = byte;
919			if (vpd_nextbyte(&vrs, &byte2)) {
920				state = -2;
921				break;
922			}
923			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
924			if (vpd_nextbyte(&vrs, &byte2)) {
925				state = -2;
926				break;
927			}
928			dflen = byte2;
929			if (dflen == 0 &&
930			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
931			    2) == 0) {
932				/*
933				 * if this happens, we can't trust the rest
934				 * of the VPD.
935				 */
936				printf(
937				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
938				    cfg->domain, cfg->bus, cfg->slot,
939				    cfg->func, dflen);
940				cksumvalid = 0;
941				state = -1;
942				break;
943			} else if (dflen == 0) {
944				cfg->vpd.vpd_ros[off].value = malloc(1 *
945				    sizeof(*cfg->vpd.vpd_ros[off].value),
946				    M_DEVBUF, M_WAITOK);
947				cfg->vpd.vpd_ros[off].value[0] = '\x00';
948			} else
949				cfg->vpd.vpd_ros[off].value = malloc(
950				    (dflen + 1) *
951				    sizeof(*cfg->vpd.vpd_ros[off].value),
952				    M_DEVBUF, M_WAITOK);
953			remain -= 3;
954			i = 0;
955			/* keep in sync w/ state 3's transistions */
956			if (dflen == 0 && remain == 0)
957				state = 0;
958			else if (dflen == 0)
959				state = 2;
960			else
961				state = 3;
962			break;
963
964		case 3:	/* VPD-R Keyword Value */
965			cfg->vpd.vpd_ros[off].value[i++] = byte;
966			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
967			    "RV", 2) == 0 && cksumvalid == -1) {
968				if (vrs.cksum == 0)
969					cksumvalid = 1;
970				else {
971					if (bootverbose)
972						printf(
973				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
974						    cfg->domain, cfg->bus,
975						    cfg->slot, cfg->func,
976						    vrs.cksum);
977					cksumvalid = 0;
978					state = -1;
979					break;
980				}
981			}
982			dflen--;
983			remain--;
984			/* keep in sync w/ state 2's transistions */
985			if (dflen == 0)
986				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
987			if (dflen == 0 && remain == 0) {
988				cfg->vpd.vpd_rocnt = off;
989				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
990				    off * sizeof(*cfg->vpd.vpd_ros),
991				    M_DEVBUF, M_WAITOK | M_ZERO);
992				state = 0;
993			} else if (dflen == 0)
994				state = 2;
995			break;
996
997		case 4:
998			remain--;
999			if (remain == 0)
1000				state = 0;
1001			break;
1002
1003		case 5:	/* VPD-W Keyword Header */
1004			if (off == alloc) {
1005				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1006				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1007				    M_DEVBUF, M_WAITOK | M_ZERO);
1008			}
1009			cfg->vpd.vpd_w[off].keyword[0] = byte;
1010			if (vpd_nextbyte(&vrs, &byte2)) {
1011				state = -2;
1012				break;
1013			}
1014			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1015			if (vpd_nextbyte(&vrs, &byte2)) {
1016				state = -2;
1017				break;
1018			}
1019			cfg->vpd.vpd_w[off].len = dflen = byte2;
1020			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1021			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1022			    sizeof(*cfg->vpd.vpd_w[off].value),
1023			    M_DEVBUF, M_WAITOK);
1024			remain -= 3;
1025			i = 0;
1026			/* keep in sync w/ state 6's transistions */
1027			if (dflen == 0 && remain == 0)
1028				state = 0;
1029			else if (dflen == 0)
1030				state = 5;
1031			else
1032				state = 6;
1033			break;
1034
1035		case 6:	/* VPD-W Keyword Value */
1036			cfg->vpd.vpd_w[off].value[i++] = byte;
1037			dflen--;
1038			remain--;
1039			/* keep in sync w/ state 5's transistions */
1040			if (dflen == 0)
1041				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1042			if (dflen == 0 && remain == 0) {
1043				cfg->vpd.vpd_wcnt = off;
1044				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1045				    off * sizeof(*cfg->vpd.vpd_w),
1046				    M_DEVBUF, M_WAITOK | M_ZERO);
1047				state = 0;
1048			} else if (dflen == 0)
1049				state = 5;
1050			break;
1051
1052		default:
1053			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1054			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1055			    state);
1056			state = -1;
1057			break;
1058		}
1059	}
1060
1061	if (cksumvalid == 0 || state < -1) {
1062		/* read-only data bad, clean up */
1063		if (cfg->vpd.vpd_ros != NULL) {
1064			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1065				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1066			free(cfg->vpd.vpd_ros, M_DEVBUF);
1067			cfg->vpd.vpd_ros = NULL;
1068		}
1069	}
1070	if (state < -1) {
1071		/* I/O error, clean up */
1072		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1073		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1074		if (cfg->vpd.vpd_ident != NULL) {
1075			free(cfg->vpd.vpd_ident, M_DEVBUF);
1076			cfg->vpd.vpd_ident = NULL;
1077		}
1078		if (cfg->vpd.vpd_w != NULL) {
1079			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1080				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1081			free(cfg->vpd.vpd_w, M_DEVBUF);
1082			cfg->vpd.vpd_w = NULL;
1083		}
1084	}
1085	cfg->vpd.vpd_cached = 1;
1086#undef REG
1087#undef WREG
1088}
1089
1090int
1091pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1092{
1093	struct pci_devinfo *dinfo = device_get_ivars(child);
1094	pcicfgregs *cfg = &dinfo->cfg;
1095
1096	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1097		pci_read_vpd(device_get_parent(dev), cfg);
1098
1099	*identptr = cfg->vpd.vpd_ident;
1100
1101	if (*identptr == NULL)
1102		return (ENXIO);
1103
1104	return (0);
1105}
1106
1107int
1108pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1109	const char **vptr)
1110{
1111	struct pci_devinfo *dinfo = device_get_ivars(child);
1112	pcicfgregs *cfg = &dinfo->cfg;
1113	int i;
1114
1115	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1116		pci_read_vpd(device_get_parent(dev), cfg);
1117
1118	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1119		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1120		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1121			*vptr = cfg->vpd.vpd_ros[i].value;
1122		}
1123
1124	if (i != cfg->vpd.vpd_rocnt)
1125		return (0);
1126
1127	*vptr = NULL;
1128	return (ENXIO);
1129}
1130
1131/*
1132 * Find the requested extended capability and return the offset in
1133 * configuration space via the pointer provided. The function returns
1134 * 0 on success and error code otherwise.
1135 */
1136int
1137pci_find_extcap_method(device_t dev, device_t child, int capability,
1138    int *capreg)
1139{
1140	struct pci_devinfo *dinfo = device_get_ivars(child);
1141	pcicfgregs *cfg = &dinfo->cfg;
1142	u_int32_t status;
1143	u_int8_t ptr;
1144
1145	/*
1146	 * Check the CAP_LIST bit of the PCI status register first.
1147	 */
1148	status = pci_read_config(child, PCIR_STATUS, 2);
1149	if (!(status & PCIM_STATUS_CAPPRESENT))
1150		return (ENXIO);
1151
1152	/*
1153	 * Determine the start pointer of the capabilities list.
1154	 */
1155	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1156	case PCIM_HDRTYPE_NORMAL:
1157	case PCIM_HDRTYPE_BRIDGE:
1158		ptr = PCIR_CAP_PTR;
1159		break;
1160	case PCIM_HDRTYPE_CARDBUS:
1161		ptr = PCIR_CAP_PTR_2;
1162		break;
1163	default:
1164		/* XXX: panic? */
1165		return (ENXIO);		/* no extended capabilities support */
1166	}
1167	ptr = pci_read_config(child, ptr, 1);
1168
1169	/*
1170	 * Traverse the capabilities list.
1171	 */
1172	while (ptr != 0) {
1173		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1174			if (capreg != NULL)
1175				*capreg = ptr;
1176			return (0);
1177		}
1178		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1179	}
1180
1181	return (ENOENT);
1182}
1183
1184/*
1185 * Support for MSI-X message interrupts.
1186 */
1187void
1188pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1189{
1190	struct pci_devinfo *dinfo = device_get_ivars(dev);
1191	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1192	uint32_t offset;
1193
1194	KASSERT(msix->msix_table_len > index, ("bogus index"));
1195	offset = msix->msix_table_offset + index * 16;
1196	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1197	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1198	bus_write_4(msix->msix_table_res, offset + 8, data);
1199
1200	/* Enable MSI -> HT mapping. */
1201	pci_ht_map_msi(dev, address);
1202}
1203
1204void
1205pci_mask_msix(device_t dev, u_int index)
1206{
1207	struct pci_devinfo *dinfo = device_get_ivars(dev);
1208	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1209	uint32_t offset, val;
1210
1211	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1212	offset = msix->msix_table_offset + index * 16 + 12;
1213	val = bus_read_4(msix->msix_table_res, offset);
1214	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1215		val |= PCIM_MSIX_VCTRL_MASK;
1216		bus_write_4(msix->msix_table_res, offset, val);
1217	}
1218}
1219
1220void
1221pci_unmask_msix(device_t dev, u_int index)
1222{
1223	struct pci_devinfo *dinfo = device_get_ivars(dev);
1224	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1225	uint32_t offset, val;
1226
1227	KASSERT(msix->msix_table_len > index, ("bogus index"));
1228	offset = msix->msix_table_offset + index * 16 + 12;
1229	val = bus_read_4(msix->msix_table_res, offset);
1230	if (val & PCIM_MSIX_VCTRL_MASK) {
1231		val &= ~PCIM_MSIX_VCTRL_MASK;
1232		bus_write_4(msix->msix_table_res, offset, val);
1233	}
1234}
1235
1236int
1237pci_pending_msix(device_t dev, u_int index)
1238{
1239	struct pci_devinfo *dinfo = device_get_ivars(dev);
1240	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1241	uint32_t offset, bit;
1242
1243	KASSERT(msix->msix_table_len > index, ("bogus index"));
1244	offset = msix->msix_pba_offset + (index / 32) * 4;
1245	bit = 1 << index % 32;
1246	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1247}
1248
1249/*
1250 * Restore MSI-X registers and table during resume.  If MSI-X is
1251 * enabled then walk the virtual table to restore the actual MSI-X
1252 * table.
1253 */
1254static void
1255pci_resume_msix(device_t dev)
1256{
1257	struct pci_devinfo *dinfo = device_get_ivars(dev);
1258	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1259	struct msix_table_entry *mte;
1260	struct msix_vector *mv;
1261	int i;
1262
1263	if (msix->msix_alloc > 0) {
1264		/* First, mask all vectors. */
1265		for (i = 0; i < msix->msix_msgnum; i++)
1266			pci_mask_msix(dev, i);
1267
1268		/* Second, program any messages with at least one handler. */
1269		for (i = 0; i < msix->msix_table_len; i++) {
1270			mte = &msix->msix_table[i];
1271			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1272				continue;
1273			mv = &msix->msix_vectors[mte->mte_vector - 1];
1274			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1275			pci_unmask_msix(dev, i);
1276		}
1277	}
1278	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1279	    msix->msix_ctrl, 2);
1280}
1281
1282/*
1283 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1284 * returned in *count.  After this function returns, each message will be
1285 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1286 */
1287int
1288pci_alloc_msix_method(device_t dev, device_t child, int *count)
1289{
1290	struct pci_devinfo *dinfo = device_get_ivars(child);
1291	pcicfgregs *cfg = &dinfo->cfg;
1292	struct resource_list_entry *rle;
1293	int actual, error, i, irq, max;
1294
1295	/* Don't let count == 0 get us into trouble. */
1296	if (*count == 0)
1297		return (EINVAL);
1298
1299	/* If rid 0 is allocated, then fail. */
1300	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1301	if (rle != NULL && rle->res != NULL)
1302		return (ENXIO);
1303
1304	/* Already have allocated messages? */
1305	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1306		return (ENXIO);
1307
1308	/* If MSI is blacklisted for this system, fail. */
1309	if (pci_msi_blacklisted())
1310		return (ENXIO);
1311
1312	/* MSI-X capability present? */
1313	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1314		return (ENODEV);
1315
1316	/* Make sure the appropriate BARs are mapped. */
1317	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1318	    cfg->msix.msix_table_bar);
1319	if (rle == NULL || rle->res == NULL ||
1320	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1321		return (ENXIO);
1322	cfg->msix.msix_table_res = rle->res;
1323	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1324		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1325		    cfg->msix.msix_pba_bar);
1326		if (rle == NULL || rle->res == NULL ||
1327		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1328			return (ENXIO);
1329	}
1330	cfg->msix.msix_pba_res = rle->res;
1331
1332	if (bootverbose)
1333		device_printf(child,
1334		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1335		    *count, cfg->msix.msix_msgnum);
1336	max = min(*count, cfg->msix.msix_msgnum);
1337	for (i = 0; i < max; i++) {
1338		/* Allocate a message. */
1339		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1340		if (error)
1341			break;
1342		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1343		    irq, 1);
1344	}
1345	actual = i;
1346
1347	if (bootverbose) {
1348		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1349		if (actual == 1)
1350			device_printf(child, "using IRQ %lu for MSI-X\n",
1351			    rle->start);
1352		else {
1353			int run;
1354
1355			/*
1356			 * Be fancy and try to print contiguous runs of
1357			 * IRQ values as ranges.  'irq' is the previous IRQ.
1358			 * 'run' is true if we are in a range.
1359			 */
1360			device_printf(child, "using IRQs %lu", rle->start);
1361			irq = rle->start;
1362			run = 0;
1363			for (i = 1; i < actual; i++) {
1364				rle = resource_list_find(&dinfo->resources,
1365				    SYS_RES_IRQ, i + 1);
1366
1367				/* Still in a run? */
1368				if (rle->start == irq + 1) {
1369					run = 1;
1370					irq++;
1371					continue;
1372				}
1373
1374				/* Finish previous range. */
1375				if (run) {
1376					printf("-%d", irq);
1377					run = 0;
1378				}
1379
1380				/* Start new range. */
1381				printf(",%lu", rle->start);
1382				irq = rle->start;
1383			}
1384
1385			/* Unfinished range? */
1386			if (run)
1387				printf("-%d", irq);
1388			printf(" for MSI-X\n");
1389		}
1390	}
1391
1392	/* Mask all vectors. */
1393	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1394		pci_mask_msix(child, i);
1395
1396	/* Allocate and initialize vector data and virtual table. */
1397	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1398	    M_DEVBUF, M_WAITOK | M_ZERO);
1399	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1400	    M_DEVBUF, M_WAITOK | M_ZERO);
1401	for (i = 0; i < actual; i++) {
1402		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1403		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1404		cfg->msix.msix_table[i].mte_vector = i + 1;
1405	}
1406
1407	/* Update control register to enable MSI-X. */
1408	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1409	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1410	    cfg->msix.msix_ctrl, 2);
1411
1412	/* Update counts of alloc'd messages. */
1413	cfg->msix.msix_alloc = actual;
1414	cfg->msix.msix_table_len = actual;
1415	*count = actual;
1416	return (0);
1417}
1418
1419/*
1420 * By default, pci_alloc_msix() will assign the allocated IRQ
1421 * resources consecutively to the first N messages in the MSI-X table.
1422 * However, device drivers may want to use different layouts if they
1423 * either receive fewer messages than they asked for, or they wish to
1424 * populate the MSI-X table sparsely.  This method allows the driver
1425 * to specify what layout it wants.  It must be called after a
1426 * successful pci_alloc_msix() but before any of the associated
1427 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1428 *
1429 * The 'vectors' array contains 'count' message vectors.  The array
1430 * maps directly to the MSI-X table in that index 0 in the array
1431 * specifies the vector for the first message in the MSI-X table, etc.
1432 * The vector value in each array index can either be 0 to indicate
1433 * that no vector should be assigned to a message slot, or it can be a
1434 * number from 1 to N (where N is the count returned from a
1435 * succcessful call to pci_alloc_msix()) to indicate which message
1436 * vector (IRQ) to be used for the corresponding message.
1437 *
1438 * On successful return, each message with a non-zero vector will have
1439 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1440 * 1.  Additionally, if any of the IRQs allocated via the previous
1441 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1442 * will be freed back to the system automatically.
1443 *
1444 * For example, suppose a driver has a MSI-X table with 6 messages and
1445 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1446 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1447 * C.  After the call to pci_alloc_msix(), the device will be setup to
1448 * have an MSI-X table of ABC--- (where - means no vector assigned).
1449 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1450 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1451 * be freed back to the system.  This device will also have valid
1452 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1453 *
1454 * In any case, the SYS_RES_IRQ rid X will always map to the message
1455 * at MSI-X table index X - 1 and will only be valid if a vector is
1456 * assigned to that table entry.
1457 */
1458int
1459pci_remap_msix_method(device_t dev, device_t child, int count,
1460    const u_int *vectors)
1461{
1462	struct pci_devinfo *dinfo = device_get_ivars(child);
1463	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1464	struct resource_list_entry *rle;
1465	int i, irq, j, *used;
1466
1467	/*
1468	 * Have to have at least one message in the table but the
1469	 * table can't be bigger than the actual MSI-X table in the
1470	 * device.
1471	 */
1472	if (count == 0 || count > msix->msix_msgnum)
1473		return (EINVAL);
1474
1475	/* Sanity check the vectors. */
1476	for (i = 0; i < count; i++)
1477		if (vectors[i] > msix->msix_alloc)
1478			return (EINVAL);
1479
1480	/*
1481	 * Make sure there aren't any holes in the vectors to be used.
1482	 * It's a big pain to support it, and it doesn't really make
1483	 * sense anyway.  Also, at least one vector must be used.
1484	 */
1485	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1486	    M_ZERO);
1487	for (i = 0; i < count; i++)
1488		if (vectors[i] != 0)
1489			used[vectors[i] - 1] = 1;
1490	for (i = 0; i < msix->msix_alloc - 1; i++)
1491		if (used[i] == 0 && used[i + 1] == 1) {
1492			free(used, M_DEVBUF);
1493			return (EINVAL);
1494		}
1495	if (used[0] != 1) {
1496		free(used, M_DEVBUF);
1497		return (EINVAL);
1498	}
1499
1500	/* Make sure none of the resources are allocated. */
1501	for (i = 0; i < msix->msix_table_len; i++) {
1502		if (msix->msix_table[i].mte_vector == 0)
1503			continue;
1504		if (msix->msix_table[i].mte_handlers > 0)
1505			return (EBUSY);
1506		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1507		KASSERT(rle != NULL, ("missing resource"));
1508		if (rle->res != NULL)
1509			return (EBUSY);
1510	}
1511
1512	/* Free the existing resource list entries. */
1513	for (i = 0; i < msix->msix_table_len; i++) {
1514		if (msix->msix_table[i].mte_vector == 0)
1515			continue;
1516		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1517	}
1518
1519	/*
1520	 * Build the new virtual table keeping track of which vectors are
1521	 * used.
1522	 */
1523	free(msix->msix_table, M_DEVBUF);
1524	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1525	    M_DEVBUF, M_WAITOK | M_ZERO);
1526	for (i = 0; i < count; i++)
1527		msix->msix_table[i].mte_vector = vectors[i];
1528	msix->msix_table_len = count;
1529
1530	/* Free any unused IRQs and resize the vectors array if necessary. */
1531	j = msix->msix_alloc - 1;
1532	if (used[j] == 0) {
1533		struct msix_vector *vec;
1534
1535		while (used[j] == 0) {
1536			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1537			    msix->msix_vectors[j].mv_irq);
1538			j--;
1539		}
1540		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1541		    M_WAITOK);
1542		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1543		    (j + 1));
1544		free(msix->msix_vectors, M_DEVBUF);
1545		msix->msix_vectors = vec;
1546		msix->msix_alloc = j + 1;
1547	}
1548	free(used, M_DEVBUF);
1549
1550	/* Map the IRQs onto the rids. */
1551	for (i = 0; i < count; i++) {
1552		if (vectors[i] == 0)
1553			continue;
1554		irq = msix->msix_vectors[vectors[i]].mv_irq;
1555		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1556		    irq, 1);
1557	}
1558
1559	if (bootverbose) {
1560		device_printf(child, "Remapped MSI-X IRQs as: ");
1561		for (i = 0; i < count; i++) {
1562			if (i != 0)
1563				printf(", ");
1564			if (vectors[i] == 0)
1565				printf("---");
1566			else
1567				printf("%d",
1568				    msix->msix_vectors[vectors[i]].mv_irq);
1569		}
1570		printf("\n");
1571	}
1572
1573	return (0);
1574}
1575
1576static int
1577pci_release_msix(device_t dev, device_t child)
1578{
1579	struct pci_devinfo *dinfo = device_get_ivars(child);
1580	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1581	struct resource_list_entry *rle;
1582	int i;
1583
1584	/* Do we have any messages to release? */
1585	if (msix->msix_alloc == 0)
1586		return (ENODEV);
1587
1588	/* Make sure none of the resources are allocated. */
1589	for (i = 0; i < msix->msix_table_len; i++) {
1590		if (msix->msix_table[i].mte_vector == 0)
1591			continue;
1592		if (msix->msix_table[i].mte_handlers > 0)
1593			return (EBUSY);
1594		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1595		KASSERT(rle != NULL, ("missing resource"));
1596		if (rle->res != NULL)
1597			return (EBUSY);
1598	}
1599
1600	/* Update control register to disable MSI-X. */
1601	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1602	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1603	    msix->msix_ctrl, 2);
1604
1605	/* Free the resource list entries. */
1606	for (i = 0; i < msix->msix_table_len; i++) {
1607		if (msix->msix_table[i].mte_vector == 0)
1608			continue;
1609		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1610	}
1611	free(msix->msix_table, M_DEVBUF);
1612	msix->msix_table_len = 0;
1613
1614	/* Release the IRQs. */
1615	for (i = 0; i < msix->msix_alloc; i++)
1616		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1617		    msix->msix_vectors[i].mv_irq);
1618	free(msix->msix_vectors, M_DEVBUF);
1619	msix->msix_alloc = 0;
1620	return (0);
1621}
1622
1623/*
1624 * Return the max supported MSI-X messages this device supports.
1625 * Basically, assuming the MD code can alloc messages, this function
1626 * should return the maximum value that pci_alloc_msix() can return.
1627 * Thus, it is subject to the tunables, etc.
1628 */
1629int
1630pci_msix_count_method(device_t dev, device_t child)
1631{
1632	struct pci_devinfo *dinfo = device_get_ivars(child);
1633	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1634
1635	if (pci_do_msix && msix->msix_location != 0)
1636		return (msix->msix_msgnum);
1637	return (0);
1638}
1639
1640/*
1641 * HyperTransport MSI mapping control
1642 */
1643void
1644pci_ht_map_msi(device_t dev, uint64_t addr)
1645{
1646	struct pci_devinfo *dinfo = device_get_ivars(dev);
1647	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1648
1649	if (!ht->ht_msimap)
1650		return;
1651
1652	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1653	    ht->ht_msiaddr >> 20 == addr >> 20) {
1654		/* Enable MSI -> HT mapping. */
1655		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1656		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1657		    ht->ht_msictrl, 2);
1658	}
1659
1660	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1661		/* Disable MSI -> HT mapping. */
1662		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1663		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1664		    ht->ht_msictrl, 2);
1665	}
1666}
1667
1668int
1669pci_get_max_read_req(device_t dev)
1670{
1671	int cap;
1672	uint16_t val;
1673
1674	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1675		return (0);
1676	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1677	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1678	val >>= 12;
1679	return (1 << (val + 7));
1680}
1681
1682int
1683pci_set_max_read_req(device_t dev, int size)
1684{
1685	int cap;
1686	uint16_t val;
1687
1688	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1689		return (0);
1690	if (size < 128)
1691		size = 128;
1692	if (size > 4096)
1693		size = 4096;
1694	size = (1 << (fls(size) - 1));
1695	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1696	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1697	val |= (fls(size) - 8) << 12;
1698	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1699	return (size);
1700}
1701
1702/*
1703 * Support for MSI message signalled interrupts.
1704 */
1705void
1706pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1707{
1708	struct pci_devinfo *dinfo = device_get_ivars(dev);
1709	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1710
1711	/* Write data and address values. */
1712	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1713	    address & 0xffffffff, 4);
1714	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1715		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1716		    address >> 32, 4);
1717		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1718		    data, 2);
1719	} else
1720		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1721		    2);
1722
1723	/* Enable MSI in the control register. */
1724	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1725	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1726	    2);
1727
1728	/* Enable MSI -> HT mapping. */
1729	pci_ht_map_msi(dev, address);
1730}
1731
1732void
1733pci_disable_msi(device_t dev)
1734{
1735	struct pci_devinfo *dinfo = device_get_ivars(dev);
1736	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1737
1738	/* Disable MSI -> HT mapping. */
1739	pci_ht_map_msi(dev, 0);
1740
1741	/* Disable MSI in the control register. */
1742	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1743	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1744	    2);
1745}
1746
1747/*
1748 * Restore MSI registers during resume.  If MSI is enabled then
1749 * restore the data and address registers in addition to the control
1750 * register.
1751 */
1752static void
1753pci_resume_msi(device_t dev)
1754{
1755	struct pci_devinfo *dinfo = device_get_ivars(dev);
1756	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1757	uint64_t address;
1758	uint16_t data;
1759
1760	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1761		address = msi->msi_addr;
1762		data = msi->msi_data;
1763		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1764		    address & 0xffffffff, 4);
1765		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1766			pci_write_config(dev, msi->msi_location +
1767			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1768			pci_write_config(dev, msi->msi_location +
1769			    PCIR_MSI_DATA_64BIT, data, 2);
1770		} else
1771			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1772			    data, 2);
1773	}
1774	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1775	    2);
1776}
1777
1778static int
1779pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1780{
1781	struct pci_devinfo *dinfo = device_get_ivars(dev);
1782	pcicfgregs *cfg = &dinfo->cfg;
1783	struct resource_list_entry *rle;
1784	struct msix_table_entry *mte;
1785	struct msix_vector *mv;
1786	uint64_t addr;
1787	uint32_t data;
1788	int error, i, j;
1789
1790	/*
1791	 * Handle MSI first.  We try to find this IRQ among our list
1792	 * of MSI IRQs.  If we find it, we request updated address and
1793	 * data registers and apply the results.
1794	 */
1795	if (cfg->msi.msi_alloc > 0) {
1796
1797		/* If we don't have any active handlers, nothing to do. */
1798		if (cfg->msi.msi_handlers == 0)
1799			return (0);
1800		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1801			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1802			    i + 1);
1803			if (rle->start == irq) {
1804				error = PCIB_MAP_MSI(device_get_parent(bus),
1805				    dev, irq, &addr, &data);
1806				if (error)
1807					return (error);
1808				pci_disable_msi(dev);
1809				dinfo->cfg.msi.msi_addr = addr;
1810				dinfo->cfg.msi.msi_data = data;
1811				pci_enable_msi(dev, addr, data);
1812				return (0);
1813			}
1814		}
1815		return (ENOENT);
1816	}
1817
1818	/*
1819	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1820	 * we request the updated mapping info.  If that works, we go
1821	 * through all the slots that use this IRQ and update them.
1822	 */
1823	if (cfg->msix.msix_alloc > 0) {
1824		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1825			mv = &cfg->msix.msix_vectors[i];
1826			if (mv->mv_irq == irq) {
1827				error = PCIB_MAP_MSI(device_get_parent(bus),
1828				    dev, irq, &addr, &data);
1829				if (error)
1830					return (error);
1831				mv->mv_address = addr;
1832				mv->mv_data = data;
1833				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1834					mte = &cfg->msix.msix_table[j];
1835					if (mte->mte_vector != i + 1)
1836						continue;
1837					if (mte->mte_handlers == 0)
1838						continue;
1839					pci_mask_msix(dev, j);
1840					pci_enable_msix(dev, j, addr, data);
1841					pci_unmask_msix(dev, j);
1842				}
1843			}
1844		}
1845		return (ENOENT);
1846	}
1847
1848	return (ENOENT);
1849}
1850
1851/*
1852 * Returns true if the specified device is blacklisted because MSI
1853 * doesn't work.
1854 */
1855int
1856pci_msi_device_blacklisted(device_t dev)
1857{
1858	struct pci_quirk *q;
1859
1860	if (!pci_honor_msi_blacklist)
1861		return (0);
1862
1863	for (q = &pci_quirks[0]; q->devid; q++) {
1864		if (q->devid == pci_get_devid(dev) &&
1865		    q->type == PCI_QUIRK_DISABLE_MSI)
1866			return (1);
1867	}
1868	return (0);
1869}
1870
1871/*
1872 * Returns true if a specified chipset supports MSI when it is
1873 * emulated hardware in a virtual machine.
1874 */
1875static int
1876pci_msi_vm_chipset(device_t dev)
1877{
1878	struct pci_quirk *q;
1879
1880	for (q = &pci_quirks[0]; q->devid; q++) {
1881		if (q->devid == pci_get_devid(dev) &&
1882		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
1883			return (1);
1884	}
1885	return (0);
1886}
1887
1888/*
1889 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1890 * we just check for blacklisted chipsets as represented by the
1891 * host-PCI bridge at device 0:0:0.  In the future, it may become
1892 * necessary to check other system attributes, such as the kenv values
1893 * that give the motherboard manufacturer and model number.
1894 */
1895static int
1896pci_msi_blacklisted(void)
1897{
1898	device_t dev;
1899
1900	if (!pci_honor_msi_blacklist)
1901		return (0);
1902
1903	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1904	if (!(pcie_chipset || pcix_chipset)) {
1905		if (vm_guest != VM_GUEST_NO) {
1906			dev = pci_find_bsf(0, 0, 0);
1907			if (dev != NULL)
1908				return (pci_msi_vm_chipset(dev) == 0);
1909		}
1910		return (1);
1911	}
1912
1913	dev = pci_find_bsf(0, 0, 0);
1914	if (dev != NULL)
1915		return (pci_msi_device_blacklisted(dev));
1916	return (0);
1917}
1918
1919/*
1920 * Attempt to allocate *count MSI messages.  The actual number allocated is
1921 * returned in *count.  After this function returns, each message will be
1922 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1923 */
1924int
1925pci_alloc_msi_method(device_t dev, device_t child, int *count)
1926{
1927	struct pci_devinfo *dinfo = device_get_ivars(child);
1928	pcicfgregs *cfg = &dinfo->cfg;
1929	struct resource_list_entry *rle;
1930	int actual, error, i, irqs[32];
1931	uint16_t ctrl;
1932
1933	/* Don't let count == 0 get us into trouble. */
1934	if (*count == 0)
1935		return (EINVAL);
1936
1937	/* If rid 0 is allocated, then fail. */
1938	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1939	if (rle != NULL && rle->res != NULL)
1940		return (ENXIO);
1941
1942	/* Already have allocated messages? */
1943	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1944		return (ENXIO);
1945
1946	/* If MSI is blacklisted for this system, fail. */
1947	if (pci_msi_blacklisted())
1948		return (ENXIO);
1949
1950	/* MSI capability present? */
1951	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1952		return (ENODEV);
1953
1954	if (bootverbose)
1955		device_printf(child,
1956		    "attempting to allocate %d MSI vectors (%d supported)\n",
1957		    *count, cfg->msi.msi_msgnum);
1958
1959	/* Don't ask for more than the device supports. */
1960	actual = min(*count, cfg->msi.msi_msgnum);
1961
1962	/* Don't ask for more than 32 messages. */
1963	actual = min(actual, 32);
1964
1965	/* MSI requires power of 2 number of messages. */
1966	if (!powerof2(actual))
1967		return (EINVAL);
1968
1969	for (;;) {
1970		/* Try to allocate N messages. */
1971		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1972		    actual, irqs);
1973		if (error == 0)
1974			break;
1975		if (actual == 1)
1976			return (error);
1977
1978		/* Try N / 2. */
1979		actual >>= 1;
1980	}
1981
1982	/*
1983	 * We now have N actual messages mapped onto SYS_RES_IRQ
1984	 * resources in the irqs[] array, so add new resources
1985	 * starting at rid 1.
1986	 */
1987	for (i = 0; i < actual; i++)
1988		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1989		    irqs[i], irqs[i], 1);
1990
1991	if (bootverbose) {
1992		if (actual == 1)
1993			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1994		else {
1995			int run;
1996
1997			/*
1998			 * Be fancy and try to print contiguous runs
1999			 * of IRQ values as ranges.  'run' is true if
2000			 * we are in a range.
2001			 */
2002			device_printf(child, "using IRQs %d", irqs[0]);
2003			run = 0;
2004			for (i = 1; i < actual; i++) {
2005
2006				/* Still in a run? */
2007				if (irqs[i] == irqs[i - 1] + 1) {
2008					run = 1;
2009					continue;
2010				}
2011
2012				/* Finish previous range. */
2013				if (run) {
2014					printf("-%d", irqs[i - 1]);
2015					run = 0;
2016				}
2017
2018				/* Start new range. */
2019				printf(",%d", irqs[i]);
2020			}
2021
2022			/* Unfinished range? */
2023			if (run)
2024				printf("-%d", irqs[actual - 1]);
2025			printf(" for MSI\n");
2026		}
2027	}
2028
2029	/* Update control register with actual count. */
2030	ctrl = cfg->msi.msi_ctrl;
2031	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2032	ctrl |= (ffs(actual) - 1) << 4;
2033	cfg->msi.msi_ctrl = ctrl;
2034	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2035
2036	/* Update counts of alloc'd messages. */
2037	cfg->msi.msi_alloc = actual;
2038	cfg->msi.msi_handlers = 0;
2039	*count = actual;
2040	return (0);
2041}
2042
2043/* Release the MSI messages associated with this device. */
2044int
2045pci_release_msi_method(device_t dev, device_t child)
2046{
2047	struct pci_devinfo *dinfo = device_get_ivars(child);
2048	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2049	struct resource_list_entry *rle;
2050	int error, i, irqs[32];
2051
2052	/* Try MSI-X first. */
2053	error = pci_release_msix(dev, child);
2054	if (error != ENODEV)
2055		return (error);
2056
2057	/* Do we have any messages to release? */
2058	if (msi->msi_alloc == 0)
2059		return (ENODEV);
2060	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2061
2062	/* Make sure none of the resources are allocated. */
2063	if (msi->msi_handlers > 0)
2064		return (EBUSY);
2065	for (i = 0; i < msi->msi_alloc; i++) {
2066		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2067		KASSERT(rle != NULL, ("missing MSI resource"));
2068		if (rle->res != NULL)
2069			return (EBUSY);
2070		irqs[i] = rle->start;
2071	}
2072
2073	/* Update control register with 0 count. */
2074	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2075	    ("%s: MSI still enabled", __func__));
2076	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2077	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2078	    msi->msi_ctrl, 2);
2079
2080	/* Release the messages. */
2081	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2082	for (i = 0; i < msi->msi_alloc; i++)
2083		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2084
2085	/* Update alloc count. */
2086	msi->msi_alloc = 0;
2087	msi->msi_addr = 0;
2088	msi->msi_data = 0;
2089	return (0);
2090}
2091
2092/*
2093 * Return the max supported MSI messages this device supports.
2094 * Basically, assuming the MD code can alloc messages, this function
2095 * should return the maximum value that pci_alloc_msi() can return.
2096 * Thus, it is subject to the tunables, etc.
2097 */
2098int
2099pci_msi_count_method(device_t dev, device_t child)
2100{
2101	struct pci_devinfo *dinfo = device_get_ivars(child);
2102	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2103
2104	if (pci_do_msi && msi->msi_location != 0)
2105		return (msi->msi_msgnum);
2106	return (0);
2107}
2108
2109/* free pcicfgregs structure and all depending data structures */
2110
2111int
2112pci_freecfg(struct pci_devinfo *dinfo)
2113{
2114	struct devlist *devlist_head;
2115	struct pci_map *pm, *next;
2116	int i;
2117
2118	devlist_head = &pci_devq;
2119
2120	if (dinfo->cfg.vpd.vpd_reg) {
2121		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2122		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2123			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2124		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2125		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2126			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2127		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2128	}
2129	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2130		free(pm, M_DEVBUF);
2131	}
2132	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2133	free(dinfo, M_DEVBUF);
2134
2135	/* increment the generation count */
2136	pci_generation++;
2137
2138	/* we're losing one device */
2139	pci_numdevs--;
2140	return (0);
2141}
2142
2143/*
2144 * PCI power manangement
2145 */
2146int
2147pci_set_powerstate_method(device_t dev, device_t child, int state)
2148{
2149	struct pci_devinfo *dinfo = device_get_ivars(child);
2150	pcicfgregs *cfg = &dinfo->cfg;
2151	uint16_t status;
2152	int result, oldstate, highest, delay;
2153
2154	if (cfg->pp.pp_cap == 0)
2155		return (EOPNOTSUPP);
2156
2157	/*
2158	 * Optimize a no state change request away.  While it would be OK to
2159	 * write to the hardware in theory, some devices have shown odd
2160	 * behavior when going from D3 -> D3.
2161	 */
2162	oldstate = pci_get_powerstate(child);
2163	if (oldstate == state)
2164		return (0);
2165
2166	/*
2167	 * The PCI power management specification states that after a state
2168	 * transition between PCI power states, system software must
2169	 * guarantee a minimal delay before the function accesses the device.
2170	 * Compute the worst case delay that we need to guarantee before we
2171	 * access the device.  Many devices will be responsive much more
2172	 * quickly than this delay, but there are some that don't respond
2173	 * instantly to state changes.  Transitions to/from D3 state require
2174	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2175	 * is done below with DELAY rather than a sleeper function because
2176	 * this function can be called from contexts where we cannot sleep.
2177	 */
2178	highest = (oldstate > state) ? oldstate : state;
2179	if (highest == PCI_POWERSTATE_D3)
2180	    delay = 10000;
2181	else if (highest == PCI_POWERSTATE_D2)
2182	    delay = 200;
2183	else
2184	    delay = 0;
2185	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2186	    & ~PCIM_PSTAT_DMASK;
2187	result = 0;
2188	switch (state) {
2189	case PCI_POWERSTATE_D0:
2190		status |= PCIM_PSTAT_D0;
2191		break;
2192	case PCI_POWERSTATE_D1:
2193		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2194			return (EOPNOTSUPP);
2195		status |= PCIM_PSTAT_D1;
2196		break;
2197	case PCI_POWERSTATE_D2:
2198		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2199			return (EOPNOTSUPP);
2200		status |= PCIM_PSTAT_D2;
2201		break;
2202	case PCI_POWERSTATE_D3:
2203		status |= PCIM_PSTAT_D3;
2204		break;
2205	default:
2206		return (EINVAL);
2207	}
2208
2209	if (bootverbose)
2210		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2211		    state);
2212
2213	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2214	if (delay)
2215		DELAY(delay);
2216	return (0);
2217}
2218
2219int
2220pci_get_powerstate_method(device_t dev, device_t child)
2221{
2222	struct pci_devinfo *dinfo = device_get_ivars(child);
2223	pcicfgregs *cfg = &dinfo->cfg;
2224	uint16_t status;
2225	int result;
2226
2227	if (cfg->pp.pp_cap != 0) {
2228		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2229		switch (status & PCIM_PSTAT_DMASK) {
2230		case PCIM_PSTAT_D0:
2231			result = PCI_POWERSTATE_D0;
2232			break;
2233		case PCIM_PSTAT_D1:
2234			result = PCI_POWERSTATE_D1;
2235			break;
2236		case PCIM_PSTAT_D2:
2237			result = PCI_POWERSTATE_D2;
2238			break;
2239		case PCIM_PSTAT_D3:
2240			result = PCI_POWERSTATE_D3;
2241			break;
2242		default:
2243			result = PCI_POWERSTATE_UNKNOWN;
2244			break;
2245		}
2246	} else {
2247		/* No support, device is always at D0 */
2248		result = PCI_POWERSTATE_D0;
2249	}
2250	return (result);
2251}
2252
2253/*
2254 * Some convenience functions for PCI device drivers.
2255 */
2256
2257static __inline void
2258pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2259{
2260	uint16_t	command;
2261
2262	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2263	command |= bit;
2264	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2265}
2266
2267static __inline void
2268pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2269{
2270	uint16_t	command;
2271
2272	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2273	command &= ~bit;
2274	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2275}
2276
2277int
2278pci_enable_busmaster_method(device_t dev, device_t child)
2279{
2280	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2281	return (0);
2282}
2283
2284int
2285pci_disable_busmaster_method(device_t dev, device_t child)
2286{
2287	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2288	return (0);
2289}
2290
2291int
2292pci_enable_io_method(device_t dev, device_t child, int space)
2293{
2294	uint16_t bit;
2295
2296	switch(space) {
2297	case SYS_RES_IOPORT:
2298		bit = PCIM_CMD_PORTEN;
2299		break;
2300	case SYS_RES_MEMORY:
2301		bit = PCIM_CMD_MEMEN;
2302		break;
2303	default:
2304		return (EINVAL);
2305	}
2306	pci_set_command_bit(dev, child, bit);
2307	return (0);
2308}
2309
2310int
2311pci_disable_io_method(device_t dev, device_t child, int space)
2312{
2313	uint16_t bit;
2314
2315	switch(space) {
2316	case SYS_RES_IOPORT:
2317		bit = PCIM_CMD_PORTEN;
2318		break;
2319	case SYS_RES_MEMORY:
2320		bit = PCIM_CMD_MEMEN;
2321		break;
2322	default:
2323		return (EINVAL);
2324	}
2325	pci_clear_command_bit(dev, child, bit);
2326	return (0);
2327}
2328
2329/*
2330 * New style pci driver.  Parent device is either a pci-host-bridge or a
2331 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2332 */
2333
2334void
2335pci_print_verbose(struct pci_devinfo *dinfo)
2336{
2337
2338	if (bootverbose) {
2339		pcicfgregs *cfg = &dinfo->cfg;
2340
2341		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2342		    cfg->vendor, cfg->device, cfg->revid);
2343		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2344		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2345		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2346		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2347		    cfg->mfdev);
2348		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2349		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2350		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2351		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2352		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2353		if (cfg->intpin > 0)
2354			printf("\tintpin=%c, irq=%d\n",
2355			    cfg->intpin +'a' -1, cfg->intline);
2356		if (cfg->pp.pp_cap) {
2357			uint16_t status;
2358
2359			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2360			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2361			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2362			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2363			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2364			    status & PCIM_PSTAT_DMASK);
2365		}
2366		if (cfg->msi.msi_location) {
2367			int ctrl;
2368
2369			ctrl = cfg->msi.msi_ctrl;
2370			printf("\tMSI supports %d message%s%s%s\n",
2371			    cfg->msi.msi_msgnum,
2372			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2373			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2374			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2375		}
2376		if (cfg->msix.msix_location) {
2377			printf("\tMSI-X supports %d message%s ",
2378			    cfg->msix.msix_msgnum,
2379			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2380			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2381				printf("in map 0x%x\n",
2382				    cfg->msix.msix_table_bar);
2383			else
2384				printf("in maps 0x%x and 0x%x\n",
2385				    cfg->msix.msix_table_bar,
2386				    cfg->msix.msix_pba_bar);
2387		}
2388	}
2389}
2390
2391static int
2392pci_porten(device_t dev)
2393{
2394	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2395}
2396
2397static int
2398pci_memen(device_t dev)
2399{
2400	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2401}
2402
2403static void
2404pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2405{
2406	struct pci_devinfo *dinfo;
2407	pci_addr_t map, testval;
2408	int ln2range;
2409	uint16_t cmd;
2410
2411	/*
2412	 * The device ROM BAR is special.  It is always a 32-bit
2413	 * memory BAR.  Bit 0 is special and should not be set when
2414	 * sizing the BAR.
2415	 */
2416	dinfo = device_get_ivars(dev);
2417	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2418		map = pci_read_config(dev, reg, 4);
2419		pci_write_config(dev, reg, 0xfffffffe, 4);
2420		testval = pci_read_config(dev, reg, 4);
2421		pci_write_config(dev, reg, map, 4);
2422		*mapp = map;
2423		*testvalp = testval;
2424		return;
2425	}
2426
2427	map = pci_read_config(dev, reg, 4);
2428	ln2range = pci_maprange(map);
2429	if (ln2range == 64)
2430		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2431
2432	/*
2433	 * Disable decoding via the command register before
2434	 * determining the BAR's length since we will be placing it in
2435	 * a weird state.
2436	 */
2437	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2438	pci_write_config(dev, PCIR_COMMAND,
2439	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2440
2441	/*
2442	 * Determine the BAR's length by writing all 1's.  The bottom
2443	 * log_2(size) bits of the BAR will stick as 0 when we read
2444	 * the value back.
2445	 */
2446	pci_write_config(dev, reg, 0xffffffff, 4);
2447	testval = pci_read_config(dev, reg, 4);
2448	if (ln2range == 64) {
2449		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2450		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2451	}
2452
2453	/*
2454	 * Restore the original value of the BAR.  We may have reprogrammed
2455	 * the BAR of the low-level console device and when booting verbose,
2456	 * we need the console device addressable.
2457	 */
2458	pci_write_config(dev, reg, map, 4);
2459	if (ln2range == 64)
2460		pci_write_config(dev, reg + 4, map >> 32, 4);
2461	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2462
2463	*mapp = map;
2464	*testvalp = testval;
2465}
2466
2467static void
2468pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2469{
2470	struct pci_devinfo *dinfo;
2471	int ln2range;
2472
2473	/* The device ROM BAR is always a 32-bit memory BAR. */
2474	dinfo = device_get_ivars(dev);
2475	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2476		ln2range = 32;
2477	else
2478		ln2range = pci_maprange(pm->pm_value);
2479	pci_write_config(dev, pm->pm_reg, base, 4);
2480	if (ln2range == 64)
2481		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2482	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2483	if (ln2range == 64)
2484		pm->pm_value |= (pci_addr_t)pci_read_config(dev, pm->pm_reg + 4, 4) << 32;
2485}
2486
2487struct pci_map *
2488pci_find_bar(device_t dev, int reg)
2489{
2490	struct pci_devinfo *dinfo;
2491	struct pci_map *pm;
2492
2493	dinfo = device_get_ivars(dev);
2494	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2495		if (pm->pm_reg == reg)
2496			return (pm);
2497	}
2498	return (NULL);
2499}
2500
2501int
2502pci_bar_enabled(device_t dev, struct pci_map *pm)
2503{
2504	struct pci_devinfo *dinfo;
2505	uint16_t cmd;
2506
2507	dinfo = device_get_ivars(dev);
2508	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2509	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2510		return (0);
2511	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2512	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2513		return ((cmd & PCIM_CMD_MEMEN) != 0);
2514	else
2515		return ((cmd & PCIM_CMD_PORTEN) != 0);
2516}
2517
2518static struct pci_map *
2519pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2520{
2521	struct pci_devinfo *dinfo;
2522	struct pci_map *pm, *prev;
2523
2524	dinfo = device_get_ivars(dev);
2525	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2526	pm->pm_reg = reg;
2527	pm->pm_value = value;
2528	pm->pm_size = size;
2529	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2530		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2531		    reg));
2532		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2533		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2534			break;
2535	}
2536	if (prev != NULL)
2537		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2538	else
2539		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2540	return (pm);
2541}
2542
2543static void
2544pci_restore_bars(device_t dev)
2545{
2546	struct pci_devinfo *dinfo;
2547	struct pci_map *pm;
2548	int ln2range;
2549
2550	dinfo = device_get_ivars(dev);
2551	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2552		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2553			ln2range = 32;
2554		else
2555			ln2range = pci_maprange(pm->pm_value);
2556		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2557		if (ln2range == 64)
2558			pci_write_config(dev, pm->pm_reg + 4,
2559			    pm->pm_value >> 32, 4);
2560	}
2561}
2562
2563/*
2564 * Add a resource based on a pci map register. Return 1 if the map
2565 * register is a 32bit map register or 2 if it is a 64bit register.
2566 */
2567static int
2568pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2569    int force, int prefetch)
2570{
2571	struct pci_map *pm;
2572	pci_addr_t base, map, testval;
2573	pci_addr_t start, end, count;
2574	int barlen, basezero, maprange, mapsize, type;
2575	uint16_t cmd;
2576	struct resource *res;
2577
2578	pci_read_bar(dev, reg, &map, &testval);
2579	if (PCI_BAR_MEM(map)) {
2580		type = SYS_RES_MEMORY;
2581		if (map & PCIM_BAR_MEM_PREFETCH)
2582			prefetch = 1;
2583	} else
2584		type = SYS_RES_IOPORT;
2585	mapsize = pci_mapsize(testval);
2586	base = pci_mapbase(map);
2587#ifdef __PCI_BAR_ZERO_VALID
2588	basezero = 0;
2589#else
2590	basezero = base == 0;
2591#endif
2592	maprange = pci_maprange(map);
2593	barlen = maprange == 64 ? 2 : 1;
2594
2595	/*
2596	 * For I/O registers, if bottom bit is set, and the next bit up
2597	 * isn't clear, we know we have a BAR that doesn't conform to the
2598	 * spec, so ignore it.  Also, sanity check the size of the data
2599	 * areas to the type of memory involved.  Memory must be at least
2600	 * 16 bytes in size, while I/O ranges must be at least 4.
2601	 */
2602	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2603		return (barlen);
2604	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2605	    (type == SYS_RES_IOPORT && mapsize < 2))
2606		return (barlen);
2607
2608	/* Save a record of this BAR. */
2609	pm = pci_add_bar(dev, reg, map, mapsize);
2610	if (bootverbose) {
2611		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2612		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2613		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2614			printf(", port disabled\n");
2615		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2616			printf(", memory disabled\n");
2617		else
2618			printf(", enabled\n");
2619	}
2620
2621	/*
2622	 * If base is 0, then we have problems if this architecture does
2623	 * not allow that.  It is best to ignore such entries for the
2624	 * moment.  These will be allocated later if the driver specifically
2625	 * requests them.  However, some removable busses look better when
2626	 * all resources are allocated, so allow '0' to be overriden.
2627	 *
2628	 * Similarly treat maps whose values is the same as the test value
2629	 * read back.  These maps have had all f's written to them by the
2630	 * BIOS in an attempt to disable the resources.
2631	 */
2632	if (!force && (basezero || map == testval))
2633		return (barlen);
2634	if ((u_long)base != base) {
2635		device_printf(bus,
2636		    "pci%d:%d:%d:%d bar %#x too many address bits",
2637		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2638		    pci_get_function(dev), reg);
2639		return (barlen);
2640	}
2641
2642	/*
2643	 * This code theoretically does the right thing, but has
2644	 * undesirable side effects in some cases where peripherals
2645	 * respond oddly to having these bits enabled.  Let the user
2646	 * be able to turn them off (since pci_enable_io_modes is 1 by
2647	 * default).
2648	 */
2649	if (pci_enable_io_modes) {
2650		/* Turn on resources that have been left off by a lazy BIOS */
2651		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2652			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2653			cmd |= PCIM_CMD_PORTEN;
2654			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2655		}
2656		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2657			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2658			cmd |= PCIM_CMD_MEMEN;
2659			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2660		}
2661	} else {
2662		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2663			return (barlen);
2664		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2665			return (barlen);
2666	}
2667
2668	count = (pci_addr_t)1 << mapsize;
2669	if (basezero || base == pci_mapbase(testval)) {
2670		start = 0;	/* Let the parent decide. */
2671		end = ~0ULL;
2672	} else {
2673		start = base;
2674		end = base + count - 1;
2675	}
2676	resource_list_add(rl, type, reg, start, end, count);
2677
2678	/*
2679	 * Try to allocate the resource for this BAR from our parent
2680	 * so that this resource range is already reserved.  The
2681	 * driver for this device will later inherit this resource in
2682	 * pci_alloc_resource().
2683	 */
2684	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2685	    prefetch ? RF_PREFETCHABLE : 0);
2686	if (res == NULL) {
2687		/*
2688		 * If the allocation fails, clear the BAR and delete
2689		 * the resource list entry to force
2690		 * pci_alloc_resource() to allocate resources from the
2691		 * parent.
2692		 */
2693		resource_list_delete(rl, type, reg);
2694		start = 0;
2695	} else
2696		start = rman_get_start(res);
2697	pci_write_bar(dev, pm, start);
2698	return (barlen);
2699}
2700
2701/*
2702 * For ATA devices we need to decide early what addressing mode to use.
2703 * Legacy demands that the primary and secondary ATA ports sits on the
2704 * same addresses that old ISA hardware did. This dictates that we use
2705 * those addresses and ignore the BAR's if we cannot set PCI native
2706 * addressing mode.
2707 */
2708static void
2709pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2710    uint32_t prefetchmask)
2711{
2712	struct resource *r;
2713	int rid, type, progif;
2714#if 0
2715	/* if this device supports PCI native addressing use it */
2716	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2717	if ((progif & 0x8a) == 0x8a) {
2718		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2719		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2720			printf("Trying ATA native PCI addressing mode\n");
2721			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2722		}
2723	}
2724#endif
2725	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2726	type = SYS_RES_IOPORT;
2727	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2728		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2729		    prefetchmask & (1 << 0));
2730		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2731		    prefetchmask & (1 << 1));
2732	} else {
2733		rid = PCIR_BAR(0);
2734		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2735		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2736		    0x1f7, 8, 0);
2737		rid = PCIR_BAR(1);
2738		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2739		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2740		    0x3f6, 1, 0);
2741	}
2742	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2743		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2744		    prefetchmask & (1 << 2));
2745		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2746		    prefetchmask & (1 << 3));
2747	} else {
2748		rid = PCIR_BAR(2);
2749		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2750		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2751		    0x177, 8, 0);
2752		rid = PCIR_BAR(3);
2753		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2754		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2755		    0x376, 1, 0);
2756	}
2757	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2758	    prefetchmask & (1 << 4));
2759	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2760	    prefetchmask & (1 << 5));
2761}
2762
2763static void
2764pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2765{
2766	struct pci_devinfo *dinfo = device_get_ivars(dev);
2767	pcicfgregs *cfg = &dinfo->cfg;
2768	char tunable_name[64];
2769	int irq;
2770
2771	/* Has to have an intpin to have an interrupt. */
2772	if (cfg->intpin == 0)
2773		return;
2774
2775	/* Let the user override the IRQ with a tunable. */
2776	irq = PCI_INVALID_IRQ;
2777	snprintf(tunable_name, sizeof(tunable_name),
2778	    "hw.pci%d.%d.%d.INT%c.irq",
2779	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2780	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2781		irq = PCI_INVALID_IRQ;
2782
2783	/*
2784	 * If we didn't get an IRQ via the tunable, then we either use the
2785	 * IRQ value in the intline register or we ask the bus to route an
2786	 * interrupt for us.  If force_route is true, then we only use the
2787	 * value in the intline register if the bus was unable to assign an
2788	 * IRQ.
2789	 */
2790	if (!PCI_INTERRUPT_VALID(irq)) {
2791		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2792			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2793		if (!PCI_INTERRUPT_VALID(irq))
2794			irq = cfg->intline;
2795	}
2796
2797	/* If after all that we don't have an IRQ, just bail. */
2798	if (!PCI_INTERRUPT_VALID(irq))
2799		return;
2800
2801	/* Update the config register if it changed. */
2802	if (irq != cfg->intline) {
2803		cfg->intline = irq;
2804		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2805	}
2806
2807	/* Add this IRQ as rid 0 interrupt resource. */
2808	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2809}
2810
2811/* Perform early OHCI takeover from SMM. */
2812static void
2813ohci_early_takeover(device_t self)
2814{
2815	struct resource *res;
2816	uint32_t ctl;
2817	int rid;
2818	int i;
2819
2820	rid = PCIR_BAR(0);
2821	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2822	if (res == NULL)
2823		return;
2824
2825	ctl = bus_read_4(res, OHCI_CONTROL);
2826	if (ctl & OHCI_IR) {
2827		if (bootverbose)
2828			printf("ohci early: "
2829			    "SMM active, request owner change\n");
2830		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2831		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2832			DELAY(1000);
2833			ctl = bus_read_4(res, OHCI_CONTROL);
2834		}
2835		if (ctl & OHCI_IR) {
2836			if (bootverbose)
2837				printf("ohci early: "
2838				    "SMM does not respond, resetting\n");
2839			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2840		}
2841		/* Disable interrupts */
2842		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2843	}
2844
2845	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2846}
2847
2848/* Perform early UHCI takeover from SMM. */
2849static void
2850uhci_early_takeover(device_t self)
2851{
2852	struct resource *res;
2853	int rid;
2854
2855	/*
2856	 * Set the PIRQD enable bit and switch off all the others. We don't
2857	 * want legacy support to interfere with us XXX Does this also mean
2858	 * that the BIOS won't touch the keyboard anymore if it is connected
2859	 * to the ports of the root hub?
2860	 */
2861	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2862
2863	/* Disable interrupts */
2864	rid = PCI_UHCI_BASE_REG;
2865	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2866	if (res != NULL) {
2867		bus_write_2(res, UHCI_INTR, 0);
2868		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2869	}
2870}
2871
2872/* Perform early EHCI takeover from SMM. */
2873static void
2874ehci_early_takeover(device_t self)
2875{
2876	struct resource *res;
2877	uint32_t cparams;
2878	uint32_t eec;
2879	uint8_t eecp;
2880	uint8_t bios_sem;
2881	uint8_t offs;
2882	int rid;
2883	int i;
2884
2885	rid = PCIR_BAR(0);
2886	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2887	if (res == NULL)
2888		return;
2889
2890	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2891
2892	/* Synchronise with the BIOS if it owns the controller. */
2893	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2894	    eecp = EHCI_EECP_NEXT(eec)) {
2895		eec = pci_read_config(self, eecp, 4);
2896		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2897			continue;
2898		}
2899		bios_sem = pci_read_config(self, eecp +
2900		    EHCI_LEGSUP_BIOS_SEM, 1);
2901		if (bios_sem == 0) {
2902			continue;
2903		}
2904		if (bootverbose)
2905			printf("ehci early: "
2906			    "SMM active, request owner change\n");
2907
2908		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2909
2910		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2911			DELAY(1000);
2912			bios_sem = pci_read_config(self, eecp +
2913			    EHCI_LEGSUP_BIOS_SEM, 1);
2914		}
2915
2916		if (bios_sem != 0) {
2917			if (bootverbose)
2918				printf("ehci early: "
2919				    "SMM does not respond\n");
2920		}
2921		/* Disable interrupts */
2922		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2923		bus_write_4(res, offs + EHCI_USBINTR, 0);
2924	}
2925	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2926}
2927
2928void
2929pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2930{
2931	struct pci_devinfo *dinfo = device_get_ivars(dev);
2932	pcicfgregs *cfg = &dinfo->cfg;
2933	struct resource_list *rl = &dinfo->resources;
2934	struct pci_quirk *q;
2935	int i;
2936
2937	/* ATA devices needs special map treatment */
2938	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2939	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2940	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2941	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2942	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2943		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2944	else
2945		for (i = 0; i < cfg->nummaps;)
2946			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2947			    prefetchmask & (1 << i));
2948
2949	/*
2950	 * Add additional, quirked resources.
2951	 */
2952	for (q = &pci_quirks[0]; q->devid; q++) {
2953		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2954		    && q->type == PCI_QUIRK_MAP_REG)
2955			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2956	}
2957
2958	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2959#ifdef __PCI_REROUTE_INTERRUPT
2960		/*
2961		 * Try to re-route interrupts. Sometimes the BIOS or
2962		 * firmware may leave bogus values in these registers.
2963		 * If the re-route fails, then just stick with what we
2964		 * have.
2965		 */
2966		pci_assign_interrupt(bus, dev, 1);
2967#else
2968		pci_assign_interrupt(bus, dev, 0);
2969#endif
2970	}
2971
2972	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2973	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2974		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2975			ehci_early_takeover(dev);
2976		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2977			ohci_early_takeover(dev);
2978		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2979			uhci_early_takeover(dev);
2980	}
2981}
2982
2983void
2984pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2985{
2986#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2987	device_t pcib = device_get_parent(dev);
2988	struct pci_devinfo *dinfo;
2989	int maxslots;
2990	int s, f, pcifunchigh;
2991	uint8_t hdrtype;
2992
2993	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2994	    ("dinfo_size too small"));
2995	maxslots = PCIB_MAXSLOTS(pcib);
2996	for (s = 0; s <= maxslots; s++) {
2997		pcifunchigh = 0;
2998		f = 0;
2999		DELAY(1);
3000		hdrtype = REG(PCIR_HDRTYPE, 1);
3001		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3002			continue;
3003		if (hdrtype & PCIM_MFDEV)
3004			pcifunchigh = PCI_FUNCMAX;
3005		for (f = 0; f <= pcifunchigh; f++) {
3006			dinfo = pci_read_device(pcib, domain, busno, s, f,
3007			    dinfo_size);
3008			if (dinfo != NULL) {
3009				pci_add_child(dev, dinfo);
3010			}
3011		}
3012	}
3013#undef REG
3014}
3015
3016void
3017pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3018{
3019	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3020	device_set_ivars(dinfo->cfg.dev, dinfo);
3021	resource_list_init(&dinfo->resources);
3022	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3023	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3024	pci_print_verbose(dinfo);
3025	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3026}
3027
3028static int
3029pci_probe(device_t dev)
3030{
3031
3032	device_set_desc(dev, "PCI bus");
3033
3034	/* Allow other subclasses to override this driver. */
3035	return (BUS_PROBE_GENERIC);
3036}
3037
3038static int
3039pci_attach(device_t dev)
3040{
3041	int busno, domain;
3042
3043	/*
3044	 * Since there can be multiple independantly numbered PCI
3045	 * busses on systems with multiple PCI domains, we can't use
3046	 * the unit number to decide which bus we are probing. We ask
3047	 * the parent pcib what our domain and bus numbers are.
3048	 */
3049	domain = pcib_get_domain(dev);
3050	busno = pcib_get_bus(dev);
3051	if (bootverbose)
3052		device_printf(dev, "domain=%d, physical bus=%d\n",
3053		    domain, busno);
3054	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3055	return (bus_generic_attach(dev));
3056}
3057
3058static void
3059pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3060    int state)
3061{
3062	device_t child, pcib;
3063	struct pci_devinfo *dinfo;
3064	int dstate, i;
3065
3066	/*
3067	 * Set the device to the given state.  If the firmware suggests
3068	 * a different power state, use it instead.  If power management
3069	 * is not present, the firmware is responsible for managing
3070	 * device power.  Skip children who aren't attached since they
3071	 * are handled separately.
3072	 */
3073	pcib = device_get_parent(dev);
3074	for (i = 0; i < numdevs; i++) {
3075		child = devlist[i];
3076		dinfo = device_get_ivars(child);
3077		dstate = state;
3078		if (device_is_attached(child) &&
3079		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3080			pci_set_powerstate(child, dstate);
3081	}
3082}
3083
3084int
3085pci_suspend(device_t dev)
3086{
3087	device_t child, *devlist;
3088	struct pci_devinfo *dinfo;
3089	int error, i, numdevs;
3090
3091	/*
3092	 * Save the PCI configuration space for each child and set the
3093	 * device in the appropriate power state for this sleep state.
3094	 */
3095	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3096		return (error);
3097	for (i = 0; i < numdevs; i++) {
3098		child = devlist[i];
3099		dinfo = device_get_ivars(child);
3100		pci_cfg_save(child, dinfo, 0);
3101	}
3102
3103	/* Suspend devices before potentially powering them down. */
3104	error = bus_generic_suspend(dev);
3105	if (error) {
3106		free(devlist, M_TEMP);
3107		return (error);
3108	}
3109	if (pci_do_power_suspend)
3110		pci_set_power_children(dev, devlist, numdevs,
3111		    PCI_POWERSTATE_D3);
3112	free(devlist, M_TEMP);
3113	return (0);
3114}
3115
3116int
3117pci_resume(device_t dev)
3118{
3119	device_t child, *devlist;
3120	struct pci_devinfo *dinfo;
3121	int error, i, numdevs;
3122
3123	/*
3124	 * Set each child to D0 and restore its PCI configuration space.
3125	 */
3126	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3127		return (error);
3128	if (pci_do_power_resume)
3129		pci_set_power_children(dev, devlist, numdevs,
3130		    PCI_POWERSTATE_D0);
3131
3132	/* Now the device is powered up, restore its config space. */
3133	for (i = 0; i < numdevs; i++) {
3134		child = devlist[i];
3135		dinfo = device_get_ivars(child);
3136
3137		pci_cfg_restore(child, dinfo);
3138		if (!device_is_attached(child))
3139			pci_cfg_save(child, dinfo, 1);
3140	}
3141
3142	/*
3143	 * Resume critical devices first, then everything else later.
3144	 */
3145	for (i = 0; i < numdevs; i++) {
3146		child = devlist[i];
3147		switch (pci_get_class(child)) {
3148		case PCIC_DISPLAY:
3149		case PCIC_MEMORY:
3150		case PCIC_BRIDGE:
3151		case PCIC_BASEPERIPH:
3152			DEVICE_RESUME(child);
3153			break;
3154		}
3155	}
3156	for (i = 0; i < numdevs; i++) {
3157		child = devlist[i];
3158		switch (pci_get_class(child)) {
3159		case PCIC_DISPLAY:
3160		case PCIC_MEMORY:
3161		case PCIC_BRIDGE:
3162		case PCIC_BASEPERIPH:
3163			break;
3164		default:
3165			DEVICE_RESUME(child);
3166		}
3167	}
3168	free(devlist, M_TEMP);
3169	return (0);
3170}
3171
3172static void
3173pci_load_vendor_data(void)
3174{
3175	caddr_t data;
3176	void *ptr;
3177	size_t sz;
3178
3179	data = preload_search_by_type("pci_vendor_data");
3180	if (data != NULL) {
3181		ptr = preload_fetch_addr(data);
3182		sz = preload_fetch_size(data);
3183		if (ptr != NULL && sz != 0) {
3184			pci_vendordata = ptr;
3185			pci_vendordata_size = sz;
3186			/* terminate the database */
3187			pci_vendordata[pci_vendordata_size] = '\n';
3188		}
3189	}
3190}
3191
3192void
3193pci_driver_added(device_t dev, driver_t *driver)
3194{
3195	int numdevs;
3196	device_t *devlist;
3197	device_t child;
3198	struct pci_devinfo *dinfo;
3199	int i;
3200
3201	if (bootverbose)
3202		device_printf(dev, "driver added\n");
3203	DEVICE_IDENTIFY(driver, dev);
3204	if (device_get_children(dev, &devlist, &numdevs) != 0)
3205		return;
3206	for (i = 0; i < numdevs; i++) {
3207		child = devlist[i];
3208		if (device_get_state(child) != DS_NOTPRESENT)
3209			continue;
3210		dinfo = device_get_ivars(child);
3211		pci_print_verbose(dinfo);
3212		if (bootverbose)
3213			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3214		pci_cfg_restore(child, dinfo);
3215		if (device_probe_and_attach(child) != 0)
3216			pci_cfg_save(child, dinfo, 1);
3217	}
3218	free(devlist, M_TEMP);
3219}
3220
3221int
3222pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3223    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3224{
3225	struct pci_devinfo *dinfo;
3226	struct msix_table_entry *mte;
3227	struct msix_vector *mv;
3228	uint64_t addr;
3229	uint32_t data;
3230	void *cookie;
3231	int error, rid;
3232
3233	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3234	    arg, &cookie);
3235	if (error)
3236		return (error);
3237
3238	/* If this is not a direct child, just bail out. */
3239	if (device_get_parent(child) != dev) {
3240		*cookiep = cookie;
3241		return(0);
3242	}
3243
3244	rid = rman_get_rid(irq);
3245	if (rid == 0) {
3246		/* Make sure that INTx is enabled */
3247		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3248	} else {
3249		/*
3250		 * Check to see if the interrupt is MSI or MSI-X.
3251		 * Ask our parent to map the MSI and give
3252		 * us the address and data register values.
3253		 * If we fail for some reason, teardown the
3254		 * interrupt handler.
3255		 */
3256		dinfo = device_get_ivars(child);
3257		if (dinfo->cfg.msi.msi_alloc > 0) {
3258			if (dinfo->cfg.msi.msi_addr == 0) {
3259				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3260			    ("MSI has handlers, but vectors not mapped"));
3261				error = PCIB_MAP_MSI(device_get_parent(dev),
3262				    child, rman_get_start(irq), &addr, &data);
3263				if (error)
3264					goto bad;
3265				dinfo->cfg.msi.msi_addr = addr;
3266				dinfo->cfg.msi.msi_data = data;
3267			}
3268			if (dinfo->cfg.msi.msi_handlers == 0)
3269				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3270				    dinfo->cfg.msi.msi_data);
3271			dinfo->cfg.msi.msi_handlers++;
3272		} else {
3273			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3274			    ("No MSI or MSI-X interrupts allocated"));
3275			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3276			    ("MSI-X index too high"));
3277			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3278			KASSERT(mte->mte_vector != 0, ("no message vector"));
3279			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3280			KASSERT(mv->mv_irq == rman_get_start(irq),
3281			    ("IRQ mismatch"));
3282			if (mv->mv_address == 0) {
3283				KASSERT(mte->mte_handlers == 0,
3284		    ("MSI-X table entry has handlers, but vector not mapped"));
3285				error = PCIB_MAP_MSI(device_get_parent(dev),
3286				    child, rman_get_start(irq), &addr, &data);
3287				if (error)
3288					goto bad;
3289				mv->mv_address = addr;
3290				mv->mv_data = data;
3291			}
3292			if (mte->mte_handlers == 0) {
3293				pci_enable_msix(child, rid - 1, mv->mv_address,
3294				    mv->mv_data);
3295				pci_unmask_msix(child, rid - 1);
3296			}
3297			mte->mte_handlers++;
3298		}
3299
3300		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3301		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3302	bad:
3303		if (error) {
3304			(void)bus_generic_teardown_intr(dev, child, irq,
3305			    cookie);
3306			return (error);
3307		}
3308	}
3309	*cookiep = cookie;
3310	return (0);
3311}
3312
3313int
3314pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3315    void *cookie)
3316{
3317	struct msix_table_entry *mte;
3318	struct resource_list_entry *rle;
3319	struct pci_devinfo *dinfo;
3320	int error, rid;
3321
3322	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3323		return (EINVAL);
3324
3325	/* If this isn't a direct child, just bail out */
3326	if (device_get_parent(child) != dev)
3327		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3328
3329	rid = rman_get_rid(irq);
3330	if (rid == 0) {
3331		/* Mask INTx */
3332		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3333	} else {
3334		/*
3335		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3336		 * decrement the appropriate handlers count and mask the
3337		 * MSI-X message, or disable MSI messages if the count
3338		 * drops to 0.
3339		 */
3340		dinfo = device_get_ivars(child);
3341		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3342		if (rle->res != irq)
3343			return (EINVAL);
3344		if (dinfo->cfg.msi.msi_alloc > 0) {
3345			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3346			    ("MSI-X index too high"));
3347			if (dinfo->cfg.msi.msi_handlers == 0)
3348				return (EINVAL);
3349			dinfo->cfg.msi.msi_handlers--;
3350			if (dinfo->cfg.msi.msi_handlers == 0)
3351				pci_disable_msi(child);
3352		} else {
3353			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3354			    ("No MSI or MSI-X interrupts allocated"));
3355			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3356			    ("MSI-X index too high"));
3357			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3358			if (mte->mte_handlers == 0)
3359				return (EINVAL);
3360			mte->mte_handlers--;
3361			if (mte->mte_handlers == 0)
3362				pci_mask_msix(child, rid - 1);
3363		}
3364	}
3365	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3366	if (rid > 0)
3367		KASSERT(error == 0,
3368		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3369	return (error);
3370}
3371
3372int
3373pci_print_child(device_t dev, device_t child)
3374{
3375	struct pci_devinfo *dinfo;
3376	struct resource_list *rl;
3377	int retval = 0;
3378
3379	dinfo = device_get_ivars(child);
3380	rl = &dinfo->resources;
3381
3382	retval += bus_print_child_header(dev, child);
3383
3384	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3385	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3386	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3387	if (device_get_flags(dev))
3388		retval += printf(" flags %#x", device_get_flags(dev));
3389
3390	retval += printf(" at device %d.%d", pci_get_slot(child),
3391	    pci_get_function(child));
3392
3393	retval += bus_print_child_footer(dev, child);
3394
3395	return (retval);
3396}
3397
3398static struct
3399{
3400	int	class;
3401	int	subclass;
3402	char	*desc;
3403} pci_nomatch_tab[] = {
3404	{PCIC_OLD,		-1,			"old"},
3405	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3406	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3407	{PCIC_STORAGE,		-1,			"mass storage"},
3408	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3409	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3410	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3411	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3412	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3413	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3414	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3415	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3416	{PCIC_NETWORK,		-1,			"network"},
3417	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3418	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3419	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3420	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3421	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3422	{PCIC_DISPLAY,		-1,			"display"},
3423	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3424	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3425	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3426	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3427	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3428	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3429	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3430	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3431	{PCIC_MEMORY,		-1,			"memory"},
3432	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3433	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3434	{PCIC_BRIDGE,		-1,			"bridge"},
3435	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3436	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3437	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3438	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3439	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3440	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3441	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3442	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3443	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3444	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3445	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3446	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3447	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3448	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3449	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3450	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3451	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3452	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3453	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3454	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3455	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3456	{PCIC_INPUTDEV,		-1,			"input device"},
3457	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3458	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3459	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3460	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3461	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3462	{PCIC_DOCKING,		-1,			"docking station"},
3463	{PCIC_PROCESSOR,	-1,			"processor"},
3464	{PCIC_SERIALBUS,	-1,			"serial bus"},
3465	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3466	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3467	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3468	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3469	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3470	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3471	{PCIC_WIRELESS,		-1,			"wireless controller"},
3472	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3473	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3474	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3475	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3476	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3477	{PCIC_SATCOM,		-1,			"satellite communication"},
3478	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3479	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3480	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3481	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3482	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3483	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3484	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3485	{PCIC_DASP,		-1,			"dasp"},
3486	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3487	{0, 0,		NULL}
3488};
3489
3490void
3491pci_probe_nomatch(device_t dev, device_t child)
3492{
3493	int	i;
3494	char	*cp, *scp, *device;
3495
3496	/*
3497	 * Look for a listing for this device in a loaded device database.
3498	 */
3499	if ((device = pci_describe_device(child)) != NULL) {
3500		device_printf(dev, "<%s>", device);
3501		free(device, M_DEVBUF);
3502	} else {
3503		/*
3504		 * Scan the class/subclass descriptions for a general
3505		 * description.
3506		 */
3507		cp = "unknown";
3508		scp = NULL;
3509		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3510			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3511				if (pci_nomatch_tab[i].subclass == -1) {
3512					cp = pci_nomatch_tab[i].desc;
3513				} else if (pci_nomatch_tab[i].subclass ==
3514				    pci_get_subclass(child)) {
3515					scp = pci_nomatch_tab[i].desc;
3516				}
3517			}
3518		}
3519		device_printf(dev, "<%s%s%s>",
3520		    cp ? cp : "",
3521		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3522		    scp ? scp : "");
3523	}
3524	printf(" at device %d.%d (no driver attached)\n",
3525	    pci_get_slot(child), pci_get_function(child));
3526	pci_cfg_save(child, device_get_ivars(child), 1);
3527	return;
3528}
3529
3530/*
3531 * Parse the PCI device database, if loaded, and return a pointer to a
3532 * description of the device.
3533 *
3534 * The database is flat text formatted as follows:
3535 *
3536 * Any line not in a valid format is ignored.
3537 * Lines are terminated with newline '\n' characters.
3538 *
3539 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3540 * the vendor name.
3541 *
3542 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3543 * - devices cannot be listed without a corresponding VENDOR line.
3544 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3545 * another TAB, then the device name.
3546 */
3547
3548/*
3549 * Assuming (ptr) points to the beginning of a line in the database,
3550 * return the vendor or device and description of the next entry.
3551 * The value of (vendor) or (device) inappropriate for the entry type
3552 * is set to -1.  Returns nonzero at the end of the database.
3553 *
3554 * Note that this is slightly unrobust in the face of corrupt data;
3555 * we attempt to safeguard against this by spamming the end of the
3556 * database with a newline when we initialise.
3557 */
3558static int
3559pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3560{
3561	char	*cp = *ptr;
3562	int	left;
3563
3564	*device = -1;
3565	*vendor = -1;
3566	**desc = '\0';
3567	for (;;) {
3568		left = pci_vendordata_size - (cp - pci_vendordata);
3569		if (left <= 0) {
3570			*ptr = cp;
3571			return(1);
3572		}
3573
3574		/* vendor entry? */
3575		if (*cp != '\t' &&
3576		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3577			break;
3578		/* device entry? */
3579		if (*cp == '\t' &&
3580		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3581			break;
3582
3583		/* skip to next line */
3584		while (*cp != '\n' && left > 0) {
3585			cp++;
3586			left--;
3587		}
3588		if (*cp == '\n') {
3589			cp++;
3590			left--;
3591		}
3592	}
3593	/* skip to next line */
3594	while (*cp != '\n' && left > 0) {
3595		cp++;
3596		left--;
3597	}
3598	if (*cp == '\n' && left > 0)
3599		cp++;
3600	*ptr = cp;
3601	return(0);
3602}
3603
3604static char *
3605pci_describe_device(device_t dev)
3606{
3607	int	vendor, device;
3608	char	*desc, *vp, *dp, *line;
3609
3610	desc = vp = dp = NULL;
3611
3612	/*
3613	 * If we have no vendor data, we can't do anything.
3614	 */
3615	if (pci_vendordata == NULL)
3616		goto out;
3617
3618	/*
3619	 * Scan the vendor data looking for this device
3620	 */
3621	line = pci_vendordata;
3622	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3623		goto out;
3624	for (;;) {
3625		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3626			goto out;
3627		if (vendor == pci_get_vendor(dev))
3628			break;
3629	}
3630	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3631		goto out;
3632	for (;;) {
3633		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3634			*dp = 0;
3635			break;
3636		}
3637		if (vendor != -1) {
3638			*dp = 0;
3639			break;
3640		}
3641		if (device == pci_get_device(dev))
3642			break;
3643	}
3644	if (dp[0] == '\0')
3645		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3646	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3647	    NULL)
3648		sprintf(desc, "%s, %s", vp, dp);
3649 out:
3650	if (vp != NULL)
3651		free(vp, M_DEVBUF);
3652	if (dp != NULL)
3653		free(dp, M_DEVBUF);
3654	return(desc);
3655}
3656
3657int
3658pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3659{
3660	struct pci_devinfo *dinfo;
3661	pcicfgregs *cfg;
3662
3663	dinfo = device_get_ivars(child);
3664	cfg = &dinfo->cfg;
3665
3666	switch (which) {
3667	case PCI_IVAR_ETHADDR:
3668		/*
3669		 * The generic accessor doesn't deal with failure, so
3670		 * we set the return value, then return an error.
3671		 */
3672		*((uint8_t **) result) = NULL;
3673		return (EINVAL);
3674	case PCI_IVAR_SUBVENDOR:
3675		*result = cfg->subvendor;
3676		break;
3677	case PCI_IVAR_SUBDEVICE:
3678		*result = cfg->subdevice;
3679		break;
3680	case PCI_IVAR_VENDOR:
3681		*result = cfg->vendor;
3682		break;
3683	case PCI_IVAR_DEVICE:
3684		*result = cfg->device;
3685		break;
3686	case PCI_IVAR_DEVID:
3687		*result = (cfg->device << 16) | cfg->vendor;
3688		break;
3689	case PCI_IVAR_CLASS:
3690		*result = cfg->baseclass;
3691		break;
3692	case PCI_IVAR_SUBCLASS:
3693		*result = cfg->subclass;
3694		break;
3695	case PCI_IVAR_PROGIF:
3696		*result = cfg->progif;
3697		break;
3698	case PCI_IVAR_REVID:
3699		*result = cfg->revid;
3700		break;
3701	case PCI_IVAR_INTPIN:
3702		*result = cfg->intpin;
3703		break;
3704	case PCI_IVAR_IRQ:
3705		*result = cfg->intline;
3706		break;
3707	case PCI_IVAR_DOMAIN:
3708		*result = cfg->domain;
3709		break;
3710	case PCI_IVAR_BUS:
3711		*result = cfg->bus;
3712		break;
3713	case PCI_IVAR_SLOT:
3714		*result = cfg->slot;
3715		break;
3716	case PCI_IVAR_FUNCTION:
3717		*result = cfg->func;
3718		break;
3719	case PCI_IVAR_CMDREG:
3720		*result = cfg->cmdreg;
3721		break;
3722	case PCI_IVAR_CACHELNSZ:
3723		*result = cfg->cachelnsz;
3724		break;
3725	case PCI_IVAR_MINGNT:
3726		*result = cfg->mingnt;
3727		break;
3728	case PCI_IVAR_MAXLAT:
3729		*result = cfg->maxlat;
3730		break;
3731	case PCI_IVAR_LATTIMER:
3732		*result = cfg->lattimer;
3733		break;
3734	default:
3735		return (ENOENT);
3736	}
3737	return (0);
3738}
3739
3740int
3741pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3742{
3743	struct pci_devinfo *dinfo;
3744
3745	dinfo = device_get_ivars(child);
3746
3747	switch (which) {
3748	case PCI_IVAR_INTPIN:
3749		dinfo->cfg.intpin = value;
3750		return (0);
3751	case PCI_IVAR_ETHADDR:
3752	case PCI_IVAR_SUBVENDOR:
3753	case PCI_IVAR_SUBDEVICE:
3754	case PCI_IVAR_VENDOR:
3755	case PCI_IVAR_DEVICE:
3756	case PCI_IVAR_DEVID:
3757	case PCI_IVAR_CLASS:
3758	case PCI_IVAR_SUBCLASS:
3759	case PCI_IVAR_PROGIF:
3760	case PCI_IVAR_REVID:
3761	case PCI_IVAR_IRQ:
3762	case PCI_IVAR_DOMAIN:
3763	case PCI_IVAR_BUS:
3764	case PCI_IVAR_SLOT:
3765	case PCI_IVAR_FUNCTION:
3766		return (EINVAL);	/* disallow for now */
3767
3768	default:
3769		return (ENOENT);
3770	}
3771}
3772
3773
3774#include "opt_ddb.h"
3775#ifdef DDB
3776#include <ddb/ddb.h>
3777#include <sys/cons.h>
3778
3779/*
3780 * List resources based on pci map registers, used for within ddb
3781 */
3782
3783DB_SHOW_COMMAND(pciregs, db_pci_dump)
3784{
3785	struct pci_devinfo *dinfo;
3786	struct devlist *devlist_head;
3787	struct pci_conf *p;
3788	const char *name;
3789	int i, error, none_count;
3790
3791	none_count = 0;
3792	/* get the head of the device queue */
3793	devlist_head = &pci_devq;
3794
3795	/*
3796	 * Go through the list of devices and print out devices
3797	 */
3798	for (error = 0, i = 0,
3799	     dinfo = STAILQ_FIRST(devlist_head);
3800	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3801	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3802
3803		/* Populate pd_name and pd_unit */
3804		name = NULL;
3805		if (dinfo->cfg.dev)
3806			name = device_get_name(dinfo->cfg.dev);
3807
3808		p = &dinfo->conf;
3809		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3810			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3811			(name && *name) ? name : "none",
3812			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3813			none_count++,
3814			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3815			p->pc_sel.pc_func, (p->pc_class << 16) |
3816			(p->pc_subclass << 8) | p->pc_progif,
3817			(p->pc_subdevice << 16) | p->pc_subvendor,
3818			(p->pc_device << 16) | p->pc_vendor,
3819			p->pc_revid, p->pc_hdr);
3820	}
3821}
3822#endif /* DDB */
3823
3824static struct resource *
3825pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3826    u_long start, u_long end, u_long count, u_int flags)
3827{
3828	struct pci_devinfo *dinfo = device_get_ivars(child);
3829	struct resource_list *rl = &dinfo->resources;
3830	struct resource_list_entry *rle;
3831	struct resource *res;
3832	struct pci_map *pm;
3833	pci_addr_t map, testval;
3834	int mapsize;
3835
3836	res = NULL;
3837	pm = pci_find_bar(child, *rid);
3838	if (pm != NULL) {
3839		/* This is a BAR that we failed to allocate earlier. */
3840		mapsize = pm->pm_size;
3841		map = pm->pm_value;
3842	} else {
3843		/*
3844		 * Weed out the bogons, and figure out how large the
3845		 * BAR/map is.  BARs that read back 0 here are bogus
3846		 * and unimplemented.  Note: atapci in legacy mode are
3847		 * special and handled elsewhere in the code.  If you
3848		 * have a atapci device in legacy mode and it fails
3849		 * here, that other code is broken.
3850		 */
3851		pci_read_bar(child, *rid, &map, &testval);
3852
3853		/*
3854		 * Determine the size of the BAR and ignore BARs with a size
3855		 * of 0.  Device ROM BARs use a different mask value.
3856		 */
3857		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
3858			mapsize = pci_romsize(testval);
3859		else
3860			mapsize = pci_mapsize(testval);
3861		if (mapsize == 0)
3862			goto out;
3863		pm = pci_add_bar(child, *rid, map, mapsize);
3864	}
3865
3866	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
3867		if (type != SYS_RES_MEMORY) {
3868			if (bootverbose)
3869				device_printf(dev,
3870				    "child %s requested type %d for rid %#x,"
3871				    " but the BAR says it is an memio\n",
3872				    device_get_nameunit(child), type, *rid);
3873			goto out;
3874		}
3875	} else {
3876		if (type != SYS_RES_IOPORT) {
3877			if (bootverbose)
3878				device_printf(dev,
3879				    "child %s requested type %d for rid %#x,"
3880				    " but the BAR says it is an ioport\n",
3881				    device_get_nameunit(child), type, *rid);
3882			goto out;
3883		}
3884	}
3885
3886	/*
3887	 * For real BARs, we need to override the size that
3888	 * the driver requests, because that's what the BAR
3889	 * actually uses and we would otherwise have a
3890	 * situation where we might allocate the excess to
3891	 * another driver, which won't work.
3892	 */
3893	count = (pci_addr_t)1 << mapsize;
3894	if (RF_ALIGNMENT(flags) < mapsize)
3895		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3896	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
3897		flags |= RF_PREFETCHABLE;
3898
3899	/*
3900	 * Allocate enough resource, and then write back the
3901	 * appropriate BAR for that resource.
3902	 */
3903	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3904	    start, end, count, flags & ~RF_ACTIVE);
3905	if (res == NULL) {
3906		device_printf(child,
3907		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3908		    count, *rid, type, start, end);
3909		goto out;
3910	}
3911	resource_list_add(rl, type, *rid, start, end, count);
3912	rle = resource_list_find(rl, type, *rid);
3913	if (rle == NULL)
3914		panic("pci_reserve_map: unexpectedly can't find resource.");
3915	rle->res = res;
3916	rle->start = rman_get_start(res);
3917	rle->end = rman_get_end(res);
3918	rle->count = count;
3919	rle->flags = RLE_RESERVED;
3920	if (bootverbose)
3921		device_printf(child,
3922		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3923		    count, *rid, type, rman_get_start(res));
3924	map = rman_get_start(res);
3925	pci_write_bar(child, pm, map);
3926out:;
3927	return (res);
3928}
3929
3930
3931struct resource *
3932pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3933		   u_long start, u_long end, u_long count, u_int flags)
3934{
3935	struct pci_devinfo *dinfo = device_get_ivars(child);
3936	struct resource_list *rl = &dinfo->resources;
3937	struct resource_list_entry *rle;
3938	struct resource *res;
3939	pcicfgregs *cfg = &dinfo->cfg;
3940
3941	if (device_get_parent(child) != dev)
3942		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3943		    type, rid, start, end, count, flags));
3944
3945	/*
3946	 * Perform lazy resource allocation
3947	 */
3948	switch (type) {
3949	case SYS_RES_IRQ:
3950		/*
3951		 * Can't alloc legacy interrupt once MSI messages have
3952		 * been allocated.
3953		 */
3954		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3955		    cfg->msix.msix_alloc > 0))
3956			return (NULL);
3957
3958		/*
3959		 * If the child device doesn't have an interrupt
3960		 * routed and is deserving of an interrupt, try to
3961		 * assign it one.
3962		 */
3963		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3964		    (cfg->intpin != 0))
3965			pci_assign_interrupt(dev, child, 0);
3966		break;
3967	case SYS_RES_IOPORT:
3968	case SYS_RES_MEMORY:
3969		/* Reserve resources for this BAR if needed. */
3970		rle = resource_list_find(rl, type, *rid);
3971		if (rle == NULL) {
3972			res = pci_reserve_map(dev, child, type, rid, start, end,
3973			    count, flags);
3974			if (res == NULL)
3975				return (NULL);
3976		}
3977	}
3978	return (resource_list_alloc(rl, dev, child, type, rid,
3979	    start, end, count, flags));
3980}
3981
3982int
3983pci_activate_resource(device_t dev, device_t child, int type, int rid,
3984    struct resource *r)
3985{
3986	struct pci_devinfo *dinfo;
3987	int error;
3988
3989	error = bus_generic_activate_resource(dev, child, type, rid, r);
3990	if (error)
3991		return (error);
3992
3993	/* Enable decoding in the command register when activating BARs. */
3994	if (device_get_parent(child) == dev) {
3995		/* Device ROMs need their decoding explicitly enabled. */
3996		dinfo = device_get_ivars(child);
3997		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
3998			pci_write_bar(child, pci_find_bar(child, rid),
3999			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4000		switch (type) {
4001		case SYS_RES_IOPORT:
4002		case SYS_RES_MEMORY:
4003			error = PCI_ENABLE_IO(dev, child, type);
4004			break;
4005		}
4006	}
4007	return (error);
4008}
4009
4010int
4011pci_deactivate_resource(device_t dev, device_t child, int type,
4012    int rid, struct resource *r)
4013{
4014	struct pci_devinfo *dinfo;
4015	int error;
4016
4017	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4018	if (error)
4019		return (error);
4020
4021	/* Disable decoding for device ROMs. */
4022	if (device_get_parent(child) == dev) {
4023		dinfo = device_get_ivars(child);
4024		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4025			pci_write_bar(child, pci_find_bar(child, rid),
4026			    rman_get_start(r));
4027	}
4028	return (0);
4029}
4030
4031void
4032pci_delete_child(device_t dev, device_t child)
4033{
4034	struct resource_list_entry *rle;
4035	struct resource_list *rl;
4036	struct pci_devinfo *dinfo;
4037
4038	dinfo = device_get_ivars(child);
4039	rl = &dinfo->resources;
4040
4041	if (device_is_attached(child))
4042		device_detach(child);
4043
4044	/* Turn off access to resources we're about to free */
4045	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4046	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4047
4048	/* Free all allocated resources */
4049	STAILQ_FOREACH(rle, rl, link) {
4050		if (rle->res) {
4051			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4052			    resource_list_busy(rl, rle->type, rle->rid)) {
4053				pci_printf(&dinfo->cfg,
4054				    "Resource still owned, oops. "
4055				    "(type=%d, rid=%d, addr=%lx)\n",
4056				    rle->type, rle->rid,
4057				    rman_get_start(rle->res));
4058				bus_release_resource(child, rle->type, rle->rid,
4059				    rle->res);
4060			}
4061			resource_list_unreserve(rl, dev, child, rle->type,
4062			    rle->rid);
4063		}
4064	}
4065	resource_list_free(rl);
4066
4067	device_delete_child(dev, child);
4068	pci_freecfg(dinfo);
4069}
4070
4071void
4072pci_delete_resource(device_t dev, device_t child, int type, int rid)
4073{
4074	struct pci_devinfo *dinfo;
4075	struct resource_list *rl;
4076	struct resource_list_entry *rle;
4077
4078	if (device_get_parent(child) != dev)
4079		return;
4080
4081	dinfo = device_get_ivars(child);
4082	rl = &dinfo->resources;
4083	rle = resource_list_find(rl, type, rid);
4084	if (rle == NULL)
4085		return;
4086
4087	if (rle->res) {
4088		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4089		    resource_list_busy(rl, type, rid)) {
4090			device_printf(dev, "delete_resource: "
4091			    "Resource still owned by child, oops. "
4092			    "(type=%d, rid=%d, addr=%lx)\n",
4093			    type, rid, rman_get_start(rle->res));
4094			return;
4095		}
4096
4097#ifndef __PCI_BAR_ZERO_VALID
4098		/*
4099		 * If this is a BAR, clear the BAR so it stops
4100		 * decoding before releasing the resource.
4101		 */
4102		switch (type) {
4103		case SYS_RES_IOPORT:
4104		case SYS_RES_MEMORY:
4105			pci_write_bar(child, pci_find_bar(child, rid), 0);
4106			break;
4107		}
4108#endif
4109		resource_list_unreserve(rl, dev, child, type, rid);
4110	}
4111	resource_list_delete(rl, type, rid);
4112}
4113
4114struct resource_list *
4115pci_get_resource_list (device_t dev, device_t child)
4116{
4117	struct pci_devinfo *dinfo = device_get_ivars(child);
4118
4119	return (&dinfo->resources);
4120}
4121
4122uint32_t
4123pci_read_config_method(device_t dev, device_t child, int reg, int width)
4124{
4125	struct pci_devinfo *dinfo = device_get_ivars(child);
4126	pcicfgregs *cfg = &dinfo->cfg;
4127
4128	return (PCIB_READ_CONFIG(device_get_parent(dev),
4129	    cfg->bus, cfg->slot, cfg->func, reg, width));
4130}
4131
4132void
4133pci_write_config_method(device_t dev, device_t child, int reg,
4134    uint32_t val, int width)
4135{
4136	struct pci_devinfo *dinfo = device_get_ivars(child);
4137	pcicfgregs *cfg = &dinfo->cfg;
4138
4139	PCIB_WRITE_CONFIG(device_get_parent(dev),
4140	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4141}
4142
4143int
4144pci_child_location_str_method(device_t dev, device_t child, char *buf,
4145    size_t buflen)
4146{
4147
4148	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4149	    pci_get_function(child));
4150	return (0);
4151}
4152
4153int
4154pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4155    size_t buflen)
4156{
4157	struct pci_devinfo *dinfo;
4158	pcicfgregs *cfg;
4159
4160	dinfo = device_get_ivars(child);
4161	cfg = &dinfo->cfg;
4162	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4163	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4164	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4165	    cfg->progif);
4166	return (0);
4167}
4168
4169int
4170pci_assign_interrupt_method(device_t dev, device_t child)
4171{
4172	struct pci_devinfo *dinfo = device_get_ivars(child);
4173	pcicfgregs *cfg = &dinfo->cfg;
4174
4175	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4176	    cfg->intpin));
4177}
4178
4179static int
4180pci_modevent(module_t mod, int what, void *arg)
4181{
4182	static struct cdev *pci_cdev;
4183
4184	switch (what) {
4185	case MOD_LOAD:
4186		STAILQ_INIT(&pci_devq);
4187		pci_generation = 0;
4188		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4189		    "pci");
4190		pci_load_vendor_data();
4191		break;
4192
4193	case MOD_UNLOAD:
4194		destroy_dev(pci_cdev);
4195		break;
4196	}
4197
4198	return (0);
4199}
4200
4201void
4202pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4203{
4204
4205	/*
4206	 * Only do header type 0 devices.  Type 1 devices are bridges,
4207	 * which we know need special treatment.  Type 2 devices are
4208	 * cardbus bridges which also require special treatment.
4209	 * Other types are unknown, and we err on the side of safety
4210	 * by ignoring them.
4211	 */
4212	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4213		return;
4214
4215	/*
4216	 * Restore the device to full power mode.  We must do this
4217	 * before we restore the registers because moving from D3 to
4218	 * D0 will cause the chip's BARs and some other registers to
4219	 * be reset to some unknown power on reset values.  Cut down
4220	 * the noise on boot by doing nothing if we are already in
4221	 * state D0.
4222	 */
4223	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4224		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4225	pci_restore_bars(dev);
4226	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4227	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4228	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4229	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4230	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4231	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4232	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4233	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4234	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4235
4236	/* Restore MSI and MSI-X configurations if they are present. */
4237	if (dinfo->cfg.msi.msi_location != 0)
4238		pci_resume_msi(dev);
4239	if (dinfo->cfg.msix.msix_location != 0)
4240		pci_resume_msix(dev);
4241}
4242
4243void
4244pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4245{
4246	uint32_t cls;
4247	int ps;
4248
4249	/*
4250	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4251	 * we know need special treatment.  Type 2 devices are cardbus bridges
4252	 * which also require special treatment.  Other types are unknown, and
4253	 * we err on the side of safety by ignoring them.  Powering down
4254	 * bridges should not be undertaken lightly.
4255	 */
4256	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4257		return;
4258
4259	/*
4260	 * Some drivers apparently write to these registers w/o updating our
4261	 * cached copy.  No harm happens if we update the copy, so do so here
4262	 * so we can restore them.  The COMMAND register is modified by the
4263	 * bus w/o updating the cache.  This should represent the normally
4264	 * writable portion of the 'defined' part of type 0 headers.  In
4265	 * theory we also need to save/restore the PCI capability structures
4266	 * we know about, but apart from power we don't know any that are
4267	 * writable.
4268	 */
4269	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4270	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4271	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4272	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4273	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4274	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4275	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4276	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4277	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4278	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4279	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4280	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4281	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4282	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4283	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4284
4285	/*
4286	 * don't set the state for display devices, base peripherals and
4287	 * memory devices since bad things happen when they are powered down.
4288	 * We should (a) have drivers that can easily detach and (b) use
4289	 * generic drivers for these devices so that some device actually
4290	 * attaches.  We need to make sure that when we implement (a) we don't
4291	 * power the device down on a reattach.
4292	 */
4293	cls = pci_get_class(dev);
4294	if (!setstate)
4295		return;
4296	switch (pci_do_power_nodriver)
4297	{
4298		case 0:		/* NO powerdown at all */
4299			return;
4300		case 1:		/* Conservative about what to power down */
4301			if (cls == PCIC_STORAGE)
4302				return;
4303			/*FALLTHROUGH*/
4304		case 2:		/* Agressive about what to power down */
4305			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4306			    cls == PCIC_BASEPERIPH)
4307				return;
4308			/*FALLTHROUGH*/
4309		case 3:		/* Power down everything */
4310			break;
4311	}
4312	/*
4313	 * PCI spec says we can only go into D3 state from D0 state.
4314	 * Transition from D[12] into D0 before going to D3 state.
4315	 */
4316	ps = pci_get_powerstate(dev);
4317	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4318		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4319	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4320		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4321}
4322