pci.c revision 219740
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 219740 2011-03-18 14:06:12Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/ehcireg.h>
66#include <dev/usb/controller/ohcireg.h>
67#include <dev/usb/controller/uhcireg.h>
68
69#include "pcib_if.h"
70#include "pci_if.h"
71
72static pci_addr_t	pci_mapbase(uint64_t mapreg);
73static const char	*pci_maptype(uint64_t mapreg);
74static int		pci_mapsize(uint64_t testval);
75static int		pci_maprange(uint64_t mapreg);
76static pci_addr_t	pci_rombase(uint64_t mapreg);
77static int		pci_romsize(uint64_t testval);
78static void		pci_fixancient(pcicfgregs *cfg);
79static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
80
81static int		pci_porten(device_t dev);
82static int		pci_memen(device_t dev);
83static void		pci_assign_interrupt(device_t bus, device_t dev,
84			    int force_route);
85static int		pci_add_map(device_t bus, device_t dev, int reg,
86			    struct resource_list *rl, int force, int prefetch);
87static int		pci_probe(device_t dev);
88static int		pci_attach(device_t dev);
89static void		pci_load_vendor_data(void);
90static int		pci_describe_parse_line(char **ptr, int *vendor,
91			    int *device, char **desc);
92static char		*pci_describe_device(device_t dev);
93static int		pci_modevent(module_t mod, int what, void *arg);
94static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95			    pcicfgregs *cfg);
96static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98			    int reg, uint32_t *data);
99#if 0
100static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101			    int reg, uint32_t data);
102#endif
103static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104static void		pci_disable_msi(device_t dev);
105static void		pci_enable_msi(device_t dev, uint64_t address,
106			    uint16_t data);
107static void		pci_enable_msix(device_t dev, u_int index,
108			    uint64_t address, uint32_t data);
109static void		pci_mask_msix(device_t dev, u_int index);
110static void		pci_unmask_msix(device_t dev, u_int index);
111static int		pci_msi_blacklisted(void);
112static void		pci_resume_msi(device_t dev);
113static void		pci_resume_msix(device_t dev);
114static int		pci_remap_intr_method(device_t bus, device_t dev,
115			    u_int irq);
116
117static device_method_t pci_methods[] = {
118	/* Device interface */
119	DEVMETHOD(device_probe,		pci_probe),
120	DEVMETHOD(device_attach,	pci_attach),
121	DEVMETHOD(device_detach,	bus_generic_detach),
122	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
123	DEVMETHOD(device_suspend,	pci_suspend),
124	DEVMETHOD(device_resume,	pci_resume),
125
126	/* Bus interface */
127	DEVMETHOD(bus_print_child,	pci_print_child),
128	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
129	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
130	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
131	DEVMETHOD(bus_driver_added,	pci_driver_added),
132	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
133	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
134
135	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
136	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
137	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
138	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
139	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
140	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
141	DEVMETHOD(bus_activate_resource, pci_activate_resource),
142	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
143	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
144	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
145	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
146
147	/* PCI interface */
148	DEVMETHOD(pci_read_config,	pci_read_config_method),
149	DEVMETHOD(pci_write_config,	pci_write_config_method),
150	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
151	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
152	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
153	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
154	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
155	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
156	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
157	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
158	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
159	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
160	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
161	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
162	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
163	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
164	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
165	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
166
167	{ 0, 0 }
168};
169
170DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
171
172static devclass_t pci_devclass;
173DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
174MODULE_VERSION(pci, 1);
175
176static char	*pci_vendordata;
177static size_t	pci_vendordata_size;
178
179
180struct pci_quirk {
181	uint32_t devid;	/* Vendor/device of the card */
182	int	type;
183#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
184#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
185#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
186	int	arg1;
187	int	arg2;
188};
189
190struct pci_quirk pci_quirks[] = {
191	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
192	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
193	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
194	/* As does the Serverworks OSB4 (the SMBus mapping register) */
195	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
196
197	/*
198	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
199	 * or the CMIC-SL (AKA ServerWorks GC_LE).
200	 */
201	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
202	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
203
204	/*
205	 * MSI doesn't work on earlier Intel chipsets including
206	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
207	 */
208	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
212	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
213	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215
216	/*
217	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
218	 * bridge.
219	 */
220	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
221
222	/*
223	 * Some virtualization environments emulate an older chipset
224	 * but support MSI just fine.  QEMU uses the Intel 82440.
225	 */
226	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
227
228	{ 0 }
229};
230
231/* map register information */
232#define	PCI_MAPMEM	0x01	/* memory map */
233#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
234#define	PCI_MAPPORT	0x04	/* port map */
235
236struct devlist pci_devq;
237uint32_t pci_generation;
238uint32_t pci_numdevs = 0;
239static int pcie_chipset, pcix_chipset;
240
241/* sysctl vars */
242SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
243
244static int pci_enable_io_modes = 1;
245TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
246SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
247    &pci_enable_io_modes, 1,
248    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
249enable these bits correctly.  We'd like to do this all the time, but there\n\
250are some peripherals that this causes problems with.");
251
252static int pci_do_power_nodriver = 0;
253TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
254SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
255    &pci_do_power_nodriver, 0,
256  "Place a function into D3 state when no driver attaches to it.  0 means\n\
257disable.  1 means conservatively place devices into D3 state.  2 means\n\
258agressively place devices into D3 state.  3 means put absolutely everything\n\
259in D3 state.");
260
261int pci_do_power_resume = 1;
262TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
263SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
264    &pci_do_power_resume, 1,
265  "Transition from D3 -> D0 on resume.");
266
267int pci_do_power_suspend = 1;
268TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
269SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
270    &pci_do_power_suspend, 1,
271  "Transition from D0 -> D3 on suspend.");
272
273static int pci_do_msi = 1;
274TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
275SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
276    "Enable support for MSI interrupts");
277
278static int pci_do_msix = 1;
279TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
280SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
281    "Enable support for MSI-X interrupts");
282
283static int pci_honor_msi_blacklist = 1;
284TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
285SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
286    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
287
288#if defined(__i386__) || defined(__amd64__)
289static int pci_usb_takeover = 1;
290#else
291static int pci_usb_takeover = 0;
292#endif
293TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
294SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
295    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
296Disable this if you depend on BIOS emulation of USB devices, that is\n\
297you use USB devices (like keyboard or mouse) but do not load USB drivers");
298
299/* Find a device_t by bus/slot/function in domain 0 */
300
301device_t
302pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
303{
304
305	return (pci_find_dbsf(0, bus, slot, func));
306}
307
308/* Find a device_t by domain/bus/slot/function */
309
310device_t
311pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
312{
313	struct pci_devinfo *dinfo;
314
315	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
316		if ((dinfo->cfg.domain == domain) &&
317		    (dinfo->cfg.bus == bus) &&
318		    (dinfo->cfg.slot == slot) &&
319		    (dinfo->cfg.func == func)) {
320			return (dinfo->cfg.dev);
321		}
322	}
323
324	return (NULL);
325}
326
327/* Find a device_t by vendor/device ID */
328
329device_t
330pci_find_device(uint16_t vendor, uint16_t device)
331{
332	struct pci_devinfo *dinfo;
333
334	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
335		if ((dinfo->cfg.vendor == vendor) &&
336		    (dinfo->cfg.device == device)) {
337			return (dinfo->cfg.dev);
338		}
339	}
340
341	return (NULL);
342}
343
344static int
345pci_printf(pcicfgregs *cfg, const char *fmt, ...)
346{
347	va_list ap;
348	int retval;
349
350	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
351	    cfg->func);
352	va_start(ap, fmt);
353	retval += vprintf(fmt, ap);
354	va_end(ap);
355	return (retval);
356}
357
358/* return base address of memory or port map */
359
360static pci_addr_t
361pci_mapbase(uint64_t mapreg)
362{
363
364	if (PCI_BAR_MEM(mapreg))
365		return (mapreg & PCIM_BAR_MEM_BASE);
366	else
367		return (mapreg & PCIM_BAR_IO_BASE);
368}
369
370/* return map type of memory or port map */
371
372static const char *
373pci_maptype(uint64_t mapreg)
374{
375
376	if (PCI_BAR_IO(mapreg))
377		return ("I/O Port");
378	if (mapreg & PCIM_BAR_MEM_PREFETCH)
379		return ("Prefetchable Memory");
380	return ("Memory");
381}
382
383/* return log2 of map size decoded for memory or port map */
384
385static int
386pci_mapsize(uint64_t testval)
387{
388	int ln2size;
389
390	testval = pci_mapbase(testval);
391	ln2size = 0;
392	if (testval != 0) {
393		while ((testval & 1) == 0)
394		{
395			ln2size++;
396			testval >>= 1;
397		}
398	}
399	return (ln2size);
400}
401
402/* return base address of device ROM */
403
404static pci_addr_t
405pci_rombase(uint64_t mapreg)
406{
407
408	return (mapreg & PCIM_BIOS_ADDR_MASK);
409}
410
411/* return log2 of map size decided for device ROM */
412
413static int
414pci_romsize(uint64_t testval)
415{
416	int ln2size;
417
418	testval = pci_rombase(testval);
419	ln2size = 0;
420	if (testval != 0) {
421		while ((testval & 1) == 0)
422		{
423			ln2size++;
424			testval >>= 1;
425		}
426	}
427	return (ln2size);
428}
429
430/* return log2 of address range supported by map register */
431
432static int
433pci_maprange(uint64_t mapreg)
434{
435	int ln2range = 0;
436
437	if (PCI_BAR_IO(mapreg))
438		ln2range = 32;
439	else
440		switch (mapreg & PCIM_BAR_MEM_TYPE) {
441		case PCIM_BAR_MEM_32:
442			ln2range = 32;
443			break;
444		case PCIM_BAR_MEM_1MB:
445			ln2range = 20;
446			break;
447		case PCIM_BAR_MEM_64:
448			ln2range = 64;
449			break;
450		}
451	return (ln2range);
452}
453
454/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
455
456static void
457pci_fixancient(pcicfgregs *cfg)
458{
459	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
460		return;
461
462	/* PCI to PCI bridges use header type 1 */
463	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
464		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
465}
466
467/* extract header type specific config data */
468
469static void
470pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
471{
472#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
473	switch (cfg->hdrtype & PCIM_HDRTYPE) {
474	case PCIM_HDRTYPE_NORMAL:
475		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
476		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
477		cfg->nummaps	    = PCI_MAXMAPS_0;
478		break;
479	case PCIM_HDRTYPE_BRIDGE:
480		cfg->nummaps	    = PCI_MAXMAPS_1;
481		break;
482	case PCIM_HDRTYPE_CARDBUS:
483		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
484		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
485		cfg->nummaps	    = PCI_MAXMAPS_2;
486		break;
487	}
488#undef REG
489}
490
491/* read configuration header into pcicfgregs structure */
492struct pci_devinfo *
493pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
494{
495#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
496	pcicfgregs *cfg = NULL;
497	struct pci_devinfo *devlist_entry;
498	struct devlist *devlist_head;
499
500	devlist_head = &pci_devq;
501
502	devlist_entry = NULL;
503
504	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
505		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
506		if (devlist_entry == NULL)
507			return (NULL);
508
509		cfg = &devlist_entry->cfg;
510
511		cfg->domain		= d;
512		cfg->bus		= b;
513		cfg->slot		= s;
514		cfg->func		= f;
515		cfg->vendor		= REG(PCIR_VENDOR, 2);
516		cfg->device		= REG(PCIR_DEVICE, 2);
517		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
518		cfg->statreg		= REG(PCIR_STATUS, 2);
519		cfg->baseclass		= REG(PCIR_CLASS, 1);
520		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
521		cfg->progif		= REG(PCIR_PROGIF, 1);
522		cfg->revid		= REG(PCIR_REVID, 1);
523		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
524		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
525		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
526		cfg->intpin		= REG(PCIR_INTPIN, 1);
527		cfg->intline		= REG(PCIR_INTLINE, 1);
528
529		cfg->mingnt		= REG(PCIR_MINGNT, 1);
530		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
531
532		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
533		cfg->hdrtype		&= ~PCIM_MFDEV;
534
535		pci_fixancient(cfg);
536		pci_hdrtypedata(pcib, b, s, f, cfg);
537
538		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
539			pci_read_extcap(pcib, cfg);
540
541		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
542
543		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
544		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
545		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
546		devlist_entry->conf.pc_sel.pc_func = cfg->func;
547		devlist_entry->conf.pc_hdr = cfg->hdrtype;
548
549		devlist_entry->conf.pc_subvendor = cfg->subvendor;
550		devlist_entry->conf.pc_subdevice = cfg->subdevice;
551		devlist_entry->conf.pc_vendor = cfg->vendor;
552		devlist_entry->conf.pc_device = cfg->device;
553
554		devlist_entry->conf.pc_class = cfg->baseclass;
555		devlist_entry->conf.pc_subclass = cfg->subclass;
556		devlist_entry->conf.pc_progif = cfg->progif;
557		devlist_entry->conf.pc_revid = cfg->revid;
558
559		pci_numdevs++;
560		pci_generation++;
561	}
562	return (devlist_entry);
563#undef REG
564}
565
566static void
567pci_read_extcap(device_t pcib, pcicfgregs *cfg)
568{
569#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
570#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
571#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
572	uint64_t addr;
573#endif
574	uint32_t val;
575	int	ptr, nextptr, ptrptr;
576
577	switch (cfg->hdrtype & PCIM_HDRTYPE) {
578	case PCIM_HDRTYPE_NORMAL:
579	case PCIM_HDRTYPE_BRIDGE:
580		ptrptr = PCIR_CAP_PTR;
581		break;
582	case PCIM_HDRTYPE_CARDBUS:
583		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
584		break;
585	default:
586		return;		/* no extended capabilities support */
587	}
588	nextptr = REG(ptrptr, 1);	/* sanity check? */
589
590	/*
591	 * Read capability entries.
592	 */
593	while (nextptr != 0) {
594		/* Sanity check */
595		if (nextptr > 255) {
596			printf("illegal PCI extended capability offset %d\n",
597			    nextptr);
598			return;
599		}
600		/* Find the next entry */
601		ptr = nextptr;
602		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
603
604		/* Process this entry */
605		switch (REG(ptr + PCICAP_ID, 1)) {
606		case PCIY_PMG:		/* PCI power management */
607			if (cfg->pp.pp_cap == 0) {
608				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
609				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
610				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
611				if ((nextptr - ptr) > PCIR_POWER_DATA)
612					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
613			}
614			break;
615		case PCIY_HT:		/* HyperTransport */
616			/* Determine HT-specific capability type. */
617			val = REG(ptr + PCIR_HT_COMMAND, 2);
618
619			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
620				cfg->ht.ht_slave = ptr;
621
622#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
623			switch (val & PCIM_HTCMD_CAP_MASK) {
624			case PCIM_HTCAP_MSI_MAPPING:
625				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
626					/* Sanity check the mapping window. */
627					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
628					    4);
629					addr <<= 32;
630					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
631					    4);
632					if (addr != MSI_INTEL_ADDR_BASE)
633						device_printf(pcib,
634	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
635						    cfg->domain, cfg->bus,
636						    cfg->slot, cfg->func,
637						    (long long)addr);
638				} else
639					addr = MSI_INTEL_ADDR_BASE;
640
641				cfg->ht.ht_msimap = ptr;
642				cfg->ht.ht_msictrl = val;
643				cfg->ht.ht_msiaddr = addr;
644				break;
645			}
646#endif
647			break;
648		case PCIY_MSI:		/* PCI MSI */
649			cfg->msi.msi_location = ptr;
650			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
651			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
652						     PCIM_MSICTRL_MMC_MASK)>>1);
653			break;
654		case PCIY_MSIX:		/* PCI MSI-X */
655			cfg->msix.msix_location = ptr;
656			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
657			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
658			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
659			val = REG(ptr + PCIR_MSIX_TABLE, 4);
660			cfg->msix.msix_table_bar = PCIR_BAR(val &
661			    PCIM_MSIX_BIR_MASK);
662			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
663			val = REG(ptr + PCIR_MSIX_PBA, 4);
664			cfg->msix.msix_pba_bar = PCIR_BAR(val &
665			    PCIM_MSIX_BIR_MASK);
666			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
667			break;
668		case PCIY_VPD:		/* PCI Vital Product Data */
669			cfg->vpd.vpd_reg = ptr;
670			break;
671		case PCIY_SUBVENDOR:
672			/* Should always be true. */
673			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
674			    PCIM_HDRTYPE_BRIDGE) {
675				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
676				cfg->subvendor = val & 0xffff;
677				cfg->subdevice = val >> 16;
678			}
679			break;
680		case PCIY_PCIX:		/* PCI-X */
681			/*
682			 * Assume we have a PCI-X chipset if we have
683			 * at least one PCI-PCI bridge with a PCI-X
684			 * capability.  Note that some systems with
685			 * PCI-express or HT chipsets might match on
686			 * this check as well.
687			 */
688			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
689			    PCIM_HDRTYPE_BRIDGE)
690				pcix_chipset = 1;
691			break;
692		case PCIY_EXPRESS:	/* PCI-express */
693			/*
694			 * Assume we have a PCI-express chipset if we have
695			 * at least one PCI-express device.
696			 */
697			pcie_chipset = 1;
698			break;
699		default:
700			break;
701		}
702	}
703
704
705#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
706	/*
707	 * Enable the MSI mapping window for all HyperTransport
708	 * slaves.  PCI-PCI bridges have their windows enabled via
709	 * PCIB_MAP_MSI().
710	 */
711	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
712	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
713		device_printf(pcib,
714	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
715		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
716		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
717		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
718		     2);
719	}
720#endif
721/* REG and WREG use carry through to next functions */
722}
723
724/*
725 * PCI Vital Product Data
726 */
727
728#define	PCI_VPD_TIMEOUT		1000000
729
730static int
731pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
732{
733	int count = PCI_VPD_TIMEOUT;
734
735	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
736
737	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
738
739	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
740		if (--count < 0)
741			return (ENXIO);
742		DELAY(1);	/* limit looping */
743	}
744	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
745
746	return (0);
747}
748
749#if 0
750static int
751pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
752{
753	int count = PCI_VPD_TIMEOUT;
754
755	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
756
757	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
758	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
759	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
760		if (--count < 0)
761			return (ENXIO);
762		DELAY(1);	/* limit looping */
763	}
764
765	return (0);
766}
767#endif
768
769#undef PCI_VPD_TIMEOUT
770
771struct vpd_readstate {
772	device_t	pcib;
773	pcicfgregs	*cfg;
774	uint32_t	val;
775	int		bytesinval;
776	int		off;
777	uint8_t		cksum;
778};
779
780static int
781vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
782{
783	uint32_t reg;
784	uint8_t byte;
785
786	if (vrs->bytesinval == 0) {
787		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
788			return (ENXIO);
789		vrs->val = le32toh(reg);
790		vrs->off += 4;
791		byte = vrs->val & 0xff;
792		vrs->bytesinval = 3;
793	} else {
794		vrs->val = vrs->val >> 8;
795		byte = vrs->val & 0xff;
796		vrs->bytesinval--;
797	}
798
799	vrs->cksum += byte;
800	*data = byte;
801	return (0);
802}
803
804static void
805pci_read_vpd(device_t pcib, pcicfgregs *cfg)
806{
807	struct vpd_readstate vrs;
808	int state;
809	int name;
810	int remain;
811	int i;
812	int alloc, off;		/* alloc/off for RO/W arrays */
813	int cksumvalid;
814	int dflen;
815	uint8_t byte;
816	uint8_t byte2;
817
818	/* init vpd reader */
819	vrs.bytesinval = 0;
820	vrs.off = 0;
821	vrs.pcib = pcib;
822	vrs.cfg = cfg;
823	vrs.cksum = 0;
824
825	state = 0;
826	name = remain = i = 0;	/* shut up stupid gcc */
827	alloc = off = 0;	/* shut up stupid gcc */
828	dflen = 0;		/* shut up stupid gcc */
829	cksumvalid = -1;
830	while (state >= 0) {
831		if (vpd_nextbyte(&vrs, &byte)) {
832			state = -2;
833			break;
834		}
835#if 0
836		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
837		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
838		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
839#endif
840		switch (state) {
841		case 0:		/* item name */
842			if (byte & 0x80) {
843				if (vpd_nextbyte(&vrs, &byte2)) {
844					state = -2;
845					break;
846				}
847				remain = byte2;
848				if (vpd_nextbyte(&vrs, &byte2)) {
849					state = -2;
850					break;
851				}
852				remain |= byte2 << 8;
853				if (remain > (0x7f*4 - vrs.off)) {
854					state = -1;
855					printf(
856			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
857					    cfg->domain, cfg->bus, cfg->slot,
858					    cfg->func, remain);
859				}
860				name = byte & 0x7f;
861			} else {
862				remain = byte & 0x7;
863				name = (byte >> 3) & 0xf;
864			}
865			switch (name) {
866			case 0x2:	/* String */
867				cfg->vpd.vpd_ident = malloc(remain + 1,
868				    M_DEVBUF, M_WAITOK);
869				i = 0;
870				state = 1;
871				break;
872			case 0xf:	/* End */
873				state = -1;
874				break;
875			case 0x10:	/* VPD-R */
876				alloc = 8;
877				off = 0;
878				cfg->vpd.vpd_ros = malloc(alloc *
879				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
880				    M_WAITOK | M_ZERO);
881				state = 2;
882				break;
883			case 0x11:	/* VPD-W */
884				alloc = 8;
885				off = 0;
886				cfg->vpd.vpd_w = malloc(alloc *
887				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
888				    M_WAITOK | M_ZERO);
889				state = 5;
890				break;
891			default:	/* Invalid data, abort */
892				state = -1;
893				break;
894			}
895			break;
896
897		case 1:	/* Identifier String */
898			cfg->vpd.vpd_ident[i++] = byte;
899			remain--;
900			if (remain == 0)  {
901				cfg->vpd.vpd_ident[i] = '\0';
902				state = 0;
903			}
904			break;
905
906		case 2:	/* VPD-R Keyword Header */
907			if (off == alloc) {
908				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
909				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
910				    M_DEVBUF, M_WAITOK | M_ZERO);
911			}
912			cfg->vpd.vpd_ros[off].keyword[0] = byte;
913			if (vpd_nextbyte(&vrs, &byte2)) {
914				state = -2;
915				break;
916			}
917			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
918			if (vpd_nextbyte(&vrs, &byte2)) {
919				state = -2;
920				break;
921			}
922			dflen = byte2;
923			if (dflen == 0 &&
924			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
925			    2) == 0) {
926				/*
927				 * if this happens, we can't trust the rest
928				 * of the VPD.
929				 */
930				printf(
931				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
932				    cfg->domain, cfg->bus, cfg->slot,
933				    cfg->func, dflen);
934				cksumvalid = 0;
935				state = -1;
936				break;
937			} else if (dflen == 0) {
938				cfg->vpd.vpd_ros[off].value = malloc(1 *
939				    sizeof(*cfg->vpd.vpd_ros[off].value),
940				    M_DEVBUF, M_WAITOK);
941				cfg->vpd.vpd_ros[off].value[0] = '\x00';
942			} else
943				cfg->vpd.vpd_ros[off].value = malloc(
944				    (dflen + 1) *
945				    sizeof(*cfg->vpd.vpd_ros[off].value),
946				    M_DEVBUF, M_WAITOK);
947			remain -= 3;
948			i = 0;
949			/* keep in sync w/ state 3's transistions */
950			if (dflen == 0 && remain == 0)
951				state = 0;
952			else if (dflen == 0)
953				state = 2;
954			else
955				state = 3;
956			break;
957
958		case 3:	/* VPD-R Keyword Value */
959			cfg->vpd.vpd_ros[off].value[i++] = byte;
960			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
961			    "RV", 2) == 0 && cksumvalid == -1) {
962				if (vrs.cksum == 0)
963					cksumvalid = 1;
964				else {
965					if (bootverbose)
966						printf(
967				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
968						    cfg->domain, cfg->bus,
969						    cfg->slot, cfg->func,
970						    vrs.cksum);
971					cksumvalid = 0;
972					state = -1;
973					break;
974				}
975			}
976			dflen--;
977			remain--;
978			/* keep in sync w/ state 2's transistions */
979			if (dflen == 0)
980				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
981			if (dflen == 0 && remain == 0) {
982				cfg->vpd.vpd_rocnt = off;
983				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
984				    off * sizeof(*cfg->vpd.vpd_ros),
985				    M_DEVBUF, M_WAITOK | M_ZERO);
986				state = 0;
987			} else if (dflen == 0)
988				state = 2;
989			break;
990
991		case 4:
992			remain--;
993			if (remain == 0)
994				state = 0;
995			break;
996
997		case 5:	/* VPD-W Keyword Header */
998			if (off == alloc) {
999				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1000				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1001				    M_DEVBUF, M_WAITOK | M_ZERO);
1002			}
1003			cfg->vpd.vpd_w[off].keyword[0] = byte;
1004			if (vpd_nextbyte(&vrs, &byte2)) {
1005				state = -2;
1006				break;
1007			}
1008			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1009			if (vpd_nextbyte(&vrs, &byte2)) {
1010				state = -2;
1011				break;
1012			}
1013			cfg->vpd.vpd_w[off].len = dflen = byte2;
1014			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1015			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1016			    sizeof(*cfg->vpd.vpd_w[off].value),
1017			    M_DEVBUF, M_WAITOK);
1018			remain -= 3;
1019			i = 0;
1020			/* keep in sync w/ state 6's transistions */
1021			if (dflen == 0 && remain == 0)
1022				state = 0;
1023			else if (dflen == 0)
1024				state = 5;
1025			else
1026				state = 6;
1027			break;
1028
1029		case 6:	/* VPD-W Keyword Value */
1030			cfg->vpd.vpd_w[off].value[i++] = byte;
1031			dflen--;
1032			remain--;
1033			/* keep in sync w/ state 5's transistions */
1034			if (dflen == 0)
1035				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1036			if (dflen == 0 && remain == 0) {
1037				cfg->vpd.vpd_wcnt = off;
1038				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1039				    off * sizeof(*cfg->vpd.vpd_w),
1040				    M_DEVBUF, M_WAITOK | M_ZERO);
1041				state = 0;
1042			} else if (dflen == 0)
1043				state = 5;
1044			break;
1045
1046		default:
1047			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1048			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1049			    state);
1050			state = -1;
1051			break;
1052		}
1053	}
1054
1055	if (cksumvalid == 0 || state < -1) {
1056		/* read-only data bad, clean up */
1057		if (cfg->vpd.vpd_ros != NULL) {
1058			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1059				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1060			free(cfg->vpd.vpd_ros, M_DEVBUF);
1061			cfg->vpd.vpd_ros = NULL;
1062		}
1063	}
1064	if (state < -1) {
1065		/* I/O error, clean up */
1066		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1067		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1068		if (cfg->vpd.vpd_ident != NULL) {
1069			free(cfg->vpd.vpd_ident, M_DEVBUF);
1070			cfg->vpd.vpd_ident = NULL;
1071		}
1072		if (cfg->vpd.vpd_w != NULL) {
1073			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1074				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1075			free(cfg->vpd.vpd_w, M_DEVBUF);
1076			cfg->vpd.vpd_w = NULL;
1077		}
1078	}
1079	cfg->vpd.vpd_cached = 1;
1080#undef REG
1081#undef WREG
1082}
1083
1084int
1085pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1086{
1087	struct pci_devinfo *dinfo = device_get_ivars(child);
1088	pcicfgregs *cfg = &dinfo->cfg;
1089
1090	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1091		pci_read_vpd(device_get_parent(dev), cfg);
1092
1093	*identptr = cfg->vpd.vpd_ident;
1094
1095	if (*identptr == NULL)
1096		return (ENXIO);
1097
1098	return (0);
1099}
1100
1101int
1102pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1103	const char **vptr)
1104{
1105	struct pci_devinfo *dinfo = device_get_ivars(child);
1106	pcicfgregs *cfg = &dinfo->cfg;
1107	int i;
1108
1109	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1110		pci_read_vpd(device_get_parent(dev), cfg);
1111
1112	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1113		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1114		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1115			*vptr = cfg->vpd.vpd_ros[i].value;
1116		}
1117
1118	if (i != cfg->vpd.vpd_rocnt)
1119		return (0);
1120
1121	*vptr = NULL;
1122	return (ENXIO);
1123}
1124
1125/*
1126 * Find the requested extended capability and return the offset in
1127 * configuration space via the pointer provided. The function returns
1128 * 0 on success and error code otherwise.
1129 */
1130int
1131pci_find_extcap_method(device_t dev, device_t child, int capability,
1132    int *capreg)
1133{
1134	struct pci_devinfo *dinfo = device_get_ivars(child);
1135	pcicfgregs *cfg = &dinfo->cfg;
1136	u_int32_t status;
1137	u_int8_t ptr;
1138
1139	/*
1140	 * Check the CAP_LIST bit of the PCI status register first.
1141	 */
1142	status = pci_read_config(child, PCIR_STATUS, 2);
1143	if (!(status & PCIM_STATUS_CAPPRESENT))
1144		return (ENXIO);
1145
1146	/*
1147	 * Determine the start pointer of the capabilities list.
1148	 */
1149	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1150	case PCIM_HDRTYPE_NORMAL:
1151	case PCIM_HDRTYPE_BRIDGE:
1152		ptr = PCIR_CAP_PTR;
1153		break;
1154	case PCIM_HDRTYPE_CARDBUS:
1155		ptr = PCIR_CAP_PTR_2;
1156		break;
1157	default:
1158		/* XXX: panic? */
1159		return (ENXIO);		/* no extended capabilities support */
1160	}
1161	ptr = pci_read_config(child, ptr, 1);
1162
1163	/*
1164	 * Traverse the capabilities list.
1165	 */
1166	while (ptr != 0) {
1167		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1168			if (capreg != NULL)
1169				*capreg = ptr;
1170			return (0);
1171		}
1172		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1173	}
1174
1175	return (ENOENT);
1176}
1177
1178/*
1179 * Support for MSI-X message interrupts.
1180 */
1181void
1182pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1183{
1184	struct pci_devinfo *dinfo = device_get_ivars(dev);
1185	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1186	uint32_t offset;
1187
1188	KASSERT(msix->msix_table_len > index, ("bogus index"));
1189	offset = msix->msix_table_offset + index * 16;
1190	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1191	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1192	bus_write_4(msix->msix_table_res, offset + 8, data);
1193
1194	/* Enable MSI -> HT mapping. */
1195	pci_ht_map_msi(dev, address);
1196}
1197
1198void
1199pci_mask_msix(device_t dev, u_int index)
1200{
1201	struct pci_devinfo *dinfo = device_get_ivars(dev);
1202	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1203	uint32_t offset, val;
1204
1205	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1206	offset = msix->msix_table_offset + index * 16 + 12;
1207	val = bus_read_4(msix->msix_table_res, offset);
1208	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1209		val |= PCIM_MSIX_VCTRL_MASK;
1210		bus_write_4(msix->msix_table_res, offset, val);
1211	}
1212}
1213
1214void
1215pci_unmask_msix(device_t dev, u_int index)
1216{
1217	struct pci_devinfo *dinfo = device_get_ivars(dev);
1218	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1219	uint32_t offset, val;
1220
1221	KASSERT(msix->msix_table_len > index, ("bogus index"));
1222	offset = msix->msix_table_offset + index * 16 + 12;
1223	val = bus_read_4(msix->msix_table_res, offset);
1224	if (val & PCIM_MSIX_VCTRL_MASK) {
1225		val &= ~PCIM_MSIX_VCTRL_MASK;
1226		bus_write_4(msix->msix_table_res, offset, val);
1227	}
1228}
1229
1230int
1231pci_pending_msix(device_t dev, u_int index)
1232{
1233	struct pci_devinfo *dinfo = device_get_ivars(dev);
1234	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1235	uint32_t offset, bit;
1236
1237	KASSERT(msix->msix_table_len > index, ("bogus index"));
1238	offset = msix->msix_pba_offset + (index / 32) * 4;
1239	bit = 1 << index % 32;
1240	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1241}
1242
1243/*
1244 * Restore MSI-X registers and table during resume.  If MSI-X is
1245 * enabled then walk the virtual table to restore the actual MSI-X
1246 * table.
1247 */
1248static void
1249pci_resume_msix(device_t dev)
1250{
1251	struct pci_devinfo *dinfo = device_get_ivars(dev);
1252	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1253	struct msix_table_entry *mte;
1254	struct msix_vector *mv;
1255	int i;
1256
1257	if (msix->msix_alloc > 0) {
1258		/* First, mask all vectors. */
1259		for (i = 0; i < msix->msix_msgnum; i++)
1260			pci_mask_msix(dev, i);
1261
1262		/* Second, program any messages with at least one handler. */
1263		for (i = 0; i < msix->msix_table_len; i++) {
1264			mte = &msix->msix_table[i];
1265			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1266				continue;
1267			mv = &msix->msix_vectors[mte->mte_vector - 1];
1268			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1269			pci_unmask_msix(dev, i);
1270		}
1271	}
1272	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1273	    msix->msix_ctrl, 2);
1274}
1275
1276/*
1277 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1278 * returned in *count.  After this function returns, each message will be
1279 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1280 */
1281int
1282pci_alloc_msix_method(device_t dev, device_t child, int *count)
1283{
1284	struct pci_devinfo *dinfo = device_get_ivars(child);
1285	pcicfgregs *cfg = &dinfo->cfg;
1286	struct resource_list_entry *rle;
1287	int actual, error, i, irq, max;
1288
1289	/* Don't let count == 0 get us into trouble. */
1290	if (*count == 0)
1291		return (EINVAL);
1292
1293	/* If rid 0 is allocated, then fail. */
1294	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1295	if (rle != NULL && rle->res != NULL)
1296		return (ENXIO);
1297
1298	/* Already have allocated messages? */
1299	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1300		return (ENXIO);
1301
1302	/* If MSI is blacklisted for this system, fail. */
1303	if (pci_msi_blacklisted())
1304		return (ENXIO);
1305
1306	/* MSI-X capability present? */
1307	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1308		return (ENODEV);
1309
1310	/* Make sure the appropriate BARs are mapped. */
1311	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1312	    cfg->msix.msix_table_bar);
1313	if (rle == NULL || rle->res == NULL ||
1314	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1315		return (ENXIO);
1316	cfg->msix.msix_table_res = rle->res;
1317	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1318		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1319		    cfg->msix.msix_pba_bar);
1320		if (rle == NULL || rle->res == NULL ||
1321		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1322			return (ENXIO);
1323	}
1324	cfg->msix.msix_pba_res = rle->res;
1325
1326	if (bootverbose)
1327		device_printf(child,
1328		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1329		    *count, cfg->msix.msix_msgnum);
1330	max = min(*count, cfg->msix.msix_msgnum);
1331	for (i = 0; i < max; i++) {
1332		/* Allocate a message. */
1333		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1334		if (error)
1335			break;
1336		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1337		    irq, 1);
1338	}
1339	actual = i;
1340
1341	if (bootverbose) {
1342		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1343		if (actual == 1)
1344			device_printf(child, "using IRQ %lu for MSI-X\n",
1345			    rle->start);
1346		else {
1347			int run;
1348
1349			/*
1350			 * Be fancy and try to print contiguous runs of
1351			 * IRQ values as ranges.  'irq' is the previous IRQ.
1352			 * 'run' is true if we are in a range.
1353			 */
1354			device_printf(child, "using IRQs %lu", rle->start);
1355			irq = rle->start;
1356			run = 0;
1357			for (i = 1; i < actual; i++) {
1358				rle = resource_list_find(&dinfo->resources,
1359				    SYS_RES_IRQ, i + 1);
1360
1361				/* Still in a run? */
1362				if (rle->start == irq + 1) {
1363					run = 1;
1364					irq++;
1365					continue;
1366				}
1367
1368				/* Finish previous range. */
1369				if (run) {
1370					printf("-%d", irq);
1371					run = 0;
1372				}
1373
1374				/* Start new range. */
1375				printf(",%lu", rle->start);
1376				irq = rle->start;
1377			}
1378
1379			/* Unfinished range? */
1380			if (run)
1381				printf("-%d", irq);
1382			printf(" for MSI-X\n");
1383		}
1384	}
1385
1386	/* Mask all vectors. */
1387	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1388		pci_mask_msix(child, i);
1389
1390	/* Allocate and initialize vector data and virtual table. */
1391	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1392	    M_DEVBUF, M_WAITOK | M_ZERO);
1393	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1394	    M_DEVBUF, M_WAITOK | M_ZERO);
1395	for (i = 0; i < actual; i++) {
1396		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1397		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1398		cfg->msix.msix_table[i].mte_vector = i + 1;
1399	}
1400
1401	/* Update control register to enable MSI-X. */
1402	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1403	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1404	    cfg->msix.msix_ctrl, 2);
1405
1406	/* Update counts of alloc'd messages. */
1407	cfg->msix.msix_alloc = actual;
1408	cfg->msix.msix_table_len = actual;
1409	*count = actual;
1410	return (0);
1411}
1412
1413/*
1414 * By default, pci_alloc_msix() will assign the allocated IRQ
1415 * resources consecutively to the first N messages in the MSI-X table.
1416 * However, device drivers may want to use different layouts if they
1417 * either receive fewer messages than they asked for, or they wish to
1418 * populate the MSI-X table sparsely.  This method allows the driver
1419 * to specify what layout it wants.  It must be called after a
1420 * successful pci_alloc_msix() but before any of the associated
1421 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1422 *
1423 * The 'vectors' array contains 'count' message vectors.  The array
1424 * maps directly to the MSI-X table in that index 0 in the array
1425 * specifies the vector for the first message in the MSI-X table, etc.
1426 * The vector value in each array index can either be 0 to indicate
1427 * that no vector should be assigned to a message slot, or it can be a
1428 * number from 1 to N (where N is the count returned from a
1429 * succcessful call to pci_alloc_msix()) to indicate which message
1430 * vector (IRQ) to be used for the corresponding message.
1431 *
1432 * On successful return, each message with a non-zero vector will have
1433 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1434 * 1.  Additionally, if any of the IRQs allocated via the previous
1435 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1436 * will be freed back to the system automatically.
1437 *
1438 * For example, suppose a driver has a MSI-X table with 6 messages and
1439 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1440 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1441 * C.  After the call to pci_alloc_msix(), the device will be setup to
1442 * have an MSI-X table of ABC--- (where - means no vector assigned).
1443 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1444 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1445 * be freed back to the system.  This device will also have valid
1446 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1447 *
1448 * In any case, the SYS_RES_IRQ rid X will always map to the message
1449 * at MSI-X table index X - 1 and will only be valid if a vector is
1450 * assigned to that table entry.
1451 */
1452int
1453pci_remap_msix_method(device_t dev, device_t child, int count,
1454    const u_int *vectors)
1455{
1456	struct pci_devinfo *dinfo = device_get_ivars(child);
1457	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1458	struct resource_list_entry *rle;
1459	int i, irq, j, *used;
1460
1461	/*
1462	 * Have to have at least one message in the table but the
1463	 * table can't be bigger than the actual MSI-X table in the
1464	 * device.
1465	 */
1466	if (count == 0 || count > msix->msix_msgnum)
1467		return (EINVAL);
1468
1469	/* Sanity check the vectors. */
1470	for (i = 0; i < count; i++)
1471		if (vectors[i] > msix->msix_alloc)
1472			return (EINVAL);
1473
1474	/*
1475	 * Make sure there aren't any holes in the vectors to be used.
1476	 * It's a big pain to support it, and it doesn't really make
1477	 * sense anyway.  Also, at least one vector must be used.
1478	 */
1479	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1480	    M_ZERO);
1481	for (i = 0; i < count; i++)
1482		if (vectors[i] != 0)
1483			used[vectors[i] - 1] = 1;
1484	for (i = 0; i < msix->msix_alloc - 1; i++)
1485		if (used[i] == 0 && used[i + 1] == 1) {
1486			free(used, M_DEVBUF);
1487			return (EINVAL);
1488		}
1489	if (used[0] != 1) {
1490		free(used, M_DEVBUF);
1491		return (EINVAL);
1492	}
1493
1494	/* Make sure none of the resources are allocated. */
1495	for (i = 0; i < msix->msix_table_len; i++) {
1496		if (msix->msix_table[i].mte_vector == 0)
1497			continue;
1498		if (msix->msix_table[i].mte_handlers > 0)
1499			return (EBUSY);
1500		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1501		KASSERT(rle != NULL, ("missing resource"));
1502		if (rle->res != NULL)
1503			return (EBUSY);
1504	}
1505
1506	/* Free the existing resource list entries. */
1507	for (i = 0; i < msix->msix_table_len; i++) {
1508		if (msix->msix_table[i].mte_vector == 0)
1509			continue;
1510		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1511	}
1512
1513	/*
1514	 * Build the new virtual table keeping track of which vectors are
1515	 * used.
1516	 */
1517	free(msix->msix_table, M_DEVBUF);
1518	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1519	    M_DEVBUF, M_WAITOK | M_ZERO);
1520	for (i = 0; i < count; i++)
1521		msix->msix_table[i].mte_vector = vectors[i];
1522	msix->msix_table_len = count;
1523
1524	/* Free any unused IRQs and resize the vectors array if necessary. */
1525	j = msix->msix_alloc - 1;
1526	if (used[j] == 0) {
1527		struct msix_vector *vec;
1528
1529		while (used[j] == 0) {
1530			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1531			    msix->msix_vectors[j].mv_irq);
1532			j--;
1533		}
1534		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1535		    M_WAITOK);
1536		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1537		    (j + 1));
1538		free(msix->msix_vectors, M_DEVBUF);
1539		msix->msix_vectors = vec;
1540		msix->msix_alloc = j + 1;
1541	}
1542	free(used, M_DEVBUF);
1543
1544	/* Map the IRQs onto the rids. */
1545	for (i = 0; i < count; i++) {
1546		if (vectors[i] == 0)
1547			continue;
1548		irq = msix->msix_vectors[vectors[i]].mv_irq;
1549		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1550		    irq, 1);
1551	}
1552
1553	if (bootverbose) {
1554		device_printf(child, "Remapped MSI-X IRQs as: ");
1555		for (i = 0; i < count; i++) {
1556			if (i != 0)
1557				printf(", ");
1558			if (vectors[i] == 0)
1559				printf("---");
1560			else
1561				printf("%d",
1562				    msix->msix_vectors[vectors[i]].mv_irq);
1563		}
1564		printf("\n");
1565	}
1566
1567	return (0);
1568}
1569
1570static int
1571pci_release_msix(device_t dev, device_t child)
1572{
1573	struct pci_devinfo *dinfo = device_get_ivars(child);
1574	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1575	struct resource_list_entry *rle;
1576	int i;
1577
1578	/* Do we have any messages to release? */
1579	if (msix->msix_alloc == 0)
1580		return (ENODEV);
1581
1582	/* Make sure none of the resources are allocated. */
1583	for (i = 0; i < msix->msix_table_len; i++) {
1584		if (msix->msix_table[i].mte_vector == 0)
1585			continue;
1586		if (msix->msix_table[i].mte_handlers > 0)
1587			return (EBUSY);
1588		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1589		KASSERT(rle != NULL, ("missing resource"));
1590		if (rle->res != NULL)
1591			return (EBUSY);
1592	}
1593
1594	/* Update control register to disable MSI-X. */
1595	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1596	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1597	    msix->msix_ctrl, 2);
1598
1599	/* Free the resource list entries. */
1600	for (i = 0; i < msix->msix_table_len; i++) {
1601		if (msix->msix_table[i].mte_vector == 0)
1602			continue;
1603		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1604	}
1605	free(msix->msix_table, M_DEVBUF);
1606	msix->msix_table_len = 0;
1607
1608	/* Release the IRQs. */
1609	for (i = 0; i < msix->msix_alloc; i++)
1610		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1611		    msix->msix_vectors[i].mv_irq);
1612	free(msix->msix_vectors, M_DEVBUF);
1613	msix->msix_alloc = 0;
1614	return (0);
1615}
1616
1617/*
1618 * Return the max supported MSI-X messages this device supports.
1619 * Basically, assuming the MD code can alloc messages, this function
1620 * should return the maximum value that pci_alloc_msix() can return.
1621 * Thus, it is subject to the tunables, etc.
1622 */
1623int
1624pci_msix_count_method(device_t dev, device_t child)
1625{
1626	struct pci_devinfo *dinfo = device_get_ivars(child);
1627	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1628
1629	if (pci_do_msix && msix->msix_location != 0)
1630		return (msix->msix_msgnum);
1631	return (0);
1632}
1633
1634/*
1635 * HyperTransport MSI mapping control
1636 */
1637void
1638pci_ht_map_msi(device_t dev, uint64_t addr)
1639{
1640	struct pci_devinfo *dinfo = device_get_ivars(dev);
1641	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1642
1643	if (!ht->ht_msimap)
1644		return;
1645
1646	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1647	    ht->ht_msiaddr >> 20 == addr >> 20) {
1648		/* Enable MSI -> HT mapping. */
1649		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1650		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1651		    ht->ht_msictrl, 2);
1652	}
1653
1654	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1655		/* Disable MSI -> HT mapping. */
1656		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1657		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1658		    ht->ht_msictrl, 2);
1659	}
1660}
1661
1662int
1663pci_get_max_read_req(device_t dev)
1664{
1665	int cap;
1666	uint16_t val;
1667
1668	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1669		return (0);
1670	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1671	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1672	val >>= 12;
1673	return (1 << (val + 7));
1674}
1675
1676int
1677pci_set_max_read_req(device_t dev, int size)
1678{
1679	int cap;
1680	uint16_t val;
1681
1682	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1683		return (0);
1684	if (size < 128)
1685		size = 128;
1686	if (size > 4096)
1687		size = 4096;
1688	size = (1 << (fls(size) - 1));
1689	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1690	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1691	val |= (fls(size) - 8) << 12;
1692	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1693	return (size);
1694}
1695
1696/*
1697 * Support for MSI message signalled interrupts.
1698 */
1699void
1700pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1701{
1702	struct pci_devinfo *dinfo = device_get_ivars(dev);
1703	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1704
1705	/* Write data and address values. */
1706	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1707	    address & 0xffffffff, 4);
1708	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1709		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1710		    address >> 32, 4);
1711		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1712		    data, 2);
1713	} else
1714		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1715		    2);
1716
1717	/* Enable MSI in the control register. */
1718	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1719	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1720	    2);
1721
1722	/* Enable MSI -> HT mapping. */
1723	pci_ht_map_msi(dev, address);
1724}
1725
1726void
1727pci_disable_msi(device_t dev)
1728{
1729	struct pci_devinfo *dinfo = device_get_ivars(dev);
1730	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1731
1732	/* Disable MSI -> HT mapping. */
1733	pci_ht_map_msi(dev, 0);
1734
1735	/* Disable MSI in the control register. */
1736	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1737	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1738	    2);
1739}
1740
1741/*
1742 * Restore MSI registers during resume.  If MSI is enabled then
1743 * restore the data and address registers in addition to the control
1744 * register.
1745 */
1746static void
1747pci_resume_msi(device_t dev)
1748{
1749	struct pci_devinfo *dinfo = device_get_ivars(dev);
1750	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1751	uint64_t address;
1752	uint16_t data;
1753
1754	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1755		address = msi->msi_addr;
1756		data = msi->msi_data;
1757		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1758		    address & 0xffffffff, 4);
1759		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1760			pci_write_config(dev, msi->msi_location +
1761			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1762			pci_write_config(dev, msi->msi_location +
1763			    PCIR_MSI_DATA_64BIT, data, 2);
1764		} else
1765			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1766			    data, 2);
1767	}
1768	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1769	    2);
1770}
1771
1772static int
1773pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1774{
1775	struct pci_devinfo *dinfo = device_get_ivars(dev);
1776	pcicfgregs *cfg = &dinfo->cfg;
1777	struct resource_list_entry *rle;
1778	struct msix_table_entry *mte;
1779	struct msix_vector *mv;
1780	uint64_t addr;
1781	uint32_t data;
1782	int error, i, j;
1783
1784	/*
1785	 * Handle MSI first.  We try to find this IRQ among our list
1786	 * of MSI IRQs.  If we find it, we request updated address and
1787	 * data registers and apply the results.
1788	 */
1789	if (cfg->msi.msi_alloc > 0) {
1790
1791		/* If we don't have any active handlers, nothing to do. */
1792		if (cfg->msi.msi_handlers == 0)
1793			return (0);
1794		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1795			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1796			    i + 1);
1797			if (rle->start == irq) {
1798				error = PCIB_MAP_MSI(device_get_parent(bus),
1799				    dev, irq, &addr, &data);
1800				if (error)
1801					return (error);
1802				pci_disable_msi(dev);
1803				dinfo->cfg.msi.msi_addr = addr;
1804				dinfo->cfg.msi.msi_data = data;
1805				pci_enable_msi(dev, addr, data);
1806				return (0);
1807			}
1808		}
1809		return (ENOENT);
1810	}
1811
1812	/*
1813	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1814	 * we request the updated mapping info.  If that works, we go
1815	 * through all the slots that use this IRQ and update them.
1816	 */
1817	if (cfg->msix.msix_alloc > 0) {
1818		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1819			mv = &cfg->msix.msix_vectors[i];
1820			if (mv->mv_irq == irq) {
1821				error = PCIB_MAP_MSI(device_get_parent(bus),
1822				    dev, irq, &addr, &data);
1823				if (error)
1824					return (error);
1825				mv->mv_address = addr;
1826				mv->mv_data = data;
1827				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1828					mte = &cfg->msix.msix_table[j];
1829					if (mte->mte_vector != i + 1)
1830						continue;
1831					if (mte->mte_handlers == 0)
1832						continue;
1833					pci_mask_msix(dev, j);
1834					pci_enable_msix(dev, j, addr, data);
1835					pci_unmask_msix(dev, j);
1836				}
1837			}
1838		}
1839		return (ENOENT);
1840	}
1841
1842	return (ENOENT);
1843}
1844
1845/*
1846 * Returns true if the specified device is blacklisted because MSI
1847 * doesn't work.
1848 */
1849int
1850pci_msi_device_blacklisted(device_t dev)
1851{
1852	struct pci_quirk *q;
1853
1854	if (!pci_honor_msi_blacklist)
1855		return (0);
1856
1857	for (q = &pci_quirks[0]; q->devid; q++) {
1858		if (q->devid == pci_get_devid(dev) &&
1859		    q->type == PCI_QUIRK_DISABLE_MSI)
1860			return (1);
1861	}
1862	return (0);
1863}
1864
1865/*
1866 * Returns true if a specified chipset supports MSI when it is
1867 * emulated hardware in a virtual machine.
1868 */
1869static int
1870pci_msi_vm_chipset(device_t dev)
1871{
1872	struct pci_quirk *q;
1873
1874	for (q = &pci_quirks[0]; q->devid; q++) {
1875		if (q->devid == pci_get_devid(dev) &&
1876		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
1877			return (1);
1878	}
1879	return (0);
1880}
1881
1882/*
1883 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1884 * we just check for blacklisted chipsets as represented by the
1885 * host-PCI bridge at device 0:0:0.  In the future, it may become
1886 * necessary to check other system attributes, such as the kenv values
1887 * that give the motherboard manufacturer and model number.
1888 */
1889static int
1890pci_msi_blacklisted(void)
1891{
1892	device_t dev;
1893
1894	if (!pci_honor_msi_blacklist)
1895		return (0);
1896
1897	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1898	if (!(pcie_chipset || pcix_chipset)) {
1899		if (vm_guest != VM_GUEST_NO) {
1900			dev = pci_find_bsf(0, 0, 0);
1901			if (dev != NULL)
1902				return (pci_msi_vm_chipset(dev) == 0);
1903		}
1904		return (1);
1905	}
1906
1907	dev = pci_find_bsf(0, 0, 0);
1908	if (dev != NULL)
1909		return (pci_msi_device_blacklisted(dev));
1910	return (0);
1911}
1912
1913/*
1914 * Attempt to allocate *count MSI messages.  The actual number allocated is
1915 * returned in *count.  After this function returns, each message will be
1916 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1917 */
1918int
1919pci_alloc_msi_method(device_t dev, device_t child, int *count)
1920{
1921	struct pci_devinfo *dinfo = device_get_ivars(child);
1922	pcicfgregs *cfg = &dinfo->cfg;
1923	struct resource_list_entry *rle;
1924	int actual, error, i, irqs[32];
1925	uint16_t ctrl;
1926
1927	/* Don't let count == 0 get us into trouble. */
1928	if (*count == 0)
1929		return (EINVAL);
1930
1931	/* If rid 0 is allocated, then fail. */
1932	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1933	if (rle != NULL && rle->res != NULL)
1934		return (ENXIO);
1935
1936	/* Already have allocated messages? */
1937	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1938		return (ENXIO);
1939
1940	/* If MSI is blacklisted for this system, fail. */
1941	if (pci_msi_blacklisted())
1942		return (ENXIO);
1943
1944	/* MSI capability present? */
1945	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1946		return (ENODEV);
1947
1948	if (bootverbose)
1949		device_printf(child,
1950		    "attempting to allocate %d MSI vectors (%d supported)\n",
1951		    *count, cfg->msi.msi_msgnum);
1952
1953	/* Don't ask for more than the device supports. */
1954	actual = min(*count, cfg->msi.msi_msgnum);
1955
1956	/* Don't ask for more than 32 messages. */
1957	actual = min(actual, 32);
1958
1959	/* MSI requires power of 2 number of messages. */
1960	if (!powerof2(actual))
1961		return (EINVAL);
1962
1963	for (;;) {
1964		/* Try to allocate N messages. */
1965		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1966		    cfg->msi.msi_msgnum, irqs);
1967		if (error == 0)
1968			break;
1969		if (actual == 1)
1970			return (error);
1971
1972		/* Try N / 2. */
1973		actual >>= 1;
1974	}
1975
1976	/*
1977	 * We now have N actual messages mapped onto SYS_RES_IRQ
1978	 * resources in the irqs[] array, so add new resources
1979	 * starting at rid 1.
1980	 */
1981	for (i = 0; i < actual; i++)
1982		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1983		    irqs[i], irqs[i], 1);
1984
1985	if (bootverbose) {
1986		if (actual == 1)
1987			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1988		else {
1989			int run;
1990
1991			/*
1992			 * Be fancy and try to print contiguous runs
1993			 * of IRQ values as ranges.  'run' is true if
1994			 * we are in a range.
1995			 */
1996			device_printf(child, "using IRQs %d", irqs[0]);
1997			run = 0;
1998			for (i = 1; i < actual; i++) {
1999
2000				/* Still in a run? */
2001				if (irqs[i] == irqs[i - 1] + 1) {
2002					run = 1;
2003					continue;
2004				}
2005
2006				/* Finish previous range. */
2007				if (run) {
2008					printf("-%d", irqs[i - 1]);
2009					run = 0;
2010				}
2011
2012				/* Start new range. */
2013				printf(",%d", irqs[i]);
2014			}
2015
2016			/* Unfinished range? */
2017			if (run)
2018				printf("-%d", irqs[actual - 1]);
2019			printf(" for MSI\n");
2020		}
2021	}
2022
2023	/* Update control register with actual count. */
2024	ctrl = cfg->msi.msi_ctrl;
2025	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2026	ctrl |= (ffs(actual) - 1) << 4;
2027	cfg->msi.msi_ctrl = ctrl;
2028	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2029
2030	/* Update counts of alloc'd messages. */
2031	cfg->msi.msi_alloc = actual;
2032	cfg->msi.msi_handlers = 0;
2033	*count = actual;
2034	return (0);
2035}
2036
2037/* Release the MSI messages associated with this device. */
2038int
2039pci_release_msi_method(device_t dev, device_t child)
2040{
2041	struct pci_devinfo *dinfo = device_get_ivars(child);
2042	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2043	struct resource_list_entry *rle;
2044	int error, i, irqs[32];
2045
2046	/* Try MSI-X first. */
2047	error = pci_release_msix(dev, child);
2048	if (error != ENODEV)
2049		return (error);
2050
2051	/* Do we have any messages to release? */
2052	if (msi->msi_alloc == 0)
2053		return (ENODEV);
2054	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2055
2056	/* Make sure none of the resources are allocated. */
2057	if (msi->msi_handlers > 0)
2058		return (EBUSY);
2059	for (i = 0; i < msi->msi_alloc; i++) {
2060		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2061		KASSERT(rle != NULL, ("missing MSI resource"));
2062		if (rle->res != NULL)
2063			return (EBUSY);
2064		irqs[i] = rle->start;
2065	}
2066
2067	/* Update control register with 0 count. */
2068	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2069	    ("%s: MSI still enabled", __func__));
2070	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2071	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2072	    msi->msi_ctrl, 2);
2073
2074	/* Release the messages. */
2075	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2076	for (i = 0; i < msi->msi_alloc; i++)
2077		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2078
2079	/* Update alloc count. */
2080	msi->msi_alloc = 0;
2081	msi->msi_addr = 0;
2082	msi->msi_data = 0;
2083	return (0);
2084}
2085
2086/*
2087 * Return the max supported MSI messages this device supports.
2088 * Basically, assuming the MD code can alloc messages, this function
2089 * should return the maximum value that pci_alloc_msi() can return.
2090 * Thus, it is subject to the tunables, etc.
2091 */
2092int
2093pci_msi_count_method(device_t dev, device_t child)
2094{
2095	struct pci_devinfo *dinfo = device_get_ivars(child);
2096	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2097
2098	if (pci_do_msi && msi->msi_location != 0)
2099		return (msi->msi_msgnum);
2100	return (0);
2101}
2102
2103/* free pcicfgregs structure and all depending data structures */
2104
2105int
2106pci_freecfg(struct pci_devinfo *dinfo)
2107{
2108	struct devlist *devlist_head;
2109	int i;
2110
2111	devlist_head = &pci_devq;
2112
2113	if (dinfo->cfg.vpd.vpd_reg) {
2114		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2115		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2116			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2117		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2118		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2119			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2120		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2121	}
2122	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2123	free(dinfo, M_DEVBUF);
2124
2125	/* increment the generation count */
2126	pci_generation++;
2127
2128	/* we're losing one device */
2129	pci_numdevs--;
2130	return (0);
2131}
2132
2133/*
2134 * PCI power manangement
2135 */
2136int
2137pci_set_powerstate_method(device_t dev, device_t child, int state)
2138{
2139	struct pci_devinfo *dinfo = device_get_ivars(child);
2140	pcicfgregs *cfg = &dinfo->cfg;
2141	uint16_t status;
2142	int result, oldstate, highest, delay;
2143
2144	if (cfg->pp.pp_cap == 0)
2145		return (EOPNOTSUPP);
2146
2147	/*
2148	 * Optimize a no state change request away.  While it would be OK to
2149	 * write to the hardware in theory, some devices have shown odd
2150	 * behavior when going from D3 -> D3.
2151	 */
2152	oldstate = pci_get_powerstate(child);
2153	if (oldstate == state)
2154		return (0);
2155
2156	/*
2157	 * The PCI power management specification states that after a state
2158	 * transition between PCI power states, system software must
2159	 * guarantee a minimal delay before the function accesses the device.
2160	 * Compute the worst case delay that we need to guarantee before we
2161	 * access the device.  Many devices will be responsive much more
2162	 * quickly than this delay, but there are some that don't respond
2163	 * instantly to state changes.  Transitions to/from D3 state require
2164	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2165	 * is done below with DELAY rather than a sleeper function because
2166	 * this function can be called from contexts where we cannot sleep.
2167	 */
2168	highest = (oldstate > state) ? oldstate : state;
2169	if (highest == PCI_POWERSTATE_D3)
2170	    delay = 10000;
2171	else if (highest == PCI_POWERSTATE_D2)
2172	    delay = 200;
2173	else
2174	    delay = 0;
2175	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2176	    & ~PCIM_PSTAT_DMASK;
2177	result = 0;
2178	switch (state) {
2179	case PCI_POWERSTATE_D0:
2180		status |= PCIM_PSTAT_D0;
2181		break;
2182	case PCI_POWERSTATE_D1:
2183		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2184			return (EOPNOTSUPP);
2185		status |= PCIM_PSTAT_D1;
2186		break;
2187	case PCI_POWERSTATE_D2:
2188		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2189			return (EOPNOTSUPP);
2190		status |= PCIM_PSTAT_D2;
2191		break;
2192	case PCI_POWERSTATE_D3:
2193		status |= PCIM_PSTAT_D3;
2194		break;
2195	default:
2196		return (EINVAL);
2197	}
2198
2199	if (bootverbose)
2200		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2201		    state);
2202
2203	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2204	if (delay)
2205		DELAY(delay);
2206	return (0);
2207}
2208
2209int
2210pci_get_powerstate_method(device_t dev, device_t child)
2211{
2212	struct pci_devinfo *dinfo = device_get_ivars(child);
2213	pcicfgregs *cfg = &dinfo->cfg;
2214	uint16_t status;
2215	int result;
2216
2217	if (cfg->pp.pp_cap != 0) {
2218		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2219		switch (status & PCIM_PSTAT_DMASK) {
2220		case PCIM_PSTAT_D0:
2221			result = PCI_POWERSTATE_D0;
2222			break;
2223		case PCIM_PSTAT_D1:
2224			result = PCI_POWERSTATE_D1;
2225			break;
2226		case PCIM_PSTAT_D2:
2227			result = PCI_POWERSTATE_D2;
2228			break;
2229		case PCIM_PSTAT_D3:
2230			result = PCI_POWERSTATE_D3;
2231			break;
2232		default:
2233			result = PCI_POWERSTATE_UNKNOWN;
2234			break;
2235		}
2236	} else {
2237		/* No support, device is always at D0 */
2238		result = PCI_POWERSTATE_D0;
2239	}
2240	return (result);
2241}
2242
2243/*
2244 * Some convenience functions for PCI device drivers.
2245 */
2246
2247static __inline void
2248pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2249{
2250	uint16_t	command;
2251
2252	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2253	command |= bit;
2254	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2255}
2256
2257static __inline void
2258pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2259{
2260	uint16_t	command;
2261
2262	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2263	command &= ~bit;
2264	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2265}
2266
2267int
2268pci_enable_busmaster_method(device_t dev, device_t child)
2269{
2270	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2271	return (0);
2272}
2273
2274int
2275pci_disable_busmaster_method(device_t dev, device_t child)
2276{
2277	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2278	return (0);
2279}
2280
2281int
2282pci_enable_io_method(device_t dev, device_t child, int space)
2283{
2284	uint16_t bit;
2285
2286	switch(space) {
2287	case SYS_RES_IOPORT:
2288		bit = PCIM_CMD_PORTEN;
2289		break;
2290	case SYS_RES_MEMORY:
2291		bit = PCIM_CMD_MEMEN;
2292		break;
2293	default:
2294		return (EINVAL);
2295	}
2296	pci_set_command_bit(dev, child, bit);
2297	return (0);
2298}
2299
2300int
2301pci_disable_io_method(device_t dev, device_t child, int space)
2302{
2303	uint16_t bit;
2304
2305	switch(space) {
2306	case SYS_RES_IOPORT:
2307		bit = PCIM_CMD_PORTEN;
2308		break;
2309	case SYS_RES_MEMORY:
2310		bit = PCIM_CMD_MEMEN;
2311		break;
2312	default:
2313		return (EINVAL);
2314	}
2315	pci_clear_command_bit(dev, child, bit);
2316	return (0);
2317}
2318
2319/*
2320 * New style pci driver.  Parent device is either a pci-host-bridge or a
2321 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2322 */
2323
2324void
2325pci_print_verbose(struct pci_devinfo *dinfo)
2326{
2327
2328	if (bootverbose) {
2329		pcicfgregs *cfg = &dinfo->cfg;
2330
2331		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2332		    cfg->vendor, cfg->device, cfg->revid);
2333		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2334		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2335		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2336		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2337		    cfg->mfdev);
2338		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2339		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2340		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2341		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2342		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2343		if (cfg->intpin > 0)
2344			printf("\tintpin=%c, irq=%d\n",
2345			    cfg->intpin +'a' -1, cfg->intline);
2346		if (cfg->pp.pp_cap) {
2347			uint16_t status;
2348
2349			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2350			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2351			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2352			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2353			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2354			    status & PCIM_PSTAT_DMASK);
2355		}
2356		if (cfg->msi.msi_location) {
2357			int ctrl;
2358
2359			ctrl = cfg->msi.msi_ctrl;
2360			printf("\tMSI supports %d message%s%s%s\n",
2361			    cfg->msi.msi_msgnum,
2362			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2363			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2364			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2365		}
2366		if (cfg->msix.msix_location) {
2367			printf("\tMSI-X supports %d message%s ",
2368			    cfg->msix.msix_msgnum,
2369			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2370			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2371				printf("in map 0x%x\n",
2372				    cfg->msix.msix_table_bar);
2373			else
2374				printf("in maps 0x%x and 0x%x\n",
2375				    cfg->msix.msix_table_bar,
2376				    cfg->msix.msix_pba_bar);
2377		}
2378	}
2379}
2380
2381static int
2382pci_porten(device_t dev)
2383{
2384	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2385}
2386
2387static int
2388pci_memen(device_t dev)
2389{
2390	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2391}
2392
2393static void
2394pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2395{
2396	pci_addr_t map, testval;
2397	int ln2range;
2398	uint16_t cmd;
2399
2400	/*
2401	 * The device ROM BAR is special.  It is always a 32-bit
2402	 * memory BAR.  Bit 0 is special and should not be set when
2403	 * sizing the BAR.
2404	 */
2405	if (reg == PCIR_BIOS) {
2406		map = pci_read_config(dev, reg, 4);
2407		pci_write_config(dev, reg, 0xfffffffe, 4);
2408		testval = pci_read_config(dev, reg, 4);
2409		pci_write_config(dev, reg, map, 4);
2410		*mapp = map;
2411		*testvalp = testval;
2412		return;
2413	}
2414
2415	map = pci_read_config(dev, reg, 4);
2416	ln2range = pci_maprange(map);
2417	if (ln2range == 64)
2418		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2419
2420	/*
2421	 * Disable decoding via the command register before
2422	 * determining the BAR's length since we will be placing it in
2423	 * a weird state.
2424	 */
2425	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2426	pci_write_config(dev, PCIR_COMMAND,
2427	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2428
2429	/*
2430	 * Determine the BAR's length by writing all 1's.  The bottom
2431	 * log_2(size) bits of the BAR will stick as 0 when we read
2432	 * the value back.
2433	 */
2434	pci_write_config(dev, reg, 0xffffffff, 4);
2435	testval = pci_read_config(dev, reg, 4);
2436	if (ln2range == 64) {
2437		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2438		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2439	}
2440
2441	/*
2442	 * Restore the original value of the BAR.  We may have reprogrammed
2443	 * the BAR of the low-level console device and when booting verbose,
2444	 * we need the console device addressable.
2445	 */
2446	pci_write_config(dev, reg, map, 4);
2447	if (ln2range == 64)
2448		pci_write_config(dev, reg + 4, map >> 32, 4);
2449	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2450
2451	*mapp = map;
2452	*testvalp = testval;
2453}
2454
2455static void
2456pci_write_bar(device_t dev, int reg, pci_addr_t base)
2457{
2458	pci_addr_t map;
2459	int ln2range;
2460
2461	map = pci_read_config(dev, reg, 4);
2462
2463	/* The device ROM BAR is always 32-bits. */
2464	if (reg == PCIR_BIOS)
2465		return;
2466	ln2range = pci_maprange(map);
2467	pci_write_config(dev, reg, base, 4);
2468	if (ln2range == 64)
2469		pci_write_config(dev, reg + 4, base >> 32, 4);
2470}
2471
2472/*
2473 * Add a resource based on a pci map register. Return 1 if the map
2474 * register is a 32bit map register or 2 if it is a 64bit register.
2475 */
2476static int
2477pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2478    int force, int prefetch)
2479{
2480	pci_addr_t base, map, testval;
2481	pci_addr_t start, end, count;
2482	int barlen, basezero, maprange, mapsize, type;
2483	uint16_t cmd;
2484	struct resource *res;
2485
2486	pci_read_bar(dev, reg, &map, &testval);
2487	if (PCI_BAR_MEM(map)) {
2488		type = SYS_RES_MEMORY;
2489		if (map & PCIM_BAR_MEM_PREFETCH)
2490			prefetch = 1;
2491	} else
2492		type = SYS_RES_IOPORT;
2493	mapsize = pci_mapsize(testval);
2494	base = pci_mapbase(map);
2495#ifdef __PCI_BAR_ZERO_VALID
2496	basezero = 0;
2497#else
2498	basezero = base == 0;
2499#endif
2500	maprange = pci_maprange(map);
2501	barlen = maprange == 64 ? 2 : 1;
2502
2503	/*
2504	 * For I/O registers, if bottom bit is set, and the next bit up
2505	 * isn't clear, we know we have a BAR that doesn't conform to the
2506	 * spec, so ignore it.  Also, sanity check the size of the data
2507	 * areas to the type of memory involved.  Memory must be at least
2508	 * 16 bytes in size, while I/O ranges must be at least 4.
2509	 */
2510	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2511		return (barlen);
2512	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2513	    (type == SYS_RES_IOPORT && mapsize < 2))
2514		return (barlen);
2515
2516	if (bootverbose) {
2517		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2518		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2519		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2520			printf(", port disabled\n");
2521		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2522			printf(", memory disabled\n");
2523		else
2524			printf(", enabled\n");
2525	}
2526
2527	/*
2528	 * If base is 0, then we have problems if this architecture does
2529	 * not allow that.  It is best to ignore such entries for the
2530	 * moment.  These will be allocated later if the driver specifically
2531	 * requests them.  However, some removable busses look better when
2532	 * all resources are allocated, so allow '0' to be overriden.
2533	 *
2534	 * Similarly treat maps whose values is the same as the test value
2535	 * read back.  These maps have had all f's written to them by the
2536	 * BIOS in an attempt to disable the resources.
2537	 */
2538	if (!force && (basezero || map == testval))
2539		return (barlen);
2540	if ((u_long)base != base) {
2541		device_printf(bus,
2542		    "pci%d:%d:%d:%d bar %#x too many address bits",
2543		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2544		    pci_get_function(dev), reg);
2545		return (barlen);
2546	}
2547
2548	/*
2549	 * This code theoretically does the right thing, but has
2550	 * undesirable side effects in some cases where peripherals
2551	 * respond oddly to having these bits enabled.  Let the user
2552	 * be able to turn them off (since pci_enable_io_modes is 1 by
2553	 * default).
2554	 */
2555	if (pci_enable_io_modes) {
2556		/* Turn on resources that have been left off by a lazy BIOS */
2557		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2558			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2559			cmd |= PCIM_CMD_PORTEN;
2560			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2561		}
2562		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2563			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2564			cmd |= PCIM_CMD_MEMEN;
2565			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2566		}
2567	} else {
2568		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2569			return (barlen);
2570		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2571			return (barlen);
2572	}
2573
2574	count = (pci_addr_t)1 << mapsize;
2575	if (basezero || base == pci_mapbase(testval)) {
2576		start = 0;	/* Let the parent decide. */
2577		end = ~0ULL;
2578	} else {
2579		start = base;
2580		end = base + count - 1;
2581	}
2582	resource_list_add(rl, type, reg, start, end, count);
2583
2584	/*
2585	 * Try to allocate the resource for this BAR from our parent
2586	 * so that this resource range is already reserved.  The
2587	 * driver for this device will later inherit this resource in
2588	 * pci_alloc_resource().
2589	 */
2590	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2591	    prefetch ? RF_PREFETCHABLE : 0);
2592	if (res == NULL) {
2593		/*
2594		 * If the allocation fails, clear the BAR and delete
2595		 * the resource list entry to force
2596		 * pci_alloc_resource() to allocate resources from the
2597		 * parent.
2598		 */
2599		resource_list_delete(rl, type, reg);
2600		start = 0;
2601	} else
2602		start = rman_get_start(res);
2603	pci_write_bar(dev, reg, start);
2604	return (barlen);
2605}
2606
2607/*
2608 * For ATA devices we need to decide early what addressing mode to use.
2609 * Legacy demands that the primary and secondary ATA ports sits on the
2610 * same addresses that old ISA hardware did. This dictates that we use
2611 * those addresses and ignore the BAR's if we cannot set PCI native
2612 * addressing mode.
2613 */
2614static void
2615pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2616    uint32_t prefetchmask)
2617{
2618	struct resource *r;
2619	int rid, type, progif;
2620#if 0
2621	/* if this device supports PCI native addressing use it */
2622	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2623	if ((progif & 0x8a) == 0x8a) {
2624		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2625		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2626			printf("Trying ATA native PCI addressing mode\n");
2627			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2628		}
2629	}
2630#endif
2631	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2632	type = SYS_RES_IOPORT;
2633	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2634		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2635		    prefetchmask & (1 << 0));
2636		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2637		    prefetchmask & (1 << 1));
2638	} else {
2639		rid = PCIR_BAR(0);
2640		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2641		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2642		    0x1f7, 8, 0);
2643		rid = PCIR_BAR(1);
2644		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2645		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2646		    0x3f6, 1, 0);
2647	}
2648	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2649		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2650		    prefetchmask & (1 << 2));
2651		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2652		    prefetchmask & (1 << 3));
2653	} else {
2654		rid = PCIR_BAR(2);
2655		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2656		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2657		    0x177, 8, 0);
2658		rid = PCIR_BAR(3);
2659		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2660		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2661		    0x376, 1, 0);
2662	}
2663	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2664	    prefetchmask & (1 << 4));
2665	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2666	    prefetchmask & (1 << 5));
2667}
2668
2669static void
2670pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2671{
2672	struct pci_devinfo *dinfo = device_get_ivars(dev);
2673	pcicfgregs *cfg = &dinfo->cfg;
2674	char tunable_name[64];
2675	int irq;
2676
2677	/* Has to have an intpin to have an interrupt. */
2678	if (cfg->intpin == 0)
2679		return;
2680
2681	/* Let the user override the IRQ with a tunable. */
2682	irq = PCI_INVALID_IRQ;
2683	snprintf(tunable_name, sizeof(tunable_name),
2684	    "hw.pci%d.%d.%d.INT%c.irq",
2685	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2686	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2687		irq = PCI_INVALID_IRQ;
2688
2689	/*
2690	 * If we didn't get an IRQ via the tunable, then we either use the
2691	 * IRQ value in the intline register or we ask the bus to route an
2692	 * interrupt for us.  If force_route is true, then we only use the
2693	 * value in the intline register if the bus was unable to assign an
2694	 * IRQ.
2695	 */
2696	if (!PCI_INTERRUPT_VALID(irq)) {
2697		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2698			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2699		if (!PCI_INTERRUPT_VALID(irq))
2700			irq = cfg->intline;
2701	}
2702
2703	/* If after all that we don't have an IRQ, just bail. */
2704	if (!PCI_INTERRUPT_VALID(irq))
2705		return;
2706
2707	/* Update the config register if it changed. */
2708	if (irq != cfg->intline) {
2709		cfg->intline = irq;
2710		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2711	}
2712
2713	/* Add this IRQ as rid 0 interrupt resource. */
2714	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2715}
2716
2717/* Perform early OHCI takeover from SMM. */
2718static void
2719ohci_early_takeover(device_t self)
2720{
2721	struct resource *res;
2722	uint32_t ctl;
2723	int rid;
2724	int i;
2725
2726	rid = PCIR_BAR(0);
2727	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2728	if (res == NULL)
2729		return;
2730
2731	ctl = bus_read_4(res, OHCI_CONTROL);
2732	if (ctl & OHCI_IR) {
2733		if (bootverbose)
2734			printf("ohci early: "
2735			    "SMM active, request owner change\n");
2736		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2737		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2738			DELAY(1000);
2739			ctl = bus_read_4(res, OHCI_CONTROL);
2740		}
2741		if (ctl & OHCI_IR) {
2742			if (bootverbose)
2743				printf("ohci early: "
2744				    "SMM does not respond, resetting\n");
2745			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2746		}
2747		/* Disable interrupts */
2748		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2749	}
2750
2751	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2752}
2753
2754/* Perform early UHCI takeover from SMM. */
2755static void
2756uhci_early_takeover(device_t self)
2757{
2758	struct resource *res;
2759	int rid;
2760
2761	/*
2762	 * Set the PIRQD enable bit and switch off all the others. We don't
2763	 * want legacy support to interfere with us XXX Does this also mean
2764	 * that the BIOS won't touch the keyboard anymore if it is connected
2765	 * to the ports of the root hub?
2766	 */
2767	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2768
2769	/* Disable interrupts */
2770	rid = PCI_UHCI_BASE_REG;
2771	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2772	if (res != NULL) {
2773		bus_write_2(res, UHCI_INTR, 0);
2774		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2775	}
2776}
2777
2778/* Perform early EHCI takeover from SMM. */
2779static void
2780ehci_early_takeover(device_t self)
2781{
2782	struct resource *res;
2783	uint32_t cparams;
2784	uint32_t eec;
2785	uint8_t eecp;
2786	uint8_t bios_sem;
2787	uint8_t offs;
2788	int rid;
2789	int i;
2790
2791	rid = PCIR_BAR(0);
2792	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2793	if (res == NULL)
2794		return;
2795
2796	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2797
2798	/* Synchronise with the BIOS if it owns the controller. */
2799	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2800	    eecp = EHCI_EECP_NEXT(eec)) {
2801		eec = pci_read_config(self, eecp, 4);
2802		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2803			continue;
2804		}
2805		bios_sem = pci_read_config(self, eecp +
2806		    EHCI_LEGSUP_BIOS_SEM, 1);
2807		if (bios_sem == 0) {
2808			continue;
2809		}
2810		if (bootverbose)
2811			printf("ehci early: "
2812			    "SMM active, request owner change\n");
2813
2814		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2815
2816		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2817			DELAY(1000);
2818			bios_sem = pci_read_config(self, eecp +
2819			    EHCI_LEGSUP_BIOS_SEM, 1);
2820		}
2821
2822		if (bios_sem != 0) {
2823			if (bootverbose)
2824				printf("ehci early: "
2825				    "SMM does not respond\n");
2826		}
2827		/* Disable interrupts */
2828		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2829		bus_write_4(res, offs + EHCI_USBINTR, 0);
2830	}
2831	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2832}
2833
2834void
2835pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2836{
2837	struct pci_devinfo *dinfo = device_get_ivars(dev);
2838	pcicfgregs *cfg = &dinfo->cfg;
2839	struct resource_list *rl = &dinfo->resources;
2840	struct pci_quirk *q;
2841	int i;
2842
2843	/* ATA devices needs special map treatment */
2844	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2845	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2846	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2847	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2848	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2849		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2850	else
2851		for (i = 0; i < cfg->nummaps;)
2852			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2853			    prefetchmask & (1 << i));
2854
2855	/*
2856	 * Add additional, quirked resources.
2857	 */
2858	for (q = &pci_quirks[0]; q->devid; q++) {
2859		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2860		    && q->type == PCI_QUIRK_MAP_REG)
2861			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2862	}
2863
2864	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2865#ifdef __PCI_REROUTE_INTERRUPT
2866		/*
2867		 * Try to re-route interrupts. Sometimes the BIOS or
2868		 * firmware may leave bogus values in these registers.
2869		 * If the re-route fails, then just stick with what we
2870		 * have.
2871		 */
2872		pci_assign_interrupt(bus, dev, 1);
2873#else
2874		pci_assign_interrupt(bus, dev, 0);
2875#endif
2876	}
2877
2878	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2879	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2880		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2881			ehci_early_takeover(dev);
2882		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2883			ohci_early_takeover(dev);
2884		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2885			uhci_early_takeover(dev);
2886	}
2887}
2888
2889void
2890pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2891{
2892#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2893	device_t pcib = device_get_parent(dev);
2894	struct pci_devinfo *dinfo;
2895	int maxslots;
2896	int s, f, pcifunchigh;
2897	uint8_t hdrtype;
2898
2899	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2900	    ("dinfo_size too small"));
2901	maxslots = PCIB_MAXSLOTS(pcib);
2902	for (s = 0; s <= maxslots; s++) {
2903		pcifunchigh = 0;
2904		f = 0;
2905		DELAY(1);
2906		hdrtype = REG(PCIR_HDRTYPE, 1);
2907		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2908			continue;
2909		if (hdrtype & PCIM_MFDEV)
2910			pcifunchigh = PCI_FUNCMAX;
2911		for (f = 0; f <= pcifunchigh; f++) {
2912			dinfo = pci_read_device(pcib, domain, busno, s, f,
2913			    dinfo_size);
2914			if (dinfo != NULL) {
2915				pci_add_child(dev, dinfo);
2916			}
2917		}
2918	}
2919#undef REG
2920}
2921
2922void
2923pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2924{
2925	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2926	device_set_ivars(dinfo->cfg.dev, dinfo);
2927	resource_list_init(&dinfo->resources);
2928	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2929	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2930	pci_print_verbose(dinfo);
2931	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2932}
2933
2934static int
2935pci_probe(device_t dev)
2936{
2937
2938	device_set_desc(dev, "PCI bus");
2939
2940	/* Allow other subclasses to override this driver. */
2941	return (BUS_PROBE_GENERIC);
2942}
2943
2944static int
2945pci_attach(device_t dev)
2946{
2947	int busno, domain;
2948
2949	/*
2950	 * Since there can be multiple independantly numbered PCI
2951	 * busses on systems with multiple PCI domains, we can't use
2952	 * the unit number to decide which bus we are probing. We ask
2953	 * the parent pcib what our domain and bus numbers are.
2954	 */
2955	domain = pcib_get_domain(dev);
2956	busno = pcib_get_bus(dev);
2957	if (bootverbose)
2958		device_printf(dev, "domain=%d, physical bus=%d\n",
2959		    domain, busno);
2960	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2961	return (bus_generic_attach(dev));
2962}
2963
2964static void
2965pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
2966    int state)
2967{
2968	device_t child, pcib;
2969	struct pci_devinfo *dinfo;
2970	int dstate, i;
2971
2972	/*
2973	 * Set the device to the given state.  If the firmware suggests
2974	 * a different power state, use it instead.  If power management
2975	 * is not present, the firmware is responsible for managing
2976	 * device power.  Skip children who aren't attached since they
2977	 * are handled separately.
2978	 */
2979	pcib = device_get_parent(dev);
2980	for (i = 0; i < numdevs; i++) {
2981		child = devlist[i];
2982		dinfo = device_get_ivars(child);
2983		dstate = state;
2984		if (device_is_attached(child) &&
2985		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
2986			pci_set_powerstate(child, dstate);
2987	}
2988}
2989
2990int
2991pci_suspend(device_t dev)
2992{
2993	device_t child, *devlist;
2994	struct pci_devinfo *dinfo;
2995	int error, i, numdevs;
2996
2997	/*
2998	 * Save the PCI configuration space for each child and set the
2999	 * device in the appropriate power state for this sleep state.
3000	 */
3001	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3002		return (error);
3003	for (i = 0; i < numdevs; i++) {
3004		child = devlist[i];
3005		dinfo = device_get_ivars(child);
3006		pci_cfg_save(child, dinfo, 0);
3007	}
3008
3009	/* Suspend devices before potentially powering them down. */
3010	error = bus_generic_suspend(dev);
3011	if (error) {
3012		free(devlist, M_TEMP);
3013		return (error);
3014	}
3015	if (pci_do_power_suspend)
3016		pci_set_power_children(dev, devlist, numdevs,
3017		    PCI_POWERSTATE_D3);
3018	free(devlist, M_TEMP);
3019	return (0);
3020}
3021
3022int
3023pci_resume(device_t dev)
3024{
3025	device_t child, *devlist;
3026	struct pci_devinfo *dinfo;
3027	int error, i, numdevs;
3028
3029	/*
3030	 * Set each child to D0 and restore its PCI configuration space.
3031	 */
3032	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3033		return (error);
3034	if (pci_do_power_resume)
3035		pci_set_power_children(dev, devlist, numdevs,
3036		    PCI_POWERSTATE_D0);
3037
3038	/* Now the device is powered up, restore its config space. */
3039	for (i = 0; i < numdevs; i++) {
3040		child = devlist[i];
3041		dinfo = device_get_ivars(child);
3042
3043		pci_cfg_restore(child, dinfo);
3044		if (!device_is_attached(child))
3045			pci_cfg_save(child, dinfo, 1);
3046	}
3047
3048	/*
3049	 * Resume critical devices first, then everything else later.
3050	 */
3051	for (i = 0; i < numdevs; i++) {
3052		child = devlist[i];
3053		switch (pci_get_class(child)) {
3054		case PCIC_DISPLAY:
3055		case PCIC_MEMORY:
3056		case PCIC_BRIDGE:
3057		case PCIC_BASEPERIPH:
3058			DEVICE_RESUME(child);
3059			break;
3060		}
3061	}
3062	for (i = 0; i < numdevs; i++) {
3063		child = devlist[i];
3064		switch (pci_get_class(child)) {
3065		case PCIC_DISPLAY:
3066		case PCIC_MEMORY:
3067		case PCIC_BRIDGE:
3068		case PCIC_BASEPERIPH:
3069			break;
3070		default:
3071			DEVICE_RESUME(child);
3072		}
3073	}
3074	free(devlist, M_TEMP);
3075	return (0);
3076}
3077
3078static void
3079pci_load_vendor_data(void)
3080{
3081	caddr_t data;
3082	void *ptr;
3083	size_t sz;
3084
3085	data = preload_search_by_type("pci_vendor_data");
3086	if (data != NULL) {
3087		ptr = preload_fetch_addr(data);
3088		sz = preload_fetch_size(data);
3089		if (ptr != NULL && sz != 0) {
3090			pci_vendordata = ptr;
3091			pci_vendordata_size = sz;
3092			/* terminate the database */
3093			pci_vendordata[pci_vendordata_size] = '\n';
3094		}
3095	}
3096}
3097
3098void
3099pci_driver_added(device_t dev, driver_t *driver)
3100{
3101	int numdevs;
3102	device_t *devlist;
3103	device_t child;
3104	struct pci_devinfo *dinfo;
3105	int i;
3106
3107	if (bootverbose)
3108		device_printf(dev, "driver added\n");
3109	DEVICE_IDENTIFY(driver, dev);
3110	if (device_get_children(dev, &devlist, &numdevs) != 0)
3111		return;
3112	for (i = 0; i < numdevs; i++) {
3113		child = devlist[i];
3114		if (device_get_state(child) != DS_NOTPRESENT)
3115			continue;
3116		dinfo = device_get_ivars(child);
3117		pci_print_verbose(dinfo);
3118		if (bootverbose)
3119			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3120		pci_cfg_restore(child, dinfo);
3121		if (device_probe_and_attach(child) != 0)
3122			pci_cfg_save(child, dinfo, 1);
3123	}
3124	free(devlist, M_TEMP);
3125}
3126
3127int
3128pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3129    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3130{
3131	struct pci_devinfo *dinfo;
3132	struct msix_table_entry *mte;
3133	struct msix_vector *mv;
3134	uint64_t addr;
3135	uint32_t data;
3136	void *cookie;
3137	int error, rid;
3138
3139	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3140	    arg, &cookie);
3141	if (error)
3142		return (error);
3143
3144	/* If this is not a direct child, just bail out. */
3145	if (device_get_parent(child) != dev) {
3146		*cookiep = cookie;
3147		return(0);
3148	}
3149
3150	rid = rman_get_rid(irq);
3151	if (rid == 0) {
3152		/* Make sure that INTx is enabled */
3153		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3154	} else {
3155		/*
3156		 * Check to see if the interrupt is MSI or MSI-X.
3157		 * Ask our parent to map the MSI and give
3158		 * us the address and data register values.
3159		 * If we fail for some reason, teardown the
3160		 * interrupt handler.
3161		 */
3162		dinfo = device_get_ivars(child);
3163		if (dinfo->cfg.msi.msi_alloc > 0) {
3164			if (dinfo->cfg.msi.msi_addr == 0) {
3165				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3166			    ("MSI has handlers, but vectors not mapped"));
3167				error = PCIB_MAP_MSI(device_get_parent(dev),
3168				    child, rman_get_start(irq), &addr, &data);
3169				if (error)
3170					goto bad;
3171				dinfo->cfg.msi.msi_addr = addr;
3172				dinfo->cfg.msi.msi_data = data;
3173			}
3174			if (dinfo->cfg.msi.msi_handlers == 0)
3175				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3176				    dinfo->cfg.msi.msi_data);
3177			dinfo->cfg.msi.msi_handlers++;
3178		} else {
3179			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3180			    ("No MSI or MSI-X interrupts allocated"));
3181			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3182			    ("MSI-X index too high"));
3183			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3184			KASSERT(mte->mte_vector != 0, ("no message vector"));
3185			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3186			KASSERT(mv->mv_irq == rman_get_start(irq),
3187			    ("IRQ mismatch"));
3188			if (mv->mv_address == 0) {
3189				KASSERT(mte->mte_handlers == 0,
3190		    ("MSI-X table entry has handlers, but vector not mapped"));
3191				error = PCIB_MAP_MSI(device_get_parent(dev),
3192				    child, rman_get_start(irq), &addr, &data);
3193				if (error)
3194					goto bad;
3195				mv->mv_address = addr;
3196				mv->mv_data = data;
3197			}
3198			if (mte->mte_handlers == 0) {
3199				pci_enable_msix(child, rid - 1, mv->mv_address,
3200				    mv->mv_data);
3201				pci_unmask_msix(child, rid - 1);
3202			}
3203			mte->mte_handlers++;
3204		}
3205
3206		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3207		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3208	bad:
3209		if (error) {
3210			(void)bus_generic_teardown_intr(dev, child, irq,
3211			    cookie);
3212			return (error);
3213		}
3214	}
3215	*cookiep = cookie;
3216	return (0);
3217}
3218
3219int
3220pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3221    void *cookie)
3222{
3223	struct msix_table_entry *mte;
3224	struct resource_list_entry *rle;
3225	struct pci_devinfo *dinfo;
3226	int error, rid;
3227
3228	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3229		return (EINVAL);
3230
3231	/* If this isn't a direct child, just bail out */
3232	if (device_get_parent(child) != dev)
3233		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3234
3235	rid = rman_get_rid(irq);
3236	if (rid == 0) {
3237		/* Mask INTx */
3238		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3239	} else {
3240		/*
3241		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3242		 * decrement the appropriate handlers count and mask the
3243		 * MSI-X message, or disable MSI messages if the count
3244		 * drops to 0.
3245		 */
3246		dinfo = device_get_ivars(child);
3247		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3248		if (rle->res != irq)
3249			return (EINVAL);
3250		if (dinfo->cfg.msi.msi_alloc > 0) {
3251			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3252			    ("MSI-X index too high"));
3253			if (dinfo->cfg.msi.msi_handlers == 0)
3254				return (EINVAL);
3255			dinfo->cfg.msi.msi_handlers--;
3256			if (dinfo->cfg.msi.msi_handlers == 0)
3257				pci_disable_msi(child);
3258		} else {
3259			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3260			    ("No MSI or MSI-X interrupts allocated"));
3261			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3262			    ("MSI-X index too high"));
3263			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3264			if (mte->mte_handlers == 0)
3265				return (EINVAL);
3266			mte->mte_handlers--;
3267			if (mte->mte_handlers == 0)
3268				pci_mask_msix(child, rid - 1);
3269		}
3270	}
3271	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3272	if (rid > 0)
3273		KASSERT(error == 0,
3274		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3275	return (error);
3276}
3277
3278int
3279pci_print_child(device_t dev, device_t child)
3280{
3281	struct pci_devinfo *dinfo;
3282	struct resource_list *rl;
3283	int retval = 0;
3284
3285	dinfo = device_get_ivars(child);
3286	rl = &dinfo->resources;
3287
3288	retval += bus_print_child_header(dev, child);
3289
3290	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3291	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3292	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3293	if (device_get_flags(dev))
3294		retval += printf(" flags %#x", device_get_flags(dev));
3295
3296	retval += printf(" at device %d.%d", pci_get_slot(child),
3297	    pci_get_function(child));
3298
3299	retval += bus_print_child_footer(dev, child);
3300
3301	return (retval);
3302}
3303
3304static struct
3305{
3306	int	class;
3307	int	subclass;
3308	char	*desc;
3309} pci_nomatch_tab[] = {
3310	{PCIC_OLD,		-1,			"old"},
3311	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3312	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3313	{PCIC_STORAGE,		-1,			"mass storage"},
3314	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3315	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3316	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3317	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3318	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3319	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3320	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3321	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3322	{PCIC_NETWORK,		-1,			"network"},
3323	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3324	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3325	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3326	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3327	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3328	{PCIC_DISPLAY,		-1,			"display"},
3329	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3330	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3331	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3332	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3333	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3334	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3335	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3336	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3337	{PCIC_MEMORY,		-1,			"memory"},
3338	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3339	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3340	{PCIC_BRIDGE,		-1,			"bridge"},
3341	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3342	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3343	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3344	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3345	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3346	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3347	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3348	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3349	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3350	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3351	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3352	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3353	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3354	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3355	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3356	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3357	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3358	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3359	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3360	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3361	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3362	{PCIC_INPUTDEV,		-1,			"input device"},
3363	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3364	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3365	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3366	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3367	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3368	{PCIC_DOCKING,		-1,			"docking station"},
3369	{PCIC_PROCESSOR,	-1,			"processor"},
3370	{PCIC_SERIALBUS,	-1,			"serial bus"},
3371	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3372	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3373	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3374	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3375	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3376	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3377	{PCIC_WIRELESS,		-1,			"wireless controller"},
3378	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3379	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3380	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3381	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3382	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3383	{PCIC_SATCOM,		-1,			"satellite communication"},
3384	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3385	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3386	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3387	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3388	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3389	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3390	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3391	{PCIC_DASP,		-1,			"dasp"},
3392	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3393	{0, 0,		NULL}
3394};
3395
3396void
3397pci_probe_nomatch(device_t dev, device_t child)
3398{
3399	int	i;
3400	char	*cp, *scp, *device;
3401
3402	/*
3403	 * Look for a listing for this device in a loaded device database.
3404	 */
3405	if ((device = pci_describe_device(child)) != NULL) {
3406		device_printf(dev, "<%s>", device);
3407		free(device, M_DEVBUF);
3408	} else {
3409		/*
3410		 * Scan the class/subclass descriptions for a general
3411		 * description.
3412		 */
3413		cp = "unknown";
3414		scp = NULL;
3415		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3416			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3417				if (pci_nomatch_tab[i].subclass == -1) {
3418					cp = pci_nomatch_tab[i].desc;
3419				} else if (pci_nomatch_tab[i].subclass ==
3420				    pci_get_subclass(child)) {
3421					scp = pci_nomatch_tab[i].desc;
3422				}
3423			}
3424		}
3425		device_printf(dev, "<%s%s%s>",
3426		    cp ? cp : "",
3427		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3428		    scp ? scp : "");
3429	}
3430	printf(" at device %d.%d (no driver attached)\n",
3431	    pci_get_slot(child), pci_get_function(child));
3432	pci_cfg_save(child, device_get_ivars(child), 1);
3433	return;
3434}
3435
3436/*
3437 * Parse the PCI device database, if loaded, and return a pointer to a
3438 * description of the device.
3439 *
3440 * The database is flat text formatted as follows:
3441 *
3442 * Any line not in a valid format is ignored.
3443 * Lines are terminated with newline '\n' characters.
3444 *
3445 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3446 * the vendor name.
3447 *
3448 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3449 * - devices cannot be listed without a corresponding VENDOR line.
3450 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3451 * another TAB, then the device name.
3452 */
3453
3454/*
3455 * Assuming (ptr) points to the beginning of a line in the database,
3456 * return the vendor or device and description of the next entry.
3457 * The value of (vendor) or (device) inappropriate for the entry type
3458 * is set to -1.  Returns nonzero at the end of the database.
3459 *
3460 * Note that this is slightly unrobust in the face of corrupt data;
3461 * we attempt to safeguard against this by spamming the end of the
3462 * database with a newline when we initialise.
3463 */
3464static int
3465pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3466{
3467	char	*cp = *ptr;
3468	int	left;
3469
3470	*device = -1;
3471	*vendor = -1;
3472	**desc = '\0';
3473	for (;;) {
3474		left = pci_vendordata_size - (cp - pci_vendordata);
3475		if (left <= 0) {
3476			*ptr = cp;
3477			return(1);
3478		}
3479
3480		/* vendor entry? */
3481		if (*cp != '\t' &&
3482		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3483			break;
3484		/* device entry? */
3485		if (*cp == '\t' &&
3486		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3487			break;
3488
3489		/* skip to next line */
3490		while (*cp != '\n' && left > 0) {
3491			cp++;
3492			left--;
3493		}
3494		if (*cp == '\n') {
3495			cp++;
3496			left--;
3497		}
3498	}
3499	/* skip to next line */
3500	while (*cp != '\n' && left > 0) {
3501		cp++;
3502		left--;
3503	}
3504	if (*cp == '\n' && left > 0)
3505		cp++;
3506	*ptr = cp;
3507	return(0);
3508}
3509
3510static char *
3511pci_describe_device(device_t dev)
3512{
3513	int	vendor, device;
3514	char	*desc, *vp, *dp, *line;
3515
3516	desc = vp = dp = NULL;
3517
3518	/*
3519	 * If we have no vendor data, we can't do anything.
3520	 */
3521	if (pci_vendordata == NULL)
3522		goto out;
3523
3524	/*
3525	 * Scan the vendor data looking for this device
3526	 */
3527	line = pci_vendordata;
3528	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3529		goto out;
3530	for (;;) {
3531		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3532			goto out;
3533		if (vendor == pci_get_vendor(dev))
3534			break;
3535	}
3536	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3537		goto out;
3538	for (;;) {
3539		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3540			*dp = 0;
3541			break;
3542		}
3543		if (vendor != -1) {
3544			*dp = 0;
3545			break;
3546		}
3547		if (device == pci_get_device(dev))
3548			break;
3549	}
3550	if (dp[0] == '\0')
3551		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3552	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3553	    NULL)
3554		sprintf(desc, "%s, %s", vp, dp);
3555 out:
3556	if (vp != NULL)
3557		free(vp, M_DEVBUF);
3558	if (dp != NULL)
3559		free(dp, M_DEVBUF);
3560	return(desc);
3561}
3562
3563int
3564pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3565{
3566	struct pci_devinfo *dinfo;
3567	pcicfgregs *cfg;
3568
3569	dinfo = device_get_ivars(child);
3570	cfg = &dinfo->cfg;
3571
3572	switch (which) {
3573	case PCI_IVAR_ETHADDR:
3574		/*
3575		 * The generic accessor doesn't deal with failure, so
3576		 * we set the return value, then return an error.
3577		 */
3578		*((uint8_t **) result) = NULL;
3579		return (EINVAL);
3580	case PCI_IVAR_SUBVENDOR:
3581		*result = cfg->subvendor;
3582		break;
3583	case PCI_IVAR_SUBDEVICE:
3584		*result = cfg->subdevice;
3585		break;
3586	case PCI_IVAR_VENDOR:
3587		*result = cfg->vendor;
3588		break;
3589	case PCI_IVAR_DEVICE:
3590		*result = cfg->device;
3591		break;
3592	case PCI_IVAR_DEVID:
3593		*result = (cfg->device << 16) | cfg->vendor;
3594		break;
3595	case PCI_IVAR_CLASS:
3596		*result = cfg->baseclass;
3597		break;
3598	case PCI_IVAR_SUBCLASS:
3599		*result = cfg->subclass;
3600		break;
3601	case PCI_IVAR_PROGIF:
3602		*result = cfg->progif;
3603		break;
3604	case PCI_IVAR_REVID:
3605		*result = cfg->revid;
3606		break;
3607	case PCI_IVAR_INTPIN:
3608		*result = cfg->intpin;
3609		break;
3610	case PCI_IVAR_IRQ:
3611		*result = cfg->intline;
3612		break;
3613	case PCI_IVAR_DOMAIN:
3614		*result = cfg->domain;
3615		break;
3616	case PCI_IVAR_BUS:
3617		*result = cfg->bus;
3618		break;
3619	case PCI_IVAR_SLOT:
3620		*result = cfg->slot;
3621		break;
3622	case PCI_IVAR_FUNCTION:
3623		*result = cfg->func;
3624		break;
3625	case PCI_IVAR_CMDREG:
3626		*result = cfg->cmdreg;
3627		break;
3628	case PCI_IVAR_CACHELNSZ:
3629		*result = cfg->cachelnsz;
3630		break;
3631	case PCI_IVAR_MINGNT:
3632		*result = cfg->mingnt;
3633		break;
3634	case PCI_IVAR_MAXLAT:
3635		*result = cfg->maxlat;
3636		break;
3637	case PCI_IVAR_LATTIMER:
3638		*result = cfg->lattimer;
3639		break;
3640	default:
3641		return (ENOENT);
3642	}
3643	return (0);
3644}
3645
3646int
3647pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3648{
3649	struct pci_devinfo *dinfo;
3650
3651	dinfo = device_get_ivars(child);
3652
3653	switch (which) {
3654	case PCI_IVAR_INTPIN:
3655		dinfo->cfg.intpin = value;
3656		return (0);
3657	case PCI_IVAR_ETHADDR:
3658	case PCI_IVAR_SUBVENDOR:
3659	case PCI_IVAR_SUBDEVICE:
3660	case PCI_IVAR_VENDOR:
3661	case PCI_IVAR_DEVICE:
3662	case PCI_IVAR_DEVID:
3663	case PCI_IVAR_CLASS:
3664	case PCI_IVAR_SUBCLASS:
3665	case PCI_IVAR_PROGIF:
3666	case PCI_IVAR_REVID:
3667	case PCI_IVAR_IRQ:
3668	case PCI_IVAR_DOMAIN:
3669	case PCI_IVAR_BUS:
3670	case PCI_IVAR_SLOT:
3671	case PCI_IVAR_FUNCTION:
3672		return (EINVAL);	/* disallow for now */
3673
3674	default:
3675		return (ENOENT);
3676	}
3677}
3678
3679
3680#include "opt_ddb.h"
3681#ifdef DDB
3682#include <ddb/ddb.h>
3683#include <sys/cons.h>
3684
3685/*
3686 * List resources based on pci map registers, used for within ddb
3687 */
3688
3689DB_SHOW_COMMAND(pciregs, db_pci_dump)
3690{
3691	struct pci_devinfo *dinfo;
3692	struct devlist *devlist_head;
3693	struct pci_conf *p;
3694	const char *name;
3695	int i, error, none_count;
3696
3697	none_count = 0;
3698	/* get the head of the device queue */
3699	devlist_head = &pci_devq;
3700
3701	/*
3702	 * Go through the list of devices and print out devices
3703	 */
3704	for (error = 0, i = 0,
3705	     dinfo = STAILQ_FIRST(devlist_head);
3706	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3707	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3708
3709		/* Populate pd_name and pd_unit */
3710		name = NULL;
3711		if (dinfo->cfg.dev)
3712			name = device_get_name(dinfo->cfg.dev);
3713
3714		p = &dinfo->conf;
3715		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3716			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3717			(name && *name) ? name : "none",
3718			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3719			none_count++,
3720			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3721			p->pc_sel.pc_func, (p->pc_class << 16) |
3722			(p->pc_subclass << 8) | p->pc_progif,
3723			(p->pc_subdevice << 16) | p->pc_subvendor,
3724			(p->pc_device << 16) | p->pc_vendor,
3725			p->pc_revid, p->pc_hdr);
3726	}
3727}
3728#endif /* DDB */
3729
3730static struct resource *
3731pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3732    u_long start, u_long end, u_long count, u_int flags)
3733{
3734	struct pci_devinfo *dinfo = device_get_ivars(child);
3735	struct resource_list *rl = &dinfo->resources;
3736	struct resource_list_entry *rle;
3737	struct resource *res;
3738	pci_addr_t map, testval;
3739	int mapsize;
3740
3741	/*
3742	 * Weed out the bogons, and figure out how large the BAR/map
3743	 * is.  Bars that read back 0 here are bogus and unimplemented.
3744	 * Note: atapci in legacy mode are special and handled elsewhere
3745	 * in the code.  If you have a atapci device in legacy mode and
3746	 * it fails here, that other code is broken.
3747	 */
3748	res = NULL;
3749	pci_read_bar(child, *rid, &map, &testval);
3750
3751	/*
3752	 * Determine the size of the BAR and ignore BARs with a size
3753	 * of 0.  Device ROM BARs use a different mask value.
3754	 */
3755	if (*rid == PCIR_BIOS)
3756		mapsize = pci_romsize(testval);
3757	else
3758		mapsize = pci_mapsize(testval);
3759	if (mapsize == 0)
3760		goto out;
3761
3762	if (PCI_BAR_MEM(testval) || *rid == PCIR_BIOS) {
3763		if (type != SYS_RES_MEMORY) {
3764			if (bootverbose)
3765				device_printf(dev,
3766				    "child %s requested type %d for rid %#x,"
3767				    " but the BAR says it is an memio\n",
3768				    device_get_nameunit(child), type, *rid);
3769			goto out;
3770		}
3771	} else {
3772		if (type != SYS_RES_IOPORT) {
3773			if (bootverbose)
3774				device_printf(dev,
3775				    "child %s requested type %d for rid %#x,"
3776				    " but the BAR says it is an ioport\n",
3777				    device_get_nameunit(child), type, *rid);
3778			goto out;
3779		}
3780	}
3781
3782	/*
3783	 * For real BARs, we need to override the size that
3784	 * the driver requests, because that's what the BAR
3785	 * actually uses and we would otherwise have a
3786	 * situation where we might allocate the excess to
3787	 * another driver, which won't work.
3788	 */
3789	count = (pci_addr_t)1 << mapsize;
3790	if (RF_ALIGNMENT(flags) < mapsize)
3791		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3792	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3793		flags |= RF_PREFETCHABLE;
3794
3795	/*
3796	 * Allocate enough resource, and then write back the
3797	 * appropriate bar for that resource.
3798	 */
3799	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3800	    start, end, count, flags & ~RF_ACTIVE);
3801	if (res == NULL) {
3802		device_printf(child,
3803		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3804		    count, *rid, type, start, end);
3805		goto out;
3806	}
3807	resource_list_add(rl, type, *rid, start, end, count);
3808	rle = resource_list_find(rl, type, *rid);
3809	if (rle == NULL)
3810		panic("pci_reserve_map: unexpectedly can't find resource.");
3811	rle->res = res;
3812	rle->start = rman_get_start(res);
3813	rle->end = rman_get_end(res);
3814	rle->count = count;
3815	rle->flags = RLE_RESERVED;
3816	if (bootverbose)
3817		device_printf(child,
3818		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3819		    count, *rid, type, rman_get_start(res));
3820	map = rman_get_start(res);
3821	pci_write_bar(child, *rid, map);
3822out:;
3823	return (res);
3824}
3825
3826
3827struct resource *
3828pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3829		   u_long start, u_long end, u_long count, u_int flags)
3830{
3831	struct pci_devinfo *dinfo = device_get_ivars(child);
3832	struct resource_list *rl = &dinfo->resources;
3833	struct resource_list_entry *rle;
3834	struct resource *res;
3835	pcicfgregs *cfg = &dinfo->cfg;
3836
3837	if (device_get_parent(child) != dev)
3838		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3839		    type, rid, start, end, count, flags));
3840
3841	/*
3842	 * Perform lazy resource allocation
3843	 */
3844	switch (type) {
3845	case SYS_RES_IRQ:
3846		/*
3847		 * Can't alloc legacy interrupt once MSI messages have
3848		 * been allocated.
3849		 */
3850		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3851		    cfg->msix.msix_alloc > 0))
3852			return (NULL);
3853
3854		/*
3855		 * If the child device doesn't have an interrupt
3856		 * routed and is deserving of an interrupt, try to
3857		 * assign it one.
3858		 */
3859		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3860		    (cfg->intpin != 0))
3861			pci_assign_interrupt(dev, child, 0);
3862		break;
3863	case SYS_RES_IOPORT:
3864	case SYS_RES_MEMORY:
3865		/* Reserve resources for this BAR if needed. */
3866		rle = resource_list_find(rl, type, *rid);
3867		if (rle == NULL) {
3868			res = pci_reserve_map(dev, child, type, rid, start, end,
3869			    count, flags);
3870			if (res == NULL)
3871				return (NULL);
3872		}
3873	}
3874	return (resource_list_alloc(rl, dev, child, type, rid,
3875	    start, end, count, flags));
3876}
3877
3878int
3879pci_activate_resource(device_t dev, device_t child, int type, int rid,
3880    struct resource *r)
3881{
3882	int error;
3883
3884	error = bus_generic_activate_resource(dev, child, type, rid, r);
3885	if (error)
3886		return (error);
3887
3888	/* Enable decoding in the command register when activating BARs. */
3889	if (device_get_parent(child) == dev) {
3890		/* Device ROMs need their decoding explicitly enabled. */
3891		if (rid == PCIR_BIOS)
3892			pci_write_config(child, rid, rman_get_start(r) |
3893			    PCIM_BIOS_ENABLE, 4);
3894		switch (type) {
3895		case SYS_RES_IOPORT:
3896		case SYS_RES_MEMORY:
3897			error = PCI_ENABLE_IO(dev, child, type);
3898			break;
3899		}
3900	}
3901	return (error);
3902}
3903
3904int
3905pci_deactivate_resource(device_t dev, device_t child, int type,
3906    int rid, struct resource *r)
3907{
3908	int error;
3909
3910	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
3911	if (error)
3912		return (error);
3913
3914	/* Disable decoding for device ROMs. */
3915	if (rid == PCIR_BIOS)
3916		pci_write_config(child, rid, rman_get_start(r), 4);
3917	return (0);
3918}
3919
3920void
3921pci_delete_child(device_t dev, device_t child)
3922{
3923	struct resource_list_entry *rle;
3924	struct resource_list *rl;
3925	struct pci_devinfo *dinfo;
3926
3927	dinfo = device_get_ivars(child);
3928	rl = &dinfo->resources;
3929
3930	if (device_is_attached(child))
3931		device_detach(child);
3932
3933	/* Turn off access to resources we're about to free */
3934	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
3935	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
3936
3937	/* Free all allocated resources */
3938	STAILQ_FOREACH(rle, rl, link) {
3939		if (rle->res) {
3940			if (rman_get_flags(rle->res) & RF_ACTIVE ||
3941			    resource_list_busy(rl, rle->type, rle->rid)) {
3942				pci_printf(&dinfo->cfg,
3943				    "Resource still owned, oops. "
3944				    "(type=%d, rid=%d, addr=%lx)\n",
3945				    rle->type, rle->rid,
3946				    rman_get_start(rle->res));
3947				bus_release_resource(child, rle->type, rle->rid,
3948				    rle->res);
3949			}
3950			resource_list_unreserve(rl, dev, child, rle->type,
3951			    rle->rid);
3952		}
3953	}
3954	resource_list_free(rl);
3955
3956	device_delete_child(dev, child);
3957	pci_freecfg(dinfo);
3958}
3959
3960void
3961pci_delete_resource(device_t dev, device_t child, int type, int rid)
3962{
3963	struct pci_devinfo *dinfo;
3964	struct resource_list *rl;
3965	struct resource_list_entry *rle;
3966
3967	if (device_get_parent(child) != dev)
3968		return;
3969
3970	dinfo = device_get_ivars(child);
3971	rl = &dinfo->resources;
3972	rle = resource_list_find(rl, type, rid);
3973	if (rle == NULL)
3974		return;
3975
3976	if (rle->res) {
3977		if (rman_get_flags(rle->res) & RF_ACTIVE ||
3978		    resource_list_busy(rl, type, rid)) {
3979			device_printf(dev, "delete_resource: "
3980			    "Resource still owned by child, oops. "
3981			    "(type=%d, rid=%d, addr=%lx)\n",
3982			    type, rid, rman_get_start(rle->res));
3983			return;
3984		}
3985
3986#ifndef __PCI_BAR_ZERO_VALID
3987		/*
3988		 * If this is a BAR, clear the BAR so it stops
3989		 * decoding before releasing the resource.
3990		 */
3991		switch (type) {
3992		case SYS_RES_IOPORT:
3993		case SYS_RES_MEMORY:
3994			pci_write_bar(child, rid, 0);
3995			break;
3996		}
3997#endif
3998		resource_list_unreserve(rl, dev, child, type, rid);
3999	}
4000	resource_list_delete(rl, type, rid);
4001}
4002
4003struct resource_list *
4004pci_get_resource_list (device_t dev, device_t child)
4005{
4006	struct pci_devinfo *dinfo = device_get_ivars(child);
4007
4008	return (&dinfo->resources);
4009}
4010
4011uint32_t
4012pci_read_config_method(device_t dev, device_t child, int reg, int width)
4013{
4014	struct pci_devinfo *dinfo = device_get_ivars(child);
4015	pcicfgregs *cfg = &dinfo->cfg;
4016
4017	return (PCIB_READ_CONFIG(device_get_parent(dev),
4018	    cfg->bus, cfg->slot, cfg->func, reg, width));
4019}
4020
4021void
4022pci_write_config_method(device_t dev, device_t child, int reg,
4023    uint32_t val, int width)
4024{
4025	struct pci_devinfo *dinfo = device_get_ivars(child);
4026	pcicfgregs *cfg = &dinfo->cfg;
4027
4028	PCIB_WRITE_CONFIG(device_get_parent(dev),
4029	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4030}
4031
4032int
4033pci_child_location_str_method(device_t dev, device_t child, char *buf,
4034    size_t buflen)
4035{
4036
4037	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4038	    pci_get_function(child));
4039	return (0);
4040}
4041
4042int
4043pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4044    size_t buflen)
4045{
4046	struct pci_devinfo *dinfo;
4047	pcicfgregs *cfg;
4048
4049	dinfo = device_get_ivars(child);
4050	cfg = &dinfo->cfg;
4051	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4052	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4053	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4054	    cfg->progif);
4055	return (0);
4056}
4057
4058int
4059pci_assign_interrupt_method(device_t dev, device_t child)
4060{
4061	struct pci_devinfo *dinfo = device_get_ivars(child);
4062	pcicfgregs *cfg = &dinfo->cfg;
4063
4064	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4065	    cfg->intpin));
4066}
4067
4068static int
4069pci_modevent(module_t mod, int what, void *arg)
4070{
4071	static struct cdev *pci_cdev;
4072
4073	switch (what) {
4074	case MOD_LOAD:
4075		STAILQ_INIT(&pci_devq);
4076		pci_generation = 0;
4077		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4078		    "pci");
4079		pci_load_vendor_data();
4080		break;
4081
4082	case MOD_UNLOAD:
4083		destroy_dev(pci_cdev);
4084		break;
4085	}
4086
4087	return (0);
4088}
4089
4090void
4091pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4092{
4093	int i;
4094
4095	/*
4096	 * Only do header type 0 devices.  Type 1 devices are bridges,
4097	 * which we know need special treatment.  Type 2 devices are
4098	 * cardbus bridges which also require special treatment.
4099	 * Other types are unknown, and we err on the side of safety
4100	 * by ignoring them.
4101	 */
4102	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4103		return;
4104
4105	/*
4106	 * Restore the device to full power mode.  We must do this
4107	 * before we restore the registers because moving from D3 to
4108	 * D0 will cause the chip's BARs and some other registers to
4109	 * be reset to some unknown power on reset values.  Cut down
4110	 * the noise on boot by doing nothing if we are already in
4111	 * state D0.
4112	 */
4113	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4114		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4115	for (i = 0; i < dinfo->cfg.nummaps; i++)
4116		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4117	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4118	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4119	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4120	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4121	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4122	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4123	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4124	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4125	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4126	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4127
4128	/* Restore MSI and MSI-X configurations if they are present. */
4129	if (dinfo->cfg.msi.msi_location != 0)
4130		pci_resume_msi(dev);
4131	if (dinfo->cfg.msix.msix_location != 0)
4132		pci_resume_msix(dev);
4133}
4134
4135void
4136pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4137{
4138	int i;
4139	uint32_t cls;
4140	int ps;
4141
4142	/*
4143	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4144	 * we know need special treatment.  Type 2 devices are cardbus bridges
4145	 * which also require special treatment.  Other types are unknown, and
4146	 * we err on the side of safety by ignoring them.  Powering down
4147	 * bridges should not be undertaken lightly.
4148	 */
4149	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4150		return;
4151	for (i = 0; i < dinfo->cfg.nummaps; i++)
4152		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4153	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4154
4155	/*
4156	 * Some drivers apparently write to these registers w/o updating our
4157	 * cached copy.  No harm happens if we update the copy, so do so here
4158	 * so we can restore them.  The COMMAND register is modified by the
4159	 * bus w/o updating the cache.  This should represent the normally
4160	 * writable portion of the 'defined' part of type 0 headers.  In
4161	 * theory we also need to save/restore the PCI capability structures
4162	 * we know about, but apart from power we don't know any that are
4163	 * writable.
4164	 */
4165	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4166	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4167	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4168	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4169	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4170	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4171	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4172	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4173	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4174	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4175	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4176	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4177	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4178	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4179	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4180
4181	/*
4182	 * don't set the state for display devices, base peripherals and
4183	 * memory devices since bad things happen when they are powered down.
4184	 * We should (a) have drivers that can easily detach and (b) use
4185	 * generic drivers for these devices so that some device actually
4186	 * attaches.  We need to make sure that when we implement (a) we don't
4187	 * power the device down on a reattach.
4188	 */
4189	cls = pci_get_class(dev);
4190	if (!setstate)
4191		return;
4192	switch (pci_do_power_nodriver)
4193	{
4194		case 0:		/* NO powerdown at all */
4195			return;
4196		case 1:		/* Conservative about what to power down */
4197			if (cls == PCIC_STORAGE)
4198				return;
4199			/*FALLTHROUGH*/
4200		case 2:		/* Agressive about what to power down */
4201			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4202			    cls == PCIC_BASEPERIPH)
4203				return;
4204			/*FALLTHROUGH*/
4205		case 3:		/* Power down everything */
4206			break;
4207	}
4208	/*
4209	 * PCI spec says we can only go into D3 state from D0 state.
4210	 * Transition from D[12] into D0 before going to D3 state.
4211	 */
4212	ps = pci_get_powerstate(dev);
4213	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4214		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4215	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4216		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4217}
4218