pci.c revision 214349
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 214349 2010-10-25 15:51:43Z nwhitehorn $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/ehcireg.h>
66#include <dev/usb/controller/ohcireg.h>
67#include <dev/usb/controller/uhcireg.h>
68
69#include "pcib_if.h"
70#include "pci_if.h"
71
72static pci_addr_t	pci_mapbase(uint64_t mapreg);
73static const char	*pci_maptype(uint64_t mapreg);
74static int		pci_mapsize(uint64_t testval);
75static int		pci_maprange(uint64_t mapreg);
76static pci_addr_t	pci_rombase(uint64_t mapreg);
77static int		pci_romsize(uint64_t testval);
78static void		pci_fixancient(pcicfgregs *cfg);
79static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
80
81static int		pci_porten(device_t dev);
82static int		pci_memen(device_t dev);
83static void		pci_assign_interrupt(device_t bus, device_t dev,
84			    int force_route);
85static int		pci_add_map(device_t bus, device_t dev, int reg,
86			    struct resource_list *rl, int force, int prefetch);
87static int		pci_probe(device_t dev);
88static int		pci_attach(device_t dev);
89static void		pci_load_vendor_data(void);
90static int		pci_describe_parse_line(char **ptr, int *vendor,
91			    int *device, char **desc);
92static char		*pci_describe_device(device_t dev);
93static int		pci_modevent(module_t mod, int what, void *arg);
94static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95			    pcicfgregs *cfg);
96static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98			    int reg, uint32_t *data);
99#if 0
100static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101			    int reg, uint32_t data);
102#endif
103static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104static void		pci_disable_msi(device_t dev);
105static void		pci_enable_msi(device_t dev, uint64_t address,
106			    uint16_t data);
107static void		pci_enable_msix(device_t dev, u_int index,
108			    uint64_t address, uint32_t data);
109static void		pci_mask_msix(device_t dev, u_int index);
110static void		pci_unmask_msix(device_t dev, u_int index);
111static int		pci_msi_blacklisted(void);
112static void		pci_resume_msi(device_t dev);
113static void		pci_resume_msix(device_t dev);
114static int		pci_remap_intr_method(device_t bus, device_t dev,
115			    u_int irq);
116
117static device_method_t pci_methods[] = {
118	/* Device interface */
119	DEVMETHOD(device_probe,		pci_probe),
120	DEVMETHOD(device_attach,	pci_attach),
121	DEVMETHOD(device_detach,	bus_generic_detach),
122	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
123	DEVMETHOD(device_suspend,	pci_suspend),
124	DEVMETHOD(device_resume,	pci_resume),
125
126	/* Bus interface */
127	DEVMETHOD(bus_print_child,	pci_print_child),
128	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
129	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
130	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
131	DEVMETHOD(bus_driver_added,	pci_driver_added),
132	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
133	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
134
135	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
136	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
137	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
138	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
139	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
140	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
141	DEVMETHOD(bus_activate_resource, pci_activate_resource),
142	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
143	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
144	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
145	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
146
147	/* PCI interface */
148	DEVMETHOD(pci_read_config,	pci_read_config_method),
149	DEVMETHOD(pci_write_config,	pci_write_config_method),
150	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
151	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
152	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
153	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
154	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
155	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
156	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
157	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
158	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
159	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
160	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
161	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
162	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
163	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
164	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
165	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
166
167	{ 0, 0 }
168};
169
170DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
171
172static devclass_t pci_devclass;
173DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
174MODULE_VERSION(pci, 1);
175
176static char	*pci_vendordata;
177static size_t	pci_vendordata_size;
178
179
180struct pci_quirk {
181	uint32_t devid;	/* Vendor/device of the card */
182	int	type;
183#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
184#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
185#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
186	int	arg1;
187	int	arg2;
188};
189
190struct pci_quirk pci_quirks[] = {
191	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
192	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
193	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
194	/* As does the Serverworks OSB4 (the SMBus mapping register) */
195	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
196
197	/*
198	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
199	 * or the CMIC-SL (AKA ServerWorks GC_LE).
200	 */
201	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
202	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
203
204	/*
205	 * MSI doesn't work on earlier Intel chipsets including
206	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
207	 */
208	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
212	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
213	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215
216	/*
217	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
218	 * bridge.
219	 */
220	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
221
222	/*
223	 * Some virtualization environments emulate an older chipset
224	 * but support MSI just fine.  QEMU uses the Intel 82440.
225	 */
226	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
227
228	{ 0 }
229};
230
231/* map register information */
232#define	PCI_MAPMEM	0x01	/* memory map */
233#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
234#define	PCI_MAPPORT	0x04	/* port map */
235
236struct devlist pci_devq;
237uint32_t pci_generation;
238uint32_t pci_numdevs = 0;
239static int pcie_chipset, pcix_chipset;
240
241/* sysctl vars */
242SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
243
244static int pci_enable_io_modes = 1;
245TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
246SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
247    &pci_enable_io_modes, 1,
248    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
249enable these bits correctly.  We'd like to do this all the time, but there\n\
250are some peripherals that this causes problems with.");
251
252static int pci_do_power_nodriver = 0;
253TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
254SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
255    &pci_do_power_nodriver, 0,
256  "Place a function into D3 state when no driver attaches to it.  0 means\n\
257disable.  1 means conservatively place devices into D3 state.  2 means\n\
258agressively place devices into D3 state.  3 means put absolutely everything\n\
259in D3 state.");
260
261int pci_do_power_resume = 1;
262TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
263SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
264    &pci_do_power_resume, 1,
265  "Transition from D3 -> D0 on resume.");
266
267int pci_do_power_suspend = 1;
268TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
269SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
270    &pci_do_power_suspend, 1,
271  "Transition from D0 -> D3 on suspend.");
272
273static int pci_do_msi = 1;
274TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
275SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
276    "Enable support for MSI interrupts");
277
278static int pci_do_msix = 1;
279TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
280SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
281    "Enable support for MSI-X interrupts");
282
283static int pci_honor_msi_blacklist = 1;
284TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
285SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
286    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
287
288#if defined(__i386__) || defined(__amd64__)
289static int pci_usb_takeover = 1;
290#else
291static int pci_usb_takeover = 0;
292#endif
293TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
294SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
295    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
296Disable this if you depend on BIOS emulation of USB devices, that is\n\
297you use USB devices (like keyboard or mouse) but do not load USB drivers");
298
299/* Find a device_t by bus/slot/function in domain 0 */
300
301device_t
302pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
303{
304
305	return (pci_find_dbsf(0, bus, slot, func));
306}
307
308/* Find a device_t by domain/bus/slot/function */
309
310device_t
311pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
312{
313	struct pci_devinfo *dinfo;
314
315	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
316		if ((dinfo->cfg.domain == domain) &&
317		    (dinfo->cfg.bus == bus) &&
318		    (dinfo->cfg.slot == slot) &&
319		    (dinfo->cfg.func == func)) {
320			return (dinfo->cfg.dev);
321		}
322	}
323
324	return (NULL);
325}
326
327/* Find a device_t by vendor/device ID */
328
329device_t
330pci_find_device(uint16_t vendor, uint16_t device)
331{
332	struct pci_devinfo *dinfo;
333
334	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
335		if ((dinfo->cfg.vendor == vendor) &&
336		    (dinfo->cfg.device == device)) {
337			return (dinfo->cfg.dev);
338		}
339	}
340
341	return (NULL);
342}
343
344static int
345pci_printf(pcicfgregs *cfg, const char *fmt, ...)
346{
347	va_list ap;
348	int retval;
349
350	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
351	    cfg->func);
352	va_start(ap, fmt);
353	retval += vprintf(fmt, ap);
354	va_end(ap);
355	return (retval);
356}
357
358/* return base address of memory or port map */
359
360static pci_addr_t
361pci_mapbase(uint64_t mapreg)
362{
363
364	if (PCI_BAR_MEM(mapreg))
365		return (mapreg & PCIM_BAR_MEM_BASE);
366	else
367		return (mapreg & PCIM_BAR_IO_BASE);
368}
369
370/* return map type of memory or port map */
371
372static const char *
373pci_maptype(uint64_t mapreg)
374{
375
376	if (PCI_BAR_IO(mapreg))
377		return ("I/O Port");
378	if (mapreg & PCIM_BAR_MEM_PREFETCH)
379		return ("Prefetchable Memory");
380	return ("Memory");
381}
382
383/* return log2 of map size decoded for memory or port map */
384
385static int
386pci_mapsize(uint64_t testval)
387{
388	int ln2size;
389
390	testval = pci_mapbase(testval);
391	ln2size = 0;
392	if (testval != 0) {
393		while ((testval & 1) == 0)
394		{
395			ln2size++;
396			testval >>= 1;
397		}
398	}
399	return (ln2size);
400}
401
402/* return base address of device ROM */
403
404static pci_addr_t
405pci_rombase(uint64_t mapreg)
406{
407
408	return (mapreg & PCIM_BIOS_ADDR_MASK);
409}
410
411/* return log2 of map size decided for device ROM */
412
413static int
414pci_romsize(uint64_t testval)
415{
416	int ln2size;
417
418	testval = pci_rombase(testval);
419	ln2size = 0;
420	if (testval != 0) {
421		while ((testval & 1) == 0)
422		{
423			ln2size++;
424			testval >>= 1;
425		}
426	}
427	return (ln2size);
428}
429
430/* return log2 of address range supported by map register */
431
432static int
433pci_maprange(uint64_t mapreg)
434{
435	int ln2range = 0;
436
437	if (PCI_BAR_IO(mapreg))
438		ln2range = 32;
439	else
440		switch (mapreg & PCIM_BAR_MEM_TYPE) {
441		case PCIM_BAR_MEM_32:
442			ln2range = 32;
443			break;
444		case PCIM_BAR_MEM_1MB:
445			ln2range = 20;
446			break;
447		case PCIM_BAR_MEM_64:
448			ln2range = 64;
449			break;
450		}
451	return (ln2range);
452}
453
454/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
455
456static void
457pci_fixancient(pcicfgregs *cfg)
458{
459	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
460		return;
461
462	/* PCI to PCI bridges use header type 1 */
463	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
464		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
465}
466
467/* extract header type specific config data */
468
469static void
470pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
471{
472#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
473	switch (cfg->hdrtype & PCIM_HDRTYPE) {
474	case PCIM_HDRTYPE_NORMAL:
475		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
476		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
477		cfg->nummaps	    = PCI_MAXMAPS_0;
478		break;
479	case PCIM_HDRTYPE_BRIDGE:
480		cfg->nummaps	    = PCI_MAXMAPS_1;
481		break;
482	case PCIM_HDRTYPE_CARDBUS:
483		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
484		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
485		cfg->nummaps	    = PCI_MAXMAPS_2;
486		break;
487	}
488#undef REG
489}
490
491/* read configuration header into pcicfgregs structure */
492struct pci_devinfo *
493pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
494{
495#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
496	pcicfgregs *cfg = NULL;
497	struct pci_devinfo *devlist_entry;
498	struct devlist *devlist_head;
499
500	devlist_head = &pci_devq;
501
502	devlist_entry = NULL;
503
504	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
505		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
506		if (devlist_entry == NULL)
507			return (NULL);
508
509		cfg = &devlist_entry->cfg;
510
511		cfg->domain		= d;
512		cfg->bus		= b;
513		cfg->slot		= s;
514		cfg->func		= f;
515		cfg->vendor		= REG(PCIR_VENDOR, 2);
516		cfg->device		= REG(PCIR_DEVICE, 2);
517		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
518		cfg->statreg		= REG(PCIR_STATUS, 2);
519		cfg->baseclass		= REG(PCIR_CLASS, 1);
520		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
521		cfg->progif		= REG(PCIR_PROGIF, 1);
522		cfg->revid		= REG(PCIR_REVID, 1);
523		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
524		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
525		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
526		cfg->intpin		= REG(PCIR_INTPIN, 1);
527		cfg->intline		= REG(PCIR_INTLINE, 1);
528
529		cfg->mingnt		= REG(PCIR_MINGNT, 1);
530		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
531
532		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
533		cfg->hdrtype		&= ~PCIM_MFDEV;
534
535		pci_fixancient(cfg);
536		pci_hdrtypedata(pcib, b, s, f, cfg);
537
538		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
539			pci_read_extcap(pcib, cfg);
540
541		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
542
543		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
544		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
545		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
546		devlist_entry->conf.pc_sel.pc_func = cfg->func;
547		devlist_entry->conf.pc_hdr = cfg->hdrtype;
548
549		devlist_entry->conf.pc_subvendor = cfg->subvendor;
550		devlist_entry->conf.pc_subdevice = cfg->subdevice;
551		devlist_entry->conf.pc_vendor = cfg->vendor;
552		devlist_entry->conf.pc_device = cfg->device;
553
554		devlist_entry->conf.pc_class = cfg->baseclass;
555		devlist_entry->conf.pc_subclass = cfg->subclass;
556		devlist_entry->conf.pc_progif = cfg->progif;
557		devlist_entry->conf.pc_revid = cfg->revid;
558
559		pci_numdevs++;
560		pci_generation++;
561	}
562	return (devlist_entry);
563#undef REG
564}
565
566static void
567pci_read_extcap(device_t pcib, pcicfgregs *cfg)
568{
569#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
570#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
571#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
572	uint64_t addr;
573#endif
574	uint32_t val;
575	int	ptr, nextptr, ptrptr;
576
577	switch (cfg->hdrtype & PCIM_HDRTYPE) {
578	case PCIM_HDRTYPE_NORMAL:
579	case PCIM_HDRTYPE_BRIDGE:
580		ptrptr = PCIR_CAP_PTR;
581		break;
582	case PCIM_HDRTYPE_CARDBUS:
583		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
584		break;
585	default:
586		return;		/* no extended capabilities support */
587	}
588	nextptr = REG(ptrptr, 1);	/* sanity check? */
589
590	/*
591	 * Read capability entries.
592	 */
593	while (nextptr != 0) {
594		/* Sanity check */
595		if (nextptr > 255) {
596			printf("illegal PCI extended capability offset %d\n",
597			    nextptr);
598			return;
599		}
600		/* Find the next entry */
601		ptr = nextptr;
602		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
603
604		/* Process this entry */
605		switch (REG(ptr + PCICAP_ID, 1)) {
606		case PCIY_PMG:		/* PCI power management */
607			if (cfg->pp.pp_cap == 0) {
608				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
609				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
610				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
611				if ((nextptr - ptr) > PCIR_POWER_DATA)
612					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
613			}
614			break;
615#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
616		case PCIY_HT:		/* HyperTransport */
617			/* Determine HT-specific capability type. */
618			val = REG(ptr + PCIR_HT_COMMAND, 2);
619			switch (val & PCIM_HTCMD_CAP_MASK) {
620			case PCIM_HTCAP_MSI_MAPPING:
621				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
622					/* Sanity check the mapping window. */
623					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
624					    4);
625					addr <<= 32;
626					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
627					    4);
628					if (addr != MSI_INTEL_ADDR_BASE)
629						device_printf(pcib,
630	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
631						    cfg->domain, cfg->bus,
632						    cfg->slot, cfg->func,
633						    (long long)addr);
634				} else
635					addr = MSI_INTEL_ADDR_BASE;
636
637				cfg->ht.ht_msimap = ptr;
638				cfg->ht.ht_msictrl = val;
639				cfg->ht.ht_msiaddr = addr;
640				break;
641			}
642			break;
643#endif
644		case PCIY_MSI:		/* PCI MSI */
645			cfg->msi.msi_location = ptr;
646			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
647			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
648						     PCIM_MSICTRL_MMC_MASK)>>1);
649			break;
650		case PCIY_MSIX:		/* PCI MSI-X */
651			cfg->msix.msix_location = ptr;
652			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
653			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
654			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
655			val = REG(ptr + PCIR_MSIX_TABLE, 4);
656			cfg->msix.msix_table_bar = PCIR_BAR(val &
657			    PCIM_MSIX_BIR_MASK);
658			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
659			val = REG(ptr + PCIR_MSIX_PBA, 4);
660			cfg->msix.msix_pba_bar = PCIR_BAR(val &
661			    PCIM_MSIX_BIR_MASK);
662			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
663			break;
664		case PCIY_VPD:		/* PCI Vital Product Data */
665			cfg->vpd.vpd_reg = ptr;
666			break;
667		case PCIY_SUBVENDOR:
668			/* Should always be true. */
669			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
670			    PCIM_HDRTYPE_BRIDGE) {
671				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
672				cfg->subvendor = val & 0xffff;
673				cfg->subdevice = val >> 16;
674			}
675			break;
676		case PCIY_PCIX:		/* PCI-X */
677			/*
678			 * Assume we have a PCI-X chipset if we have
679			 * at least one PCI-PCI bridge with a PCI-X
680			 * capability.  Note that some systems with
681			 * PCI-express or HT chipsets might match on
682			 * this check as well.
683			 */
684			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
685			    PCIM_HDRTYPE_BRIDGE)
686				pcix_chipset = 1;
687			break;
688		case PCIY_EXPRESS:	/* PCI-express */
689			/*
690			 * Assume we have a PCI-express chipset if we have
691			 * at least one PCI-express device.
692			 */
693			pcie_chipset = 1;
694			break;
695		default:
696			break;
697		}
698	}
699/* REG and WREG use carry through to next functions */
700}
701
702/*
703 * PCI Vital Product Data
704 */
705
706#define	PCI_VPD_TIMEOUT		1000000
707
708static int
709pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
710{
711	int count = PCI_VPD_TIMEOUT;
712
713	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
714
715	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
716
717	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
718		if (--count < 0)
719			return (ENXIO);
720		DELAY(1);	/* limit looping */
721	}
722	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
723
724	return (0);
725}
726
727#if 0
728static int
729pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
730{
731	int count = PCI_VPD_TIMEOUT;
732
733	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
734
735	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
736	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
737	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
738		if (--count < 0)
739			return (ENXIO);
740		DELAY(1);	/* limit looping */
741	}
742
743	return (0);
744}
745#endif
746
747#undef PCI_VPD_TIMEOUT
748
749struct vpd_readstate {
750	device_t	pcib;
751	pcicfgregs	*cfg;
752	uint32_t	val;
753	int		bytesinval;
754	int		off;
755	uint8_t		cksum;
756};
757
758static int
759vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
760{
761	uint32_t reg;
762	uint8_t byte;
763
764	if (vrs->bytesinval == 0) {
765		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
766			return (ENXIO);
767		vrs->val = le32toh(reg);
768		vrs->off += 4;
769		byte = vrs->val & 0xff;
770		vrs->bytesinval = 3;
771	} else {
772		vrs->val = vrs->val >> 8;
773		byte = vrs->val & 0xff;
774		vrs->bytesinval--;
775	}
776
777	vrs->cksum += byte;
778	*data = byte;
779	return (0);
780}
781
782static void
783pci_read_vpd(device_t pcib, pcicfgregs *cfg)
784{
785	struct vpd_readstate vrs;
786	int state;
787	int name;
788	int remain;
789	int i;
790	int alloc, off;		/* alloc/off for RO/W arrays */
791	int cksumvalid;
792	int dflen;
793	uint8_t byte;
794	uint8_t byte2;
795
796	/* init vpd reader */
797	vrs.bytesinval = 0;
798	vrs.off = 0;
799	vrs.pcib = pcib;
800	vrs.cfg = cfg;
801	vrs.cksum = 0;
802
803	state = 0;
804	name = remain = i = 0;	/* shut up stupid gcc */
805	alloc = off = 0;	/* shut up stupid gcc */
806	dflen = 0;		/* shut up stupid gcc */
807	cksumvalid = -1;
808	while (state >= 0) {
809		if (vpd_nextbyte(&vrs, &byte)) {
810			state = -2;
811			break;
812		}
813#if 0
814		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
815		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
816		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
817#endif
818		switch (state) {
819		case 0:		/* item name */
820			if (byte & 0x80) {
821				if (vpd_nextbyte(&vrs, &byte2)) {
822					state = -2;
823					break;
824				}
825				remain = byte2;
826				if (vpd_nextbyte(&vrs, &byte2)) {
827					state = -2;
828					break;
829				}
830				remain |= byte2 << 8;
831				if (remain > (0x7f*4 - vrs.off)) {
832					state = -1;
833					printf(
834			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
835					    cfg->domain, cfg->bus, cfg->slot,
836					    cfg->func, remain);
837				}
838				name = byte & 0x7f;
839			} else {
840				remain = byte & 0x7;
841				name = (byte >> 3) & 0xf;
842			}
843			switch (name) {
844			case 0x2:	/* String */
845				cfg->vpd.vpd_ident = malloc(remain + 1,
846				    M_DEVBUF, M_WAITOK);
847				i = 0;
848				state = 1;
849				break;
850			case 0xf:	/* End */
851				state = -1;
852				break;
853			case 0x10:	/* VPD-R */
854				alloc = 8;
855				off = 0;
856				cfg->vpd.vpd_ros = malloc(alloc *
857				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
858				    M_WAITOK | M_ZERO);
859				state = 2;
860				break;
861			case 0x11:	/* VPD-W */
862				alloc = 8;
863				off = 0;
864				cfg->vpd.vpd_w = malloc(alloc *
865				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
866				    M_WAITOK | M_ZERO);
867				state = 5;
868				break;
869			default:	/* Invalid data, abort */
870				state = -1;
871				break;
872			}
873			break;
874
875		case 1:	/* Identifier String */
876			cfg->vpd.vpd_ident[i++] = byte;
877			remain--;
878			if (remain == 0)  {
879				cfg->vpd.vpd_ident[i] = '\0';
880				state = 0;
881			}
882			break;
883
884		case 2:	/* VPD-R Keyword Header */
885			if (off == alloc) {
886				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
887				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
888				    M_DEVBUF, M_WAITOK | M_ZERO);
889			}
890			cfg->vpd.vpd_ros[off].keyword[0] = byte;
891			if (vpd_nextbyte(&vrs, &byte2)) {
892				state = -2;
893				break;
894			}
895			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
896			if (vpd_nextbyte(&vrs, &byte2)) {
897				state = -2;
898				break;
899			}
900			dflen = byte2;
901			if (dflen == 0 &&
902			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
903			    2) == 0) {
904				/*
905				 * if this happens, we can't trust the rest
906				 * of the VPD.
907				 */
908				printf(
909				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
910				    cfg->domain, cfg->bus, cfg->slot,
911				    cfg->func, dflen);
912				cksumvalid = 0;
913				state = -1;
914				break;
915			} else if (dflen == 0) {
916				cfg->vpd.vpd_ros[off].value = malloc(1 *
917				    sizeof(*cfg->vpd.vpd_ros[off].value),
918				    M_DEVBUF, M_WAITOK);
919				cfg->vpd.vpd_ros[off].value[0] = '\x00';
920			} else
921				cfg->vpd.vpd_ros[off].value = malloc(
922				    (dflen + 1) *
923				    sizeof(*cfg->vpd.vpd_ros[off].value),
924				    M_DEVBUF, M_WAITOK);
925			remain -= 3;
926			i = 0;
927			/* keep in sync w/ state 3's transistions */
928			if (dflen == 0 && remain == 0)
929				state = 0;
930			else if (dflen == 0)
931				state = 2;
932			else
933				state = 3;
934			break;
935
936		case 3:	/* VPD-R Keyword Value */
937			cfg->vpd.vpd_ros[off].value[i++] = byte;
938			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
939			    "RV", 2) == 0 && cksumvalid == -1) {
940				if (vrs.cksum == 0)
941					cksumvalid = 1;
942				else {
943					if (bootverbose)
944						printf(
945				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
946						    cfg->domain, cfg->bus,
947						    cfg->slot, cfg->func,
948						    vrs.cksum);
949					cksumvalid = 0;
950					state = -1;
951					break;
952				}
953			}
954			dflen--;
955			remain--;
956			/* keep in sync w/ state 2's transistions */
957			if (dflen == 0)
958				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
959			if (dflen == 0 && remain == 0) {
960				cfg->vpd.vpd_rocnt = off;
961				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
962				    off * sizeof(*cfg->vpd.vpd_ros),
963				    M_DEVBUF, M_WAITOK | M_ZERO);
964				state = 0;
965			} else if (dflen == 0)
966				state = 2;
967			break;
968
969		case 4:
970			remain--;
971			if (remain == 0)
972				state = 0;
973			break;
974
975		case 5:	/* VPD-W Keyword Header */
976			if (off == alloc) {
977				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
978				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
979				    M_DEVBUF, M_WAITOK | M_ZERO);
980			}
981			cfg->vpd.vpd_w[off].keyword[0] = byte;
982			if (vpd_nextbyte(&vrs, &byte2)) {
983				state = -2;
984				break;
985			}
986			cfg->vpd.vpd_w[off].keyword[1] = byte2;
987			if (vpd_nextbyte(&vrs, &byte2)) {
988				state = -2;
989				break;
990			}
991			cfg->vpd.vpd_w[off].len = dflen = byte2;
992			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
993			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
994			    sizeof(*cfg->vpd.vpd_w[off].value),
995			    M_DEVBUF, M_WAITOK);
996			remain -= 3;
997			i = 0;
998			/* keep in sync w/ state 6's transistions */
999			if (dflen == 0 && remain == 0)
1000				state = 0;
1001			else if (dflen == 0)
1002				state = 5;
1003			else
1004				state = 6;
1005			break;
1006
1007		case 6:	/* VPD-W Keyword Value */
1008			cfg->vpd.vpd_w[off].value[i++] = byte;
1009			dflen--;
1010			remain--;
1011			/* keep in sync w/ state 5's transistions */
1012			if (dflen == 0)
1013				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1014			if (dflen == 0 && remain == 0) {
1015				cfg->vpd.vpd_wcnt = off;
1016				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1017				    off * sizeof(*cfg->vpd.vpd_w),
1018				    M_DEVBUF, M_WAITOK | M_ZERO);
1019				state = 0;
1020			} else if (dflen == 0)
1021				state = 5;
1022			break;
1023
1024		default:
1025			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1026			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1027			    state);
1028			state = -1;
1029			break;
1030		}
1031	}
1032
1033	if (cksumvalid == 0 || state < -1) {
1034		/* read-only data bad, clean up */
1035		if (cfg->vpd.vpd_ros != NULL) {
1036			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1037				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1038			free(cfg->vpd.vpd_ros, M_DEVBUF);
1039			cfg->vpd.vpd_ros = NULL;
1040		}
1041	}
1042	if (state < -1) {
1043		/* I/O error, clean up */
1044		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1045		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1046		if (cfg->vpd.vpd_ident != NULL) {
1047			free(cfg->vpd.vpd_ident, M_DEVBUF);
1048			cfg->vpd.vpd_ident = NULL;
1049		}
1050		if (cfg->vpd.vpd_w != NULL) {
1051			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1052				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1053			free(cfg->vpd.vpd_w, M_DEVBUF);
1054			cfg->vpd.vpd_w = NULL;
1055		}
1056	}
1057	cfg->vpd.vpd_cached = 1;
1058#undef REG
1059#undef WREG
1060}
1061
1062int
1063pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1064{
1065	struct pci_devinfo *dinfo = device_get_ivars(child);
1066	pcicfgregs *cfg = &dinfo->cfg;
1067
1068	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1069		pci_read_vpd(device_get_parent(dev), cfg);
1070
1071	*identptr = cfg->vpd.vpd_ident;
1072
1073	if (*identptr == NULL)
1074		return (ENXIO);
1075
1076	return (0);
1077}
1078
1079int
1080pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1081	const char **vptr)
1082{
1083	struct pci_devinfo *dinfo = device_get_ivars(child);
1084	pcicfgregs *cfg = &dinfo->cfg;
1085	int i;
1086
1087	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1088		pci_read_vpd(device_get_parent(dev), cfg);
1089
1090	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1091		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1092		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1093			*vptr = cfg->vpd.vpd_ros[i].value;
1094		}
1095
1096	if (i != cfg->vpd.vpd_rocnt)
1097		return (0);
1098
1099	*vptr = NULL;
1100	return (ENXIO);
1101}
1102
1103/*
1104 * Find the requested extended capability and return the offset in
1105 * configuration space via the pointer provided. The function returns
1106 * 0 on success and error code otherwise.
1107 */
1108int
1109pci_find_extcap_method(device_t dev, device_t child, int capability,
1110    int *capreg)
1111{
1112	struct pci_devinfo *dinfo = device_get_ivars(child);
1113	pcicfgregs *cfg = &dinfo->cfg;
1114	u_int32_t status;
1115	u_int8_t ptr;
1116
1117	/*
1118	 * Check the CAP_LIST bit of the PCI status register first.
1119	 */
1120	status = pci_read_config(child, PCIR_STATUS, 2);
1121	if (!(status & PCIM_STATUS_CAPPRESENT))
1122		return (ENXIO);
1123
1124	/*
1125	 * Determine the start pointer of the capabilities list.
1126	 */
1127	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1128	case PCIM_HDRTYPE_NORMAL:
1129	case PCIM_HDRTYPE_BRIDGE:
1130		ptr = PCIR_CAP_PTR;
1131		break;
1132	case PCIM_HDRTYPE_CARDBUS:
1133		ptr = PCIR_CAP_PTR_2;
1134		break;
1135	default:
1136		/* XXX: panic? */
1137		return (ENXIO);		/* no extended capabilities support */
1138	}
1139	ptr = pci_read_config(child, ptr, 1);
1140
1141	/*
1142	 * Traverse the capabilities list.
1143	 */
1144	while (ptr != 0) {
1145		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1146			if (capreg != NULL)
1147				*capreg = ptr;
1148			return (0);
1149		}
1150		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1151	}
1152
1153	return (ENOENT);
1154}
1155
1156/*
1157 * Support for MSI-X message interrupts.
1158 */
1159void
1160pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1161{
1162	struct pci_devinfo *dinfo = device_get_ivars(dev);
1163	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1164	uint32_t offset;
1165
1166	KASSERT(msix->msix_table_len > index, ("bogus index"));
1167	offset = msix->msix_table_offset + index * 16;
1168	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1169	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1170	bus_write_4(msix->msix_table_res, offset + 8, data);
1171
1172	/* Enable MSI -> HT mapping. */
1173	pci_ht_map_msi(dev, address);
1174}
1175
1176void
1177pci_mask_msix(device_t dev, u_int index)
1178{
1179	struct pci_devinfo *dinfo = device_get_ivars(dev);
1180	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1181	uint32_t offset, val;
1182
1183	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1184	offset = msix->msix_table_offset + index * 16 + 12;
1185	val = bus_read_4(msix->msix_table_res, offset);
1186	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1187		val |= PCIM_MSIX_VCTRL_MASK;
1188		bus_write_4(msix->msix_table_res, offset, val);
1189	}
1190}
1191
1192void
1193pci_unmask_msix(device_t dev, u_int index)
1194{
1195	struct pci_devinfo *dinfo = device_get_ivars(dev);
1196	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1197	uint32_t offset, val;
1198
1199	KASSERT(msix->msix_table_len > index, ("bogus index"));
1200	offset = msix->msix_table_offset + index * 16 + 12;
1201	val = bus_read_4(msix->msix_table_res, offset);
1202	if (val & PCIM_MSIX_VCTRL_MASK) {
1203		val &= ~PCIM_MSIX_VCTRL_MASK;
1204		bus_write_4(msix->msix_table_res, offset, val);
1205	}
1206}
1207
1208int
1209pci_pending_msix(device_t dev, u_int index)
1210{
1211	struct pci_devinfo *dinfo = device_get_ivars(dev);
1212	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1213	uint32_t offset, bit;
1214
1215	KASSERT(msix->msix_table_len > index, ("bogus index"));
1216	offset = msix->msix_pba_offset + (index / 32) * 4;
1217	bit = 1 << index % 32;
1218	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1219}
1220
1221/*
1222 * Restore MSI-X registers and table during resume.  If MSI-X is
1223 * enabled then walk the virtual table to restore the actual MSI-X
1224 * table.
1225 */
1226static void
1227pci_resume_msix(device_t dev)
1228{
1229	struct pci_devinfo *dinfo = device_get_ivars(dev);
1230	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1231	struct msix_table_entry *mte;
1232	struct msix_vector *mv;
1233	int i;
1234
1235	if (msix->msix_alloc > 0) {
1236		/* First, mask all vectors. */
1237		for (i = 0; i < msix->msix_msgnum; i++)
1238			pci_mask_msix(dev, i);
1239
1240		/* Second, program any messages with at least one handler. */
1241		for (i = 0; i < msix->msix_table_len; i++) {
1242			mte = &msix->msix_table[i];
1243			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1244				continue;
1245			mv = &msix->msix_vectors[mte->mte_vector - 1];
1246			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1247			pci_unmask_msix(dev, i);
1248		}
1249	}
1250	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1251	    msix->msix_ctrl, 2);
1252}
1253
1254/*
1255 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1256 * returned in *count.  After this function returns, each message will be
1257 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1258 */
1259int
1260pci_alloc_msix_method(device_t dev, device_t child, int *count)
1261{
1262	struct pci_devinfo *dinfo = device_get_ivars(child);
1263	pcicfgregs *cfg = &dinfo->cfg;
1264	struct resource_list_entry *rle;
1265	int actual, error, i, irq, max;
1266
1267	/* Don't let count == 0 get us into trouble. */
1268	if (*count == 0)
1269		return (EINVAL);
1270
1271	/* If rid 0 is allocated, then fail. */
1272	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1273	if (rle != NULL && rle->res != NULL)
1274		return (ENXIO);
1275
1276	/* Already have allocated messages? */
1277	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1278		return (ENXIO);
1279
1280	/* If MSI is blacklisted for this system, fail. */
1281	if (pci_msi_blacklisted())
1282		return (ENXIO);
1283
1284	/* MSI-X capability present? */
1285	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1286		return (ENODEV);
1287
1288	/* Make sure the appropriate BARs are mapped. */
1289	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1290	    cfg->msix.msix_table_bar);
1291	if (rle == NULL || rle->res == NULL ||
1292	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1293		return (ENXIO);
1294	cfg->msix.msix_table_res = rle->res;
1295	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1296		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1297		    cfg->msix.msix_pba_bar);
1298		if (rle == NULL || rle->res == NULL ||
1299		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1300			return (ENXIO);
1301	}
1302	cfg->msix.msix_pba_res = rle->res;
1303
1304	if (bootverbose)
1305		device_printf(child,
1306		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1307		    *count, cfg->msix.msix_msgnum);
1308	max = min(*count, cfg->msix.msix_msgnum);
1309	for (i = 0; i < max; i++) {
1310		/* Allocate a message. */
1311		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1312		if (error)
1313			break;
1314		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1315		    irq, 1);
1316	}
1317	actual = i;
1318
1319	if (bootverbose) {
1320		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1321		if (actual == 1)
1322			device_printf(child, "using IRQ %lu for MSI-X\n",
1323			    rle->start);
1324		else {
1325			int run;
1326
1327			/*
1328			 * Be fancy and try to print contiguous runs of
1329			 * IRQ values as ranges.  'irq' is the previous IRQ.
1330			 * 'run' is true if we are in a range.
1331			 */
1332			device_printf(child, "using IRQs %lu", rle->start);
1333			irq = rle->start;
1334			run = 0;
1335			for (i = 1; i < actual; i++) {
1336				rle = resource_list_find(&dinfo->resources,
1337				    SYS_RES_IRQ, i + 1);
1338
1339				/* Still in a run? */
1340				if (rle->start == irq + 1) {
1341					run = 1;
1342					irq++;
1343					continue;
1344				}
1345
1346				/* Finish previous range. */
1347				if (run) {
1348					printf("-%d", irq);
1349					run = 0;
1350				}
1351
1352				/* Start new range. */
1353				printf(",%lu", rle->start);
1354				irq = rle->start;
1355			}
1356
1357			/* Unfinished range? */
1358			if (run)
1359				printf("-%d", irq);
1360			printf(" for MSI-X\n");
1361		}
1362	}
1363
1364	/* Mask all vectors. */
1365	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1366		pci_mask_msix(child, i);
1367
1368	/* Allocate and initialize vector data and virtual table. */
1369	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1370	    M_DEVBUF, M_WAITOK | M_ZERO);
1371	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1372	    M_DEVBUF, M_WAITOK | M_ZERO);
1373	for (i = 0; i < actual; i++) {
1374		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1375		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1376		cfg->msix.msix_table[i].mte_vector = i + 1;
1377	}
1378
1379	/* Update control register to enable MSI-X. */
1380	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1381	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1382	    cfg->msix.msix_ctrl, 2);
1383
1384	/* Update counts of alloc'd messages. */
1385	cfg->msix.msix_alloc = actual;
1386	cfg->msix.msix_table_len = actual;
1387	*count = actual;
1388	return (0);
1389}
1390
1391/*
1392 * By default, pci_alloc_msix() will assign the allocated IRQ
1393 * resources consecutively to the first N messages in the MSI-X table.
1394 * However, device drivers may want to use different layouts if they
1395 * either receive fewer messages than they asked for, or they wish to
1396 * populate the MSI-X table sparsely.  This method allows the driver
1397 * to specify what layout it wants.  It must be called after a
1398 * successful pci_alloc_msix() but before any of the associated
1399 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1400 *
1401 * The 'vectors' array contains 'count' message vectors.  The array
1402 * maps directly to the MSI-X table in that index 0 in the array
1403 * specifies the vector for the first message in the MSI-X table, etc.
1404 * The vector value in each array index can either be 0 to indicate
1405 * that no vector should be assigned to a message slot, or it can be a
1406 * number from 1 to N (where N is the count returned from a
1407 * succcessful call to pci_alloc_msix()) to indicate which message
1408 * vector (IRQ) to be used for the corresponding message.
1409 *
1410 * On successful return, each message with a non-zero vector will have
1411 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1412 * 1.  Additionally, if any of the IRQs allocated via the previous
1413 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1414 * will be freed back to the system automatically.
1415 *
1416 * For example, suppose a driver has a MSI-X table with 6 messages and
1417 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1418 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1419 * C.  After the call to pci_alloc_msix(), the device will be setup to
1420 * have an MSI-X table of ABC--- (where - means no vector assigned).
1421 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1422 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1423 * be freed back to the system.  This device will also have valid
1424 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1425 *
1426 * In any case, the SYS_RES_IRQ rid X will always map to the message
1427 * at MSI-X table index X - 1 and will only be valid if a vector is
1428 * assigned to that table entry.
1429 */
1430int
1431pci_remap_msix_method(device_t dev, device_t child, int count,
1432    const u_int *vectors)
1433{
1434	struct pci_devinfo *dinfo = device_get_ivars(child);
1435	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1436	struct resource_list_entry *rle;
1437	int i, irq, j, *used;
1438
1439	/*
1440	 * Have to have at least one message in the table but the
1441	 * table can't be bigger than the actual MSI-X table in the
1442	 * device.
1443	 */
1444	if (count == 0 || count > msix->msix_msgnum)
1445		return (EINVAL);
1446
1447	/* Sanity check the vectors. */
1448	for (i = 0; i < count; i++)
1449		if (vectors[i] > msix->msix_alloc)
1450			return (EINVAL);
1451
1452	/*
1453	 * Make sure there aren't any holes in the vectors to be used.
1454	 * It's a big pain to support it, and it doesn't really make
1455	 * sense anyway.  Also, at least one vector must be used.
1456	 */
1457	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1458	    M_ZERO);
1459	for (i = 0; i < count; i++)
1460		if (vectors[i] != 0)
1461			used[vectors[i] - 1] = 1;
1462	for (i = 0; i < msix->msix_alloc - 1; i++)
1463		if (used[i] == 0 && used[i + 1] == 1) {
1464			free(used, M_DEVBUF);
1465			return (EINVAL);
1466		}
1467	if (used[0] != 1) {
1468		free(used, M_DEVBUF);
1469		return (EINVAL);
1470	}
1471
1472	/* Make sure none of the resources are allocated. */
1473	for (i = 0; i < msix->msix_table_len; i++) {
1474		if (msix->msix_table[i].mte_vector == 0)
1475			continue;
1476		if (msix->msix_table[i].mte_handlers > 0)
1477			return (EBUSY);
1478		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1479		KASSERT(rle != NULL, ("missing resource"));
1480		if (rle->res != NULL)
1481			return (EBUSY);
1482	}
1483
1484	/* Free the existing resource list entries. */
1485	for (i = 0; i < msix->msix_table_len; i++) {
1486		if (msix->msix_table[i].mte_vector == 0)
1487			continue;
1488		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1489	}
1490
1491	/*
1492	 * Build the new virtual table keeping track of which vectors are
1493	 * used.
1494	 */
1495	free(msix->msix_table, M_DEVBUF);
1496	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1497	    M_DEVBUF, M_WAITOK | M_ZERO);
1498	for (i = 0; i < count; i++)
1499		msix->msix_table[i].mte_vector = vectors[i];
1500	msix->msix_table_len = count;
1501
1502	/* Free any unused IRQs and resize the vectors array if necessary. */
1503	j = msix->msix_alloc - 1;
1504	if (used[j] == 0) {
1505		struct msix_vector *vec;
1506
1507		while (used[j] == 0) {
1508			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1509			    msix->msix_vectors[j].mv_irq);
1510			j--;
1511		}
1512		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1513		    M_WAITOK);
1514		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1515		    (j + 1));
1516		free(msix->msix_vectors, M_DEVBUF);
1517		msix->msix_vectors = vec;
1518		msix->msix_alloc = j + 1;
1519	}
1520	free(used, M_DEVBUF);
1521
1522	/* Map the IRQs onto the rids. */
1523	for (i = 0; i < count; i++) {
1524		if (vectors[i] == 0)
1525			continue;
1526		irq = msix->msix_vectors[vectors[i]].mv_irq;
1527		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1528		    irq, 1);
1529	}
1530
1531	if (bootverbose) {
1532		device_printf(child, "Remapped MSI-X IRQs as: ");
1533		for (i = 0; i < count; i++) {
1534			if (i != 0)
1535				printf(", ");
1536			if (vectors[i] == 0)
1537				printf("---");
1538			else
1539				printf("%d",
1540				    msix->msix_vectors[vectors[i]].mv_irq);
1541		}
1542		printf("\n");
1543	}
1544
1545	return (0);
1546}
1547
1548static int
1549pci_release_msix(device_t dev, device_t child)
1550{
1551	struct pci_devinfo *dinfo = device_get_ivars(child);
1552	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1553	struct resource_list_entry *rle;
1554	int i;
1555
1556	/* Do we have any messages to release? */
1557	if (msix->msix_alloc == 0)
1558		return (ENODEV);
1559
1560	/* Make sure none of the resources are allocated. */
1561	for (i = 0; i < msix->msix_table_len; i++) {
1562		if (msix->msix_table[i].mte_vector == 0)
1563			continue;
1564		if (msix->msix_table[i].mte_handlers > 0)
1565			return (EBUSY);
1566		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1567		KASSERT(rle != NULL, ("missing resource"));
1568		if (rle->res != NULL)
1569			return (EBUSY);
1570	}
1571
1572	/* Update control register to disable MSI-X. */
1573	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1574	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1575	    msix->msix_ctrl, 2);
1576
1577	/* Free the resource list entries. */
1578	for (i = 0; i < msix->msix_table_len; i++) {
1579		if (msix->msix_table[i].mte_vector == 0)
1580			continue;
1581		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1582	}
1583	free(msix->msix_table, M_DEVBUF);
1584	msix->msix_table_len = 0;
1585
1586	/* Release the IRQs. */
1587	for (i = 0; i < msix->msix_alloc; i++)
1588		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1589		    msix->msix_vectors[i].mv_irq);
1590	free(msix->msix_vectors, M_DEVBUF);
1591	msix->msix_alloc = 0;
1592	return (0);
1593}
1594
1595/*
1596 * Return the max supported MSI-X messages this device supports.
1597 * Basically, assuming the MD code can alloc messages, this function
1598 * should return the maximum value that pci_alloc_msix() can return.
1599 * Thus, it is subject to the tunables, etc.
1600 */
1601int
1602pci_msix_count_method(device_t dev, device_t child)
1603{
1604	struct pci_devinfo *dinfo = device_get_ivars(child);
1605	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1606
1607	if (pci_do_msix && msix->msix_location != 0)
1608		return (msix->msix_msgnum);
1609	return (0);
1610}
1611
1612/*
1613 * HyperTransport MSI mapping control
1614 */
1615void
1616pci_ht_map_msi(device_t dev, uint64_t addr)
1617{
1618	struct pci_devinfo *dinfo = device_get_ivars(dev);
1619	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1620
1621	if (!ht->ht_msimap)
1622		return;
1623
1624	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1625	    ht->ht_msiaddr >> 20 == addr >> 20) {
1626		/* Enable MSI -> HT mapping. */
1627		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1628		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1629		    ht->ht_msictrl, 2);
1630	}
1631
1632	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1633		/* Disable MSI -> HT mapping. */
1634		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1635		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1636		    ht->ht_msictrl, 2);
1637	}
1638}
1639
1640int
1641pci_get_max_read_req(device_t dev)
1642{
1643	int cap;
1644	uint16_t val;
1645
1646	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1647		return (0);
1648	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1649	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1650	val >>= 12;
1651	return (1 << (val + 7));
1652}
1653
1654int
1655pci_set_max_read_req(device_t dev, int size)
1656{
1657	int cap;
1658	uint16_t val;
1659
1660	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1661		return (0);
1662	if (size < 128)
1663		size = 128;
1664	if (size > 4096)
1665		size = 4096;
1666	size = (1 << (fls(size) - 1));
1667	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1668	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1669	val |= (fls(size) - 8) << 12;
1670	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1671	return (size);
1672}
1673
1674/*
1675 * Support for MSI message signalled interrupts.
1676 */
1677void
1678pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1679{
1680	struct pci_devinfo *dinfo = device_get_ivars(dev);
1681	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1682
1683	/* Write data and address values. */
1684	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1685	    address & 0xffffffff, 4);
1686	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1687		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1688		    address >> 32, 4);
1689		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1690		    data, 2);
1691	} else
1692		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1693		    2);
1694
1695	/* Enable MSI in the control register. */
1696	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1697	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1698	    2);
1699
1700	/* Enable MSI -> HT mapping. */
1701	pci_ht_map_msi(dev, address);
1702}
1703
1704void
1705pci_disable_msi(device_t dev)
1706{
1707	struct pci_devinfo *dinfo = device_get_ivars(dev);
1708	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1709
1710	/* Disable MSI -> HT mapping. */
1711	pci_ht_map_msi(dev, 0);
1712
1713	/* Disable MSI in the control register. */
1714	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1715	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1716	    2);
1717}
1718
1719/*
1720 * Restore MSI registers during resume.  If MSI is enabled then
1721 * restore the data and address registers in addition to the control
1722 * register.
1723 */
1724static void
1725pci_resume_msi(device_t dev)
1726{
1727	struct pci_devinfo *dinfo = device_get_ivars(dev);
1728	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1729	uint64_t address;
1730	uint16_t data;
1731
1732	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1733		address = msi->msi_addr;
1734		data = msi->msi_data;
1735		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1736		    address & 0xffffffff, 4);
1737		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1738			pci_write_config(dev, msi->msi_location +
1739			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1740			pci_write_config(dev, msi->msi_location +
1741			    PCIR_MSI_DATA_64BIT, data, 2);
1742		} else
1743			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1744			    data, 2);
1745	}
1746	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1747	    2);
1748}
1749
1750static int
1751pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1752{
1753	struct pci_devinfo *dinfo = device_get_ivars(dev);
1754	pcicfgregs *cfg = &dinfo->cfg;
1755	struct resource_list_entry *rle;
1756	struct msix_table_entry *mte;
1757	struct msix_vector *mv;
1758	uint64_t addr;
1759	uint32_t data;
1760	int error, i, j;
1761
1762	/*
1763	 * Handle MSI first.  We try to find this IRQ among our list
1764	 * of MSI IRQs.  If we find it, we request updated address and
1765	 * data registers and apply the results.
1766	 */
1767	if (cfg->msi.msi_alloc > 0) {
1768
1769		/* If we don't have any active handlers, nothing to do. */
1770		if (cfg->msi.msi_handlers == 0)
1771			return (0);
1772		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1773			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1774			    i + 1);
1775			if (rle->start == irq) {
1776				error = PCIB_MAP_MSI(device_get_parent(bus),
1777				    dev, irq, &addr, &data);
1778				if (error)
1779					return (error);
1780				pci_disable_msi(dev);
1781				dinfo->cfg.msi.msi_addr = addr;
1782				dinfo->cfg.msi.msi_data = data;
1783				pci_enable_msi(dev, addr, data);
1784				return (0);
1785			}
1786		}
1787		return (ENOENT);
1788	}
1789
1790	/*
1791	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1792	 * we request the updated mapping info.  If that works, we go
1793	 * through all the slots that use this IRQ and update them.
1794	 */
1795	if (cfg->msix.msix_alloc > 0) {
1796		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1797			mv = &cfg->msix.msix_vectors[i];
1798			if (mv->mv_irq == irq) {
1799				error = PCIB_MAP_MSI(device_get_parent(bus),
1800				    dev, irq, &addr, &data);
1801				if (error)
1802					return (error);
1803				mv->mv_address = addr;
1804				mv->mv_data = data;
1805				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1806					mte = &cfg->msix.msix_table[j];
1807					if (mte->mte_vector != i + 1)
1808						continue;
1809					if (mte->mte_handlers == 0)
1810						continue;
1811					pci_mask_msix(dev, j);
1812					pci_enable_msix(dev, j, addr, data);
1813					pci_unmask_msix(dev, j);
1814				}
1815			}
1816		}
1817		return (ENOENT);
1818	}
1819
1820	return (ENOENT);
1821}
1822
1823/*
1824 * Returns true if the specified device is blacklisted because MSI
1825 * doesn't work.
1826 */
1827int
1828pci_msi_device_blacklisted(device_t dev)
1829{
1830	struct pci_quirk *q;
1831
1832	if (!pci_honor_msi_blacklist)
1833		return (0);
1834
1835	for (q = &pci_quirks[0]; q->devid; q++) {
1836		if (q->devid == pci_get_devid(dev) &&
1837		    q->type == PCI_QUIRK_DISABLE_MSI)
1838			return (1);
1839	}
1840	return (0);
1841}
1842
1843/*
1844 * Returns true if a specified chipset supports MSI when it is
1845 * emulated hardware in a virtual machine.
1846 */
1847static int
1848pci_msi_vm_chipset(device_t dev)
1849{
1850	struct pci_quirk *q;
1851
1852	for (q = &pci_quirks[0]; q->devid; q++) {
1853		if (q->devid == pci_get_devid(dev) &&
1854		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
1855			return (1);
1856	}
1857	return (0);
1858}
1859
1860/*
1861 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1862 * we just check for blacklisted chipsets as represented by the
1863 * host-PCI bridge at device 0:0:0.  In the future, it may become
1864 * necessary to check other system attributes, such as the kenv values
1865 * that give the motherboard manufacturer and model number.
1866 */
1867static int
1868pci_msi_blacklisted(void)
1869{
1870	device_t dev;
1871
1872	if (!pci_honor_msi_blacklist)
1873		return (0);
1874
1875	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1876	if (!(pcie_chipset || pcix_chipset)) {
1877		if (vm_guest != VM_GUEST_NO) {
1878			dev = pci_find_bsf(0, 0, 0);
1879			if (dev != NULL)
1880				return (pci_msi_vm_chipset(dev) == 0);
1881		}
1882		return (1);
1883	}
1884
1885	dev = pci_find_bsf(0, 0, 0);
1886	if (dev != NULL)
1887		return (pci_msi_device_blacklisted(dev));
1888	return (0);
1889}
1890
1891/*
1892 * Attempt to allocate *count MSI messages.  The actual number allocated is
1893 * returned in *count.  After this function returns, each message will be
1894 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1895 */
1896int
1897pci_alloc_msi_method(device_t dev, device_t child, int *count)
1898{
1899	struct pci_devinfo *dinfo = device_get_ivars(child);
1900	pcicfgregs *cfg = &dinfo->cfg;
1901	struct resource_list_entry *rle;
1902	int actual, error, i, irqs[32];
1903	uint16_t ctrl;
1904
1905	/* Don't let count == 0 get us into trouble. */
1906	if (*count == 0)
1907		return (EINVAL);
1908
1909	/* If rid 0 is allocated, then fail. */
1910	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1911	if (rle != NULL && rle->res != NULL)
1912		return (ENXIO);
1913
1914	/* Already have allocated messages? */
1915	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1916		return (ENXIO);
1917
1918	/* If MSI is blacklisted for this system, fail. */
1919	if (pci_msi_blacklisted())
1920		return (ENXIO);
1921
1922	/* MSI capability present? */
1923	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1924		return (ENODEV);
1925
1926	if (bootverbose)
1927		device_printf(child,
1928		    "attempting to allocate %d MSI vectors (%d supported)\n",
1929		    *count, cfg->msi.msi_msgnum);
1930
1931	/* Don't ask for more than the device supports. */
1932	actual = min(*count, cfg->msi.msi_msgnum);
1933
1934	/* Don't ask for more than 32 messages. */
1935	actual = min(actual, 32);
1936
1937	/* MSI requires power of 2 number of messages. */
1938	if (!powerof2(actual))
1939		return (EINVAL);
1940
1941	for (;;) {
1942		/* Try to allocate N messages. */
1943		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1944		    cfg->msi.msi_msgnum, irqs);
1945		if (error == 0)
1946			break;
1947		if (actual == 1)
1948			return (error);
1949
1950		/* Try N / 2. */
1951		actual >>= 1;
1952	}
1953
1954	/*
1955	 * We now have N actual messages mapped onto SYS_RES_IRQ
1956	 * resources in the irqs[] array, so add new resources
1957	 * starting at rid 1.
1958	 */
1959	for (i = 0; i < actual; i++)
1960		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1961		    irqs[i], irqs[i], 1);
1962
1963	if (bootverbose) {
1964		if (actual == 1)
1965			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1966		else {
1967			int run;
1968
1969			/*
1970			 * Be fancy and try to print contiguous runs
1971			 * of IRQ values as ranges.  'run' is true if
1972			 * we are in a range.
1973			 */
1974			device_printf(child, "using IRQs %d", irqs[0]);
1975			run = 0;
1976			for (i = 1; i < actual; i++) {
1977
1978				/* Still in a run? */
1979				if (irqs[i] == irqs[i - 1] + 1) {
1980					run = 1;
1981					continue;
1982				}
1983
1984				/* Finish previous range. */
1985				if (run) {
1986					printf("-%d", irqs[i - 1]);
1987					run = 0;
1988				}
1989
1990				/* Start new range. */
1991				printf(",%d", irqs[i]);
1992			}
1993
1994			/* Unfinished range? */
1995			if (run)
1996				printf("-%d", irqs[actual - 1]);
1997			printf(" for MSI\n");
1998		}
1999	}
2000
2001	/* Update control register with actual count. */
2002	ctrl = cfg->msi.msi_ctrl;
2003	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2004	ctrl |= (ffs(actual) - 1) << 4;
2005	cfg->msi.msi_ctrl = ctrl;
2006	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2007
2008	/* Update counts of alloc'd messages. */
2009	cfg->msi.msi_alloc = actual;
2010	cfg->msi.msi_handlers = 0;
2011	*count = actual;
2012	return (0);
2013}
2014
2015/* Release the MSI messages associated with this device. */
2016int
2017pci_release_msi_method(device_t dev, device_t child)
2018{
2019	struct pci_devinfo *dinfo = device_get_ivars(child);
2020	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2021	struct resource_list_entry *rle;
2022	int error, i, irqs[32];
2023
2024	/* Try MSI-X first. */
2025	error = pci_release_msix(dev, child);
2026	if (error != ENODEV)
2027		return (error);
2028
2029	/* Do we have any messages to release? */
2030	if (msi->msi_alloc == 0)
2031		return (ENODEV);
2032	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2033
2034	/* Make sure none of the resources are allocated. */
2035	if (msi->msi_handlers > 0)
2036		return (EBUSY);
2037	for (i = 0; i < msi->msi_alloc; i++) {
2038		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2039		KASSERT(rle != NULL, ("missing MSI resource"));
2040		if (rle->res != NULL)
2041			return (EBUSY);
2042		irqs[i] = rle->start;
2043	}
2044
2045	/* Update control register with 0 count. */
2046	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2047	    ("%s: MSI still enabled", __func__));
2048	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2049	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2050	    msi->msi_ctrl, 2);
2051
2052	/* Release the messages. */
2053	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2054	for (i = 0; i < msi->msi_alloc; i++)
2055		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2056
2057	/* Update alloc count. */
2058	msi->msi_alloc = 0;
2059	msi->msi_addr = 0;
2060	msi->msi_data = 0;
2061	return (0);
2062}
2063
2064/*
2065 * Return the max supported MSI messages this device supports.
2066 * Basically, assuming the MD code can alloc messages, this function
2067 * should return the maximum value that pci_alloc_msi() can return.
2068 * Thus, it is subject to the tunables, etc.
2069 */
2070int
2071pci_msi_count_method(device_t dev, device_t child)
2072{
2073	struct pci_devinfo *dinfo = device_get_ivars(child);
2074	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2075
2076	if (pci_do_msi && msi->msi_location != 0)
2077		return (msi->msi_msgnum);
2078	return (0);
2079}
2080
2081/* free pcicfgregs structure and all depending data structures */
2082
2083int
2084pci_freecfg(struct pci_devinfo *dinfo)
2085{
2086	struct devlist *devlist_head;
2087	int i;
2088
2089	devlist_head = &pci_devq;
2090
2091	if (dinfo->cfg.vpd.vpd_reg) {
2092		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2093		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2094			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2095		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2096		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2097			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2098		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2099	}
2100	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2101	free(dinfo, M_DEVBUF);
2102
2103	/* increment the generation count */
2104	pci_generation++;
2105
2106	/* we're losing one device */
2107	pci_numdevs--;
2108	return (0);
2109}
2110
2111/*
2112 * PCI power manangement
2113 */
2114int
2115pci_set_powerstate_method(device_t dev, device_t child, int state)
2116{
2117	struct pci_devinfo *dinfo = device_get_ivars(child);
2118	pcicfgregs *cfg = &dinfo->cfg;
2119	uint16_t status;
2120	int result, oldstate, highest, delay;
2121
2122	if (cfg->pp.pp_cap == 0)
2123		return (EOPNOTSUPP);
2124
2125	/*
2126	 * Optimize a no state change request away.  While it would be OK to
2127	 * write to the hardware in theory, some devices have shown odd
2128	 * behavior when going from D3 -> D3.
2129	 */
2130	oldstate = pci_get_powerstate(child);
2131	if (oldstate == state)
2132		return (0);
2133
2134	/*
2135	 * The PCI power management specification states that after a state
2136	 * transition between PCI power states, system software must
2137	 * guarantee a minimal delay before the function accesses the device.
2138	 * Compute the worst case delay that we need to guarantee before we
2139	 * access the device.  Many devices will be responsive much more
2140	 * quickly than this delay, but there are some that don't respond
2141	 * instantly to state changes.  Transitions to/from D3 state require
2142	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2143	 * is done below with DELAY rather than a sleeper function because
2144	 * this function can be called from contexts where we cannot sleep.
2145	 */
2146	highest = (oldstate > state) ? oldstate : state;
2147	if (highest == PCI_POWERSTATE_D3)
2148	    delay = 10000;
2149	else if (highest == PCI_POWERSTATE_D2)
2150	    delay = 200;
2151	else
2152	    delay = 0;
2153	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2154	    & ~PCIM_PSTAT_DMASK;
2155	result = 0;
2156	switch (state) {
2157	case PCI_POWERSTATE_D0:
2158		status |= PCIM_PSTAT_D0;
2159		break;
2160	case PCI_POWERSTATE_D1:
2161		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2162			return (EOPNOTSUPP);
2163		status |= PCIM_PSTAT_D1;
2164		break;
2165	case PCI_POWERSTATE_D2:
2166		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2167			return (EOPNOTSUPP);
2168		status |= PCIM_PSTAT_D2;
2169		break;
2170	case PCI_POWERSTATE_D3:
2171		status |= PCIM_PSTAT_D3;
2172		break;
2173	default:
2174		return (EINVAL);
2175	}
2176
2177	if (bootverbose)
2178		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2179		    state);
2180
2181	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2182	if (delay)
2183		DELAY(delay);
2184	return (0);
2185}
2186
2187int
2188pci_get_powerstate_method(device_t dev, device_t child)
2189{
2190	struct pci_devinfo *dinfo = device_get_ivars(child);
2191	pcicfgregs *cfg = &dinfo->cfg;
2192	uint16_t status;
2193	int result;
2194
2195	if (cfg->pp.pp_cap != 0) {
2196		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2197		switch (status & PCIM_PSTAT_DMASK) {
2198		case PCIM_PSTAT_D0:
2199			result = PCI_POWERSTATE_D0;
2200			break;
2201		case PCIM_PSTAT_D1:
2202			result = PCI_POWERSTATE_D1;
2203			break;
2204		case PCIM_PSTAT_D2:
2205			result = PCI_POWERSTATE_D2;
2206			break;
2207		case PCIM_PSTAT_D3:
2208			result = PCI_POWERSTATE_D3;
2209			break;
2210		default:
2211			result = PCI_POWERSTATE_UNKNOWN;
2212			break;
2213		}
2214	} else {
2215		/* No support, device is always at D0 */
2216		result = PCI_POWERSTATE_D0;
2217	}
2218	return (result);
2219}
2220
2221/*
2222 * Some convenience functions for PCI device drivers.
2223 */
2224
2225static __inline void
2226pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2227{
2228	uint16_t	command;
2229
2230	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2231	command |= bit;
2232	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2233}
2234
2235static __inline void
2236pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2237{
2238	uint16_t	command;
2239
2240	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2241	command &= ~bit;
2242	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2243}
2244
2245int
2246pci_enable_busmaster_method(device_t dev, device_t child)
2247{
2248	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2249	return (0);
2250}
2251
2252int
2253pci_disable_busmaster_method(device_t dev, device_t child)
2254{
2255	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2256	return (0);
2257}
2258
2259int
2260pci_enable_io_method(device_t dev, device_t child, int space)
2261{
2262	uint16_t bit;
2263
2264	switch(space) {
2265	case SYS_RES_IOPORT:
2266		bit = PCIM_CMD_PORTEN;
2267		break;
2268	case SYS_RES_MEMORY:
2269		bit = PCIM_CMD_MEMEN;
2270		break;
2271	default:
2272		return (EINVAL);
2273	}
2274	pci_set_command_bit(dev, child, bit);
2275	return (0);
2276}
2277
2278int
2279pci_disable_io_method(device_t dev, device_t child, int space)
2280{
2281	uint16_t bit;
2282
2283	switch(space) {
2284	case SYS_RES_IOPORT:
2285		bit = PCIM_CMD_PORTEN;
2286		break;
2287	case SYS_RES_MEMORY:
2288		bit = PCIM_CMD_MEMEN;
2289		break;
2290	default:
2291		return (EINVAL);
2292	}
2293	pci_clear_command_bit(dev, child, bit);
2294	return (0);
2295}
2296
2297/*
2298 * New style pci driver.  Parent device is either a pci-host-bridge or a
2299 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2300 */
2301
2302void
2303pci_print_verbose(struct pci_devinfo *dinfo)
2304{
2305
2306	if (bootverbose) {
2307		pcicfgregs *cfg = &dinfo->cfg;
2308
2309		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2310		    cfg->vendor, cfg->device, cfg->revid);
2311		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2312		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2313		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2314		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2315		    cfg->mfdev);
2316		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2317		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2318		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2319		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2320		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2321		if (cfg->intpin > 0)
2322			printf("\tintpin=%c, irq=%d\n",
2323			    cfg->intpin +'a' -1, cfg->intline);
2324		if (cfg->pp.pp_cap) {
2325			uint16_t status;
2326
2327			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2328			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2329			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2330			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2331			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2332			    status & PCIM_PSTAT_DMASK);
2333		}
2334		if (cfg->msi.msi_location) {
2335			int ctrl;
2336
2337			ctrl = cfg->msi.msi_ctrl;
2338			printf("\tMSI supports %d message%s%s%s\n",
2339			    cfg->msi.msi_msgnum,
2340			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2341			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2342			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2343		}
2344		if (cfg->msix.msix_location) {
2345			printf("\tMSI-X supports %d message%s ",
2346			    cfg->msix.msix_msgnum,
2347			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2348			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2349				printf("in map 0x%x\n",
2350				    cfg->msix.msix_table_bar);
2351			else
2352				printf("in maps 0x%x and 0x%x\n",
2353				    cfg->msix.msix_table_bar,
2354				    cfg->msix.msix_pba_bar);
2355		}
2356	}
2357}
2358
2359static int
2360pci_porten(device_t dev)
2361{
2362	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2363}
2364
2365static int
2366pci_memen(device_t dev)
2367{
2368	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2369}
2370
2371static void
2372pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2373{
2374	pci_addr_t map, testval;
2375	int ln2range;
2376	uint16_t cmd;
2377
2378	/*
2379	 * The device ROM BAR is special.  It is always a 32-bit
2380	 * memory BAR.  Bit 0 is special and should not be set when
2381	 * sizing the BAR.
2382	 */
2383	if (reg == PCIR_BIOS) {
2384		map = pci_read_config(dev, reg, 4);
2385		pci_write_config(dev, reg, 0xfffffffe, 4);
2386		testval = pci_read_config(dev, reg, 4);
2387		pci_write_config(dev, reg, map, 4);
2388		*mapp = map;
2389		*testvalp = testval;
2390		return;
2391	}
2392
2393	map = pci_read_config(dev, reg, 4);
2394	ln2range = pci_maprange(map);
2395	if (ln2range == 64)
2396		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2397
2398	/*
2399	 * Disable decoding via the command register before
2400	 * determining the BAR's length since we will be placing it in
2401	 * a weird state.
2402	 */
2403	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2404	pci_write_config(dev, PCIR_COMMAND,
2405	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2406
2407	/*
2408	 * Determine the BAR's length by writing all 1's.  The bottom
2409	 * log_2(size) bits of the BAR will stick as 0 when we read
2410	 * the value back.
2411	 */
2412	pci_write_config(dev, reg, 0xffffffff, 4);
2413	testval = pci_read_config(dev, reg, 4);
2414	if (ln2range == 64) {
2415		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2416		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2417	}
2418
2419	/*
2420	 * Restore the original value of the BAR.  We may have reprogrammed
2421	 * the BAR of the low-level console device and when booting verbose,
2422	 * we need the console device addressable.
2423	 */
2424	pci_write_config(dev, reg, map, 4);
2425	if (ln2range == 64)
2426		pci_write_config(dev, reg + 4, map >> 32, 4);
2427	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2428
2429	*mapp = map;
2430	*testvalp = testval;
2431}
2432
2433static void
2434pci_write_bar(device_t dev, int reg, pci_addr_t base)
2435{
2436	pci_addr_t map;
2437	int ln2range;
2438
2439	map = pci_read_config(dev, reg, 4);
2440
2441	/* The device ROM BAR is always 32-bits. */
2442	if (reg == PCIR_BIOS)
2443		return;
2444	ln2range = pci_maprange(map);
2445	pci_write_config(dev, reg, base, 4);
2446	if (ln2range == 64)
2447		pci_write_config(dev, reg + 4, base >> 32, 4);
2448}
2449
2450/*
2451 * Add a resource based on a pci map register. Return 1 if the map
2452 * register is a 32bit map register or 2 if it is a 64bit register.
2453 */
2454static int
2455pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2456    int force, int prefetch)
2457{
2458	pci_addr_t base, map, testval;
2459	pci_addr_t start, end, count;
2460	int barlen, basezero, maprange, mapsize, type;
2461	uint16_t cmd;
2462	struct resource *res;
2463
2464	pci_read_bar(dev, reg, &map, &testval);
2465	if (PCI_BAR_MEM(map)) {
2466		type = SYS_RES_MEMORY;
2467		if (map & PCIM_BAR_MEM_PREFETCH)
2468			prefetch = 1;
2469	} else
2470		type = SYS_RES_IOPORT;
2471	mapsize = pci_mapsize(testval);
2472	base = pci_mapbase(map);
2473#ifdef __PCI_BAR_ZERO_VALID
2474	basezero = 0;
2475#else
2476	basezero = base == 0;
2477#endif
2478	maprange = pci_maprange(map);
2479	barlen = maprange == 64 ? 2 : 1;
2480
2481	/*
2482	 * For I/O registers, if bottom bit is set, and the next bit up
2483	 * isn't clear, we know we have a BAR that doesn't conform to the
2484	 * spec, so ignore it.  Also, sanity check the size of the data
2485	 * areas to the type of memory involved.  Memory must be at least
2486	 * 16 bytes in size, while I/O ranges must be at least 4.
2487	 */
2488	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2489		return (barlen);
2490	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2491	    (type == SYS_RES_IOPORT && mapsize < 2))
2492		return (barlen);
2493
2494	if (bootverbose) {
2495		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2496		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2497		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2498			printf(", port disabled\n");
2499		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2500			printf(", memory disabled\n");
2501		else
2502			printf(", enabled\n");
2503	}
2504
2505	/*
2506	 * If base is 0, then we have problems if this architecture does
2507	 * not allow that.  It is best to ignore such entries for the
2508	 * moment.  These will be allocated later if the driver specifically
2509	 * requests them.  However, some removable busses look better when
2510	 * all resources are allocated, so allow '0' to be overriden.
2511	 *
2512	 * Similarly treat maps whose values is the same as the test value
2513	 * read back.  These maps have had all f's written to them by the
2514	 * BIOS in an attempt to disable the resources.
2515	 */
2516	if (!force && (basezero || map == testval))
2517		return (barlen);
2518	if ((u_long)base != base) {
2519		device_printf(bus,
2520		    "pci%d:%d:%d:%d bar %#x too many address bits",
2521		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2522		    pci_get_function(dev), reg);
2523		return (barlen);
2524	}
2525
2526	/*
2527	 * This code theoretically does the right thing, but has
2528	 * undesirable side effects in some cases where peripherals
2529	 * respond oddly to having these bits enabled.  Let the user
2530	 * be able to turn them off (since pci_enable_io_modes is 1 by
2531	 * default).
2532	 */
2533	if (pci_enable_io_modes) {
2534		/* Turn on resources that have been left off by a lazy BIOS */
2535		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2536			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2537			cmd |= PCIM_CMD_PORTEN;
2538			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2539		}
2540		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2541			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2542			cmd |= PCIM_CMD_MEMEN;
2543			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2544		}
2545	} else {
2546		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2547			return (barlen);
2548		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2549			return (barlen);
2550	}
2551
2552	count = 1 << mapsize;
2553	if (basezero || base == pci_mapbase(testval)) {
2554		start = 0;	/* Let the parent decide. */
2555		end = ~0ULL;
2556	} else {
2557		start = base;
2558		end = base + (1 << mapsize) - 1;
2559	}
2560	resource_list_add(rl, type, reg, start, end, count);
2561
2562	/*
2563	 * Try to allocate the resource for this BAR from our parent
2564	 * so that this resource range is already reserved.  The
2565	 * driver for this device will later inherit this resource in
2566	 * pci_alloc_resource().
2567	 */
2568	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2569	    prefetch ? RF_PREFETCHABLE : 0);
2570	if (res == NULL) {
2571		/*
2572		 * If the allocation fails, clear the BAR and delete
2573		 * the resource list entry to force
2574		 * pci_alloc_resource() to allocate resources from the
2575		 * parent.
2576		 */
2577		resource_list_delete(rl, type, reg);
2578		start = 0;
2579	} else
2580		start = rman_get_start(res);
2581	pci_write_bar(dev, reg, start);
2582	return (barlen);
2583}
2584
2585/*
2586 * For ATA devices we need to decide early what addressing mode to use.
2587 * Legacy demands that the primary and secondary ATA ports sits on the
2588 * same addresses that old ISA hardware did. This dictates that we use
2589 * those addresses and ignore the BAR's if we cannot set PCI native
2590 * addressing mode.
2591 */
2592static void
2593pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2594    uint32_t prefetchmask)
2595{
2596	struct resource *r;
2597	int rid, type, progif;
2598#if 0
2599	/* if this device supports PCI native addressing use it */
2600	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2601	if ((progif & 0x8a) == 0x8a) {
2602		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2603		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2604			printf("Trying ATA native PCI addressing mode\n");
2605			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2606		}
2607	}
2608#endif
2609	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2610	type = SYS_RES_IOPORT;
2611	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2612		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2613		    prefetchmask & (1 << 0));
2614		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2615		    prefetchmask & (1 << 1));
2616	} else {
2617		rid = PCIR_BAR(0);
2618		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2619		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2620		    0x1f7, 8, 0);
2621		rid = PCIR_BAR(1);
2622		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2623		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2624		    0x3f6, 1, 0);
2625	}
2626	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2627		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2628		    prefetchmask & (1 << 2));
2629		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2630		    prefetchmask & (1 << 3));
2631	} else {
2632		rid = PCIR_BAR(2);
2633		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2634		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2635		    0x177, 8, 0);
2636		rid = PCIR_BAR(3);
2637		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2638		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2639		    0x376, 1, 0);
2640	}
2641	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2642	    prefetchmask & (1 << 4));
2643	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2644	    prefetchmask & (1 << 5));
2645}
2646
2647static void
2648pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2649{
2650	struct pci_devinfo *dinfo = device_get_ivars(dev);
2651	pcicfgregs *cfg = &dinfo->cfg;
2652	char tunable_name[64];
2653	int irq;
2654
2655	/* Has to have an intpin to have an interrupt. */
2656	if (cfg->intpin == 0)
2657		return;
2658
2659	/* Let the user override the IRQ with a tunable. */
2660	irq = PCI_INVALID_IRQ;
2661	snprintf(tunable_name, sizeof(tunable_name),
2662	    "hw.pci%d.%d.%d.INT%c.irq",
2663	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2664	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2665		irq = PCI_INVALID_IRQ;
2666
2667	/*
2668	 * If we didn't get an IRQ via the tunable, then we either use the
2669	 * IRQ value in the intline register or we ask the bus to route an
2670	 * interrupt for us.  If force_route is true, then we only use the
2671	 * value in the intline register if the bus was unable to assign an
2672	 * IRQ.
2673	 */
2674	if (!PCI_INTERRUPT_VALID(irq)) {
2675		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2676			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2677		if (!PCI_INTERRUPT_VALID(irq))
2678			irq = cfg->intline;
2679	}
2680
2681	/* If after all that we don't have an IRQ, just bail. */
2682	if (!PCI_INTERRUPT_VALID(irq))
2683		return;
2684
2685	/* Update the config register if it changed. */
2686	if (irq != cfg->intline) {
2687		cfg->intline = irq;
2688		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2689	}
2690
2691	/* Add this IRQ as rid 0 interrupt resource. */
2692	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2693}
2694
2695/* Perform early OHCI takeover from SMM. */
2696static void
2697ohci_early_takeover(device_t self)
2698{
2699	struct resource *res;
2700	uint32_t ctl;
2701	int rid;
2702	int i;
2703
2704	rid = PCIR_BAR(0);
2705	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2706	if (res == NULL)
2707		return;
2708
2709	ctl = bus_read_4(res, OHCI_CONTROL);
2710	if (ctl & OHCI_IR) {
2711		if (bootverbose)
2712			printf("ohci early: "
2713			    "SMM active, request owner change\n");
2714		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2715		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2716			DELAY(1000);
2717			ctl = bus_read_4(res, OHCI_CONTROL);
2718		}
2719		if (ctl & OHCI_IR) {
2720			if (bootverbose)
2721				printf("ohci early: "
2722				    "SMM does not respond, resetting\n");
2723			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2724		}
2725		/* Disable interrupts */
2726		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2727	}
2728
2729	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2730}
2731
2732/* Perform early UHCI takeover from SMM. */
2733static void
2734uhci_early_takeover(device_t self)
2735{
2736	struct resource *res;
2737	int rid;
2738
2739	/*
2740	 * Set the PIRQD enable bit and switch off all the others. We don't
2741	 * want legacy support to interfere with us XXX Does this also mean
2742	 * that the BIOS won't touch the keyboard anymore if it is connected
2743	 * to the ports of the root hub?
2744	 */
2745	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2746
2747	/* Disable interrupts */
2748	rid = PCI_UHCI_BASE_REG;
2749	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2750	if (res != NULL) {
2751		bus_write_2(res, UHCI_INTR, 0);
2752		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2753	}
2754}
2755
2756/* Perform early EHCI takeover from SMM. */
2757static void
2758ehci_early_takeover(device_t self)
2759{
2760	struct resource *res;
2761	uint32_t cparams;
2762	uint32_t eec;
2763	uint8_t eecp;
2764	uint8_t bios_sem;
2765	uint8_t offs;
2766	int rid;
2767	int i;
2768
2769	rid = PCIR_BAR(0);
2770	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2771	if (res == NULL)
2772		return;
2773
2774	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2775
2776	/* Synchronise with the BIOS if it owns the controller. */
2777	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2778	    eecp = EHCI_EECP_NEXT(eec)) {
2779		eec = pci_read_config(self, eecp, 4);
2780		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2781			continue;
2782		}
2783		bios_sem = pci_read_config(self, eecp +
2784		    EHCI_LEGSUP_BIOS_SEM, 1);
2785		if (bios_sem == 0) {
2786			continue;
2787		}
2788		if (bootverbose)
2789			printf("ehci early: "
2790			    "SMM active, request owner change\n");
2791
2792		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2793
2794		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2795			DELAY(1000);
2796			bios_sem = pci_read_config(self, eecp +
2797			    EHCI_LEGSUP_BIOS_SEM, 1);
2798		}
2799
2800		if (bios_sem != 0) {
2801			if (bootverbose)
2802				printf("ehci early: "
2803				    "SMM does not respond\n");
2804		}
2805		/* Disable interrupts */
2806		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2807		bus_write_4(res, offs + EHCI_USBINTR, 0);
2808	}
2809	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2810}
2811
2812void
2813pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2814{
2815	struct pci_devinfo *dinfo = device_get_ivars(dev);
2816	pcicfgregs *cfg = &dinfo->cfg;
2817	struct resource_list *rl = &dinfo->resources;
2818	struct pci_quirk *q;
2819	int i;
2820
2821	/* ATA devices needs special map treatment */
2822	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2823	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2824	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2825	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2826	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2827		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2828	else
2829		for (i = 0; i < cfg->nummaps;)
2830			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2831			    prefetchmask & (1 << i));
2832
2833	/*
2834	 * Add additional, quirked resources.
2835	 */
2836	for (q = &pci_quirks[0]; q->devid; q++) {
2837		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2838		    && q->type == PCI_QUIRK_MAP_REG)
2839			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2840	}
2841
2842	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2843#ifdef __PCI_REROUTE_INTERRUPT
2844		/*
2845		 * Try to re-route interrupts. Sometimes the BIOS or
2846		 * firmware may leave bogus values in these registers.
2847		 * If the re-route fails, then just stick with what we
2848		 * have.
2849		 */
2850		pci_assign_interrupt(bus, dev, 1);
2851#else
2852		pci_assign_interrupt(bus, dev, 0);
2853#endif
2854	}
2855
2856	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2857	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2858		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2859			ehci_early_takeover(dev);
2860		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2861			ohci_early_takeover(dev);
2862		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2863			uhci_early_takeover(dev);
2864	}
2865}
2866
2867void
2868pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2869{
2870#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2871	device_t pcib = device_get_parent(dev);
2872	struct pci_devinfo *dinfo;
2873	int maxslots;
2874	int s, f, pcifunchigh;
2875	uint8_t hdrtype;
2876
2877	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2878	    ("dinfo_size too small"));
2879	maxslots = PCIB_MAXSLOTS(pcib);
2880	for (s = 0; s <= maxslots; s++) {
2881		pcifunchigh = 0;
2882		f = 0;
2883		DELAY(1);
2884		hdrtype = REG(PCIR_HDRTYPE, 1);
2885		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2886			continue;
2887		if (hdrtype & PCIM_MFDEV)
2888			pcifunchigh = PCI_FUNCMAX;
2889		for (f = 0; f <= pcifunchigh; f++) {
2890			dinfo = pci_read_device(pcib, domain, busno, s, f,
2891			    dinfo_size);
2892			if (dinfo != NULL) {
2893				pci_add_child(dev, dinfo);
2894			}
2895		}
2896	}
2897#undef REG
2898}
2899
2900void
2901pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2902{
2903	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2904	device_set_ivars(dinfo->cfg.dev, dinfo);
2905	resource_list_init(&dinfo->resources);
2906	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2907	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2908	pci_print_verbose(dinfo);
2909	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2910}
2911
2912static int
2913pci_probe(device_t dev)
2914{
2915
2916	device_set_desc(dev, "PCI bus");
2917
2918	/* Allow other subclasses to override this driver. */
2919	return (BUS_PROBE_GENERIC);
2920}
2921
2922static int
2923pci_attach(device_t dev)
2924{
2925	int busno, domain;
2926
2927	/*
2928	 * Since there can be multiple independantly numbered PCI
2929	 * busses on systems with multiple PCI domains, we can't use
2930	 * the unit number to decide which bus we are probing. We ask
2931	 * the parent pcib what our domain and bus numbers are.
2932	 */
2933	domain = pcib_get_domain(dev);
2934	busno = pcib_get_bus(dev);
2935	if (bootverbose)
2936		device_printf(dev, "domain=%d, physical bus=%d\n",
2937		    domain, busno);
2938	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2939	return (bus_generic_attach(dev));
2940}
2941
2942static void
2943pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
2944    int state)
2945{
2946	device_t child, pcib;
2947	struct pci_devinfo *dinfo;
2948	int dstate, i;
2949
2950	/*
2951	 * Set the device to the given state.  If the firmware suggests
2952	 * a different power state, use it instead.  If power management
2953	 * is not present, the firmware is responsible for managing
2954	 * device power.  Skip children who aren't attached since they
2955	 * are handled separately.
2956	 */
2957	pcib = device_get_parent(dev);
2958	for (i = 0; i < numdevs; i++) {
2959		child = devlist[i];
2960		dinfo = device_get_ivars(child);
2961		dstate = state;
2962		if (device_is_attached(child) &&
2963		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
2964			pci_set_powerstate(child, dstate);
2965	}
2966}
2967
2968int
2969pci_suspend(device_t dev)
2970{
2971	device_t child, *devlist;
2972	struct pci_devinfo *dinfo;
2973	int error, i, numdevs;
2974
2975	/*
2976	 * Save the PCI configuration space for each child and set the
2977	 * device in the appropriate power state for this sleep state.
2978	 */
2979	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2980		return (error);
2981	for (i = 0; i < numdevs; i++) {
2982		child = devlist[i];
2983		dinfo = device_get_ivars(child);
2984		pci_cfg_save(child, dinfo, 0);
2985	}
2986
2987	/* Suspend devices before potentially powering them down. */
2988	error = bus_generic_suspend(dev);
2989	if (error) {
2990		free(devlist, M_TEMP);
2991		return (error);
2992	}
2993	if (pci_do_power_suspend)
2994		pci_set_power_children(dev, devlist, numdevs,
2995		    PCI_POWERSTATE_D3);
2996	free(devlist, M_TEMP);
2997	return (0);
2998}
2999
3000int
3001pci_resume(device_t dev)
3002{
3003	device_t child, *devlist;
3004	struct pci_devinfo *dinfo;
3005	int error, i, numdevs;
3006
3007	/*
3008	 * Set each child to D0 and restore its PCI configuration space.
3009	 */
3010	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3011		return (error);
3012	if (pci_do_power_resume)
3013		pci_set_power_children(dev, devlist, numdevs,
3014		    PCI_POWERSTATE_D0);
3015
3016	/* Now the device is powered up, restore its config space. */
3017	for (i = 0; i < numdevs; i++) {
3018		child = devlist[i];
3019		dinfo = device_get_ivars(child);
3020
3021		pci_cfg_restore(child, dinfo);
3022		if (!device_is_attached(child))
3023			pci_cfg_save(child, dinfo, 1);
3024	}
3025	free(devlist, M_TEMP);
3026	return (bus_generic_resume(dev));
3027}
3028
3029static void
3030pci_load_vendor_data(void)
3031{
3032	caddr_t vendordata, info;
3033
3034	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3035		info = preload_search_info(vendordata, MODINFO_ADDR);
3036		pci_vendordata = *(char **)info;
3037		info = preload_search_info(vendordata, MODINFO_SIZE);
3038		pci_vendordata_size = *(size_t *)info;
3039		/* terminate the database */
3040		pci_vendordata[pci_vendordata_size] = '\n';
3041	}
3042}
3043
3044void
3045pci_driver_added(device_t dev, driver_t *driver)
3046{
3047	int numdevs;
3048	device_t *devlist;
3049	device_t child;
3050	struct pci_devinfo *dinfo;
3051	int i;
3052
3053	if (bootverbose)
3054		device_printf(dev, "driver added\n");
3055	DEVICE_IDENTIFY(driver, dev);
3056	if (device_get_children(dev, &devlist, &numdevs) != 0)
3057		return;
3058	for (i = 0; i < numdevs; i++) {
3059		child = devlist[i];
3060		if (device_get_state(child) != DS_NOTPRESENT)
3061			continue;
3062		dinfo = device_get_ivars(child);
3063		pci_print_verbose(dinfo);
3064		if (bootverbose)
3065			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3066		pci_cfg_restore(child, dinfo);
3067		if (device_probe_and_attach(child) != 0)
3068			pci_cfg_save(child, dinfo, 1);
3069	}
3070	free(devlist, M_TEMP);
3071}
3072
3073int
3074pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3075    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3076{
3077	struct pci_devinfo *dinfo;
3078	struct msix_table_entry *mte;
3079	struct msix_vector *mv;
3080	uint64_t addr;
3081	uint32_t data;
3082	void *cookie;
3083	int error, rid;
3084
3085	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3086	    arg, &cookie);
3087	if (error)
3088		return (error);
3089
3090	/* If this is not a direct child, just bail out. */
3091	if (device_get_parent(child) != dev) {
3092		*cookiep = cookie;
3093		return(0);
3094	}
3095
3096	rid = rman_get_rid(irq);
3097	if (rid == 0) {
3098		/* Make sure that INTx is enabled */
3099		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3100	} else {
3101		/*
3102		 * Check to see if the interrupt is MSI or MSI-X.
3103		 * Ask our parent to map the MSI and give
3104		 * us the address and data register values.
3105		 * If we fail for some reason, teardown the
3106		 * interrupt handler.
3107		 */
3108		dinfo = device_get_ivars(child);
3109		if (dinfo->cfg.msi.msi_alloc > 0) {
3110			if (dinfo->cfg.msi.msi_addr == 0) {
3111				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3112			    ("MSI has handlers, but vectors not mapped"));
3113				error = PCIB_MAP_MSI(device_get_parent(dev),
3114				    child, rman_get_start(irq), &addr, &data);
3115				if (error)
3116					goto bad;
3117				dinfo->cfg.msi.msi_addr = addr;
3118				dinfo->cfg.msi.msi_data = data;
3119			}
3120			if (dinfo->cfg.msi.msi_handlers == 0)
3121				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3122				    dinfo->cfg.msi.msi_data);
3123			dinfo->cfg.msi.msi_handlers++;
3124		} else {
3125			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3126			    ("No MSI or MSI-X interrupts allocated"));
3127			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3128			    ("MSI-X index too high"));
3129			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3130			KASSERT(mte->mte_vector != 0, ("no message vector"));
3131			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3132			KASSERT(mv->mv_irq == rman_get_start(irq),
3133			    ("IRQ mismatch"));
3134			if (mv->mv_address == 0) {
3135				KASSERT(mte->mte_handlers == 0,
3136		    ("MSI-X table entry has handlers, but vector not mapped"));
3137				error = PCIB_MAP_MSI(device_get_parent(dev),
3138				    child, rman_get_start(irq), &addr, &data);
3139				if (error)
3140					goto bad;
3141				mv->mv_address = addr;
3142				mv->mv_data = data;
3143			}
3144			if (mte->mte_handlers == 0) {
3145				pci_enable_msix(child, rid - 1, mv->mv_address,
3146				    mv->mv_data);
3147				pci_unmask_msix(child, rid - 1);
3148			}
3149			mte->mte_handlers++;
3150		}
3151
3152		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3153		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3154	bad:
3155		if (error) {
3156			(void)bus_generic_teardown_intr(dev, child, irq,
3157			    cookie);
3158			return (error);
3159		}
3160	}
3161	*cookiep = cookie;
3162	return (0);
3163}
3164
3165int
3166pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3167    void *cookie)
3168{
3169	struct msix_table_entry *mte;
3170	struct resource_list_entry *rle;
3171	struct pci_devinfo *dinfo;
3172	int error, rid;
3173
3174	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3175		return (EINVAL);
3176
3177	/* If this isn't a direct child, just bail out */
3178	if (device_get_parent(child) != dev)
3179		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3180
3181	rid = rman_get_rid(irq);
3182	if (rid == 0) {
3183		/* Mask INTx */
3184		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3185	} else {
3186		/*
3187		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3188		 * decrement the appropriate handlers count and mask the
3189		 * MSI-X message, or disable MSI messages if the count
3190		 * drops to 0.
3191		 */
3192		dinfo = device_get_ivars(child);
3193		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3194		if (rle->res != irq)
3195			return (EINVAL);
3196		if (dinfo->cfg.msi.msi_alloc > 0) {
3197			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3198			    ("MSI-X index too high"));
3199			if (dinfo->cfg.msi.msi_handlers == 0)
3200				return (EINVAL);
3201			dinfo->cfg.msi.msi_handlers--;
3202			if (dinfo->cfg.msi.msi_handlers == 0)
3203				pci_disable_msi(child);
3204		} else {
3205			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3206			    ("No MSI or MSI-X interrupts allocated"));
3207			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3208			    ("MSI-X index too high"));
3209			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3210			if (mte->mte_handlers == 0)
3211				return (EINVAL);
3212			mte->mte_handlers--;
3213			if (mte->mte_handlers == 0)
3214				pci_mask_msix(child, rid - 1);
3215		}
3216	}
3217	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3218	if (rid > 0)
3219		KASSERT(error == 0,
3220		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3221	return (error);
3222}
3223
3224int
3225pci_print_child(device_t dev, device_t child)
3226{
3227	struct pci_devinfo *dinfo;
3228	struct resource_list *rl;
3229	int retval = 0;
3230
3231	dinfo = device_get_ivars(child);
3232	rl = &dinfo->resources;
3233
3234	retval += bus_print_child_header(dev, child);
3235
3236	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3237	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3238	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3239	if (device_get_flags(dev))
3240		retval += printf(" flags %#x", device_get_flags(dev));
3241
3242	retval += printf(" at device %d.%d", pci_get_slot(child),
3243	    pci_get_function(child));
3244
3245	retval += bus_print_child_footer(dev, child);
3246
3247	return (retval);
3248}
3249
3250static struct
3251{
3252	int	class;
3253	int	subclass;
3254	char	*desc;
3255} pci_nomatch_tab[] = {
3256	{PCIC_OLD,		-1,			"old"},
3257	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3258	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3259	{PCIC_STORAGE,		-1,			"mass storage"},
3260	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3261	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3262	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3263	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3264	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3265	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3266	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3267	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3268	{PCIC_NETWORK,		-1,			"network"},
3269	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3270	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3271	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3272	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3273	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3274	{PCIC_DISPLAY,		-1,			"display"},
3275	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3276	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3277	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3278	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3279	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3280	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3281	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3282	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3283	{PCIC_MEMORY,		-1,			"memory"},
3284	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3285	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3286	{PCIC_BRIDGE,		-1,			"bridge"},
3287	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3288	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3289	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3290	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3291	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3292	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3293	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3294	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3295	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3296	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3297	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3298	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3299	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3300	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3301	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3302	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3303	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3304	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3305	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3306	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3307	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3308	{PCIC_INPUTDEV,		-1,			"input device"},
3309	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3310	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3311	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3312	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3313	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3314	{PCIC_DOCKING,		-1,			"docking station"},
3315	{PCIC_PROCESSOR,	-1,			"processor"},
3316	{PCIC_SERIALBUS,	-1,			"serial bus"},
3317	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3318	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3319	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3320	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3321	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3322	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3323	{PCIC_WIRELESS,		-1,			"wireless controller"},
3324	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3325	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3326	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3327	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3328	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3329	{PCIC_SATCOM,		-1,			"satellite communication"},
3330	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3331	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3332	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3333	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3334	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3335	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3336	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3337	{PCIC_DASP,		-1,			"dasp"},
3338	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3339	{0, 0,		NULL}
3340};
3341
3342void
3343pci_probe_nomatch(device_t dev, device_t child)
3344{
3345	int	i;
3346	char	*cp, *scp, *device;
3347
3348	/*
3349	 * Look for a listing for this device in a loaded device database.
3350	 */
3351	if ((device = pci_describe_device(child)) != NULL) {
3352		device_printf(dev, "<%s>", device);
3353		free(device, M_DEVBUF);
3354	} else {
3355		/*
3356		 * Scan the class/subclass descriptions for a general
3357		 * description.
3358		 */
3359		cp = "unknown";
3360		scp = NULL;
3361		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3362			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3363				if (pci_nomatch_tab[i].subclass == -1) {
3364					cp = pci_nomatch_tab[i].desc;
3365				} else if (pci_nomatch_tab[i].subclass ==
3366				    pci_get_subclass(child)) {
3367					scp = pci_nomatch_tab[i].desc;
3368				}
3369			}
3370		}
3371		device_printf(dev, "<%s%s%s>",
3372		    cp ? cp : "",
3373		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3374		    scp ? scp : "");
3375	}
3376	printf(" at device %d.%d (no driver attached)\n",
3377	    pci_get_slot(child), pci_get_function(child));
3378	pci_cfg_save(child, device_get_ivars(child), 1);
3379	return;
3380}
3381
3382/*
3383 * Parse the PCI device database, if loaded, and return a pointer to a
3384 * description of the device.
3385 *
3386 * The database is flat text formatted as follows:
3387 *
3388 * Any line not in a valid format is ignored.
3389 * Lines are terminated with newline '\n' characters.
3390 *
3391 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3392 * the vendor name.
3393 *
3394 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3395 * - devices cannot be listed without a corresponding VENDOR line.
3396 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3397 * another TAB, then the device name.
3398 */
3399
3400/*
3401 * Assuming (ptr) points to the beginning of a line in the database,
3402 * return the vendor or device and description of the next entry.
3403 * The value of (vendor) or (device) inappropriate for the entry type
3404 * is set to -1.  Returns nonzero at the end of the database.
3405 *
3406 * Note that this is slightly unrobust in the face of corrupt data;
3407 * we attempt to safeguard against this by spamming the end of the
3408 * database with a newline when we initialise.
3409 */
3410static int
3411pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3412{
3413	char	*cp = *ptr;
3414	int	left;
3415
3416	*device = -1;
3417	*vendor = -1;
3418	**desc = '\0';
3419	for (;;) {
3420		left = pci_vendordata_size - (cp - pci_vendordata);
3421		if (left <= 0) {
3422			*ptr = cp;
3423			return(1);
3424		}
3425
3426		/* vendor entry? */
3427		if (*cp != '\t' &&
3428		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3429			break;
3430		/* device entry? */
3431		if (*cp == '\t' &&
3432		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3433			break;
3434
3435		/* skip to next line */
3436		while (*cp != '\n' && left > 0) {
3437			cp++;
3438			left--;
3439		}
3440		if (*cp == '\n') {
3441			cp++;
3442			left--;
3443		}
3444	}
3445	/* skip to next line */
3446	while (*cp != '\n' && left > 0) {
3447		cp++;
3448		left--;
3449	}
3450	if (*cp == '\n' && left > 0)
3451		cp++;
3452	*ptr = cp;
3453	return(0);
3454}
3455
3456static char *
3457pci_describe_device(device_t dev)
3458{
3459	int	vendor, device;
3460	char	*desc, *vp, *dp, *line;
3461
3462	desc = vp = dp = NULL;
3463
3464	/*
3465	 * If we have no vendor data, we can't do anything.
3466	 */
3467	if (pci_vendordata == NULL)
3468		goto out;
3469
3470	/*
3471	 * Scan the vendor data looking for this device
3472	 */
3473	line = pci_vendordata;
3474	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3475		goto out;
3476	for (;;) {
3477		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3478			goto out;
3479		if (vendor == pci_get_vendor(dev))
3480			break;
3481	}
3482	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3483		goto out;
3484	for (;;) {
3485		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3486			*dp = 0;
3487			break;
3488		}
3489		if (vendor != -1) {
3490			*dp = 0;
3491			break;
3492		}
3493		if (device == pci_get_device(dev))
3494			break;
3495	}
3496	if (dp[0] == '\0')
3497		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3498	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3499	    NULL)
3500		sprintf(desc, "%s, %s", vp, dp);
3501 out:
3502	if (vp != NULL)
3503		free(vp, M_DEVBUF);
3504	if (dp != NULL)
3505		free(dp, M_DEVBUF);
3506	return(desc);
3507}
3508
3509int
3510pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3511{
3512	struct pci_devinfo *dinfo;
3513	pcicfgregs *cfg;
3514
3515	dinfo = device_get_ivars(child);
3516	cfg = &dinfo->cfg;
3517
3518	switch (which) {
3519	case PCI_IVAR_ETHADDR:
3520		/*
3521		 * The generic accessor doesn't deal with failure, so
3522		 * we set the return value, then return an error.
3523		 */
3524		*((uint8_t **) result) = NULL;
3525		return (EINVAL);
3526	case PCI_IVAR_SUBVENDOR:
3527		*result = cfg->subvendor;
3528		break;
3529	case PCI_IVAR_SUBDEVICE:
3530		*result = cfg->subdevice;
3531		break;
3532	case PCI_IVAR_VENDOR:
3533		*result = cfg->vendor;
3534		break;
3535	case PCI_IVAR_DEVICE:
3536		*result = cfg->device;
3537		break;
3538	case PCI_IVAR_DEVID:
3539		*result = (cfg->device << 16) | cfg->vendor;
3540		break;
3541	case PCI_IVAR_CLASS:
3542		*result = cfg->baseclass;
3543		break;
3544	case PCI_IVAR_SUBCLASS:
3545		*result = cfg->subclass;
3546		break;
3547	case PCI_IVAR_PROGIF:
3548		*result = cfg->progif;
3549		break;
3550	case PCI_IVAR_REVID:
3551		*result = cfg->revid;
3552		break;
3553	case PCI_IVAR_INTPIN:
3554		*result = cfg->intpin;
3555		break;
3556	case PCI_IVAR_IRQ:
3557		*result = cfg->intline;
3558		break;
3559	case PCI_IVAR_DOMAIN:
3560		*result = cfg->domain;
3561		break;
3562	case PCI_IVAR_BUS:
3563		*result = cfg->bus;
3564		break;
3565	case PCI_IVAR_SLOT:
3566		*result = cfg->slot;
3567		break;
3568	case PCI_IVAR_FUNCTION:
3569		*result = cfg->func;
3570		break;
3571	case PCI_IVAR_CMDREG:
3572		*result = cfg->cmdreg;
3573		break;
3574	case PCI_IVAR_CACHELNSZ:
3575		*result = cfg->cachelnsz;
3576		break;
3577	case PCI_IVAR_MINGNT:
3578		*result = cfg->mingnt;
3579		break;
3580	case PCI_IVAR_MAXLAT:
3581		*result = cfg->maxlat;
3582		break;
3583	case PCI_IVAR_LATTIMER:
3584		*result = cfg->lattimer;
3585		break;
3586	default:
3587		return (ENOENT);
3588	}
3589	return (0);
3590}
3591
3592int
3593pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3594{
3595	struct pci_devinfo *dinfo;
3596
3597	dinfo = device_get_ivars(child);
3598
3599	switch (which) {
3600	case PCI_IVAR_INTPIN:
3601		dinfo->cfg.intpin = value;
3602		return (0);
3603	case PCI_IVAR_ETHADDR:
3604	case PCI_IVAR_SUBVENDOR:
3605	case PCI_IVAR_SUBDEVICE:
3606	case PCI_IVAR_VENDOR:
3607	case PCI_IVAR_DEVICE:
3608	case PCI_IVAR_DEVID:
3609	case PCI_IVAR_CLASS:
3610	case PCI_IVAR_SUBCLASS:
3611	case PCI_IVAR_PROGIF:
3612	case PCI_IVAR_REVID:
3613	case PCI_IVAR_IRQ:
3614	case PCI_IVAR_DOMAIN:
3615	case PCI_IVAR_BUS:
3616	case PCI_IVAR_SLOT:
3617	case PCI_IVAR_FUNCTION:
3618		return (EINVAL);	/* disallow for now */
3619
3620	default:
3621		return (ENOENT);
3622	}
3623}
3624
3625
3626#include "opt_ddb.h"
3627#ifdef DDB
3628#include <ddb/ddb.h>
3629#include <sys/cons.h>
3630
3631/*
3632 * List resources based on pci map registers, used for within ddb
3633 */
3634
3635DB_SHOW_COMMAND(pciregs, db_pci_dump)
3636{
3637	struct pci_devinfo *dinfo;
3638	struct devlist *devlist_head;
3639	struct pci_conf *p;
3640	const char *name;
3641	int i, error, none_count;
3642
3643	none_count = 0;
3644	/* get the head of the device queue */
3645	devlist_head = &pci_devq;
3646
3647	/*
3648	 * Go through the list of devices and print out devices
3649	 */
3650	for (error = 0, i = 0,
3651	     dinfo = STAILQ_FIRST(devlist_head);
3652	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3653	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3654
3655		/* Populate pd_name and pd_unit */
3656		name = NULL;
3657		if (dinfo->cfg.dev)
3658			name = device_get_name(dinfo->cfg.dev);
3659
3660		p = &dinfo->conf;
3661		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3662			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3663			(name && *name) ? name : "none",
3664			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3665			none_count++,
3666			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3667			p->pc_sel.pc_func, (p->pc_class << 16) |
3668			(p->pc_subclass << 8) | p->pc_progif,
3669			(p->pc_subdevice << 16) | p->pc_subvendor,
3670			(p->pc_device << 16) | p->pc_vendor,
3671			p->pc_revid, p->pc_hdr);
3672	}
3673}
3674#endif /* DDB */
3675
3676static struct resource *
3677pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3678    u_long start, u_long end, u_long count, u_int flags)
3679{
3680	struct pci_devinfo *dinfo = device_get_ivars(child);
3681	struct resource_list *rl = &dinfo->resources;
3682	struct resource_list_entry *rle;
3683	struct resource *res;
3684	pci_addr_t map, testval;
3685	int mapsize;
3686
3687	/*
3688	 * Weed out the bogons, and figure out how large the BAR/map
3689	 * is.  Bars that read back 0 here are bogus and unimplemented.
3690	 * Note: atapci in legacy mode are special and handled elsewhere
3691	 * in the code.  If you have a atapci device in legacy mode and
3692	 * it fails here, that other code is broken.
3693	 */
3694	res = NULL;
3695	pci_read_bar(child, *rid, &map, &testval);
3696
3697	/*
3698	 * Determine the size of the BAR and ignore BARs with a size
3699	 * of 0.  Device ROM BARs use a different mask value.
3700	 */
3701	if (*rid == PCIR_BIOS)
3702		mapsize = pci_romsize(testval);
3703	else
3704		mapsize = pci_mapsize(testval);
3705	if (mapsize == 0)
3706		goto out;
3707
3708	if (PCI_BAR_MEM(testval) || *rid == PCIR_BIOS) {
3709		if (type != SYS_RES_MEMORY) {
3710			if (bootverbose)
3711				device_printf(dev,
3712				    "child %s requested type %d for rid %#x,"
3713				    " but the BAR says it is an memio\n",
3714				    device_get_nameunit(child), type, *rid);
3715			goto out;
3716		}
3717	} else {
3718		if (type != SYS_RES_IOPORT) {
3719			if (bootverbose)
3720				device_printf(dev,
3721				    "child %s requested type %d for rid %#x,"
3722				    " but the BAR says it is an ioport\n",
3723				    device_get_nameunit(child), type, *rid);
3724			goto out;
3725		}
3726	}
3727
3728	/*
3729	 * For real BARs, we need to override the size that
3730	 * the driver requests, because that's what the BAR
3731	 * actually uses and we would otherwise have a
3732	 * situation where we might allocate the excess to
3733	 * another driver, which won't work.
3734	 */
3735	count = 1UL << mapsize;
3736	if (RF_ALIGNMENT(flags) < mapsize)
3737		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3738	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3739		flags |= RF_PREFETCHABLE;
3740
3741	/*
3742	 * Allocate enough resource, and then write back the
3743	 * appropriate bar for that resource.
3744	 */
3745	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3746	    start, end, count, flags & ~RF_ACTIVE);
3747	if (res == NULL) {
3748		device_printf(child,
3749		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3750		    count, *rid, type, start, end);
3751		goto out;
3752	}
3753	resource_list_add(rl, type, *rid, start, end, count);
3754	rle = resource_list_find(rl, type, *rid);
3755	if (rle == NULL)
3756		panic("pci_reserve_map: unexpectedly can't find resource.");
3757	rle->res = res;
3758	rle->start = rman_get_start(res);
3759	rle->end = rman_get_end(res);
3760	rle->count = count;
3761	rle->flags = RLE_RESERVED;
3762	if (bootverbose)
3763		device_printf(child,
3764		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3765		    count, *rid, type, rman_get_start(res));
3766	map = rman_get_start(res);
3767	pci_write_bar(child, *rid, map);
3768out:;
3769	return (res);
3770}
3771
3772
3773struct resource *
3774pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3775		   u_long start, u_long end, u_long count, u_int flags)
3776{
3777	struct pci_devinfo *dinfo = device_get_ivars(child);
3778	struct resource_list *rl = &dinfo->resources;
3779	struct resource_list_entry *rle;
3780	struct resource *res;
3781	pcicfgregs *cfg = &dinfo->cfg;
3782
3783	if (device_get_parent(child) != dev)
3784		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3785		    type, rid, start, end, count, flags));
3786
3787	/*
3788	 * Perform lazy resource allocation
3789	 */
3790	switch (type) {
3791	case SYS_RES_IRQ:
3792		/*
3793		 * Can't alloc legacy interrupt once MSI messages have
3794		 * been allocated.
3795		 */
3796		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3797		    cfg->msix.msix_alloc > 0))
3798			return (NULL);
3799
3800		/*
3801		 * If the child device doesn't have an interrupt
3802		 * routed and is deserving of an interrupt, try to
3803		 * assign it one.
3804		 */
3805		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3806		    (cfg->intpin != 0))
3807			pci_assign_interrupt(dev, child, 0);
3808		break;
3809	case SYS_RES_IOPORT:
3810	case SYS_RES_MEMORY:
3811		/* Reserve resources for this BAR if needed. */
3812		rle = resource_list_find(rl, type, *rid);
3813		if (rle == NULL) {
3814			res = pci_reserve_map(dev, child, type, rid, start, end,
3815			    count, flags);
3816			if (res == NULL)
3817				return (NULL);
3818		}
3819	}
3820	return (resource_list_alloc(rl, dev, child, type, rid,
3821	    start, end, count, flags));
3822}
3823
3824int
3825pci_activate_resource(device_t dev, device_t child, int type, int rid,
3826    struct resource *r)
3827{
3828	int error;
3829
3830	error = bus_generic_activate_resource(dev, child, type, rid, r);
3831	if (error)
3832		return (error);
3833
3834	/* Enable decoding in the command register when activating BARs. */
3835	if (device_get_parent(child) == dev) {
3836		/* Device ROMs need their decoding explicitly enabled. */
3837		if (rid == PCIR_BIOS)
3838			pci_write_config(child, rid, rman_get_start(r) |
3839			    PCIM_BIOS_ENABLE, 4);
3840		switch (type) {
3841		case SYS_RES_IOPORT:
3842		case SYS_RES_MEMORY:
3843			error = PCI_ENABLE_IO(dev, child, type);
3844			break;
3845		}
3846	}
3847	return (error);
3848}
3849
3850int
3851pci_deactivate_resource(device_t dev, device_t child, int type,
3852    int rid, struct resource *r)
3853{
3854	int error;
3855
3856	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
3857	if (error)
3858		return (error);
3859
3860	/* Disable decoding for device ROMs. */
3861	if (rid == PCIR_BIOS)
3862		pci_write_config(child, rid, rman_get_start(r), 4);
3863	return (0);
3864}
3865
3866void
3867pci_delete_child(device_t dev, device_t child)
3868{
3869	struct resource_list_entry *rle;
3870	struct resource_list *rl;
3871	struct pci_devinfo *dinfo;
3872
3873	dinfo = device_get_ivars(child);
3874	rl = &dinfo->resources;
3875
3876	if (device_is_attached(child))
3877		device_detach(child);
3878
3879	/* Turn off access to resources we're about to free */
3880	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
3881	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
3882
3883	/* Free all allocated resources */
3884	STAILQ_FOREACH(rle, rl, link) {
3885		if (rle->res) {
3886			if (rman_get_flags(rle->res) & RF_ACTIVE ||
3887			    resource_list_busy(rl, rle->type, rle->rid)) {
3888				pci_printf(&dinfo->cfg,
3889				    "Resource still owned, oops. "
3890				    "(type=%d, rid=%d, addr=%lx)\n",
3891				    rle->type, rle->rid,
3892				    rman_get_start(rle->res));
3893				bus_release_resource(child, rle->type, rle->rid,
3894				    rle->res);
3895			}
3896			resource_list_unreserve(rl, dev, child, rle->type,
3897			    rle->rid);
3898		}
3899	}
3900	resource_list_free(rl);
3901
3902	device_delete_child(dev, child);
3903	pci_freecfg(dinfo);
3904}
3905
3906void
3907pci_delete_resource(device_t dev, device_t child, int type, int rid)
3908{
3909	struct pci_devinfo *dinfo;
3910	struct resource_list *rl;
3911	struct resource_list_entry *rle;
3912
3913	if (device_get_parent(child) != dev)
3914		return;
3915
3916	dinfo = device_get_ivars(child);
3917	rl = &dinfo->resources;
3918	rle = resource_list_find(rl, type, rid);
3919	if (rle == NULL)
3920		return;
3921
3922	if (rle->res) {
3923		if (rman_get_flags(rle->res) & RF_ACTIVE ||
3924		    resource_list_busy(rl, type, rid)) {
3925			device_printf(dev, "delete_resource: "
3926			    "Resource still owned by child, oops. "
3927			    "(type=%d, rid=%d, addr=%lx)\n",
3928			    type, rid, rman_get_start(rle->res));
3929			return;
3930		}
3931
3932#ifndef __PCI_BAR_ZERO_VALID
3933		/*
3934		 * If this is a BAR, clear the BAR so it stops
3935		 * decoding before releasing the resource.
3936		 */
3937		switch (type) {
3938		case SYS_RES_IOPORT:
3939		case SYS_RES_MEMORY:
3940			pci_write_bar(child, rid, 0);
3941			break;
3942		}
3943#endif
3944		resource_list_unreserve(rl, dev, child, type, rid);
3945	}
3946	resource_list_delete(rl, type, rid);
3947}
3948
3949struct resource_list *
3950pci_get_resource_list (device_t dev, device_t child)
3951{
3952	struct pci_devinfo *dinfo = device_get_ivars(child);
3953
3954	return (&dinfo->resources);
3955}
3956
3957uint32_t
3958pci_read_config_method(device_t dev, device_t child, int reg, int width)
3959{
3960	struct pci_devinfo *dinfo = device_get_ivars(child);
3961	pcicfgregs *cfg = &dinfo->cfg;
3962
3963	return (PCIB_READ_CONFIG(device_get_parent(dev),
3964	    cfg->bus, cfg->slot, cfg->func, reg, width));
3965}
3966
3967void
3968pci_write_config_method(device_t dev, device_t child, int reg,
3969    uint32_t val, int width)
3970{
3971	struct pci_devinfo *dinfo = device_get_ivars(child);
3972	pcicfgregs *cfg = &dinfo->cfg;
3973
3974	PCIB_WRITE_CONFIG(device_get_parent(dev),
3975	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3976}
3977
3978int
3979pci_child_location_str_method(device_t dev, device_t child, char *buf,
3980    size_t buflen)
3981{
3982
3983	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3984	    pci_get_function(child));
3985	return (0);
3986}
3987
3988int
3989pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3990    size_t buflen)
3991{
3992	struct pci_devinfo *dinfo;
3993	pcicfgregs *cfg;
3994
3995	dinfo = device_get_ivars(child);
3996	cfg = &dinfo->cfg;
3997	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3998	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3999	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4000	    cfg->progif);
4001	return (0);
4002}
4003
4004int
4005pci_assign_interrupt_method(device_t dev, device_t child)
4006{
4007	struct pci_devinfo *dinfo = device_get_ivars(child);
4008	pcicfgregs *cfg = &dinfo->cfg;
4009
4010	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4011	    cfg->intpin));
4012}
4013
4014static int
4015pci_modevent(module_t mod, int what, void *arg)
4016{
4017	static struct cdev *pci_cdev;
4018
4019	switch (what) {
4020	case MOD_LOAD:
4021		STAILQ_INIT(&pci_devq);
4022		pci_generation = 0;
4023		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4024		    "pci");
4025		pci_load_vendor_data();
4026		break;
4027
4028	case MOD_UNLOAD:
4029		destroy_dev(pci_cdev);
4030		break;
4031	}
4032
4033	return (0);
4034}
4035
4036void
4037pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4038{
4039	int i;
4040
4041	/*
4042	 * Only do header type 0 devices.  Type 1 devices are bridges,
4043	 * which we know need special treatment.  Type 2 devices are
4044	 * cardbus bridges which also require special treatment.
4045	 * Other types are unknown, and we err on the side of safety
4046	 * by ignoring them.
4047	 */
4048	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4049		return;
4050
4051	/*
4052	 * Restore the device to full power mode.  We must do this
4053	 * before we restore the registers because moving from D3 to
4054	 * D0 will cause the chip's BARs and some other registers to
4055	 * be reset to some unknown power on reset values.  Cut down
4056	 * the noise on boot by doing nothing if we are already in
4057	 * state D0.
4058	 */
4059	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4060		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4061	for (i = 0; i < dinfo->cfg.nummaps; i++)
4062		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4063	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4064	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4065	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4066	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4067	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4068	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4069	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4070	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4071	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4072	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4073
4074	/* Restore MSI and MSI-X configurations if they are present. */
4075	if (dinfo->cfg.msi.msi_location != 0)
4076		pci_resume_msi(dev);
4077	if (dinfo->cfg.msix.msix_location != 0)
4078		pci_resume_msix(dev);
4079}
4080
4081void
4082pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4083{
4084	int i;
4085	uint32_t cls;
4086	int ps;
4087
4088	/*
4089	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4090	 * we know need special treatment.  Type 2 devices are cardbus bridges
4091	 * which also require special treatment.  Other types are unknown, and
4092	 * we err on the side of safety by ignoring them.  Powering down
4093	 * bridges should not be undertaken lightly.
4094	 */
4095	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4096		return;
4097	for (i = 0; i < dinfo->cfg.nummaps; i++)
4098		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4099	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4100
4101	/*
4102	 * Some drivers apparently write to these registers w/o updating our
4103	 * cached copy.  No harm happens if we update the copy, so do so here
4104	 * so we can restore them.  The COMMAND register is modified by the
4105	 * bus w/o updating the cache.  This should represent the normally
4106	 * writable portion of the 'defined' part of type 0 headers.  In
4107	 * theory we also need to save/restore the PCI capability structures
4108	 * we know about, but apart from power we don't know any that are
4109	 * writable.
4110	 */
4111	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4112	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4113	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4114	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4115	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4116	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4117	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4118	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4119	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4120	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4121	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4122	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4123	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4124	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4125	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4126
4127	/*
4128	 * don't set the state for display devices, base peripherals and
4129	 * memory devices since bad things happen when they are powered down.
4130	 * We should (a) have drivers that can easily detach and (b) use
4131	 * generic drivers for these devices so that some device actually
4132	 * attaches.  We need to make sure that when we implement (a) we don't
4133	 * power the device down on a reattach.
4134	 */
4135	cls = pci_get_class(dev);
4136	if (!setstate)
4137		return;
4138	switch (pci_do_power_nodriver)
4139	{
4140		case 0:		/* NO powerdown at all */
4141			return;
4142		case 1:		/* Conservative about what to power down */
4143			if (cls == PCIC_STORAGE)
4144				return;
4145			/*FALLTHROUGH*/
4146		case 2:		/* Agressive about what to power down */
4147			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4148			    cls == PCIC_BASEPERIPH)
4149				return;
4150			/*FALLTHROUGH*/
4151		case 3:		/* Power down everything */
4152			break;
4153	}
4154	/*
4155	 * PCI spec says we can only go into D3 state from D0 state.
4156	 * Transition from D[12] into D0 before going to D3 state.
4157	 */
4158	ps = pci_get_powerstate(dev);
4159	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4160		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4161	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4162		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4163}
4164