pci.c revision 199814
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 199814 2009-11-25 20:50:43Z thompsa $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/ehcireg.h>
66#include <dev/usb/controller/ohcireg.h>
67#include <dev/usb/controller/uhcireg.h>
68
69#include "pcib_if.h"
70#include "pci_if.h"
71
72#ifdef __HAVE_ACPI
73#include <contrib/dev/acpica/include/acpi.h>
74#include "acpi_if.h"
75#else
76#define	ACPI_PWR_FOR_SLEEP(x, y, z)
77#endif
78
79static pci_addr_t	pci_mapbase(uint64_t mapreg);
80static const char	*pci_maptype(uint64_t mapreg);
81static int		pci_mapsize(uint64_t testval);
82static int		pci_maprange(uint64_t mapreg);
83static void		pci_fixancient(pcicfgregs *cfg);
84static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
85
86static int		pci_porten(device_t dev);
87static int		pci_memen(device_t dev);
88static void		pci_assign_interrupt(device_t bus, device_t dev,
89			    int force_route);
90static int		pci_add_map(device_t bus, device_t dev, int reg,
91			    struct resource_list *rl, int force, int prefetch);
92static int		pci_probe(device_t dev);
93static int		pci_attach(device_t dev);
94static void		pci_load_vendor_data(void);
95static int		pci_describe_parse_line(char **ptr, int *vendor,
96			    int *device, char **desc);
97static char		*pci_describe_device(device_t dev);
98static int		pci_modevent(module_t mod, int what, void *arg);
99static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
100			    pcicfgregs *cfg);
101static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
102static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
103			    int reg, uint32_t *data);
104#if 0
105static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
106			    int reg, uint32_t data);
107#endif
108static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
109static void		pci_disable_msi(device_t dev);
110static void		pci_enable_msi(device_t dev, uint64_t address,
111			    uint16_t data);
112static void		pci_enable_msix(device_t dev, u_int index,
113			    uint64_t address, uint32_t data);
114static void		pci_mask_msix(device_t dev, u_int index);
115static void		pci_unmask_msix(device_t dev, u_int index);
116static int		pci_msi_blacklisted(void);
117static void		pci_resume_msi(device_t dev);
118static void		pci_resume_msix(device_t dev);
119
120static device_method_t pci_methods[] = {
121	/* Device interface */
122	DEVMETHOD(device_probe,		pci_probe),
123	DEVMETHOD(device_attach,	pci_attach),
124	DEVMETHOD(device_detach,	bus_generic_detach),
125	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
126	DEVMETHOD(device_suspend,	pci_suspend),
127	DEVMETHOD(device_resume,	pci_resume),
128
129	/* Bus interface */
130	DEVMETHOD(bus_print_child,	pci_print_child),
131	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
132	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
133	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
134	DEVMETHOD(bus_driver_added,	pci_driver_added),
135	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
136	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
137
138	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
139	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
140	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
141	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
142	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
143	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
144	DEVMETHOD(bus_activate_resource, pci_activate_resource),
145	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
146	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
147	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
148
149	/* PCI interface */
150	DEVMETHOD(pci_read_config,	pci_read_config_method),
151	DEVMETHOD(pci_write_config,	pci_write_config_method),
152	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
153	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
154	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
155	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
156	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
157	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
158	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
159	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
160	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
161	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
162	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
163	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
164	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
165	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
166	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
167	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
168
169	{ 0, 0 }
170};
171
172DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
173
174static devclass_t pci_devclass;
175DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
176MODULE_VERSION(pci, 1);
177
178static char	*pci_vendordata;
179static size_t	pci_vendordata_size;
180
181
182struct pci_quirk {
183	uint32_t devid;	/* Vendor/device of the card */
184	int	type;
185#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
186#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
187	int	arg1;
188	int	arg2;
189};
190
191struct pci_quirk pci_quirks[] = {
192	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
193	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
194	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
195	/* As does the Serverworks OSB4 (the SMBus mapping register) */
196	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
197
198	/*
199	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
200	 * or the CMIC-SL (AKA ServerWorks GC_LE).
201	 */
202	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
203	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
204
205	/*
206	 * MSI doesn't work on earlier Intel chipsets including
207	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
208	 */
209	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
212	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
213	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216
217	/*
218	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
219	 * bridge.
220	 */
221	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
222
223	{ 0 }
224};
225
226/* map register information */
227#define	PCI_MAPMEM	0x01	/* memory map */
228#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
229#define	PCI_MAPPORT	0x04	/* port map */
230
231struct devlist pci_devq;
232uint32_t pci_generation;
233uint32_t pci_numdevs = 0;
234static int pcie_chipset, pcix_chipset;
235
236/* sysctl vars */
237SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
238
239static int pci_enable_io_modes = 1;
240TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
241SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
242    &pci_enable_io_modes, 1,
243    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
244enable these bits correctly.  We'd like to do this all the time, but there\n\
245are some peripherals that this causes problems with.");
246
247static int pci_do_power_nodriver = 0;
248TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
249SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
250    &pci_do_power_nodriver, 0,
251  "Place a function into D3 state when no driver attaches to it.  0 means\n\
252disable.  1 means conservatively place devices into D3 state.  2 means\n\
253agressively place devices into D3 state.  3 means put absolutely everything\n\
254in D3 state.");
255
256static int pci_do_power_resume = 1;
257TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
258SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
259    &pci_do_power_resume, 1,
260  "Transition from D3 -> D0 on resume.");
261
262static int pci_do_msi = 1;
263TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
264SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
265    "Enable support for MSI interrupts");
266
267static int pci_do_msix = 1;
268TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
269SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
270    "Enable support for MSI-X interrupts");
271
272static int pci_honor_msi_blacklist = 1;
273TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
274SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
275    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
276
277#if defined(__i386__) || defined(__amd64__)
278static int pci_usb_takeover = 1;
279#else
280static int pci_usb_takeover = 0;
281#endif
282TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
283SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
284    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
285Disable this if you depend on BIOS emulation of USB devices, that is\n\
286you use USB devices (like keyboard or mouse) but do not load USB drivers");
287
288/* Find a device_t by bus/slot/function in domain 0 */
289
290device_t
291pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
292{
293
294	return (pci_find_dbsf(0, bus, slot, func));
295}
296
297/* Find a device_t by domain/bus/slot/function */
298
299device_t
300pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
301{
302	struct pci_devinfo *dinfo;
303
304	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
305		if ((dinfo->cfg.domain == domain) &&
306		    (dinfo->cfg.bus == bus) &&
307		    (dinfo->cfg.slot == slot) &&
308		    (dinfo->cfg.func == func)) {
309			return (dinfo->cfg.dev);
310		}
311	}
312
313	return (NULL);
314}
315
316/* Find a device_t by vendor/device ID */
317
318device_t
319pci_find_device(uint16_t vendor, uint16_t device)
320{
321	struct pci_devinfo *dinfo;
322
323	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
324		if ((dinfo->cfg.vendor == vendor) &&
325		    (dinfo->cfg.device == device)) {
326			return (dinfo->cfg.dev);
327		}
328	}
329
330	return (NULL);
331}
332
333static int
334pci_printf(pcicfgregs *cfg, const char *fmt, ...)
335{
336	va_list ap;
337	int retval;
338
339	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
340	    cfg->func);
341	va_start(ap, fmt);
342	retval += vprintf(fmt, ap);
343	va_end(ap);
344	return (retval);
345}
346
347/* return base address of memory or port map */
348
349static pci_addr_t
350pci_mapbase(uint64_t mapreg)
351{
352
353	if (PCI_BAR_MEM(mapreg))
354		return (mapreg & PCIM_BAR_MEM_BASE);
355	else
356		return (mapreg & PCIM_BAR_IO_BASE);
357}
358
359/* return map type of memory or port map */
360
361static const char *
362pci_maptype(uint64_t mapreg)
363{
364
365	if (PCI_BAR_IO(mapreg))
366		return ("I/O Port");
367	if (mapreg & PCIM_BAR_MEM_PREFETCH)
368		return ("Prefetchable Memory");
369	return ("Memory");
370}
371
372/* return log2 of map size decoded for memory or port map */
373
374static int
375pci_mapsize(uint64_t testval)
376{
377	int ln2size;
378
379	testval = pci_mapbase(testval);
380	ln2size = 0;
381	if (testval != 0) {
382		while ((testval & 1) == 0)
383		{
384			ln2size++;
385			testval >>= 1;
386		}
387	}
388	return (ln2size);
389}
390
391/* return log2 of address range supported by map register */
392
393static int
394pci_maprange(uint64_t mapreg)
395{
396	int ln2range = 0;
397
398	if (PCI_BAR_IO(mapreg))
399		ln2range = 32;
400	else
401		switch (mapreg & PCIM_BAR_MEM_TYPE) {
402		case PCIM_BAR_MEM_32:
403			ln2range = 32;
404			break;
405		case PCIM_BAR_MEM_1MB:
406			ln2range = 20;
407			break;
408		case PCIM_BAR_MEM_64:
409			ln2range = 64;
410			break;
411		}
412	return (ln2range);
413}
414
415/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
416
417static void
418pci_fixancient(pcicfgregs *cfg)
419{
420	if (cfg->hdrtype != 0)
421		return;
422
423	/* PCI to PCI bridges use header type 1 */
424	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
425		cfg->hdrtype = 1;
426}
427
428/* extract header type specific config data */
429
430static void
431pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
432{
433#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
434	switch (cfg->hdrtype) {
435	case 0:
436		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
437		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
438		cfg->nummaps	    = PCI_MAXMAPS_0;
439		break;
440	case 1:
441		cfg->nummaps	    = PCI_MAXMAPS_1;
442		break;
443	case 2:
444		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
445		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
446		cfg->nummaps	    = PCI_MAXMAPS_2;
447		break;
448	}
449#undef REG
450}
451
452/* read configuration header into pcicfgregs structure */
453struct pci_devinfo *
454pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
455{
456#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
457	pcicfgregs *cfg = NULL;
458	struct pci_devinfo *devlist_entry;
459	struct devlist *devlist_head;
460
461	devlist_head = &pci_devq;
462
463	devlist_entry = NULL;
464
465	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
466		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
467		if (devlist_entry == NULL)
468			return (NULL);
469
470		cfg = &devlist_entry->cfg;
471
472		cfg->domain		= d;
473		cfg->bus		= b;
474		cfg->slot		= s;
475		cfg->func		= f;
476		cfg->vendor		= REG(PCIR_VENDOR, 2);
477		cfg->device		= REG(PCIR_DEVICE, 2);
478		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
479		cfg->statreg		= REG(PCIR_STATUS, 2);
480		cfg->baseclass		= REG(PCIR_CLASS, 1);
481		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
482		cfg->progif		= REG(PCIR_PROGIF, 1);
483		cfg->revid		= REG(PCIR_REVID, 1);
484		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
485		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
486		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
487		cfg->intpin		= REG(PCIR_INTPIN, 1);
488		cfg->intline		= REG(PCIR_INTLINE, 1);
489
490		cfg->mingnt		= REG(PCIR_MINGNT, 1);
491		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
492
493		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
494		cfg->hdrtype		&= ~PCIM_MFDEV;
495
496		pci_fixancient(cfg);
497		pci_hdrtypedata(pcib, b, s, f, cfg);
498
499		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
500			pci_read_extcap(pcib, cfg);
501
502		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
503
504		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
505		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
506		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
507		devlist_entry->conf.pc_sel.pc_func = cfg->func;
508		devlist_entry->conf.pc_hdr = cfg->hdrtype;
509
510		devlist_entry->conf.pc_subvendor = cfg->subvendor;
511		devlist_entry->conf.pc_subdevice = cfg->subdevice;
512		devlist_entry->conf.pc_vendor = cfg->vendor;
513		devlist_entry->conf.pc_device = cfg->device;
514
515		devlist_entry->conf.pc_class = cfg->baseclass;
516		devlist_entry->conf.pc_subclass = cfg->subclass;
517		devlist_entry->conf.pc_progif = cfg->progif;
518		devlist_entry->conf.pc_revid = cfg->revid;
519
520		pci_numdevs++;
521		pci_generation++;
522	}
523	return (devlist_entry);
524#undef REG
525}
526
527static void
528pci_read_extcap(device_t pcib, pcicfgregs *cfg)
529{
530#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
531#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
532#if defined(__i386__) || defined(__amd64__)
533	uint64_t addr;
534#endif
535	uint32_t val;
536	int	ptr, nextptr, ptrptr;
537
538	switch (cfg->hdrtype & PCIM_HDRTYPE) {
539	case 0:
540	case 1:
541		ptrptr = PCIR_CAP_PTR;
542		break;
543	case 2:
544		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
545		break;
546	default:
547		return;		/* no extended capabilities support */
548	}
549	nextptr = REG(ptrptr, 1);	/* sanity check? */
550
551	/*
552	 * Read capability entries.
553	 */
554	while (nextptr != 0) {
555		/* Sanity check */
556		if (nextptr > 255) {
557			printf("illegal PCI extended capability offset %d\n",
558			    nextptr);
559			return;
560		}
561		/* Find the next entry */
562		ptr = nextptr;
563		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
564
565		/* Process this entry */
566		switch (REG(ptr + PCICAP_ID, 1)) {
567		case PCIY_PMG:		/* PCI power management */
568			if (cfg->pp.pp_cap == 0) {
569				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
570				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
571				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
572				if ((nextptr - ptr) > PCIR_POWER_DATA)
573					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
574			}
575			break;
576#if defined(__i386__) || defined(__amd64__)
577		case PCIY_HT:		/* HyperTransport */
578			/* Determine HT-specific capability type. */
579			val = REG(ptr + PCIR_HT_COMMAND, 2);
580			switch (val & PCIM_HTCMD_CAP_MASK) {
581			case PCIM_HTCAP_MSI_MAPPING:
582				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
583					/* Sanity check the mapping window. */
584					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
585					    4);
586					addr <<= 32;
587					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
588					    4);
589					if (addr != MSI_INTEL_ADDR_BASE)
590						device_printf(pcib,
591	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
592						    cfg->domain, cfg->bus,
593						    cfg->slot, cfg->func,
594						    (long long)addr);
595				} else
596					addr = MSI_INTEL_ADDR_BASE;
597
598				cfg->ht.ht_msimap = ptr;
599				cfg->ht.ht_msictrl = val;
600				cfg->ht.ht_msiaddr = addr;
601				break;
602			}
603			break;
604#endif
605		case PCIY_MSI:		/* PCI MSI */
606			cfg->msi.msi_location = ptr;
607			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
608			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
609						     PCIM_MSICTRL_MMC_MASK)>>1);
610			break;
611		case PCIY_MSIX:		/* PCI MSI-X */
612			cfg->msix.msix_location = ptr;
613			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
614			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
615			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
616			val = REG(ptr + PCIR_MSIX_TABLE, 4);
617			cfg->msix.msix_table_bar = PCIR_BAR(val &
618			    PCIM_MSIX_BIR_MASK);
619			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
620			val = REG(ptr + PCIR_MSIX_PBA, 4);
621			cfg->msix.msix_pba_bar = PCIR_BAR(val &
622			    PCIM_MSIX_BIR_MASK);
623			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
624			break;
625		case PCIY_VPD:		/* PCI Vital Product Data */
626			cfg->vpd.vpd_reg = ptr;
627			break;
628		case PCIY_SUBVENDOR:
629			/* Should always be true. */
630			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
631				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
632				cfg->subvendor = val & 0xffff;
633				cfg->subdevice = val >> 16;
634			}
635			break;
636		case PCIY_PCIX:		/* PCI-X */
637			/*
638			 * Assume we have a PCI-X chipset if we have
639			 * at least one PCI-PCI bridge with a PCI-X
640			 * capability.  Note that some systems with
641			 * PCI-express or HT chipsets might match on
642			 * this check as well.
643			 */
644			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
645				pcix_chipset = 1;
646			break;
647		case PCIY_EXPRESS:	/* PCI-express */
648			/*
649			 * Assume we have a PCI-express chipset if we have
650			 * at least one PCI-express device.
651			 */
652			pcie_chipset = 1;
653			break;
654		default:
655			break;
656		}
657	}
658/* REG and WREG use carry through to next functions */
659}
660
661/*
662 * PCI Vital Product Data
663 */
664
665#define	PCI_VPD_TIMEOUT		1000000
666
667static int
668pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
669{
670	int count = PCI_VPD_TIMEOUT;
671
672	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
673
674	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
675
676	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
677		if (--count < 0)
678			return (ENXIO);
679		DELAY(1);	/* limit looping */
680	}
681	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
682
683	return (0);
684}
685
686#if 0
687static int
688pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
689{
690	int count = PCI_VPD_TIMEOUT;
691
692	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
693
694	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
695	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
696	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
697		if (--count < 0)
698			return (ENXIO);
699		DELAY(1);	/* limit looping */
700	}
701
702	return (0);
703}
704#endif
705
706#undef PCI_VPD_TIMEOUT
707
708struct vpd_readstate {
709	device_t	pcib;
710	pcicfgregs	*cfg;
711	uint32_t	val;
712	int		bytesinval;
713	int		off;
714	uint8_t		cksum;
715};
716
717static int
718vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
719{
720	uint32_t reg;
721	uint8_t byte;
722
723	if (vrs->bytesinval == 0) {
724		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
725			return (ENXIO);
726		vrs->val = le32toh(reg);
727		vrs->off += 4;
728		byte = vrs->val & 0xff;
729		vrs->bytesinval = 3;
730	} else {
731		vrs->val = vrs->val >> 8;
732		byte = vrs->val & 0xff;
733		vrs->bytesinval--;
734	}
735
736	vrs->cksum += byte;
737	*data = byte;
738	return (0);
739}
740
741static void
742pci_read_vpd(device_t pcib, pcicfgregs *cfg)
743{
744	struct vpd_readstate vrs;
745	int state;
746	int name;
747	int remain;
748	int i;
749	int alloc, off;		/* alloc/off for RO/W arrays */
750	int cksumvalid;
751	int dflen;
752	uint8_t byte;
753	uint8_t byte2;
754
755	/* init vpd reader */
756	vrs.bytesinval = 0;
757	vrs.off = 0;
758	vrs.pcib = pcib;
759	vrs.cfg = cfg;
760	vrs.cksum = 0;
761
762	state = 0;
763	name = remain = i = 0;	/* shut up stupid gcc */
764	alloc = off = 0;	/* shut up stupid gcc */
765	dflen = 0;		/* shut up stupid gcc */
766	cksumvalid = -1;
767	while (state >= 0) {
768		if (vpd_nextbyte(&vrs, &byte)) {
769			state = -2;
770			break;
771		}
772#if 0
773		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
774		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
775		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
776#endif
777		switch (state) {
778		case 0:		/* item name */
779			if (byte & 0x80) {
780				if (vpd_nextbyte(&vrs, &byte2)) {
781					state = -2;
782					break;
783				}
784				remain = byte2;
785				if (vpd_nextbyte(&vrs, &byte2)) {
786					state = -2;
787					break;
788				}
789				remain |= byte2 << 8;
790				if (remain > (0x7f*4 - vrs.off)) {
791					state = -1;
792					printf(
793			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
794					    cfg->domain, cfg->bus, cfg->slot,
795					    cfg->func, remain);
796				}
797				name = byte & 0x7f;
798			} else {
799				remain = byte & 0x7;
800				name = (byte >> 3) & 0xf;
801			}
802			switch (name) {
803			case 0x2:	/* String */
804				cfg->vpd.vpd_ident = malloc(remain + 1,
805				    M_DEVBUF, M_WAITOK);
806				i = 0;
807				state = 1;
808				break;
809			case 0xf:	/* End */
810				state = -1;
811				break;
812			case 0x10:	/* VPD-R */
813				alloc = 8;
814				off = 0;
815				cfg->vpd.vpd_ros = malloc(alloc *
816				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
817				    M_WAITOK | M_ZERO);
818				state = 2;
819				break;
820			case 0x11:	/* VPD-W */
821				alloc = 8;
822				off = 0;
823				cfg->vpd.vpd_w = malloc(alloc *
824				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
825				    M_WAITOK | M_ZERO);
826				state = 5;
827				break;
828			default:	/* Invalid data, abort */
829				state = -1;
830				break;
831			}
832			break;
833
834		case 1:	/* Identifier String */
835			cfg->vpd.vpd_ident[i++] = byte;
836			remain--;
837			if (remain == 0)  {
838				cfg->vpd.vpd_ident[i] = '\0';
839				state = 0;
840			}
841			break;
842
843		case 2:	/* VPD-R Keyword Header */
844			if (off == alloc) {
845				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
846				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
847				    M_DEVBUF, M_WAITOK | M_ZERO);
848			}
849			cfg->vpd.vpd_ros[off].keyword[0] = byte;
850			if (vpd_nextbyte(&vrs, &byte2)) {
851				state = -2;
852				break;
853			}
854			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
855			if (vpd_nextbyte(&vrs, &byte2)) {
856				state = -2;
857				break;
858			}
859			dflen = byte2;
860			if (dflen == 0 &&
861			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
862			    2) == 0) {
863				/*
864				 * if this happens, we can't trust the rest
865				 * of the VPD.
866				 */
867				printf(
868				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
869				    cfg->domain, cfg->bus, cfg->slot,
870				    cfg->func, dflen);
871				cksumvalid = 0;
872				state = -1;
873				break;
874			} else if (dflen == 0) {
875				cfg->vpd.vpd_ros[off].value = malloc(1 *
876				    sizeof(*cfg->vpd.vpd_ros[off].value),
877				    M_DEVBUF, M_WAITOK);
878				cfg->vpd.vpd_ros[off].value[0] = '\x00';
879			} else
880				cfg->vpd.vpd_ros[off].value = malloc(
881				    (dflen + 1) *
882				    sizeof(*cfg->vpd.vpd_ros[off].value),
883				    M_DEVBUF, M_WAITOK);
884			remain -= 3;
885			i = 0;
886			/* keep in sync w/ state 3's transistions */
887			if (dflen == 0 && remain == 0)
888				state = 0;
889			else if (dflen == 0)
890				state = 2;
891			else
892				state = 3;
893			break;
894
895		case 3:	/* VPD-R Keyword Value */
896			cfg->vpd.vpd_ros[off].value[i++] = byte;
897			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
898			    "RV", 2) == 0 && cksumvalid == -1) {
899				if (vrs.cksum == 0)
900					cksumvalid = 1;
901				else {
902					if (bootverbose)
903						printf(
904				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
905						    cfg->domain, cfg->bus,
906						    cfg->slot, cfg->func,
907						    vrs.cksum);
908					cksumvalid = 0;
909					state = -1;
910					break;
911				}
912			}
913			dflen--;
914			remain--;
915			/* keep in sync w/ state 2's transistions */
916			if (dflen == 0)
917				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
918			if (dflen == 0 && remain == 0) {
919				cfg->vpd.vpd_rocnt = off;
920				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
921				    off * sizeof(*cfg->vpd.vpd_ros),
922				    M_DEVBUF, M_WAITOK | M_ZERO);
923				state = 0;
924			} else if (dflen == 0)
925				state = 2;
926			break;
927
928		case 4:
929			remain--;
930			if (remain == 0)
931				state = 0;
932			break;
933
934		case 5:	/* VPD-W Keyword Header */
935			if (off == alloc) {
936				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
937				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
938				    M_DEVBUF, M_WAITOK | M_ZERO);
939			}
940			cfg->vpd.vpd_w[off].keyword[0] = byte;
941			if (vpd_nextbyte(&vrs, &byte2)) {
942				state = -2;
943				break;
944			}
945			cfg->vpd.vpd_w[off].keyword[1] = byte2;
946			if (vpd_nextbyte(&vrs, &byte2)) {
947				state = -2;
948				break;
949			}
950			cfg->vpd.vpd_w[off].len = dflen = byte2;
951			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
952			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
953			    sizeof(*cfg->vpd.vpd_w[off].value),
954			    M_DEVBUF, M_WAITOK);
955			remain -= 3;
956			i = 0;
957			/* keep in sync w/ state 6's transistions */
958			if (dflen == 0 && remain == 0)
959				state = 0;
960			else if (dflen == 0)
961				state = 5;
962			else
963				state = 6;
964			break;
965
966		case 6:	/* VPD-W Keyword Value */
967			cfg->vpd.vpd_w[off].value[i++] = byte;
968			dflen--;
969			remain--;
970			/* keep in sync w/ state 5's transistions */
971			if (dflen == 0)
972				cfg->vpd.vpd_w[off++].value[i++] = '\0';
973			if (dflen == 0 && remain == 0) {
974				cfg->vpd.vpd_wcnt = off;
975				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
976				    off * sizeof(*cfg->vpd.vpd_w),
977				    M_DEVBUF, M_WAITOK | M_ZERO);
978				state = 0;
979			} else if (dflen == 0)
980				state = 5;
981			break;
982
983		default:
984			printf("pci%d:%d:%d:%d: invalid state: %d\n",
985			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
986			    state);
987			state = -1;
988			break;
989		}
990	}
991
992	if (cksumvalid == 0 || state < -1) {
993		/* read-only data bad, clean up */
994		if (cfg->vpd.vpd_ros != NULL) {
995			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
996				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
997			free(cfg->vpd.vpd_ros, M_DEVBUF);
998			cfg->vpd.vpd_ros = NULL;
999		}
1000	}
1001	if (state < -1) {
1002		/* I/O error, clean up */
1003		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1004		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1005		if (cfg->vpd.vpd_ident != NULL) {
1006			free(cfg->vpd.vpd_ident, M_DEVBUF);
1007			cfg->vpd.vpd_ident = NULL;
1008		}
1009		if (cfg->vpd.vpd_w != NULL) {
1010			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1011				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1012			free(cfg->vpd.vpd_w, M_DEVBUF);
1013			cfg->vpd.vpd_w = NULL;
1014		}
1015	}
1016	cfg->vpd.vpd_cached = 1;
1017#undef REG
1018#undef WREG
1019}
1020
1021int
1022pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1023{
1024	struct pci_devinfo *dinfo = device_get_ivars(child);
1025	pcicfgregs *cfg = &dinfo->cfg;
1026
1027	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1028		pci_read_vpd(device_get_parent(dev), cfg);
1029
1030	*identptr = cfg->vpd.vpd_ident;
1031
1032	if (*identptr == NULL)
1033		return (ENXIO);
1034
1035	return (0);
1036}
1037
1038int
1039pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1040	const char **vptr)
1041{
1042	struct pci_devinfo *dinfo = device_get_ivars(child);
1043	pcicfgregs *cfg = &dinfo->cfg;
1044	int i;
1045
1046	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1047		pci_read_vpd(device_get_parent(dev), cfg);
1048
1049	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1050		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1051		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1052			*vptr = cfg->vpd.vpd_ros[i].value;
1053		}
1054
1055	if (i != cfg->vpd.vpd_rocnt)
1056		return (0);
1057
1058	*vptr = NULL;
1059	return (ENXIO);
1060}
1061
1062/*
1063 * Find the requested extended capability and return the offset in
1064 * configuration space via the pointer provided. The function returns
1065 * 0 on success and error code otherwise.
1066 */
1067int
1068pci_find_extcap_method(device_t dev, device_t child, int capability,
1069    int *capreg)
1070{
1071	struct pci_devinfo *dinfo = device_get_ivars(child);
1072	pcicfgregs *cfg = &dinfo->cfg;
1073	u_int32_t status;
1074	u_int8_t ptr;
1075
1076	/*
1077	 * Check the CAP_LIST bit of the PCI status register first.
1078	 */
1079	status = pci_read_config(child, PCIR_STATUS, 2);
1080	if (!(status & PCIM_STATUS_CAPPRESENT))
1081		return (ENXIO);
1082
1083	/*
1084	 * Determine the start pointer of the capabilities list.
1085	 */
1086	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1087	case 0:
1088	case 1:
1089		ptr = PCIR_CAP_PTR;
1090		break;
1091	case 2:
1092		ptr = PCIR_CAP_PTR_2;
1093		break;
1094	default:
1095		/* XXX: panic? */
1096		return (ENXIO);		/* no extended capabilities support */
1097	}
1098	ptr = pci_read_config(child, ptr, 1);
1099
1100	/*
1101	 * Traverse the capabilities list.
1102	 */
1103	while (ptr != 0) {
1104		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1105			if (capreg != NULL)
1106				*capreg = ptr;
1107			return (0);
1108		}
1109		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1110	}
1111
1112	return (ENOENT);
1113}
1114
1115/*
1116 * Support for MSI-X message interrupts.
1117 */
1118void
1119pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1120{
1121	struct pci_devinfo *dinfo = device_get_ivars(dev);
1122	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1123	uint32_t offset;
1124
1125	KASSERT(msix->msix_table_len > index, ("bogus index"));
1126	offset = msix->msix_table_offset + index * 16;
1127	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1128	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1129	bus_write_4(msix->msix_table_res, offset + 8, data);
1130
1131	/* Enable MSI -> HT mapping. */
1132	pci_ht_map_msi(dev, address);
1133}
1134
1135void
1136pci_mask_msix(device_t dev, u_int index)
1137{
1138	struct pci_devinfo *dinfo = device_get_ivars(dev);
1139	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1140	uint32_t offset, val;
1141
1142	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1143	offset = msix->msix_table_offset + index * 16 + 12;
1144	val = bus_read_4(msix->msix_table_res, offset);
1145	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1146		val |= PCIM_MSIX_VCTRL_MASK;
1147		bus_write_4(msix->msix_table_res, offset, val);
1148	}
1149}
1150
1151void
1152pci_unmask_msix(device_t dev, u_int index)
1153{
1154	struct pci_devinfo *dinfo = device_get_ivars(dev);
1155	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1156	uint32_t offset, val;
1157
1158	KASSERT(msix->msix_table_len > index, ("bogus index"));
1159	offset = msix->msix_table_offset + index * 16 + 12;
1160	val = bus_read_4(msix->msix_table_res, offset);
1161	if (val & PCIM_MSIX_VCTRL_MASK) {
1162		val &= ~PCIM_MSIX_VCTRL_MASK;
1163		bus_write_4(msix->msix_table_res, offset, val);
1164	}
1165}
1166
1167int
1168pci_pending_msix(device_t dev, u_int index)
1169{
1170	struct pci_devinfo *dinfo = device_get_ivars(dev);
1171	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1172	uint32_t offset, bit;
1173
1174	KASSERT(msix->msix_table_len > index, ("bogus index"));
1175	offset = msix->msix_pba_offset + (index / 32) * 4;
1176	bit = 1 << index % 32;
1177	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1178}
1179
1180/*
1181 * Restore MSI-X registers and table during resume.  If MSI-X is
1182 * enabled then walk the virtual table to restore the actual MSI-X
1183 * table.
1184 */
1185static void
1186pci_resume_msix(device_t dev)
1187{
1188	struct pci_devinfo *dinfo = device_get_ivars(dev);
1189	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1190	struct msix_table_entry *mte;
1191	struct msix_vector *mv;
1192	int i;
1193
1194	if (msix->msix_alloc > 0) {
1195		/* First, mask all vectors. */
1196		for (i = 0; i < msix->msix_msgnum; i++)
1197			pci_mask_msix(dev, i);
1198
1199		/* Second, program any messages with at least one handler. */
1200		for (i = 0; i < msix->msix_table_len; i++) {
1201			mte = &msix->msix_table[i];
1202			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1203				continue;
1204			mv = &msix->msix_vectors[mte->mte_vector - 1];
1205			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1206			pci_unmask_msix(dev, i);
1207		}
1208	}
1209	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1210	    msix->msix_ctrl, 2);
1211}
1212
1213/*
1214 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1215 * returned in *count.  After this function returns, each message will be
1216 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1217 */
1218int
1219pci_alloc_msix_method(device_t dev, device_t child, int *count)
1220{
1221	struct pci_devinfo *dinfo = device_get_ivars(child);
1222	pcicfgregs *cfg = &dinfo->cfg;
1223	struct resource_list_entry *rle;
1224	int actual, error, i, irq, max;
1225
1226	/* Don't let count == 0 get us into trouble. */
1227	if (*count == 0)
1228		return (EINVAL);
1229
1230	/* If rid 0 is allocated, then fail. */
1231	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1232	if (rle != NULL && rle->res != NULL)
1233		return (ENXIO);
1234
1235	/* Already have allocated messages? */
1236	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1237		return (ENXIO);
1238
1239	/* If MSI is blacklisted for this system, fail. */
1240	if (pci_msi_blacklisted())
1241		return (ENXIO);
1242
1243	/* MSI-X capability present? */
1244	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1245		return (ENODEV);
1246
1247	/* Make sure the appropriate BARs are mapped. */
1248	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1249	    cfg->msix.msix_table_bar);
1250	if (rle == NULL || rle->res == NULL ||
1251	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1252		return (ENXIO);
1253	cfg->msix.msix_table_res = rle->res;
1254	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1255		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1256		    cfg->msix.msix_pba_bar);
1257		if (rle == NULL || rle->res == NULL ||
1258		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1259			return (ENXIO);
1260	}
1261	cfg->msix.msix_pba_res = rle->res;
1262
1263	if (bootverbose)
1264		device_printf(child,
1265		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1266		    *count, cfg->msix.msix_msgnum);
1267	max = min(*count, cfg->msix.msix_msgnum);
1268	for (i = 0; i < max; i++) {
1269		/* Allocate a message. */
1270		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1271		if (error)
1272			break;
1273		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1274		    irq, 1);
1275	}
1276	actual = i;
1277
1278	if (bootverbose) {
1279		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1280		if (actual == 1)
1281			device_printf(child, "using IRQ %lu for MSI-X\n",
1282			    rle->start);
1283		else {
1284			int run;
1285
1286			/*
1287			 * Be fancy and try to print contiguous runs of
1288			 * IRQ values as ranges.  'irq' is the previous IRQ.
1289			 * 'run' is true if we are in a range.
1290			 */
1291			device_printf(child, "using IRQs %lu", rle->start);
1292			irq = rle->start;
1293			run = 0;
1294			for (i = 1; i < actual; i++) {
1295				rle = resource_list_find(&dinfo->resources,
1296				    SYS_RES_IRQ, i + 1);
1297
1298				/* Still in a run? */
1299				if (rle->start == irq + 1) {
1300					run = 1;
1301					irq++;
1302					continue;
1303				}
1304
1305				/* Finish previous range. */
1306				if (run) {
1307					printf("-%d", irq);
1308					run = 0;
1309				}
1310
1311				/* Start new range. */
1312				printf(",%lu", rle->start);
1313				irq = rle->start;
1314			}
1315
1316			/* Unfinished range? */
1317			if (run)
1318				printf("-%d", irq);
1319			printf(" for MSI-X\n");
1320		}
1321	}
1322
1323	/* Mask all vectors. */
1324	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1325		pci_mask_msix(child, i);
1326
1327	/* Allocate and initialize vector data and virtual table. */
1328	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1329	    M_DEVBUF, M_WAITOK | M_ZERO);
1330	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1331	    M_DEVBUF, M_WAITOK | M_ZERO);
1332	for (i = 0; i < actual; i++) {
1333		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1334		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1335		cfg->msix.msix_table[i].mte_vector = i + 1;
1336	}
1337
1338	/* Update control register to enable MSI-X. */
1339	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1340	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1341	    cfg->msix.msix_ctrl, 2);
1342
1343	/* Update counts of alloc'd messages. */
1344	cfg->msix.msix_alloc = actual;
1345	cfg->msix.msix_table_len = actual;
1346	*count = actual;
1347	return (0);
1348}
1349
1350/*
1351 * By default, pci_alloc_msix() will assign the allocated IRQ
1352 * resources consecutively to the first N messages in the MSI-X table.
1353 * However, device drivers may want to use different layouts if they
1354 * either receive fewer messages than they asked for, or they wish to
1355 * populate the MSI-X table sparsely.  This method allows the driver
1356 * to specify what layout it wants.  It must be called after a
1357 * successful pci_alloc_msix() but before any of the associated
1358 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1359 *
1360 * The 'vectors' array contains 'count' message vectors.  The array
1361 * maps directly to the MSI-X table in that index 0 in the array
1362 * specifies the vector for the first message in the MSI-X table, etc.
1363 * The vector value in each array index can either be 0 to indicate
1364 * that no vector should be assigned to a message slot, or it can be a
1365 * number from 1 to N (where N is the count returned from a
1366 * succcessful call to pci_alloc_msix()) to indicate which message
1367 * vector (IRQ) to be used for the corresponding message.
1368 *
1369 * On successful return, each message with a non-zero vector will have
1370 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1371 * 1.  Additionally, if any of the IRQs allocated via the previous
1372 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1373 * will be freed back to the system automatically.
1374 *
1375 * For example, suppose a driver has a MSI-X table with 6 messages and
1376 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1377 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1378 * C.  After the call to pci_alloc_msix(), the device will be setup to
1379 * have an MSI-X table of ABC--- (where - means no vector assigned).
1380 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1381 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1382 * be freed back to the system.  This device will also have valid
1383 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1384 *
1385 * In any case, the SYS_RES_IRQ rid X will always map to the message
1386 * at MSI-X table index X - 1 and will only be valid if a vector is
1387 * assigned to that table entry.
1388 */
1389int
1390pci_remap_msix_method(device_t dev, device_t child, int count,
1391    const u_int *vectors)
1392{
1393	struct pci_devinfo *dinfo = device_get_ivars(child);
1394	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1395	struct resource_list_entry *rle;
1396	int i, irq, j, *used;
1397
1398	/*
1399	 * Have to have at least one message in the table but the
1400	 * table can't be bigger than the actual MSI-X table in the
1401	 * device.
1402	 */
1403	if (count == 0 || count > msix->msix_msgnum)
1404		return (EINVAL);
1405
1406	/* Sanity check the vectors. */
1407	for (i = 0; i < count; i++)
1408		if (vectors[i] > msix->msix_alloc)
1409			return (EINVAL);
1410
1411	/*
1412	 * Make sure there aren't any holes in the vectors to be used.
1413	 * It's a big pain to support it, and it doesn't really make
1414	 * sense anyway.  Also, at least one vector must be used.
1415	 */
1416	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1417	    M_ZERO);
1418	for (i = 0; i < count; i++)
1419		if (vectors[i] != 0)
1420			used[vectors[i] - 1] = 1;
1421	for (i = 0; i < msix->msix_alloc - 1; i++)
1422		if (used[i] == 0 && used[i + 1] == 1) {
1423			free(used, M_DEVBUF);
1424			return (EINVAL);
1425		}
1426	if (used[0] != 1) {
1427		free(used, M_DEVBUF);
1428		return (EINVAL);
1429	}
1430
1431	/* Make sure none of the resources are allocated. */
1432	for (i = 0; i < msix->msix_table_len; i++) {
1433		if (msix->msix_table[i].mte_vector == 0)
1434			continue;
1435		if (msix->msix_table[i].mte_handlers > 0)
1436			return (EBUSY);
1437		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1438		KASSERT(rle != NULL, ("missing resource"));
1439		if (rle->res != NULL)
1440			return (EBUSY);
1441	}
1442
1443	/* Free the existing resource list entries. */
1444	for (i = 0; i < msix->msix_table_len; i++) {
1445		if (msix->msix_table[i].mte_vector == 0)
1446			continue;
1447		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1448	}
1449
1450	/*
1451	 * Build the new virtual table keeping track of which vectors are
1452	 * used.
1453	 */
1454	free(msix->msix_table, M_DEVBUF);
1455	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1456	    M_DEVBUF, M_WAITOK | M_ZERO);
1457	for (i = 0; i < count; i++)
1458		msix->msix_table[i].mte_vector = vectors[i];
1459	msix->msix_table_len = count;
1460
1461	/* Free any unused IRQs and resize the vectors array if necessary. */
1462	j = msix->msix_alloc - 1;
1463	if (used[j] == 0) {
1464		struct msix_vector *vec;
1465
1466		while (used[j] == 0) {
1467			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1468			    msix->msix_vectors[j].mv_irq);
1469			j--;
1470		}
1471		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1472		    M_WAITOK);
1473		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1474		    (j + 1));
1475		free(msix->msix_vectors, M_DEVBUF);
1476		msix->msix_vectors = vec;
1477		msix->msix_alloc = j + 1;
1478	}
1479	free(used, M_DEVBUF);
1480
1481	/* Map the IRQs onto the rids. */
1482	for (i = 0; i < count; i++) {
1483		if (vectors[i] == 0)
1484			continue;
1485		irq = msix->msix_vectors[vectors[i]].mv_irq;
1486		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1487		    irq, 1);
1488	}
1489
1490	if (bootverbose) {
1491		device_printf(child, "Remapped MSI-X IRQs as: ");
1492		for (i = 0; i < count; i++) {
1493			if (i != 0)
1494				printf(", ");
1495			if (vectors[i] == 0)
1496				printf("---");
1497			else
1498				printf("%d",
1499				    msix->msix_vectors[vectors[i]].mv_irq);
1500		}
1501		printf("\n");
1502	}
1503
1504	return (0);
1505}
1506
1507static int
1508pci_release_msix(device_t dev, device_t child)
1509{
1510	struct pci_devinfo *dinfo = device_get_ivars(child);
1511	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1512	struct resource_list_entry *rle;
1513	int i;
1514
1515	/* Do we have any messages to release? */
1516	if (msix->msix_alloc == 0)
1517		return (ENODEV);
1518
1519	/* Make sure none of the resources are allocated. */
1520	for (i = 0; i < msix->msix_table_len; i++) {
1521		if (msix->msix_table[i].mte_vector == 0)
1522			continue;
1523		if (msix->msix_table[i].mte_handlers > 0)
1524			return (EBUSY);
1525		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1526		KASSERT(rle != NULL, ("missing resource"));
1527		if (rle->res != NULL)
1528			return (EBUSY);
1529	}
1530
1531	/* Update control register to disable MSI-X. */
1532	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1533	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1534	    msix->msix_ctrl, 2);
1535
1536	/* Free the resource list entries. */
1537	for (i = 0; i < msix->msix_table_len; i++) {
1538		if (msix->msix_table[i].mte_vector == 0)
1539			continue;
1540		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1541	}
1542	free(msix->msix_table, M_DEVBUF);
1543	msix->msix_table_len = 0;
1544
1545	/* Release the IRQs. */
1546	for (i = 0; i < msix->msix_alloc; i++)
1547		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1548		    msix->msix_vectors[i].mv_irq);
1549	free(msix->msix_vectors, M_DEVBUF);
1550	msix->msix_alloc = 0;
1551	return (0);
1552}
1553
1554/*
1555 * Return the max supported MSI-X messages this device supports.
1556 * Basically, assuming the MD code can alloc messages, this function
1557 * should return the maximum value that pci_alloc_msix() can return.
1558 * Thus, it is subject to the tunables, etc.
1559 */
1560int
1561pci_msix_count_method(device_t dev, device_t child)
1562{
1563	struct pci_devinfo *dinfo = device_get_ivars(child);
1564	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1565
1566	if (pci_do_msix && msix->msix_location != 0)
1567		return (msix->msix_msgnum);
1568	return (0);
1569}
1570
1571/*
1572 * HyperTransport MSI mapping control
1573 */
1574void
1575pci_ht_map_msi(device_t dev, uint64_t addr)
1576{
1577	struct pci_devinfo *dinfo = device_get_ivars(dev);
1578	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1579
1580	if (!ht->ht_msimap)
1581		return;
1582
1583	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1584	    ht->ht_msiaddr >> 20 == addr >> 20) {
1585		/* Enable MSI -> HT mapping. */
1586		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1587		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1588		    ht->ht_msictrl, 2);
1589	}
1590
1591	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1592		/* Disable MSI -> HT mapping. */
1593		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1594		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1595		    ht->ht_msictrl, 2);
1596	}
1597}
1598
1599/*
1600 * Support for MSI message signalled interrupts.
1601 */
1602void
1603pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1604{
1605	struct pci_devinfo *dinfo = device_get_ivars(dev);
1606	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1607
1608	/* Write data and address values. */
1609	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1610	    address & 0xffffffff, 4);
1611	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1612		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1613		    address >> 32, 4);
1614		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1615		    data, 2);
1616	} else
1617		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1618		    2);
1619
1620	/* Enable MSI in the control register. */
1621	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1622	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1623	    2);
1624
1625	/* Enable MSI -> HT mapping. */
1626	pci_ht_map_msi(dev, address);
1627}
1628
1629void
1630pci_disable_msi(device_t dev)
1631{
1632	struct pci_devinfo *dinfo = device_get_ivars(dev);
1633	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1634
1635	/* Disable MSI -> HT mapping. */
1636	pci_ht_map_msi(dev, 0);
1637
1638	/* Disable MSI in the control register. */
1639	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1640	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1641	    2);
1642}
1643
1644/*
1645 * Restore MSI registers during resume.  If MSI is enabled then
1646 * restore the data and address registers in addition to the control
1647 * register.
1648 */
1649static void
1650pci_resume_msi(device_t dev)
1651{
1652	struct pci_devinfo *dinfo = device_get_ivars(dev);
1653	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1654	uint64_t address;
1655	uint16_t data;
1656
1657	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1658		address = msi->msi_addr;
1659		data = msi->msi_data;
1660		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1661		    address & 0xffffffff, 4);
1662		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1663			pci_write_config(dev, msi->msi_location +
1664			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1665			pci_write_config(dev, msi->msi_location +
1666			    PCIR_MSI_DATA_64BIT, data, 2);
1667		} else
1668			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1669			    data, 2);
1670	}
1671	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1672	    2);
1673}
1674
1675int
1676pci_remap_msi_irq(device_t dev, u_int irq)
1677{
1678	struct pci_devinfo *dinfo = device_get_ivars(dev);
1679	pcicfgregs *cfg = &dinfo->cfg;
1680	struct resource_list_entry *rle;
1681	struct msix_table_entry *mte;
1682	struct msix_vector *mv;
1683	device_t bus;
1684	uint64_t addr;
1685	uint32_t data;
1686	int error, i, j;
1687
1688	bus = device_get_parent(dev);
1689
1690	/*
1691	 * Handle MSI first.  We try to find this IRQ among our list
1692	 * of MSI IRQs.  If we find it, we request updated address and
1693	 * data registers and apply the results.
1694	 */
1695	if (cfg->msi.msi_alloc > 0) {
1696
1697		/* If we don't have any active handlers, nothing to do. */
1698		if (cfg->msi.msi_handlers == 0)
1699			return (0);
1700		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1701			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1702			    i + 1);
1703			if (rle->start == irq) {
1704				error = PCIB_MAP_MSI(device_get_parent(bus),
1705				    dev, irq, &addr, &data);
1706				if (error)
1707					return (error);
1708				pci_disable_msi(dev);
1709				dinfo->cfg.msi.msi_addr = addr;
1710				dinfo->cfg.msi.msi_data = data;
1711				pci_enable_msi(dev, addr, data);
1712				return (0);
1713			}
1714		}
1715		return (ENOENT);
1716	}
1717
1718	/*
1719	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1720	 * we request the updated mapping info.  If that works, we go
1721	 * through all the slots that use this IRQ and update them.
1722	 */
1723	if (cfg->msix.msix_alloc > 0) {
1724		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1725			mv = &cfg->msix.msix_vectors[i];
1726			if (mv->mv_irq == irq) {
1727				error = PCIB_MAP_MSI(device_get_parent(bus),
1728				    dev, irq, &addr, &data);
1729				if (error)
1730					return (error);
1731				mv->mv_address = addr;
1732				mv->mv_data = data;
1733				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1734					mte = &cfg->msix.msix_table[j];
1735					if (mte->mte_vector != i + 1)
1736						continue;
1737					if (mte->mte_handlers == 0)
1738						continue;
1739					pci_mask_msix(dev, j);
1740					pci_enable_msix(dev, j, addr, data);
1741					pci_unmask_msix(dev, j);
1742				}
1743			}
1744		}
1745		return (ENOENT);
1746	}
1747
1748	return (ENOENT);
1749}
1750
1751/*
1752 * Returns true if the specified device is blacklisted because MSI
1753 * doesn't work.
1754 */
1755int
1756pci_msi_device_blacklisted(device_t dev)
1757{
1758	struct pci_quirk *q;
1759
1760	if (!pci_honor_msi_blacklist)
1761		return (0);
1762
1763	for (q = &pci_quirks[0]; q->devid; q++) {
1764		if (q->devid == pci_get_devid(dev) &&
1765		    q->type == PCI_QUIRK_DISABLE_MSI)
1766			return (1);
1767	}
1768	return (0);
1769}
1770
1771/*
1772 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1773 * we just check for blacklisted chipsets as represented by the
1774 * host-PCI bridge at device 0:0:0.  In the future, it may become
1775 * necessary to check other system attributes, such as the kenv values
1776 * that give the motherboard manufacturer and model number.
1777 */
1778static int
1779pci_msi_blacklisted(void)
1780{
1781	device_t dev;
1782
1783	if (!pci_honor_msi_blacklist)
1784		return (0);
1785
1786	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1787	if (!(pcie_chipset || pcix_chipset))
1788		return (1);
1789
1790	dev = pci_find_bsf(0, 0, 0);
1791	if (dev != NULL)
1792		return (pci_msi_device_blacklisted(dev));
1793	return (0);
1794}
1795
1796/*
1797 * Attempt to allocate *count MSI messages.  The actual number allocated is
1798 * returned in *count.  After this function returns, each message will be
1799 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1800 */
1801int
1802pci_alloc_msi_method(device_t dev, device_t child, int *count)
1803{
1804	struct pci_devinfo *dinfo = device_get_ivars(child);
1805	pcicfgregs *cfg = &dinfo->cfg;
1806	struct resource_list_entry *rle;
1807	int actual, error, i, irqs[32];
1808	uint16_t ctrl;
1809
1810	/* Don't let count == 0 get us into trouble. */
1811	if (*count == 0)
1812		return (EINVAL);
1813
1814	/* If rid 0 is allocated, then fail. */
1815	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1816	if (rle != NULL && rle->res != NULL)
1817		return (ENXIO);
1818
1819	/* Already have allocated messages? */
1820	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1821		return (ENXIO);
1822
1823	/* If MSI is blacklisted for this system, fail. */
1824	if (pci_msi_blacklisted())
1825		return (ENXIO);
1826
1827	/* MSI capability present? */
1828	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1829		return (ENODEV);
1830
1831	if (bootverbose)
1832		device_printf(child,
1833		    "attempting to allocate %d MSI vectors (%d supported)\n",
1834		    *count, cfg->msi.msi_msgnum);
1835
1836	/* Don't ask for more than the device supports. */
1837	actual = min(*count, cfg->msi.msi_msgnum);
1838
1839	/* Don't ask for more than 32 messages. */
1840	actual = min(actual, 32);
1841
1842	/* MSI requires power of 2 number of messages. */
1843	if (!powerof2(actual))
1844		return (EINVAL);
1845
1846	for (;;) {
1847		/* Try to allocate N messages. */
1848		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1849		    cfg->msi.msi_msgnum, irqs);
1850		if (error == 0)
1851			break;
1852		if (actual == 1)
1853			return (error);
1854
1855		/* Try N / 2. */
1856		actual >>= 1;
1857	}
1858
1859	/*
1860	 * We now have N actual messages mapped onto SYS_RES_IRQ
1861	 * resources in the irqs[] array, so add new resources
1862	 * starting at rid 1.
1863	 */
1864	for (i = 0; i < actual; i++)
1865		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1866		    irqs[i], irqs[i], 1);
1867
1868	if (bootverbose) {
1869		if (actual == 1)
1870			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1871		else {
1872			int run;
1873
1874			/*
1875			 * Be fancy and try to print contiguous runs
1876			 * of IRQ values as ranges.  'run' is true if
1877			 * we are in a range.
1878			 */
1879			device_printf(child, "using IRQs %d", irqs[0]);
1880			run = 0;
1881			for (i = 1; i < actual; i++) {
1882
1883				/* Still in a run? */
1884				if (irqs[i] == irqs[i - 1] + 1) {
1885					run = 1;
1886					continue;
1887				}
1888
1889				/* Finish previous range. */
1890				if (run) {
1891					printf("-%d", irqs[i - 1]);
1892					run = 0;
1893				}
1894
1895				/* Start new range. */
1896				printf(",%d", irqs[i]);
1897			}
1898
1899			/* Unfinished range? */
1900			if (run)
1901				printf("-%d", irqs[actual - 1]);
1902			printf(" for MSI\n");
1903		}
1904	}
1905
1906	/* Update control register with actual count. */
1907	ctrl = cfg->msi.msi_ctrl;
1908	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1909	ctrl |= (ffs(actual) - 1) << 4;
1910	cfg->msi.msi_ctrl = ctrl;
1911	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1912
1913	/* Update counts of alloc'd messages. */
1914	cfg->msi.msi_alloc = actual;
1915	cfg->msi.msi_handlers = 0;
1916	*count = actual;
1917	return (0);
1918}
1919
1920/* Release the MSI messages associated with this device. */
1921int
1922pci_release_msi_method(device_t dev, device_t child)
1923{
1924	struct pci_devinfo *dinfo = device_get_ivars(child);
1925	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1926	struct resource_list_entry *rle;
1927	int error, i, irqs[32];
1928
1929	/* Try MSI-X first. */
1930	error = pci_release_msix(dev, child);
1931	if (error != ENODEV)
1932		return (error);
1933
1934	/* Do we have any messages to release? */
1935	if (msi->msi_alloc == 0)
1936		return (ENODEV);
1937	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1938
1939	/* Make sure none of the resources are allocated. */
1940	if (msi->msi_handlers > 0)
1941		return (EBUSY);
1942	for (i = 0; i < msi->msi_alloc; i++) {
1943		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1944		KASSERT(rle != NULL, ("missing MSI resource"));
1945		if (rle->res != NULL)
1946			return (EBUSY);
1947		irqs[i] = rle->start;
1948	}
1949
1950	/* Update control register with 0 count. */
1951	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1952	    ("%s: MSI still enabled", __func__));
1953	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1954	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1955	    msi->msi_ctrl, 2);
1956
1957	/* Release the messages. */
1958	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1959	for (i = 0; i < msi->msi_alloc; i++)
1960		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1961
1962	/* Update alloc count. */
1963	msi->msi_alloc = 0;
1964	msi->msi_addr = 0;
1965	msi->msi_data = 0;
1966	return (0);
1967}
1968
1969/*
1970 * Return the max supported MSI messages this device supports.
1971 * Basically, assuming the MD code can alloc messages, this function
1972 * should return the maximum value that pci_alloc_msi() can return.
1973 * Thus, it is subject to the tunables, etc.
1974 */
1975int
1976pci_msi_count_method(device_t dev, device_t child)
1977{
1978	struct pci_devinfo *dinfo = device_get_ivars(child);
1979	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1980
1981	if (pci_do_msi && msi->msi_location != 0)
1982		return (msi->msi_msgnum);
1983	return (0);
1984}
1985
1986/* free pcicfgregs structure and all depending data structures */
1987
1988int
1989pci_freecfg(struct pci_devinfo *dinfo)
1990{
1991	struct devlist *devlist_head;
1992	int i;
1993
1994	devlist_head = &pci_devq;
1995
1996	if (dinfo->cfg.vpd.vpd_reg) {
1997		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1998		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1999			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2000		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2001		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2002			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2003		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2004	}
2005	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2006	free(dinfo, M_DEVBUF);
2007
2008	/* increment the generation count */
2009	pci_generation++;
2010
2011	/* we're losing one device */
2012	pci_numdevs--;
2013	return (0);
2014}
2015
2016/*
2017 * PCI power manangement
2018 */
2019int
2020pci_set_powerstate_method(device_t dev, device_t child, int state)
2021{
2022	struct pci_devinfo *dinfo = device_get_ivars(child);
2023	pcicfgregs *cfg = &dinfo->cfg;
2024	uint16_t status;
2025	int result, oldstate, highest, delay;
2026
2027	if (cfg->pp.pp_cap == 0)
2028		return (EOPNOTSUPP);
2029
2030	/*
2031	 * Optimize a no state change request away.  While it would be OK to
2032	 * write to the hardware in theory, some devices have shown odd
2033	 * behavior when going from D3 -> D3.
2034	 */
2035	oldstate = pci_get_powerstate(child);
2036	if (oldstate == state)
2037		return (0);
2038
2039	/*
2040	 * The PCI power management specification states that after a state
2041	 * transition between PCI power states, system software must
2042	 * guarantee a minimal delay before the function accesses the device.
2043	 * Compute the worst case delay that we need to guarantee before we
2044	 * access the device.  Many devices will be responsive much more
2045	 * quickly than this delay, but there are some that don't respond
2046	 * instantly to state changes.  Transitions to/from D3 state require
2047	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2048	 * is done below with DELAY rather than a sleeper function because
2049	 * this function can be called from contexts where we cannot sleep.
2050	 */
2051	highest = (oldstate > state) ? oldstate : state;
2052	if (highest == PCI_POWERSTATE_D3)
2053	    delay = 10000;
2054	else if (highest == PCI_POWERSTATE_D2)
2055	    delay = 200;
2056	else
2057	    delay = 0;
2058	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2059	    & ~PCIM_PSTAT_DMASK;
2060	result = 0;
2061	switch (state) {
2062	case PCI_POWERSTATE_D0:
2063		status |= PCIM_PSTAT_D0;
2064		break;
2065	case PCI_POWERSTATE_D1:
2066		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2067			return (EOPNOTSUPP);
2068		status |= PCIM_PSTAT_D1;
2069		break;
2070	case PCI_POWERSTATE_D2:
2071		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2072			return (EOPNOTSUPP);
2073		status |= PCIM_PSTAT_D2;
2074		break;
2075	case PCI_POWERSTATE_D3:
2076		status |= PCIM_PSTAT_D3;
2077		break;
2078	default:
2079		return (EINVAL);
2080	}
2081
2082	if (bootverbose)
2083		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2084		    state);
2085
2086	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2087	if (delay)
2088		DELAY(delay);
2089	return (0);
2090}
2091
2092int
2093pci_get_powerstate_method(device_t dev, device_t child)
2094{
2095	struct pci_devinfo *dinfo = device_get_ivars(child);
2096	pcicfgregs *cfg = &dinfo->cfg;
2097	uint16_t status;
2098	int result;
2099
2100	if (cfg->pp.pp_cap != 0) {
2101		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2102		switch (status & PCIM_PSTAT_DMASK) {
2103		case PCIM_PSTAT_D0:
2104			result = PCI_POWERSTATE_D0;
2105			break;
2106		case PCIM_PSTAT_D1:
2107			result = PCI_POWERSTATE_D1;
2108			break;
2109		case PCIM_PSTAT_D2:
2110			result = PCI_POWERSTATE_D2;
2111			break;
2112		case PCIM_PSTAT_D3:
2113			result = PCI_POWERSTATE_D3;
2114			break;
2115		default:
2116			result = PCI_POWERSTATE_UNKNOWN;
2117			break;
2118		}
2119	} else {
2120		/* No support, device is always at D0 */
2121		result = PCI_POWERSTATE_D0;
2122	}
2123	return (result);
2124}
2125
2126/*
2127 * Some convenience functions for PCI device drivers.
2128 */
2129
2130static __inline void
2131pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2132{
2133	uint16_t	command;
2134
2135	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2136	command |= bit;
2137	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2138}
2139
2140static __inline void
2141pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2142{
2143	uint16_t	command;
2144
2145	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2146	command &= ~bit;
2147	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2148}
2149
2150int
2151pci_enable_busmaster_method(device_t dev, device_t child)
2152{
2153	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2154	return (0);
2155}
2156
2157int
2158pci_disable_busmaster_method(device_t dev, device_t child)
2159{
2160	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2161	return (0);
2162}
2163
2164int
2165pci_enable_io_method(device_t dev, device_t child, int space)
2166{
2167	uint16_t bit;
2168
2169	switch(space) {
2170	case SYS_RES_IOPORT:
2171		bit = PCIM_CMD_PORTEN;
2172		break;
2173	case SYS_RES_MEMORY:
2174		bit = PCIM_CMD_MEMEN;
2175		break;
2176	default:
2177		return (EINVAL);
2178	}
2179	pci_set_command_bit(dev, child, bit);
2180	return (0);
2181}
2182
2183int
2184pci_disable_io_method(device_t dev, device_t child, int space)
2185{
2186	uint16_t bit;
2187
2188	switch(space) {
2189	case SYS_RES_IOPORT:
2190		bit = PCIM_CMD_PORTEN;
2191		break;
2192	case SYS_RES_MEMORY:
2193		bit = PCIM_CMD_MEMEN;
2194		break;
2195	default:
2196		return (EINVAL);
2197	}
2198	pci_clear_command_bit(dev, child, bit);
2199	return (0);
2200}
2201
2202/*
2203 * New style pci driver.  Parent device is either a pci-host-bridge or a
2204 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2205 */
2206
2207void
2208pci_print_verbose(struct pci_devinfo *dinfo)
2209{
2210
2211	if (bootverbose) {
2212		pcicfgregs *cfg = &dinfo->cfg;
2213
2214		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2215		    cfg->vendor, cfg->device, cfg->revid);
2216		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2217		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2218		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2219		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2220		    cfg->mfdev);
2221		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2222		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2223		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2224		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2225		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2226		if (cfg->intpin > 0)
2227			printf("\tintpin=%c, irq=%d\n",
2228			    cfg->intpin +'a' -1, cfg->intline);
2229		if (cfg->pp.pp_cap) {
2230			uint16_t status;
2231
2232			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2233			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2234			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2235			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2236			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2237			    status & PCIM_PSTAT_DMASK);
2238		}
2239		if (cfg->msi.msi_location) {
2240			int ctrl;
2241
2242			ctrl = cfg->msi.msi_ctrl;
2243			printf("\tMSI supports %d message%s%s%s\n",
2244			    cfg->msi.msi_msgnum,
2245			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2246			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2247			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2248		}
2249		if (cfg->msix.msix_location) {
2250			printf("\tMSI-X supports %d message%s ",
2251			    cfg->msix.msix_msgnum,
2252			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2253			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2254				printf("in map 0x%x\n",
2255				    cfg->msix.msix_table_bar);
2256			else
2257				printf("in maps 0x%x and 0x%x\n",
2258				    cfg->msix.msix_table_bar,
2259				    cfg->msix.msix_pba_bar);
2260		}
2261	}
2262}
2263
2264static int
2265pci_porten(device_t dev)
2266{
2267	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2268}
2269
2270static int
2271pci_memen(device_t dev)
2272{
2273	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2274}
2275
2276static void
2277pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2278{
2279	pci_addr_t map, testval;
2280	int ln2range;
2281	uint16_t cmd;
2282
2283	map = pci_read_config(dev, reg, 4);
2284	ln2range = pci_maprange(map);
2285	if (ln2range == 64)
2286		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2287
2288	/*
2289	 * Disable decoding via the command register before
2290	 * determining the BAR's length since we will be placing it in
2291	 * a weird state.
2292	 */
2293	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2294	pci_write_config(dev, PCIR_COMMAND,
2295	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2296
2297	/*
2298	 * Determine the BAR's length by writing all 1's.  The bottom
2299	 * log_2(size) bits of the BAR will stick as 0 when we read
2300	 * the value back.
2301	 */
2302	pci_write_config(dev, reg, 0xffffffff, 4);
2303	testval = pci_read_config(dev, reg, 4);
2304	if (ln2range == 64) {
2305		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2306		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2307	}
2308
2309	/*
2310	 * Restore the original value of the BAR.  We may have reprogrammed
2311	 * the BAR of the low-level console device and when booting verbose,
2312	 * we need the console device addressable.
2313	 */
2314	pci_write_config(dev, reg, map, 4);
2315	if (ln2range == 64)
2316		pci_write_config(dev, reg + 4, map >> 32, 4);
2317	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2318
2319	*mapp = map;
2320	*testvalp = testval;
2321}
2322
2323static void
2324pci_write_bar(device_t dev, int reg, pci_addr_t base)
2325{
2326	pci_addr_t map;
2327	int ln2range;
2328
2329	map = pci_read_config(dev, reg, 4);
2330	ln2range = pci_maprange(map);
2331	pci_write_config(dev, reg, base, 4);
2332	if (ln2range == 64)
2333		pci_write_config(dev, reg + 4, base >> 32, 4);
2334}
2335
2336/*
2337 * Add a resource based on a pci map register. Return 1 if the map
2338 * register is a 32bit map register or 2 if it is a 64bit register.
2339 */
2340static int
2341pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2342    int force, int prefetch)
2343{
2344	pci_addr_t base, map, testval;
2345	pci_addr_t start, end, count;
2346	int barlen, basezero, maprange, mapsize, type;
2347	uint16_t cmd;
2348	struct resource *res;
2349
2350	pci_read_bar(dev, reg, &map, &testval);
2351	if (PCI_BAR_MEM(map)) {
2352		type = SYS_RES_MEMORY;
2353		if (map & PCIM_BAR_MEM_PREFETCH)
2354			prefetch = 1;
2355	} else
2356		type = SYS_RES_IOPORT;
2357	mapsize = pci_mapsize(testval);
2358	base = pci_mapbase(map);
2359#ifdef __PCI_BAR_ZERO_VALID
2360	basezero = 0;
2361#else
2362	basezero = base == 0;
2363#endif
2364	maprange = pci_maprange(map);
2365	barlen = maprange == 64 ? 2 : 1;
2366
2367	/*
2368	 * For I/O registers, if bottom bit is set, and the next bit up
2369	 * isn't clear, we know we have a BAR that doesn't conform to the
2370	 * spec, so ignore it.  Also, sanity check the size of the data
2371	 * areas to the type of memory involved.  Memory must be at least
2372	 * 16 bytes in size, while I/O ranges must be at least 4.
2373	 */
2374	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2375		return (barlen);
2376	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2377	    (type == SYS_RES_IOPORT && mapsize < 2))
2378		return (barlen);
2379
2380	if (bootverbose) {
2381		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2382		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2383		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2384			printf(", port disabled\n");
2385		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2386			printf(", memory disabled\n");
2387		else
2388			printf(", enabled\n");
2389	}
2390
2391	/*
2392	 * If base is 0, then we have problems if this architecture does
2393	 * not allow that.  It is best to ignore such entries for the
2394	 * moment.  These will be allocated later if the driver specifically
2395	 * requests them.  However, some removable busses look better when
2396	 * all resources are allocated, so allow '0' to be overriden.
2397	 *
2398	 * Similarly treat maps whose values is the same as the test value
2399	 * read back.  These maps have had all f's written to them by the
2400	 * BIOS in an attempt to disable the resources.
2401	 */
2402	if (!force && (basezero || map == testval))
2403		return (barlen);
2404	if ((u_long)base != base) {
2405		device_printf(bus,
2406		    "pci%d:%d:%d:%d bar %#x too many address bits",
2407		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2408		    pci_get_function(dev), reg);
2409		return (barlen);
2410	}
2411
2412	/*
2413	 * This code theoretically does the right thing, but has
2414	 * undesirable side effects in some cases where peripherals
2415	 * respond oddly to having these bits enabled.  Let the user
2416	 * be able to turn them off (since pci_enable_io_modes is 1 by
2417	 * default).
2418	 */
2419	if (pci_enable_io_modes) {
2420		/* Turn on resources that have been left off by a lazy BIOS */
2421		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2422			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2423			cmd |= PCIM_CMD_PORTEN;
2424			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2425		}
2426		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2427			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2428			cmd |= PCIM_CMD_MEMEN;
2429			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2430		}
2431	} else {
2432		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2433			return (barlen);
2434		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2435			return (barlen);
2436	}
2437
2438	count = 1 << mapsize;
2439	if (basezero || base == pci_mapbase(testval)) {
2440		start = 0;	/* Let the parent decide. */
2441		end = ~0ULL;
2442	} else {
2443		start = base;
2444		end = base + (1 << mapsize) - 1;
2445	}
2446	resource_list_add(rl, type, reg, start, end, count);
2447
2448	/*
2449	 * Try to allocate the resource for this BAR from our parent
2450	 * so that this resource range is already reserved.  The
2451	 * driver for this device will later inherit this resource in
2452	 * pci_alloc_resource().
2453	 */
2454	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2455	    prefetch ? RF_PREFETCHABLE : 0);
2456	if (res == NULL) {
2457		/*
2458		 * If the allocation fails, clear the BAR and delete
2459		 * the resource list entry to force
2460		 * pci_alloc_resource() to allocate resources from the
2461		 * parent.
2462		 */
2463		resource_list_delete(rl, type, reg);
2464		start = 0;
2465	} else {
2466		start = rman_get_start(res);
2467		rman_set_device(res, bus);
2468	}
2469	pci_write_bar(dev, reg, start);
2470	return (barlen);
2471}
2472
2473/*
2474 * For ATA devices we need to decide early what addressing mode to use.
2475 * Legacy demands that the primary and secondary ATA ports sits on the
2476 * same addresses that old ISA hardware did. This dictates that we use
2477 * those addresses and ignore the BAR's if we cannot set PCI native
2478 * addressing mode.
2479 */
2480static void
2481pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2482    uint32_t prefetchmask)
2483{
2484	struct resource *r;
2485	int rid, type, progif;
2486#if 0
2487	/* if this device supports PCI native addressing use it */
2488	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2489	if ((progif & 0x8a) == 0x8a) {
2490		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2491		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2492			printf("Trying ATA native PCI addressing mode\n");
2493			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2494		}
2495	}
2496#endif
2497	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2498	type = SYS_RES_IOPORT;
2499	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2500		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2501		    prefetchmask & (1 << 0));
2502		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2503		    prefetchmask & (1 << 1));
2504	} else {
2505		rid = PCIR_BAR(0);
2506		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2507		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7,
2508		    8, 0);
2509		rman_set_device(r, bus);
2510		rid = PCIR_BAR(1);
2511		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2512		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6,
2513		    1, 0);
2514		rman_set_device(r, bus);
2515	}
2516	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2517		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2518		    prefetchmask & (1 << 2));
2519		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2520		    prefetchmask & (1 << 3));
2521	} else {
2522		rid = PCIR_BAR(2);
2523		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2524		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177,
2525		    8, 0);
2526		rman_set_device(r, bus);
2527		rid = PCIR_BAR(3);
2528		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2529		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376,
2530		    1, 0);
2531		rman_set_device(r, bus);
2532	}
2533	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2534	    prefetchmask & (1 << 4));
2535	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2536	    prefetchmask & (1 << 5));
2537}
2538
2539static void
2540pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2541{
2542	struct pci_devinfo *dinfo = device_get_ivars(dev);
2543	pcicfgregs *cfg = &dinfo->cfg;
2544	char tunable_name[64];
2545	int irq;
2546
2547	/* Has to have an intpin to have an interrupt. */
2548	if (cfg->intpin == 0)
2549		return;
2550
2551	/* Let the user override the IRQ with a tunable. */
2552	irq = PCI_INVALID_IRQ;
2553	snprintf(tunable_name, sizeof(tunable_name),
2554	    "hw.pci%d.%d.%d.INT%c.irq",
2555	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2556	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2557		irq = PCI_INVALID_IRQ;
2558
2559	/*
2560	 * If we didn't get an IRQ via the tunable, then we either use the
2561	 * IRQ value in the intline register or we ask the bus to route an
2562	 * interrupt for us.  If force_route is true, then we only use the
2563	 * value in the intline register if the bus was unable to assign an
2564	 * IRQ.
2565	 */
2566	if (!PCI_INTERRUPT_VALID(irq)) {
2567		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2568			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2569		if (!PCI_INTERRUPT_VALID(irq))
2570			irq = cfg->intline;
2571	}
2572
2573	/* If after all that we don't have an IRQ, just bail. */
2574	if (!PCI_INTERRUPT_VALID(irq))
2575		return;
2576
2577	/* Update the config register if it changed. */
2578	if (irq != cfg->intline) {
2579		cfg->intline = irq;
2580		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2581	}
2582
2583	/* Add this IRQ as rid 0 interrupt resource. */
2584	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2585}
2586
2587/* Perform early OHCI takeover from SMM. */
2588static void
2589ohci_early_takeover(device_t self)
2590{
2591	struct resource *res;
2592	uint32_t ctl;
2593	int rid;
2594	int i;
2595
2596	rid = PCIR_BAR(0);
2597	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2598	if (res == NULL)
2599		return;
2600
2601	ctl = bus_read_4(res, OHCI_CONTROL);
2602	if (ctl & OHCI_IR) {
2603		if (bootverbose)
2604			printf("ohci early: "
2605			    "SMM active, request owner change\n");
2606		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2607		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2608			DELAY(1000);
2609			ctl = bus_read_4(res, OHCI_CONTROL);
2610		}
2611		if (ctl & OHCI_IR) {
2612			if (bootverbose)
2613				printf("ohci early: "
2614				    "SMM does not respond, resetting\n");
2615			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2616		}
2617		/* Disable interrupts */
2618		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2619	}
2620
2621	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2622}
2623
2624/* Perform early UHCI takeover from SMM. */
2625static void
2626uhci_early_takeover(device_t self)
2627{
2628	struct resource *res;
2629	int rid;
2630
2631	/*
2632	 * Set the PIRQD enable bit and switch off all the others. We don't
2633	 * want legacy support to interfere with us XXX Does this also mean
2634	 * that the BIOS won't touch the keyboard anymore if it is connected
2635	 * to the ports of the root hub?
2636	 */
2637	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2638
2639	/* Disable interrupts */
2640	rid = PCI_UHCI_BASE_REG;
2641	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2642	if (res != NULL) {
2643		bus_write_2(res, UHCI_INTR, 0);
2644		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2645	}
2646}
2647
2648/* Perform early EHCI takeover from SMM. */
2649static void
2650ehci_early_takeover(device_t self)
2651{
2652	struct resource *res;
2653	uint32_t cparams;
2654	uint32_t eec;
2655	uint8_t eecp;
2656	uint8_t bios_sem;
2657	uint8_t offs;
2658	int rid;
2659	int i;
2660
2661	rid = PCIR_BAR(0);
2662	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2663	if (res == NULL)
2664		return;
2665
2666	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2667
2668	/* Synchronise with the BIOS if it owns the controller. */
2669	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2670	    eecp = EHCI_EECP_NEXT(eec)) {
2671		eec = pci_read_config(self, eecp, 4);
2672		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2673			continue;
2674		}
2675		bios_sem = pci_read_config(self, eecp +
2676		    EHCI_LEGSUP_BIOS_SEM, 1);
2677		if (bios_sem == 0) {
2678			continue;
2679		}
2680		if (bootverbose)
2681			printf("ehci early: "
2682			    "SMM active, request owner change\n");
2683
2684		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2685
2686		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2687			DELAY(1000);
2688			bios_sem = pci_read_config(self, eecp +
2689			    EHCI_LEGSUP_BIOS_SEM, 1);
2690		}
2691
2692		if (bios_sem != 0) {
2693			if (bootverbose)
2694				printf("ehci early: "
2695				    "SMM does not respond\n");
2696		}
2697		/* Disable interrupts */
2698		offs = bus_read_1(res, EHCI_CAPLENGTH);
2699		bus_write_4(res, offs + EHCI_USBINTR, 0);
2700	}
2701	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2702}
2703
2704void
2705pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2706{
2707	struct pci_devinfo *dinfo = device_get_ivars(dev);
2708	pcicfgregs *cfg = &dinfo->cfg;
2709	struct resource_list *rl = &dinfo->resources;
2710	struct pci_quirk *q;
2711	int i;
2712
2713	/* ATA devices needs special map treatment */
2714	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2715	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2716	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2717	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2718	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2719		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2720	else
2721		for (i = 0; i < cfg->nummaps;)
2722			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2723			    prefetchmask & (1 << i));
2724
2725	/*
2726	 * Add additional, quirked resources.
2727	 */
2728	for (q = &pci_quirks[0]; q->devid; q++) {
2729		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2730		    && q->type == PCI_QUIRK_MAP_REG)
2731			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2732	}
2733
2734	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2735#ifdef __PCI_REROUTE_INTERRUPT
2736		/*
2737		 * Try to re-route interrupts. Sometimes the BIOS or
2738		 * firmware may leave bogus values in these registers.
2739		 * If the re-route fails, then just stick with what we
2740		 * have.
2741		 */
2742		pci_assign_interrupt(bus, dev, 1);
2743#else
2744		pci_assign_interrupt(bus, dev, 0);
2745#endif
2746	}
2747
2748	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2749	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2750		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2751			ehci_early_takeover(dev);
2752		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2753			ohci_early_takeover(dev);
2754		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2755			uhci_early_takeover(dev);
2756	}
2757}
2758
2759void
2760pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2761{
2762#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2763	device_t pcib = device_get_parent(dev);
2764	struct pci_devinfo *dinfo;
2765	int maxslots;
2766	int s, f, pcifunchigh;
2767	uint8_t hdrtype;
2768
2769	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2770	    ("dinfo_size too small"));
2771	maxslots = PCIB_MAXSLOTS(pcib);
2772	for (s = 0; s <= maxslots; s++) {
2773		pcifunchigh = 0;
2774		f = 0;
2775		DELAY(1);
2776		hdrtype = REG(PCIR_HDRTYPE, 1);
2777		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2778			continue;
2779		if (hdrtype & PCIM_MFDEV)
2780			pcifunchigh = PCI_FUNCMAX;
2781		for (f = 0; f <= pcifunchigh; f++) {
2782			dinfo = pci_read_device(pcib, domain, busno, s, f,
2783			    dinfo_size);
2784			if (dinfo != NULL) {
2785				pci_add_child(dev, dinfo);
2786			}
2787		}
2788	}
2789#undef REG
2790}
2791
2792void
2793pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2794{
2795	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2796	device_set_ivars(dinfo->cfg.dev, dinfo);
2797	resource_list_init(&dinfo->resources);
2798	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2799	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2800	pci_print_verbose(dinfo);
2801	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2802}
2803
2804static int
2805pci_probe(device_t dev)
2806{
2807
2808	device_set_desc(dev, "PCI bus");
2809
2810	/* Allow other subclasses to override this driver. */
2811	return (BUS_PROBE_GENERIC);
2812}
2813
2814static int
2815pci_attach(device_t dev)
2816{
2817	int busno, domain;
2818
2819	/*
2820	 * Since there can be multiple independantly numbered PCI
2821	 * busses on systems with multiple PCI domains, we can't use
2822	 * the unit number to decide which bus we are probing. We ask
2823	 * the parent pcib what our domain and bus numbers are.
2824	 */
2825	domain = pcib_get_domain(dev);
2826	busno = pcib_get_bus(dev);
2827	if (bootverbose)
2828		device_printf(dev, "domain=%d, physical bus=%d\n",
2829		    domain, busno);
2830	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2831	return (bus_generic_attach(dev));
2832}
2833
2834int
2835pci_suspend(device_t dev)
2836{
2837	int dstate, error, i, numdevs;
2838	device_t acpi_dev, child, *devlist;
2839	struct pci_devinfo *dinfo;
2840
2841	/*
2842	 * Save the PCI configuration space for each child and set the
2843	 * device in the appropriate power state for this sleep state.
2844	 */
2845	acpi_dev = NULL;
2846	if (pci_do_power_resume)
2847		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2848	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2849		return (error);
2850	for (i = 0; i < numdevs; i++) {
2851		child = devlist[i];
2852		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2853		pci_cfg_save(child, dinfo, 0);
2854	}
2855
2856	/* Suspend devices before potentially powering them down. */
2857	error = bus_generic_suspend(dev);
2858	if (error) {
2859		free(devlist, M_TEMP);
2860		return (error);
2861	}
2862
2863	/*
2864	 * Always set the device to D3.  If ACPI suggests a different
2865	 * power state, use it instead.  If ACPI is not present, the
2866	 * firmware is responsible for managing device power.  Skip
2867	 * children who aren't attached since they are powered down
2868	 * separately.  Only manage type 0 devices for now.
2869	 */
2870	for (i = 0; acpi_dev && i < numdevs; i++) {
2871		child = devlist[i];
2872		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2873		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2874			dstate = PCI_POWERSTATE_D3;
2875			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2876			pci_set_powerstate(child, dstate);
2877		}
2878	}
2879	free(devlist, M_TEMP);
2880	return (0);
2881}
2882
2883int
2884pci_resume(device_t dev)
2885{
2886	int i, numdevs, error;
2887	device_t acpi_dev, child, *devlist;
2888	struct pci_devinfo *dinfo;
2889
2890	/*
2891	 * Set each child to D0 and restore its PCI configuration space.
2892	 */
2893	acpi_dev = NULL;
2894	if (pci_do_power_resume)
2895		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2896	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2897		return (error);
2898	for (i = 0; i < numdevs; i++) {
2899		/*
2900		 * Notify ACPI we're going to D0 but ignore the result.  If
2901		 * ACPI is not present, the firmware is responsible for
2902		 * managing device power.  Only manage type 0 devices for now.
2903		 */
2904		child = devlist[i];
2905		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2906		if (acpi_dev && device_is_attached(child) &&
2907		    dinfo->cfg.hdrtype == 0) {
2908			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2909			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2910		}
2911
2912		/* Now the device is powered up, restore its config space. */
2913		pci_cfg_restore(child, dinfo);
2914	}
2915	free(devlist, M_TEMP);
2916	return (bus_generic_resume(dev));
2917}
2918
2919static void
2920pci_load_vendor_data(void)
2921{
2922	caddr_t vendordata, info;
2923
2924	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2925		info = preload_search_info(vendordata, MODINFO_ADDR);
2926		pci_vendordata = *(char **)info;
2927		info = preload_search_info(vendordata, MODINFO_SIZE);
2928		pci_vendordata_size = *(size_t *)info;
2929		/* terminate the database */
2930		pci_vendordata[pci_vendordata_size] = '\n';
2931	}
2932}
2933
2934void
2935pci_driver_added(device_t dev, driver_t *driver)
2936{
2937	int numdevs;
2938	device_t *devlist;
2939	device_t child;
2940	struct pci_devinfo *dinfo;
2941	int i;
2942
2943	if (bootverbose)
2944		device_printf(dev, "driver added\n");
2945	DEVICE_IDENTIFY(driver, dev);
2946	if (device_get_children(dev, &devlist, &numdevs) != 0)
2947		return;
2948	for (i = 0; i < numdevs; i++) {
2949		child = devlist[i];
2950		if (device_get_state(child) != DS_NOTPRESENT)
2951			continue;
2952		dinfo = device_get_ivars(child);
2953		pci_print_verbose(dinfo);
2954		if (bootverbose)
2955			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
2956		pci_cfg_restore(child, dinfo);
2957		if (device_probe_and_attach(child) != 0)
2958			pci_cfg_save(child, dinfo, 1);
2959	}
2960	free(devlist, M_TEMP);
2961}
2962
2963int
2964pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2965    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2966{
2967	struct pci_devinfo *dinfo;
2968	struct msix_table_entry *mte;
2969	struct msix_vector *mv;
2970	uint64_t addr;
2971	uint32_t data;
2972	void *cookie;
2973	int error, rid;
2974
2975	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2976	    arg, &cookie);
2977	if (error)
2978		return (error);
2979
2980	/* If this is not a direct child, just bail out. */
2981	if (device_get_parent(child) != dev) {
2982		*cookiep = cookie;
2983		return(0);
2984	}
2985
2986	rid = rman_get_rid(irq);
2987	if (rid == 0) {
2988		/* Make sure that INTx is enabled */
2989		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
2990	} else {
2991		/*
2992		 * Check to see if the interrupt is MSI or MSI-X.
2993		 * Ask our parent to map the MSI and give
2994		 * us the address and data register values.
2995		 * If we fail for some reason, teardown the
2996		 * interrupt handler.
2997		 */
2998		dinfo = device_get_ivars(child);
2999		if (dinfo->cfg.msi.msi_alloc > 0) {
3000			if (dinfo->cfg.msi.msi_addr == 0) {
3001				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3002			    ("MSI has handlers, but vectors not mapped"));
3003				error = PCIB_MAP_MSI(device_get_parent(dev),
3004				    child, rman_get_start(irq), &addr, &data);
3005				if (error)
3006					goto bad;
3007				dinfo->cfg.msi.msi_addr = addr;
3008				dinfo->cfg.msi.msi_data = data;
3009			}
3010			if (dinfo->cfg.msi.msi_handlers == 0)
3011				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3012				    dinfo->cfg.msi.msi_data);
3013			dinfo->cfg.msi.msi_handlers++;
3014		} else {
3015			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3016			    ("No MSI or MSI-X interrupts allocated"));
3017			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3018			    ("MSI-X index too high"));
3019			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3020			KASSERT(mte->mte_vector != 0, ("no message vector"));
3021			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3022			KASSERT(mv->mv_irq == rman_get_start(irq),
3023			    ("IRQ mismatch"));
3024			if (mv->mv_address == 0) {
3025				KASSERT(mte->mte_handlers == 0,
3026		    ("MSI-X table entry has handlers, but vector not mapped"));
3027				error = PCIB_MAP_MSI(device_get_parent(dev),
3028				    child, rman_get_start(irq), &addr, &data);
3029				if (error)
3030					goto bad;
3031				mv->mv_address = addr;
3032				mv->mv_data = data;
3033			}
3034			if (mte->mte_handlers == 0) {
3035				pci_enable_msix(child, rid - 1, mv->mv_address,
3036				    mv->mv_data);
3037				pci_unmask_msix(child, rid - 1);
3038			}
3039			mte->mte_handlers++;
3040		}
3041
3042		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3043		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3044	bad:
3045		if (error) {
3046			(void)bus_generic_teardown_intr(dev, child, irq,
3047			    cookie);
3048			return (error);
3049		}
3050	}
3051	*cookiep = cookie;
3052	return (0);
3053}
3054
3055int
3056pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3057    void *cookie)
3058{
3059	struct msix_table_entry *mte;
3060	struct resource_list_entry *rle;
3061	struct pci_devinfo *dinfo;
3062	int error, rid;
3063
3064	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3065		return (EINVAL);
3066
3067	/* If this isn't a direct child, just bail out */
3068	if (device_get_parent(child) != dev)
3069		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3070
3071	rid = rman_get_rid(irq);
3072	if (rid == 0) {
3073		/* Mask INTx */
3074		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3075	} else {
3076		/*
3077		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3078		 * decrement the appropriate handlers count and mask the
3079		 * MSI-X message, or disable MSI messages if the count
3080		 * drops to 0.
3081		 */
3082		dinfo = device_get_ivars(child);
3083		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3084		if (rle->res != irq)
3085			return (EINVAL);
3086		if (dinfo->cfg.msi.msi_alloc > 0) {
3087			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3088			    ("MSI-X index too high"));
3089			if (dinfo->cfg.msi.msi_handlers == 0)
3090				return (EINVAL);
3091			dinfo->cfg.msi.msi_handlers--;
3092			if (dinfo->cfg.msi.msi_handlers == 0)
3093				pci_disable_msi(child);
3094		} else {
3095			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3096			    ("No MSI or MSI-X interrupts allocated"));
3097			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3098			    ("MSI-X index too high"));
3099			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3100			if (mte->mte_handlers == 0)
3101				return (EINVAL);
3102			mte->mte_handlers--;
3103			if (mte->mte_handlers == 0)
3104				pci_mask_msix(child, rid - 1);
3105		}
3106	}
3107	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3108	if (rid > 0)
3109		KASSERT(error == 0,
3110		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3111	return (error);
3112}
3113
3114int
3115pci_print_child(device_t dev, device_t child)
3116{
3117	struct pci_devinfo *dinfo;
3118	struct resource_list *rl;
3119	int retval = 0;
3120
3121	dinfo = device_get_ivars(child);
3122	rl = &dinfo->resources;
3123
3124	retval += bus_print_child_header(dev, child);
3125
3126	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3127	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3128	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3129	if (device_get_flags(dev))
3130		retval += printf(" flags %#x", device_get_flags(dev));
3131
3132	retval += printf(" at device %d.%d", pci_get_slot(child),
3133	    pci_get_function(child));
3134
3135	retval += bus_print_child_footer(dev, child);
3136
3137	return (retval);
3138}
3139
3140static struct
3141{
3142	int	class;
3143	int	subclass;
3144	char	*desc;
3145} pci_nomatch_tab[] = {
3146	{PCIC_OLD,		-1,			"old"},
3147	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3148	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3149	{PCIC_STORAGE,		-1,			"mass storage"},
3150	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3151	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3152	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3153	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3154	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3155	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3156	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3157	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3158	{PCIC_NETWORK,		-1,			"network"},
3159	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3160	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3161	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3162	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3163	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3164	{PCIC_DISPLAY,		-1,			"display"},
3165	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3166	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3167	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3168	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3169	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3170	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3171	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3172	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3173	{PCIC_MEMORY,		-1,			"memory"},
3174	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3175	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3176	{PCIC_BRIDGE,		-1,			"bridge"},
3177	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3178	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3179	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3180	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3181	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3182	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3183	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3184	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3185	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3186	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3187	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3188	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3189	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3190	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3191	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3192	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3193	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3194	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3195	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3196	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3197	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3198	{PCIC_INPUTDEV,		-1,			"input device"},
3199	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3200	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3201	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3202	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3203	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3204	{PCIC_DOCKING,		-1,			"docking station"},
3205	{PCIC_PROCESSOR,	-1,			"processor"},
3206	{PCIC_SERIALBUS,	-1,			"serial bus"},
3207	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3208	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3209	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3210	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3211	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3212	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3213	{PCIC_WIRELESS,		-1,			"wireless controller"},
3214	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3215	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3216	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3217	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3218	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3219	{PCIC_SATCOM,		-1,			"satellite communication"},
3220	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3221	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3222	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3223	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3224	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3225	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3226	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3227	{PCIC_DASP,		-1,			"dasp"},
3228	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3229	{0, 0,		NULL}
3230};
3231
3232void
3233pci_probe_nomatch(device_t dev, device_t child)
3234{
3235	int	i;
3236	char	*cp, *scp, *device;
3237
3238	/*
3239	 * Look for a listing for this device in a loaded device database.
3240	 */
3241	if ((device = pci_describe_device(child)) != NULL) {
3242		device_printf(dev, "<%s>", device);
3243		free(device, M_DEVBUF);
3244	} else {
3245		/*
3246		 * Scan the class/subclass descriptions for a general
3247		 * description.
3248		 */
3249		cp = "unknown";
3250		scp = NULL;
3251		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3252			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3253				if (pci_nomatch_tab[i].subclass == -1) {
3254					cp = pci_nomatch_tab[i].desc;
3255				} else if (pci_nomatch_tab[i].subclass ==
3256				    pci_get_subclass(child)) {
3257					scp = pci_nomatch_tab[i].desc;
3258				}
3259			}
3260		}
3261		device_printf(dev, "<%s%s%s>",
3262		    cp ? cp : "",
3263		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3264		    scp ? scp : "");
3265	}
3266	printf(" at device %d.%d (no driver attached)\n",
3267	    pci_get_slot(child), pci_get_function(child));
3268	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3269	return;
3270}
3271
3272/*
3273 * Parse the PCI device database, if loaded, and return a pointer to a
3274 * description of the device.
3275 *
3276 * The database is flat text formatted as follows:
3277 *
3278 * Any line not in a valid format is ignored.
3279 * Lines are terminated with newline '\n' characters.
3280 *
3281 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3282 * the vendor name.
3283 *
3284 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3285 * - devices cannot be listed without a corresponding VENDOR line.
3286 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3287 * another TAB, then the device name.
3288 */
3289
3290/*
3291 * Assuming (ptr) points to the beginning of a line in the database,
3292 * return the vendor or device and description of the next entry.
3293 * The value of (vendor) or (device) inappropriate for the entry type
3294 * is set to -1.  Returns nonzero at the end of the database.
3295 *
3296 * Note that this is slightly unrobust in the face of corrupt data;
3297 * we attempt to safeguard against this by spamming the end of the
3298 * database with a newline when we initialise.
3299 */
3300static int
3301pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3302{
3303	char	*cp = *ptr;
3304	int	left;
3305
3306	*device = -1;
3307	*vendor = -1;
3308	**desc = '\0';
3309	for (;;) {
3310		left = pci_vendordata_size - (cp - pci_vendordata);
3311		if (left <= 0) {
3312			*ptr = cp;
3313			return(1);
3314		}
3315
3316		/* vendor entry? */
3317		if (*cp != '\t' &&
3318		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3319			break;
3320		/* device entry? */
3321		if (*cp == '\t' &&
3322		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3323			break;
3324
3325		/* skip to next line */
3326		while (*cp != '\n' && left > 0) {
3327			cp++;
3328			left--;
3329		}
3330		if (*cp == '\n') {
3331			cp++;
3332			left--;
3333		}
3334	}
3335	/* skip to next line */
3336	while (*cp != '\n' && left > 0) {
3337		cp++;
3338		left--;
3339	}
3340	if (*cp == '\n' && left > 0)
3341		cp++;
3342	*ptr = cp;
3343	return(0);
3344}
3345
3346static char *
3347pci_describe_device(device_t dev)
3348{
3349	int	vendor, device;
3350	char	*desc, *vp, *dp, *line;
3351
3352	desc = vp = dp = NULL;
3353
3354	/*
3355	 * If we have no vendor data, we can't do anything.
3356	 */
3357	if (pci_vendordata == NULL)
3358		goto out;
3359
3360	/*
3361	 * Scan the vendor data looking for this device
3362	 */
3363	line = pci_vendordata;
3364	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3365		goto out;
3366	for (;;) {
3367		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3368			goto out;
3369		if (vendor == pci_get_vendor(dev))
3370			break;
3371	}
3372	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3373		goto out;
3374	for (;;) {
3375		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3376			*dp = 0;
3377			break;
3378		}
3379		if (vendor != -1) {
3380			*dp = 0;
3381			break;
3382		}
3383		if (device == pci_get_device(dev))
3384			break;
3385	}
3386	if (dp[0] == '\0')
3387		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3388	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3389	    NULL)
3390		sprintf(desc, "%s, %s", vp, dp);
3391 out:
3392	if (vp != NULL)
3393		free(vp, M_DEVBUF);
3394	if (dp != NULL)
3395		free(dp, M_DEVBUF);
3396	return(desc);
3397}
3398
3399int
3400pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3401{
3402	struct pci_devinfo *dinfo;
3403	pcicfgregs *cfg;
3404
3405	dinfo = device_get_ivars(child);
3406	cfg = &dinfo->cfg;
3407
3408	switch (which) {
3409	case PCI_IVAR_ETHADDR:
3410		/*
3411		 * The generic accessor doesn't deal with failure, so
3412		 * we set the return value, then return an error.
3413		 */
3414		*((uint8_t **) result) = NULL;
3415		return (EINVAL);
3416	case PCI_IVAR_SUBVENDOR:
3417		*result = cfg->subvendor;
3418		break;
3419	case PCI_IVAR_SUBDEVICE:
3420		*result = cfg->subdevice;
3421		break;
3422	case PCI_IVAR_VENDOR:
3423		*result = cfg->vendor;
3424		break;
3425	case PCI_IVAR_DEVICE:
3426		*result = cfg->device;
3427		break;
3428	case PCI_IVAR_DEVID:
3429		*result = (cfg->device << 16) | cfg->vendor;
3430		break;
3431	case PCI_IVAR_CLASS:
3432		*result = cfg->baseclass;
3433		break;
3434	case PCI_IVAR_SUBCLASS:
3435		*result = cfg->subclass;
3436		break;
3437	case PCI_IVAR_PROGIF:
3438		*result = cfg->progif;
3439		break;
3440	case PCI_IVAR_REVID:
3441		*result = cfg->revid;
3442		break;
3443	case PCI_IVAR_INTPIN:
3444		*result = cfg->intpin;
3445		break;
3446	case PCI_IVAR_IRQ:
3447		*result = cfg->intline;
3448		break;
3449	case PCI_IVAR_DOMAIN:
3450		*result = cfg->domain;
3451		break;
3452	case PCI_IVAR_BUS:
3453		*result = cfg->bus;
3454		break;
3455	case PCI_IVAR_SLOT:
3456		*result = cfg->slot;
3457		break;
3458	case PCI_IVAR_FUNCTION:
3459		*result = cfg->func;
3460		break;
3461	case PCI_IVAR_CMDREG:
3462		*result = cfg->cmdreg;
3463		break;
3464	case PCI_IVAR_CACHELNSZ:
3465		*result = cfg->cachelnsz;
3466		break;
3467	case PCI_IVAR_MINGNT:
3468		*result = cfg->mingnt;
3469		break;
3470	case PCI_IVAR_MAXLAT:
3471		*result = cfg->maxlat;
3472		break;
3473	case PCI_IVAR_LATTIMER:
3474		*result = cfg->lattimer;
3475		break;
3476	default:
3477		return (ENOENT);
3478	}
3479	return (0);
3480}
3481
3482int
3483pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3484{
3485	struct pci_devinfo *dinfo;
3486
3487	dinfo = device_get_ivars(child);
3488
3489	switch (which) {
3490	case PCI_IVAR_INTPIN:
3491		dinfo->cfg.intpin = value;
3492		return (0);
3493	case PCI_IVAR_ETHADDR:
3494	case PCI_IVAR_SUBVENDOR:
3495	case PCI_IVAR_SUBDEVICE:
3496	case PCI_IVAR_VENDOR:
3497	case PCI_IVAR_DEVICE:
3498	case PCI_IVAR_DEVID:
3499	case PCI_IVAR_CLASS:
3500	case PCI_IVAR_SUBCLASS:
3501	case PCI_IVAR_PROGIF:
3502	case PCI_IVAR_REVID:
3503	case PCI_IVAR_IRQ:
3504	case PCI_IVAR_DOMAIN:
3505	case PCI_IVAR_BUS:
3506	case PCI_IVAR_SLOT:
3507	case PCI_IVAR_FUNCTION:
3508		return (EINVAL);	/* disallow for now */
3509
3510	default:
3511		return (ENOENT);
3512	}
3513}
3514
3515
3516#include "opt_ddb.h"
3517#ifdef DDB
3518#include <ddb/ddb.h>
3519#include <sys/cons.h>
3520
3521/*
3522 * List resources based on pci map registers, used for within ddb
3523 */
3524
3525DB_SHOW_COMMAND(pciregs, db_pci_dump)
3526{
3527	struct pci_devinfo *dinfo;
3528	struct devlist *devlist_head;
3529	struct pci_conf *p;
3530	const char *name;
3531	int i, error, none_count;
3532
3533	none_count = 0;
3534	/* get the head of the device queue */
3535	devlist_head = &pci_devq;
3536
3537	/*
3538	 * Go through the list of devices and print out devices
3539	 */
3540	for (error = 0, i = 0,
3541	     dinfo = STAILQ_FIRST(devlist_head);
3542	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3543	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3544
3545		/* Populate pd_name and pd_unit */
3546		name = NULL;
3547		if (dinfo->cfg.dev)
3548			name = device_get_name(dinfo->cfg.dev);
3549
3550		p = &dinfo->conf;
3551		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3552			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3553			(name && *name) ? name : "none",
3554			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3555			none_count++,
3556			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3557			p->pc_sel.pc_func, (p->pc_class << 16) |
3558			(p->pc_subclass << 8) | p->pc_progif,
3559			(p->pc_subdevice << 16) | p->pc_subvendor,
3560			(p->pc_device << 16) | p->pc_vendor,
3561			p->pc_revid, p->pc_hdr);
3562	}
3563}
3564#endif /* DDB */
3565
3566static struct resource *
3567pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3568    u_long start, u_long end, u_long count, u_int flags)
3569{
3570	struct pci_devinfo *dinfo = device_get_ivars(child);
3571	struct resource_list *rl = &dinfo->resources;
3572	struct resource_list_entry *rle;
3573	struct resource *res;
3574	pci_addr_t map, testval;
3575	int mapsize;
3576
3577	/*
3578	 * Weed out the bogons, and figure out how large the BAR/map
3579	 * is.  Bars that read back 0 here are bogus and unimplemented.
3580	 * Note: atapci in legacy mode are special and handled elsewhere
3581	 * in the code.  If you have a atapci device in legacy mode and
3582	 * it fails here, that other code is broken.
3583	 */
3584	res = NULL;
3585	pci_read_bar(child, *rid, &map, &testval);
3586
3587	/* Ignore a BAR with a base of 0. */
3588	if (pci_mapbase(testval) == 0)
3589		goto out;
3590
3591	if (PCI_BAR_MEM(testval)) {
3592		if (type != SYS_RES_MEMORY) {
3593			if (bootverbose)
3594				device_printf(dev,
3595				    "child %s requested type %d for rid %#x,"
3596				    " but the BAR says it is an memio\n",
3597				    device_get_nameunit(child), type, *rid);
3598			goto out;
3599		}
3600	} else {
3601		if (type != SYS_RES_IOPORT) {
3602			if (bootverbose)
3603				device_printf(dev,
3604				    "child %s requested type %d for rid %#x,"
3605				    " but the BAR says it is an ioport\n",
3606				    device_get_nameunit(child), type, *rid);
3607			goto out;
3608		}
3609	}
3610
3611	/*
3612	 * For real BARs, we need to override the size that
3613	 * the driver requests, because that's what the BAR
3614	 * actually uses and we would otherwise have a
3615	 * situation where we might allocate the excess to
3616	 * another driver, which won't work.
3617	 */
3618	mapsize = pci_mapsize(testval);
3619	count = 1UL << mapsize;
3620	if (RF_ALIGNMENT(flags) < mapsize)
3621		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3622	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3623		flags |= RF_PREFETCHABLE;
3624
3625	/*
3626	 * Allocate enough resource, and then write back the
3627	 * appropriate bar for that resource.
3628	 */
3629	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3630	    start, end, count, flags & ~RF_ACTIVE);
3631	if (res == NULL) {
3632		device_printf(child,
3633		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3634		    count, *rid, type, start, end);
3635		goto out;
3636	}
3637	rman_set_device(res, dev);
3638	resource_list_add(rl, type, *rid, start, end, count);
3639	rle = resource_list_find(rl, type, *rid);
3640	if (rle == NULL)
3641		panic("pci_alloc_map: unexpectedly can't find resource.");
3642	rle->res = res;
3643	rle->start = rman_get_start(res);
3644	rle->end = rman_get_end(res);
3645	rle->count = count;
3646	if (bootverbose)
3647		device_printf(child,
3648		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3649		    count, *rid, type, rman_get_start(res));
3650	map = rman_get_start(res);
3651	pci_write_bar(child, *rid, map);
3652out:;
3653	return (res);
3654}
3655
3656
3657struct resource *
3658pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3659		   u_long start, u_long end, u_long count, u_int flags)
3660{
3661	struct pci_devinfo *dinfo = device_get_ivars(child);
3662	struct resource_list *rl = &dinfo->resources;
3663	struct resource_list_entry *rle;
3664	struct resource *res;
3665	pcicfgregs *cfg = &dinfo->cfg;
3666
3667	if (device_get_parent(child) != dev)
3668		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3669		    type, rid, start, end, count, flags));
3670
3671	/*
3672	 * Perform lazy resource allocation
3673	 */
3674	switch (type) {
3675	case SYS_RES_IRQ:
3676		/*
3677		 * Can't alloc legacy interrupt once MSI messages have
3678		 * been allocated.
3679		 */
3680		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3681		    cfg->msix.msix_alloc > 0))
3682			return (NULL);
3683
3684		/*
3685		 * If the child device doesn't have an interrupt
3686		 * routed and is deserving of an interrupt, try to
3687		 * assign it one.
3688		 */
3689		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3690		    (cfg->intpin != 0))
3691			pci_assign_interrupt(dev, child, 0);
3692		break;
3693	case SYS_RES_IOPORT:
3694	case SYS_RES_MEMORY:
3695		/* Allocate resources for this BAR if needed. */
3696		rle = resource_list_find(rl, type, *rid);
3697		if (rle == NULL) {
3698			res = pci_alloc_map(dev, child, type, rid, start, end,
3699			    count, flags);
3700			if (res == NULL)
3701				return (NULL);
3702			rle = resource_list_find(rl, type, *rid);
3703		}
3704
3705		/*
3706		 * If the resource belongs to the bus, then give it to
3707		 * the child.  We need to activate it if requested
3708		 * since the bus always allocates inactive resources.
3709		 */
3710		if (rle != NULL && rle->res != NULL &&
3711		    rman_get_device(rle->res) == dev) {
3712			if (bootverbose)
3713				device_printf(child,
3714			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3715				    rman_get_size(rle->res), *rid, type,
3716				    rman_get_start(rle->res));
3717			rman_set_device(rle->res, child);
3718			if ((flags & RF_ACTIVE) &&
3719			    bus_activate_resource(child, type, *rid,
3720			    rle->res) != 0)
3721				return (NULL);
3722			return (rle->res);
3723		}
3724	}
3725	return (resource_list_alloc(rl, dev, child, type, rid,
3726	    start, end, count, flags));
3727}
3728
3729int
3730pci_release_resource(device_t dev, device_t child, int type, int rid,
3731    struct resource *r)
3732{
3733	int error;
3734
3735	if (device_get_parent(child) != dev)
3736		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
3737		    type, rid, r));
3738
3739	/*
3740	 * For BARs we don't actually want to release the resource.
3741	 * Instead, we deactivate the resource if needed and then give
3742	 * ownership of the BAR back to the bus.
3743	 */
3744	switch (type) {
3745	case SYS_RES_IOPORT:
3746	case SYS_RES_MEMORY:
3747		if (rman_get_device(r) != child)
3748			return (EINVAL);
3749		if (rman_get_flags(r) & RF_ACTIVE) {
3750			error = bus_deactivate_resource(child, type, rid, r);
3751			if (error)
3752				return (error);
3753		}
3754		rman_set_device(r, dev);
3755		return (0);
3756	}
3757	return (bus_generic_rl_release_resource(dev, child, type, rid, r));
3758}
3759
3760int
3761pci_activate_resource(device_t dev, device_t child, int type, int rid,
3762    struct resource *r)
3763{
3764	int error;
3765
3766	error = bus_generic_activate_resource(dev, child, type, rid, r);
3767	if (error)
3768		return (error);
3769
3770	/* Enable decoding in the command register when activating BARs. */
3771	if (device_get_parent(child) == dev) {
3772		switch (type) {
3773		case SYS_RES_IOPORT:
3774		case SYS_RES_MEMORY:
3775			error = PCI_ENABLE_IO(dev, child, type);
3776			break;
3777		}
3778	}
3779	return (error);
3780}
3781
3782void
3783pci_delete_resource(device_t dev, device_t child, int type, int rid)
3784{
3785	struct pci_devinfo *dinfo;
3786	struct resource_list *rl;
3787	struct resource_list_entry *rle;
3788
3789	if (device_get_parent(child) != dev)
3790		return;
3791
3792	dinfo = device_get_ivars(child);
3793	rl = &dinfo->resources;
3794	rle = resource_list_find(rl, type, rid);
3795	if (rle == NULL)
3796		return;
3797
3798	if (rle->res) {
3799		if (rman_get_device(rle->res) != dev ||
3800		    rman_get_flags(rle->res) & RF_ACTIVE) {
3801			device_printf(dev, "delete_resource: "
3802			    "Resource still owned by child, oops. "
3803			    "(type=%d, rid=%d, addr=%lx)\n",
3804			    rle->type, rle->rid,
3805			    rman_get_start(rle->res));
3806			return;
3807		}
3808
3809#ifndef __PCI_BAR_ZERO_VALID
3810		/*
3811		 * If this is a BAR, clear the BAR so it stops
3812		 * decoding before releasing the resource.
3813		 */
3814		switch (type) {
3815		case SYS_RES_IOPORT:
3816		case SYS_RES_MEMORY:
3817			pci_write_bar(child, rid, 0);
3818			break;
3819		}
3820#endif
3821		bus_release_resource(dev, type, rid, rle->res);
3822	}
3823	resource_list_delete(rl, type, rid);
3824}
3825
3826struct resource_list *
3827pci_get_resource_list (device_t dev, device_t child)
3828{
3829	struct pci_devinfo *dinfo = device_get_ivars(child);
3830
3831	return (&dinfo->resources);
3832}
3833
3834uint32_t
3835pci_read_config_method(device_t dev, device_t child, int reg, int width)
3836{
3837	struct pci_devinfo *dinfo = device_get_ivars(child);
3838	pcicfgregs *cfg = &dinfo->cfg;
3839
3840	return (PCIB_READ_CONFIG(device_get_parent(dev),
3841	    cfg->bus, cfg->slot, cfg->func, reg, width));
3842}
3843
3844void
3845pci_write_config_method(device_t dev, device_t child, int reg,
3846    uint32_t val, int width)
3847{
3848	struct pci_devinfo *dinfo = device_get_ivars(child);
3849	pcicfgregs *cfg = &dinfo->cfg;
3850
3851	PCIB_WRITE_CONFIG(device_get_parent(dev),
3852	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3853}
3854
3855int
3856pci_child_location_str_method(device_t dev, device_t child, char *buf,
3857    size_t buflen)
3858{
3859
3860	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3861	    pci_get_function(child));
3862	return (0);
3863}
3864
3865int
3866pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3867    size_t buflen)
3868{
3869	struct pci_devinfo *dinfo;
3870	pcicfgregs *cfg;
3871
3872	dinfo = device_get_ivars(child);
3873	cfg = &dinfo->cfg;
3874	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3875	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3876	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3877	    cfg->progif);
3878	return (0);
3879}
3880
3881int
3882pci_assign_interrupt_method(device_t dev, device_t child)
3883{
3884	struct pci_devinfo *dinfo = device_get_ivars(child);
3885	pcicfgregs *cfg = &dinfo->cfg;
3886
3887	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3888	    cfg->intpin));
3889}
3890
3891static int
3892pci_modevent(module_t mod, int what, void *arg)
3893{
3894	static struct cdev *pci_cdev;
3895
3896	switch (what) {
3897	case MOD_LOAD:
3898		STAILQ_INIT(&pci_devq);
3899		pci_generation = 0;
3900		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3901		    "pci");
3902		pci_load_vendor_data();
3903		break;
3904
3905	case MOD_UNLOAD:
3906		destroy_dev(pci_cdev);
3907		break;
3908	}
3909
3910	return (0);
3911}
3912
3913void
3914pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3915{
3916	int i;
3917
3918	/*
3919	 * Only do header type 0 devices.  Type 1 devices are bridges,
3920	 * which we know need special treatment.  Type 2 devices are
3921	 * cardbus bridges which also require special treatment.
3922	 * Other types are unknown, and we err on the side of safety
3923	 * by ignoring them.
3924	 */
3925	if (dinfo->cfg.hdrtype != 0)
3926		return;
3927
3928	/*
3929	 * Restore the device to full power mode.  We must do this
3930	 * before we restore the registers because moving from D3 to
3931	 * D0 will cause the chip's BARs and some other registers to
3932	 * be reset to some unknown power on reset values.  Cut down
3933	 * the noise on boot by doing nothing if we are already in
3934	 * state D0.
3935	 */
3936	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3937		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3938	}
3939	for (i = 0; i < dinfo->cfg.nummaps; i++)
3940		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3941	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3942	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3943	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3944	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3945	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3946	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3947	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3948	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3949	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3950	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3951
3952	/* Restore MSI and MSI-X configurations if they are present. */
3953	if (dinfo->cfg.msi.msi_location != 0)
3954		pci_resume_msi(dev);
3955	if (dinfo->cfg.msix.msix_location != 0)
3956		pci_resume_msix(dev);
3957}
3958
3959void
3960pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3961{
3962	int i;
3963	uint32_t cls;
3964	int ps;
3965
3966	/*
3967	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3968	 * we know need special treatment.  Type 2 devices are cardbus bridges
3969	 * which also require special treatment.  Other types are unknown, and
3970	 * we err on the side of safety by ignoring them.  Powering down
3971	 * bridges should not be undertaken lightly.
3972	 */
3973	if (dinfo->cfg.hdrtype != 0)
3974		return;
3975	for (i = 0; i < dinfo->cfg.nummaps; i++)
3976		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3977	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3978
3979	/*
3980	 * Some drivers apparently write to these registers w/o updating our
3981	 * cached copy.  No harm happens if we update the copy, so do so here
3982	 * so we can restore them.  The COMMAND register is modified by the
3983	 * bus w/o updating the cache.  This should represent the normally
3984	 * writable portion of the 'defined' part of type 0 headers.  In
3985	 * theory we also need to save/restore the PCI capability structures
3986	 * we know about, but apart from power we don't know any that are
3987	 * writable.
3988	 */
3989	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3990	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3991	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3992	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3993	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3994	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3995	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3996	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3997	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3998	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3999	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4000	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4001	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4002	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4003	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4004
4005	/*
4006	 * don't set the state for display devices, base peripherals and
4007	 * memory devices since bad things happen when they are powered down.
4008	 * We should (a) have drivers that can easily detach and (b) use
4009	 * generic drivers for these devices so that some device actually
4010	 * attaches.  We need to make sure that when we implement (a) we don't
4011	 * power the device down on a reattach.
4012	 */
4013	cls = pci_get_class(dev);
4014	if (!setstate)
4015		return;
4016	switch (pci_do_power_nodriver)
4017	{
4018		case 0:		/* NO powerdown at all */
4019			return;
4020		case 1:		/* Conservative about what to power down */
4021			if (cls == PCIC_STORAGE)
4022				return;
4023			/*FALLTHROUGH*/
4024		case 2:		/* Agressive about what to power down */
4025			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4026			    cls == PCIC_BASEPERIPH)
4027				return;
4028			/*FALLTHROUGH*/
4029		case 3:		/* Power down everything */
4030			break;
4031	}
4032	/*
4033	 * PCI spec says we can only go into D3 state from D0 state.
4034	 * Transition from D[12] into D0 before going to D3 state.
4035	 */
4036	ps = pci_get_powerstate(dev);
4037	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4038		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4039	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4040		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4041}
4042