pci.c revision 200315
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 200315 2009-12-09 21:52:53Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/ehcireg.h>
66#include <dev/usb/controller/ohcireg.h>
67#include <dev/usb/controller/uhcireg.h>
68
69#include "pcib_if.h"
70#include "pci_if.h"
71
72#ifdef __HAVE_ACPI
73#include <contrib/dev/acpica/include/acpi.h>
74#include "acpi_if.h"
75#else
76#define	ACPI_PWR_FOR_SLEEP(x, y, z)
77#endif
78
79static pci_addr_t	pci_mapbase(uint64_t mapreg);
80static const char	*pci_maptype(uint64_t mapreg);
81static int		pci_mapsize(uint64_t testval);
82static int		pci_maprange(uint64_t mapreg);
83static void		pci_fixancient(pcicfgregs *cfg);
84static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
85
86static int		pci_porten(device_t dev);
87static int		pci_memen(device_t dev);
88static void		pci_assign_interrupt(device_t bus, device_t dev,
89			    int force_route);
90static int		pci_add_map(device_t bus, device_t dev, int reg,
91			    struct resource_list *rl, int force, int prefetch);
92static int		pci_probe(device_t dev);
93static int		pci_attach(device_t dev);
94static void		pci_load_vendor_data(void);
95static int		pci_describe_parse_line(char **ptr, int *vendor,
96			    int *device, char **desc);
97static char		*pci_describe_device(device_t dev);
98static int		pci_modevent(module_t mod, int what, void *arg);
99static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
100			    pcicfgregs *cfg);
101static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
102static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
103			    int reg, uint32_t *data);
104#if 0
105static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
106			    int reg, uint32_t data);
107#endif
108static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
109static void		pci_disable_msi(device_t dev);
110static void		pci_enable_msi(device_t dev, uint64_t address,
111			    uint16_t data);
112static void		pci_enable_msix(device_t dev, u_int index,
113			    uint64_t address, uint32_t data);
114static void		pci_mask_msix(device_t dev, u_int index);
115static void		pci_unmask_msix(device_t dev, u_int index);
116static int		pci_msi_blacklisted(void);
117static void		pci_resume_msi(device_t dev);
118static void		pci_resume_msix(device_t dev);
119
120static device_method_t pci_methods[] = {
121	/* Device interface */
122	DEVMETHOD(device_probe,		pci_probe),
123	DEVMETHOD(device_attach,	pci_attach),
124	DEVMETHOD(device_detach,	bus_generic_detach),
125	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
126	DEVMETHOD(device_suspend,	pci_suspend),
127	DEVMETHOD(device_resume,	pci_resume),
128
129	/* Bus interface */
130	DEVMETHOD(bus_print_child,	pci_print_child),
131	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
132	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
133	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
134	DEVMETHOD(bus_driver_added,	pci_driver_added),
135	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
136	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
137
138	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
139	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
140	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
141	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
142	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
143	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
144	DEVMETHOD(bus_activate_resource, pci_activate_resource),
145	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
146	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
147	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
148
149	/* PCI interface */
150	DEVMETHOD(pci_read_config,	pci_read_config_method),
151	DEVMETHOD(pci_write_config,	pci_write_config_method),
152	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
153	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
154	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
155	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
156	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
157	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
158	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
159	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
160	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
161	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
162	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
163	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
164	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
165	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
166	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
167	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
168
169	{ 0, 0 }
170};
171
172DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
173
174static devclass_t pci_devclass;
175DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
176MODULE_VERSION(pci, 1);
177
178static char	*pci_vendordata;
179static size_t	pci_vendordata_size;
180
181
182struct pci_quirk {
183	uint32_t devid;	/* Vendor/device of the card */
184	int	type;
185#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
186#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
187	int	arg1;
188	int	arg2;
189};
190
191struct pci_quirk pci_quirks[] = {
192	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
193	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
194	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
195	/* As does the Serverworks OSB4 (the SMBus mapping register) */
196	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
197
198	/*
199	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
200	 * or the CMIC-SL (AKA ServerWorks GC_LE).
201	 */
202	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
203	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
204
205	/*
206	 * MSI doesn't work on earlier Intel chipsets including
207	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
208	 */
209	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
212	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
213	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216
217	/*
218	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
219	 * bridge.
220	 */
221	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
222
223	{ 0 }
224};
225
226/* map register information */
227#define	PCI_MAPMEM	0x01	/* memory map */
228#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
229#define	PCI_MAPPORT	0x04	/* port map */
230
231struct devlist pci_devq;
232uint32_t pci_generation;
233uint32_t pci_numdevs = 0;
234static int pcie_chipset, pcix_chipset;
235
236/* sysctl vars */
237SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
238
239static int pci_enable_io_modes = 1;
240TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
241SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
242    &pci_enable_io_modes, 1,
243    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
244enable these bits correctly.  We'd like to do this all the time, but there\n\
245are some peripherals that this causes problems with.");
246
247static int pci_do_power_nodriver = 0;
248TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
249SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
250    &pci_do_power_nodriver, 0,
251  "Place a function into D3 state when no driver attaches to it.  0 means\n\
252disable.  1 means conservatively place devices into D3 state.  2 means\n\
253agressively place devices into D3 state.  3 means put absolutely everything\n\
254in D3 state.");
255
256static int pci_do_power_resume = 1;
257TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
258SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
259    &pci_do_power_resume, 1,
260  "Transition from D3 -> D0 on resume.");
261
262static int pci_do_msi = 1;
263TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
264SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
265    "Enable support for MSI interrupts");
266
267static int pci_do_msix = 1;
268TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
269SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
270    "Enable support for MSI-X interrupts");
271
272static int pci_honor_msi_blacklist = 1;
273TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
274SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
275    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
276
277#if defined(__i386__) || defined(__amd64__)
278static int pci_usb_takeover = 1;
279#else
280static int pci_usb_takeover = 0;
281#endif
282TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
283SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
284    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
285Disable this if you depend on BIOS emulation of USB devices, that is\n\
286you use USB devices (like keyboard or mouse) but do not load USB drivers");
287
288/* Find a device_t by bus/slot/function in domain 0 */
289
290device_t
291pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
292{
293
294	return (pci_find_dbsf(0, bus, slot, func));
295}
296
297/* Find a device_t by domain/bus/slot/function */
298
299device_t
300pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
301{
302	struct pci_devinfo *dinfo;
303
304	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
305		if ((dinfo->cfg.domain == domain) &&
306		    (dinfo->cfg.bus == bus) &&
307		    (dinfo->cfg.slot == slot) &&
308		    (dinfo->cfg.func == func)) {
309			return (dinfo->cfg.dev);
310		}
311	}
312
313	return (NULL);
314}
315
316/* Find a device_t by vendor/device ID */
317
318device_t
319pci_find_device(uint16_t vendor, uint16_t device)
320{
321	struct pci_devinfo *dinfo;
322
323	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
324		if ((dinfo->cfg.vendor == vendor) &&
325		    (dinfo->cfg.device == device)) {
326			return (dinfo->cfg.dev);
327		}
328	}
329
330	return (NULL);
331}
332
333static int
334pci_printf(pcicfgregs *cfg, const char *fmt, ...)
335{
336	va_list ap;
337	int retval;
338
339	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
340	    cfg->func);
341	va_start(ap, fmt);
342	retval += vprintf(fmt, ap);
343	va_end(ap);
344	return (retval);
345}
346
347/* return base address of memory or port map */
348
349static pci_addr_t
350pci_mapbase(uint64_t mapreg)
351{
352
353	if (PCI_BAR_MEM(mapreg))
354		return (mapreg & PCIM_BAR_MEM_BASE);
355	else
356		return (mapreg & PCIM_BAR_IO_BASE);
357}
358
359/* return map type of memory or port map */
360
361static const char *
362pci_maptype(uint64_t mapreg)
363{
364
365	if (PCI_BAR_IO(mapreg))
366		return ("I/O Port");
367	if (mapreg & PCIM_BAR_MEM_PREFETCH)
368		return ("Prefetchable Memory");
369	return ("Memory");
370}
371
372/* return log2 of map size decoded for memory or port map */
373
374static int
375pci_mapsize(uint64_t testval)
376{
377	int ln2size;
378
379	testval = pci_mapbase(testval);
380	ln2size = 0;
381	if (testval != 0) {
382		while ((testval & 1) == 0)
383		{
384			ln2size++;
385			testval >>= 1;
386		}
387	}
388	return (ln2size);
389}
390
391/* return log2 of address range supported by map register */
392
393static int
394pci_maprange(uint64_t mapreg)
395{
396	int ln2range = 0;
397
398	if (PCI_BAR_IO(mapreg))
399		ln2range = 32;
400	else
401		switch (mapreg & PCIM_BAR_MEM_TYPE) {
402		case PCIM_BAR_MEM_32:
403			ln2range = 32;
404			break;
405		case PCIM_BAR_MEM_1MB:
406			ln2range = 20;
407			break;
408		case PCIM_BAR_MEM_64:
409			ln2range = 64;
410			break;
411		}
412	return (ln2range);
413}
414
415/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
416
417static void
418pci_fixancient(pcicfgregs *cfg)
419{
420	if (cfg->hdrtype != 0)
421		return;
422
423	/* PCI to PCI bridges use header type 1 */
424	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
425		cfg->hdrtype = 1;
426}
427
428/* extract header type specific config data */
429
430static void
431pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
432{
433#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
434	switch (cfg->hdrtype) {
435	case 0:
436		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
437		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
438		cfg->nummaps	    = PCI_MAXMAPS_0;
439		break;
440	case 1:
441		cfg->nummaps	    = PCI_MAXMAPS_1;
442		break;
443	case 2:
444		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
445		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
446		cfg->nummaps	    = PCI_MAXMAPS_2;
447		break;
448	}
449#undef REG
450}
451
452/* read configuration header into pcicfgregs structure */
453struct pci_devinfo *
454pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
455{
456#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
457	pcicfgregs *cfg = NULL;
458	struct pci_devinfo *devlist_entry;
459	struct devlist *devlist_head;
460
461	devlist_head = &pci_devq;
462
463	devlist_entry = NULL;
464
465	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
466		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
467		if (devlist_entry == NULL)
468			return (NULL);
469
470		cfg = &devlist_entry->cfg;
471
472		cfg->domain		= d;
473		cfg->bus		= b;
474		cfg->slot		= s;
475		cfg->func		= f;
476		cfg->vendor		= REG(PCIR_VENDOR, 2);
477		cfg->device		= REG(PCIR_DEVICE, 2);
478		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
479		cfg->statreg		= REG(PCIR_STATUS, 2);
480		cfg->baseclass		= REG(PCIR_CLASS, 1);
481		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
482		cfg->progif		= REG(PCIR_PROGIF, 1);
483		cfg->revid		= REG(PCIR_REVID, 1);
484		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
485		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
486		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
487		cfg->intpin		= REG(PCIR_INTPIN, 1);
488		cfg->intline		= REG(PCIR_INTLINE, 1);
489
490		cfg->mingnt		= REG(PCIR_MINGNT, 1);
491		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
492
493		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
494		cfg->hdrtype		&= ~PCIM_MFDEV;
495
496		pci_fixancient(cfg);
497		pci_hdrtypedata(pcib, b, s, f, cfg);
498
499		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
500			pci_read_extcap(pcib, cfg);
501
502		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
503
504		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
505		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
506		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
507		devlist_entry->conf.pc_sel.pc_func = cfg->func;
508		devlist_entry->conf.pc_hdr = cfg->hdrtype;
509
510		devlist_entry->conf.pc_subvendor = cfg->subvendor;
511		devlist_entry->conf.pc_subdevice = cfg->subdevice;
512		devlist_entry->conf.pc_vendor = cfg->vendor;
513		devlist_entry->conf.pc_device = cfg->device;
514
515		devlist_entry->conf.pc_class = cfg->baseclass;
516		devlist_entry->conf.pc_subclass = cfg->subclass;
517		devlist_entry->conf.pc_progif = cfg->progif;
518		devlist_entry->conf.pc_revid = cfg->revid;
519
520		pci_numdevs++;
521		pci_generation++;
522	}
523	return (devlist_entry);
524#undef REG
525}
526
527static void
528pci_read_extcap(device_t pcib, pcicfgregs *cfg)
529{
530#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
531#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
532#if defined(__i386__) || defined(__amd64__)
533	uint64_t addr;
534#endif
535	uint32_t val;
536	int	ptr, nextptr, ptrptr;
537
538	switch (cfg->hdrtype & PCIM_HDRTYPE) {
539	case 0:
540	case 1:
541		ptrptr = PCIR_CAP_PTR;
542		break;
543	case 2:
544		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
545		break;
546	default:
547		return;		/* no extended capabilities support */
548	}
549	nextptr = REG(ptrptr, 1);	/* sanity check? */
550
551	/*
552	 * Read capability entries.
553	 */
554	while (nextptr != 0) {
555		/* Sanity check */
556		if (nextptr > 255) {
557			printf("illegal PCI extended capability offset %d\n",
558			    nextptr);
559			return;
560		}
561		/* Find the next entry */
562		ptr = nextptr;
563		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
564
565		/* Process this entry */
566		switch (REG(ptr + PCICAP_ID, 1)) {
567		case PCIY_PMG:		/* PCI power management */
568			if (cfg->pp.pp_cap == 0) {
569				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
570				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
571				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
572				if ((nextptr - ptr) > PCIR_POWER_DATA)
573					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
574			}
575			break;
576#if defined(__i386__) || defined(__amd64__)
577		case PCIY_HT:		/* HyperTransport */
578			/* Determine HT-specific capability type. */
579			val = REG(ptr + PCIR_HT_COMMAND, 2);
580			switch (val & PCIM_HTCMD_CAP_MASK) {
581			case PCIM_HTCAP_MSI_MAPPING:
582				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
583					/* Sanity check the mapping window. */
584					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
585					    4);
586					addr <<= 32;
587					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
588					    4);
589					if (addr != MSI_INTEL_ADDR_BASE)
590						device_printf(pcib,
591	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
592						    cfg->domain, cfg->bus,
593						    cfg->slot, cfg->func,
594						    (long long)addr);
595				} else
596					addr = MSI_INTEL_ADDR_BASE;
597
598				cfg->ht.ht_msimap = ptr;
599				cfg->ht.ht_msictrl = val;
600				cfg->ht.ht_msiaddr = addr;
601				break;
602			}
603			break;
604#endif
605		case PCIY_MSI:		/* PCI MSI */
606			cfg->msi.msi_location = ptr;
607			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
608			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
609						     PCIM_MSICTRL_MMC_MASK)>>1);
610			break;
611		case PCIY_MSIX:		/* PCI MSI-X */
612			cfg->msix.msix_location = ptr;
613			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
614			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
615			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
616			val = REG(ptr + PCIR_MSIX_TABLE, 4);
617			cfg->msix.msix_table_bar = PCIR_BAR(val &
618			    PCIM_MSIX_BIR_MASK);
619			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
620			val = REG(ptr + PCIR_MSIX_PBA, 4);
621			cfg->msix.msix_pba_bar = PCIR_BAR(val &
622			    PCIM_MSIX_BIR_MASK);
623			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
624			break;
625		case PCIY_VPD:		/* PCI Vital Product Data */
626			cfg->vpd.vpd_reg = ptr;
627			break;
628		case PCIY_SUBVENDOR:
629			/* Should always be true. */
630			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
631				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
632				cfg->subvendor = val & 0xffff;
633				cfg->subdevice = val >> 16;
634			}
635			break;
636		case PCIY_PCIX:		/* PCI-X */
637			/*
638			 * Assume we have a PCI-X chipset if we have
639			 * at least one PCI-PCI bridge with a PCI-X
640			 * capability.  Note that some systems with
641			 * PCI-express or HT chipsets might match on
642			 * this check as well.
643			 */
644			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
645				pcix_chipset = 1;
646			break;
647		case PCIY_EXPRESS:	/* PCI-express */
648			/*
649			 * Assume we have a PCI-express chipset if we have
650			 * at least one PCI-express device.
651			 */
652			pcie_chipset = 1;
653			break;
654		default:
655			break;
656		}
657	}
658/* REG and WREG use carry through to next functions */
659}
660
661/*
662 * PCI Vital Product Data
663 */
664
665#define	PCI_VPD_TIMEOUT		1000000
666
667static int
668pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
669{
670	int count = PCI_VPD_TIMEOUT;
671
672	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
673
674	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
675
676	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
677		if (--count < 0)
678			return (ENXIO);
679		DELAY(1);	/* limit looping */
680	}
681	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
682
683	return (0);
684}
685
686#if 0
687static int
688pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
689{
690	int count = PCI_VPD_TIMEOUT;
691
692	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
693
694	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
695	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
696	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
697		if (--count < 0)
698			return (ENXIO);
699		DELAY(1);	/* limit looping */
700	}
701
702	return (0);
703}
704#endif
705
706#undef PCI_VPD_TIMEOUT
707
708struct vpd_readstate {
709	device_t	pcib;
710	pcicfgregs	*cfg;
711	uint32_t	val;
712	int		bytesinval;
713	int		off;
714	uint8_t		cksum;
715};
716
717static int
718vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
719{
720	uint32_t reg;
721	uint8_t byte;
722
723	if (vrs->bytesinval == 0) {
724		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
725			return (ENXIO);
726		vrs->val = le32toh(reg);
727		vrs->off += 4;
728		byte = vrs->val & 0xff;
729		vrs->bytesinval = 3;
730	} else {
731		vrs->val = vrs->val >> 8;
732		byte = vrs->val & 0xff;
733		vrs->bytesinval--;
734	}
735
736	vrs->cksum += byte;
737	*data = byte;
738	return (0);
739}
740
741static void
742pci_read_vpd(device_t pcib, pcicfgregs *cfg)
743{
744	struct vpd_readstate vrs;
745	int state;
746	int name;
747	int remain;
748	int i;
749	int alloc, off;		/* alloc/off for RO/W arrays */
750	int cksumvalid;
751	int dflen;
752	uint8_t byte;
753	uint8_t byte2;
754
755	/* init vpd reader */
756	vrs.bytesinval = 0;
757	vrs.off = 0;
758	vrs.pcib = pcib;
759	vrs.cfg = cfg;
760	vrs.cksum = 0;
761
762	state = 0;
763	name = remain = i = 0;	/* shut up stupid gcc */
764	alloc = off = 0;	/* shut up stupid gcc */
765	dflen = 0;		/* shut up stupid gcc */
766	cksumvalid = -1;
767	while (state >= 0) {
768		if (vpd_nextbyte(&vrs, &byte)) {
769			state = -2;
770			break;
771		}
772#if 0
773		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
774		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
775		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
776#endif
777		switch (state) {
778		case 0:		/* item name */
779			if (byte & 0x80) {
780				if (vpd_nextbyte(&vrs, &byte2)) {
781					state = -2;
782					break;
783				}
784				remain = byte2;
785				if (vpd_nextbyte(&vrs, &byte2)) {
786					state = -2;
787					break;
788				}
789				remain |= byte2 << 8;
790				if (remain > (0x7f*4 - vrs.off)) {
791					state = -1;
792					printf(
793			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
794					    cfg->domain, cfg->bus, cfg->slot,
795					    cfg->func, remain);
796				}
797				name = byte & 0x7f;
798			} else {
799				remain = byte & 0x7;
800				name = (byte >> 3) & 0xf;
801			}
802			switch (name) {
803			case 0x2:	/* String */
804				cfg->vpd.vpd_ident = malloc(remain + 1,
805				    M_DEVBUF, M_WAITOK);
806				i = 0;
807				state = 1;
808				break;
809			case 0xf:	/* End */
810				state = -1;
811				break;
812			case 0x10:	/* VPD-R */
813				alloc = 8;
814				off = 0;
815				cfg->vpd.vpd_ros = malloc(alloc *
816				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
817				    M_WAITOK | M_ZERO);
818				state = 2;
819				break;
820			case 0x11:	/* VPD-W */
821				alloc = 8;
822				off = 0;
823				cfg->vpd.vpd_w = malloc(alloc *
824				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
825				    M_WAITOK | M_ZERO);
826				state = 5;
827				break;
828			default:	/* Invalid data, abort */
829				state = -1;
830				break;
831			}
832			break;
833
834		case 1:	/* Identifier String */
835			cfg->vpd.vpd_ident[i++] = byte;
836			remain--;
837			if (remain == 0)  {
838				cfg->vpd.vpd_ident[i] = '\0';
839				state = 0;
840			}
841			break;
842
843		case 2:	/* VPD-R Keyword Header */
844			if (off == alloc) {
845				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
846				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
847				    M_DEVBUF, M_WAITOK | M_ZERO);
848			}
849			cfg->vpd.vpd_ros[off].keyword[0] = byte;
850			if (vpd_nextbyte(&vrs, &byte2)) {
851				state = -2;
852				break;
853			}
854			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
855			if (vpd_nextbyte(&vrs, &byte2)) {
856				state = -2;
857				break;
858			}
859			dflen = byte2;
860			if (dflen == 0 &&
861			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
862			    2) == 0) {
863				/*
864				 * if this happens, we can't trust the rest
865				 * of the VPD.
866				 */
867				printf(
868				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
869				    cfg->domain, cfg->bus, cfg->slot,
870				    cfg->func, dflen);
871				cksumvalid = 0;
872				state = -1;
873				break;
874			} else if (dflen == 0) {
875				cfg->vpd.vpd_ros[off].value = malloc(1 *
876				    sizeof(*cfg->vpd.vpd_ros[off].value),
877				    M_DEVBUF, M_WAITOK);
878				cfg->vpd.vpd_ros[off].value[0] = '\x00';
879			} else
880				cfg->vpd.vpd_ros[off].value = malloc(
881				    (dflen + 1) *
882				    sizeof(*cfg->vpd.vpd_ros[off].value),
883				    M_DEVBUF, M_WAITOK);
884			remain -= 3;
885			i = 0;
886			/* keep in sync w/ state 3's transistions */
887			if (dflen == 0 && remain == 0)
888				state = 0;
889			else if (dflen == 0)
890				state = 2;
891			else
892				state = 3;
893			break;
894
895		case 3:	/* VPD-R Keyword Value */
896			cfg->vpd.vpd_ros[off].value[i++] = byte;
897			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
898			    "RV", 2) == 0 && cksumvalid == -1) {
899				if (vrs.cksum == 0)
900					cksumvalid = 1;
901				else {
902					if (bootverbose)
903						printf(
904				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
905						    cfg->domain, cfg->bus,
906						    cfg->slot, cfg->func,
907						    vrs.cksum);
908					cksumvalid = 0;
909					state = -1;
910					break;
911				}
912			}
913			dflen--;
914			remain--;
915			/* keep in sync w/ state 2's transistions */
916			if (dflen == 0)
917				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
918			if (dflen == 0 && remain == 0) {
919				cfg->vpd.vpd_rocnt = off;
920				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
921				    off * sizeof(*cfg->vpd.vpd_ros),
922				    M_DEVBUF, M_WAITOK | M_ZERO);
923				state = 0;
924			} else if (dflen == 0)
925				state = 2;
926			break;
927
928		case 4:
929			remain--;
930			if (remain == 0)
931				state = 0;
932			break;
933
934		case 5:	/* VPD-W Keyword Header */
935			if (off == alloc) {
936				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
937				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
938				    M_DEVBUF, M_WAITOK | M_ZERO);
939			}
940			cfg->vpd.vpd_w[off].keyword[0] = byte;
941			if (vpd_nextbyte(&vrs, &byte2)) {
942				state = -2;
943				break;
944			}
945			cfg->vpd.vpd_w[off].keyword[1] = byte2;
946			if (vpd_nextbyte(&vrs, &byte2)) {
947				state = -2;
948				break;
949			}
950			cfg->vpd.vpd_w[off].len = dflen = byte2;
951			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
952			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
953			    sizeof(*cfg->vpd.vpd_w[off].value),
954			    M_DEVBUF, M_WAITOK);
955			remain -= 3;
956			i = 0;
957			/* keep in sync w/ state 6's transistions */
958			if (dflen == 0 && remain == 0)
959				state = 0;
960			else if (dflen == 0)
961				state = 5;
962			else
963				state = 6;
964			break;
965
966		case 6:	/* VPD-W Keyword Value */
967			cfg->vpd.vpd_w[off].value[i++] = byte;
968			dflen--;
969			remain--;
970			/* keep in sync w/ state 5's transistions */
971			if (dflen == 0)
972				cfg->vpd.vpd_w[off++].value[i++] = '\0';
973			if (dflen == 0 && remain == 0) {
974				cfg->vpd.vpd_wcnt = off;
975				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
976				    off * sizeof(*cfg->vpd.vpd_w),
977				    M_DEVBUF, M_WAITOK | M_ZERO);
978				state = 0;
979			} else if (dflen == 0)
980				state = 5;
981			break;
982
983		default:
984			printf("pci%d:%d:%d:%d: invalid state: %d\n",
985			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
986			    state);
987			state = -1;
988			break;
989		}
990	}
991
992	if (cksumvalid == 0 || state < -1) {
993		/* read-only data bad, clean up */
994		if (cfg->vpd.vpd_ros != NULL) {
995			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
996				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
997			free(cfg->vpd.vpd_ros, M_DEVBUF);
998			cfg->vpd.vpd_ros = NULL;
999		}
1000	}
1001	if (state < -1) {
1002		/* I/O error, clean up */
1003		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1004		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1005		if (cfg->vpd.vpd_ident != NULL) {
1006			free(cfg->vpd.vpd_ident, M_DEVBUF);
1007			cfg->vpd.vpd_ident = NULL;
1008		}
1009		if (cfg->vpd.vpd_w != NULL) {
1010			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1011				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1012			free(cfg->vpd.vpd_w, M_DEVBUF);
1013			cfg->vpd.vpd_w = NULL;
1014		}
1015	}
1016	cfg->vpd.vpd_cached = 1;
1017#undef REG
1018#undef WREG
1019}
1020
1021int
1022pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1023{
1024	struct pci_devinfo *dinfo = device_get_ivars(child);
1025	pcicfgregs *cfg = &dinfo->cfg;
1026
1027	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1028		pci_read_vpd(device_get_parent(dev), cfg);
1029
1030	*identptr = cfg->vpd.vpd_ident;
1031
1032	if (*identptr == NULL)
1033		return (ENXIO);
1034
1035	return (0);
1036}
1037
1038int
1039pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1040	const char **vptr)
1041{
1042	struct pci_devinfo *dinfo = device_get_ivars(child);
1043	pcicfgregs *cfg = &dinfo->cfg;
1044	int i;
1045
1046	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1047		pci_read_vpd(device_get_parent(dev), cfg);
1048
1049	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1050		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1051		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1052			*vptr = cfg->vpd.vpd_ros[i].value;
1053		}
1054
1055	if (i != cfg->vpd.vpd_rocnt)
1056		return (0);
1057
1058	*vptr = NULL;
1059	return (ENXIO);
1060}
1061
1062/*
1063 * Find the requested extended capability and return the offset in
1064 * configuration space via the pointer provided. The function returns
1065 * 0 on success and error code otherwise.
1066 */
1067int
1068pci_find_extcap_method(device_t dev, device_t child, int capability,
1069    int *capreg)
1070{
1071	struct pci_devinfo *dinfo = device_get_ivars(child);
1072	pcicfgregs *cfg = &dinfo->cfg;
1073	u_int32_t status;
1074	u_int8_t ptr;
1075
1076	/*
1077	 * Check the CAP_LIST bit of the PCI status register first.
1078	 */
1079	status = pci_read_config(child, PCIR_STATUS, 2);
1080	if (!(status & PCIM_STATUS_CAPPRESENT))
1081		return (ENXIO);
1082
1083	/*
1084	 * Determine the start pointer of the capabilities list.
1085	 */
1086	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1087	case 0:
1088	case 1:
1089		ptr = PCIR_CAP_PTR;
1090		break;
1091	case 2:
1092		ptr = PCIR_CAP_PTR_2;
1093		break;
1094	default:
1095		/* XXX: panic? */
1096		return (ENXIO);		/* no extended capabilities support */
1097	}
1098	ptr = pci_read_config(child, ptr, 1);
1099
1100	/*
1101	 * Traverse the capabilities list.
1102	 */
1103	while (ptr != 0) {
1104		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1105			if (capreg != NULL)
1106				*capreg = ptr;
1107			return (0);
1108		}
1109		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1110	}
1111
1112	return (ENOENT);
1113}
1114
1115/*
1116 * Support for MSI-X message interrupts.
1117 */
1118void
1119pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1120{
1121	struct pci_devinfo *dinfo = device_get_ivars(dev);
1122	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1123	uint32_t offset;
1124
1125	KASSERT(msix->msix_table_len > index, ("bogus index"));
1126	offset = msix->msix_table_offset + index * 16;
1127	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1128	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1129	bus_write_4(msix->msix_table_res, offset + 8, data);
1130
1131	/* Enable MSI -> HT mapping. */
1132	pci_ht_map_msi(dev, address);
1133}
1134
1135void
1136pci_mask_msix(device_t dev, u_int index)
1137{
1138	struct pci_devinfo *dinfo = device_get_ivars(dev);
1139	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1140	uint32_t offset, val;
1141
1142	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1143	offset = msix->msix_table_offset + index * 16 + 12;
1144	val = bus_read_4(msix->msix_table_res, offset);
1145	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1146		val |= PCIM_MSIX_VCTRL_MASK;
1147		bus_write_4(msix->msix_table_res, offset, val);
1148	}
1149}
1150
1151void
1152pci_unmask_msix(device_t dev, u_int index)
1153{
1154	struct pci_devinfo *dinfo = device_get_ivars(dev);
1155	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1156	uint32_t offset, val;
1157
1158	KASSERT(msix->msix_table_len > index, ("bogus index"));
1159	offset = msix->msix_table_offset + index * 16 + 12;
1160	val = bus_read_4(msix->msix_table_res, offset);
1161	if (val & PCIM_MSIX_VCTRL_MASK) {
1162		val &= ~PCIM_MSIX_VCTRL_MASK;
1163		bus_write_4(msix->msix_table_res, offset, val);
1164	}
1165}
1166
1167int
1168pci_pending_msix(device_t dev, u_int index)
1169{
1170	struct pci_devinfo *dinfo = device_get_ivars(dev);
1171	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1172	uint32_t offset, bit;
1173
1174	KASSERT(msix->msix_table_len > index, ("bogus index"));
1175	offset = msix->msix_pba_offset + (index / 32) * 4;
1176	bit = 1 << index % 32;
1177	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1178}
1179
1180/*
1181 * Restore MSI-X registers and table during resume.  If MSI-X is
1182 * enabled then walk the virtual table to restore the actual MSI-X
1183 * table.
1184 */
1185static void
1186pci_resume_msix(device_t dev)
1187{
1188	struct pci_devinfo *dinfo = device_get_ivars(dev);
1189	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1190	struct msix_table_entry *mte;
1191	struct msix_vector *mv;
1192	int i;
1193
1194	if (msix->msix_alloc > 0) {
1195		/* First, mask all vectors. */
1196		for (i = 0; i < msix->msix_msgnum; i++)
1197			pci_mask_msix(dev, i);
1198
1199		/* Second, program any messages with at least one handler. */
1200		for (i = 0; i < msix->msix_table_len; i++) {
1201			mte = &msix->msix_table[i];
1202			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1203				continue;
1204			mv = &msix->msix_vectors[mte->mte_vector - 1];
1205			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1206			pci_unmask_msix(dev, i);
1207		}
1208	}
1209	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1210	    msix->msix_ctrl, 2);
1211}
1212
1213/*
1214 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1215 * returned in *count.  After this function returns, each message will be
1216 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1217 */
1218int
1219pci_alloc_msix_method(device_t dev, device_t child, int *count)
1220{
1221	struct pci_devinfo *dinfo = device_get_ivars(child);
1222	pcicfgregs *cfg = &dinfo->cfg;
1223	struct resource_list_entry *rle;
1224	int actual, error, i, irq, max;
1225
1226	/* Don't let count == 0 get us into trouble. */
1227	if (*count == 0)
1228		return (EINVAL);
1229
1230	/* If rid 0 is allocated, then fail. */
1231	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1232	if (rle != NULL && rle->res != NULL)
1233		return (ENXIO);
1234
1235	/* Already have allocated messages? */
1236	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1237		return (ENXIO);
1238
1239	/* If MSI is blacklisted for this system, fail. */
1240	if (pci_msi_blacklisted())
1241		return (ENXIO);
1242
1243	/* MSI-X capability present? */
1244	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1245		return (ENODEV);
1246
1247	/* Make sure the appropriate BARs are mapped. */
1248	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1249	    cfg->msix.msix_table_bar);
1250	if (rle == NULL || rle->res == NULL ||
1251	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1252		return (ENXIO);
1253	cfg->msix.msix_table_res = rle->res;
1254	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1255		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1256		    cfg->msix.msix_pba_bar);
1257		if (rle == NULL || rle->res == NULL ||
1258		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1259			return (ENXIO);
1260	}
1261	cfg->msix.msix_pba_res = rle->res;
1262
1263	if (bootverbose)
1264		device_printf(child,
1265		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1266		    *count, cfg->msix.msix_msgnum);
1267	max = min(*count, cfg->msix.msix_msgnum);
1268	for (i = 0; i < max; i++) {
1269		/* Allocate a message. */
1270		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1271		if (error)
1272			break;
1273		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1274		    irq, 1);
1275	}
1276	actual = i;
1277
1278	if (bootverbose) {
1279		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1280		if (actual == 1)
1281			device_printf(child, "using IRQ %lu for MSI-X\n",
1282			    rle->start);
1283		else {
1284			int run;
1285
1286			/*
1287			 * Be fancy and try to print contiguous runs of
1288			 * IRQ values as ranges.  'irq' is the previous IRQ.
1289			 * 'run' is true if we are in a range.
1290			 */
1291			device_printf(child, "using IRQs %lu", rle->start);
1292			irq = rle->start;
1293			run = 0;
1294			for (i = 1; i < actual; i++) {
1295				rle = resource_list_find(&dinfo->resources,
1296				    SYS_RES_IRQ, i + 1);
1297
1298				/* Still in a run? */
1299				if (rle->start == irq + 1) {
1300					run = 1;
1301					irq++;
1302					continue;
1303				}
1304
1305				/* Finish previous range. */
1306				if (run) {
1307					printf("-%d", irq);
1308					run = 0;
1309				}
1310
1311				/* Start new range. */
1312				printf(",%lu", rle->start);
1313				irq = rle->start;
1314			}
1315
1316			/* Unfinished range? */
1317			if (run)
1318				printf("-%d", irq);
1319			printf(" for MSI-X\n");
1320		}
1321	}
1322
1323	/* Mask all vectors. */
1324	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1325		pci_mask_msix(child, i);
1326
1327	/* Allocate and initialize vector data and virtual table. */
1328	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1329	    M_DEVBUF, M_WAITOK | M_ZERO);
1330	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1331	    M_DEVBUF, M_WAITOK | M_ZERO);
1332	for (i = 0; i < actual; i++) {
1333		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1334		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1335		cfg->msix.msix_table[i].mte_vector = i + 1;
1336	}
1337
1338	/* Update control register to enable MSI-X. */
1339	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1340	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1341	    cfg->msix.msix_ctrl, 2);
1342
1343	/* Update counts of alloc'd messages. */
1344	cfg->msix.msix_alloc = actual;
1345	cfg->msix.msix_table_len = actual;
1346	*count = actual;
1347	return (0);
1348}
1349
1350/*
1351 * By default, pci_alloc_msix() will assign the allocated IRQ
1352 * resources consecutively to the first N messages in the MSI-X table.
1353 * However, device drivers may want to use different layouts if they
1354 * either receive fewer messages than they asked for, or they wish to
1355 * populate the MSI-X table sparsely.  This method allows the driver
1356 * to specify what layout it wants.  It must be called after a
1357 * successful pci_alloc_msix() but before any of the associated
1358 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1359 *
1360 * The 'vectors' array contains 'count' message vectors.  The array
1361 * maps directly to the MSI-X table in that index 0 in the array
1362 * specifies the vector for the first message in the MSI-X table, etc.
1363 * The vector value in each array index can either be 0 to indicate
1364 * that no vector should be assigned to a message slot, or it can be a
1365 * number from 1 to N (where N is the count returned from a
1366 * succcessful call to pci_alloc_msix()) to indicate which message
1367 * vector (IRQ) to be used for the corresponding message.
1368 *
1369 * On successful return, each message with a non-zero vector will have
1370 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1371 * 1.  Additionally, if any of the IRQs allocated via the previous
1372 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1373 * will be freed back to the system automatically.
1374 *
1375 * For example, suppose a driver has a MSI-X table with 6 messages and
1376 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1377 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1378 * C.  After the call to pci_alloc_msix(), the device will be setup to
1379 * have an MSI-X table of ABC--- (where - means no vector assigned).
1380 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1381 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1382 * be freed back to the system.  This device will also have valid
1383 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1384 *
1385 * In any case, the SYS_RES_IRQ rid X will always map to the message
1386 * at MSI-X table index X - 1 and will only be valid if a vector is
1387 * assigned to that table entry.
1388 */
1389int
1390pci_remap_msix_method(device_t dev, device_t child, int count,
1391    const u_int *vectors)
1392{
1393	struct pci_devinfo *dinfo = device_get_ivars(child);
1394	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1395	struct resource_list_entry *rle;
1396	int i, irq, j, *used;
1397
1398	/*
1399	 * Have to have at least one message in the table but the
1400	 * table can't be bigger than the actual MSI-X table in the
1401	 * device.
1402	 */
1403	if (count == 0 || count > msix->msix_msgnum)
1404		return (EINVAL);
1405
1406	/* Sanity check the vectors. */
1407	for (i = 0; i < count; i++)
1408		if (vectors[i] > msix->msix_alloc)
1409			return (EINVAL);
1410
1411	/*
1412	 * Make sure there aren't any holes in the vectors to be used.
1413	 * It's a big pain to support it, and it doesn't really make
1414	 * sense anyway.  Also, at least one vector must be used.
1415	 */
1416	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1417	    M_ZERO);
1418	for (i = 0; i < count; i++)
1419		if (vectors[i] != 0)
1420			used[vectors[i] - 1] = 1;
1421	for (i = 0; i < msix->msix_alloc - 1; i++)
1422		if (used[i] == 0 && used[i + 1] == 1) {
1423			free(used, M_DEVBUF);
1424			return (EINVAL);
1425		}
1426	if (used[0] != 1) {
1427		free(used, M_DEVBUF);
1428		return (EINVAL);
1429	}
1430
1431	/* Make sure none of the resources are allocated. */
1432	for (i = 0; i < msix->msix_table_len; i++) {
1433		if (msix->msix_table[i].mte_vector == 0)
1434			continue;
1435		if (msix->msix_table[i].mte_handlers > 0)
1436			return (EBUSY);
1437		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1438		KASSERT(rle != NULL, ("missing resource"));
1439		if (rle->res != NULL)
1440			return (EBUSY);
1441	}
1442
1443	/* Free the existing resource list entries. */
1444	for (i = 0; i < msix->msix_table_len; i++) {
1445		if (msix->msix_table[i].mte_vector == 0)
1446			continue;
1447		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1448	}
1449
1450	/*
1451	 * Build the new virtual table keeping track of which vectors are
1452	 * used.
1453	 */
1454	free(msix->msix_table, M_DEVBUF);
1455	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1456	    M_DEVBUF, M_WAITOK | M_ZERO);
1457	for (i = 0; i < count; i++)
1458		msix->msix_table[i].mte_vector = vectors[i];
1459	msix->msix_table_len = count;
1460
1461	/* Free any unused IRQs and resize the vectors array if necessary. */
1462	j = msix->msix_alloc - 1;
1463	if (used[j] == 0) {
1464		struct msix_vector *vec;
1465
1466		while (used[j] == 0) {
1467			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1468			    msix->msix_vectors[j].mv_irq);
1469			j--;
1470		}
1471		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1472		    M_WAITOK);
1473		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1474		    (j + 1));
1475		free(msix->msix_vectors, M_DEVBUF);
1476		msix->msix_vectors = vec;
1477		msix->msix_alloc = j + 1;
1478	}
1479	free(used, M_DEVBUF);
1480
1481	/* Map the IRQs onto the rids. */
1482	for (i = 0; i < count; i++) {
1483		if (vectors[i] == 0)
1484			continue;
1485		irq = msix->msix_vectors[vectors[i]].mv_irq;
1486		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1487		    irq, 1);
1488	}
1489
1490	if (bootverbose) {
1491		device_printf(child, "Remapped MSI-X IRQs as: ");
1492		for (i = 0; i < count; i++) {
1493			if (i != 0)
1494				printf(", ");
1495			if (vectors[i] == 0)
1496				printf("---");
1497			else
1498				printf("%d",
1499				    msix->msix_vectors[vectors[i]].mv_irq);
1500		}
1501		printf("\n");
1502	}
1503
1504	return (0);
1505}
1506
1507static int
1508pci_release_msix(device_t dev, device_t child)
1509{
1510	struct pci_devinfo *dinfo = device_get_ivars(child);
1511	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1512	struct resource_list_entry *rle;
1513	int i;
1514
1515	/* Do we have any messages to release? */
1516	if (msix->msix_alloc == 0)
1517		return (ENODEV);
1518
1519	/* Make sure none of the resources are allocated. */
1520	for (i = 0; i < msix->msix_table_len; i++) {
1521		if (msix->msix_table[i].mte_vector == 0)
1522			continue;
1523		if (msix->msix_table[i].mte_handlers > 0)
1524			return (EBUSY);
1525		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1526		KASSERT(rle != NULL, ("missing resource"));
1527		if (rle->res != NULL)
1528			return (EBUSY);
1529	}
1530
1531	/* Update control register to disable MSI-X. */
1532	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1533	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1534	    msix->msix_ctrl, 2);
1535
1536	/* Free the resource list entries. */
1537	for (i = 0; i < msix->msix_table_len; i++) {
1538		if (msix->msix_table[i].mte_vector == 0)
1539			continue;
1540		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1541	}
1542	free(msix->msix_table, M_DEVBUF);
1543	msix->msix_table_len = 0;
1544
1545	/* Release the IRQs. */
1546	for (i = 0; i < msix->msix_alloc; i++)
1547		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1548		    msix->msix_vectors[i].mv_irq);
1549	free(msix->msix_vectors, M_DEVBUF);
1550	msix->msix_alloc = 0;
1551	return (0);
1552}
1553
1554/*
1555 * Return the max supported MSI-X messages this device supports.
1556 * Basically, assuming the MD code can alloc messages, this function
1557 * should return the maximum value that pci_alloc_msix() can return.
1558 * Thus, it is subject to the tunables, etc.
1559 */
1560int
1561pci_msix_count_method(device_t dev, device_t child)
1562{
1563	struct pci_devinfo *dinfo = device_get_ivars(child);
1564	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1565
1566	if (pci_do_msix && msix->msix_location != 0)
1567		return (msix->msix_msgnum);
1568	return (0);
1569}
1570
1571/*
1572 * HyperTransport MSI mapping control
1573 */
1574void
1575pci_ht_map_msi(device_t dev, uint64_t addr)
1576{
1577	struct pci_devinfo *dinfo = device_get_ivars(dev);
1578	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1579
1580	if (!ht->ht_msimap)
1581		return;
1582
1583	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1584	    ht->ht_msiaddr >> 20 == addr >> 20) {
1585		/* Enable MSI -> HT mapping. */
1586		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1587		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1588		    ht->ht_msictrl, 2);
1589	}
1590
1591	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1592		/* Disable MSI -> HT mapping. */
1593		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1594		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1595		    ht->ht_msictrl, 2);
1596	}
1597}
1598
1599/*
1600 * Support for MSI message signalled interrupts.
1601 */
1602void
1603pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1604{
1605	struct pci_devinfo *dinfo = device_get_ivars(dev);
1606	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1607
1608	/* Write data and address values. */
1609	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1610	    address & 0xffffffff, 4);
1611	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1612		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1613		    address >> 32, 4);
1614		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1615		    data, 2);
1616	} else
1617		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1618		    2);
1619
1620	/* Enable MSI in the control register. */
1621	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1622	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1623	    2);
1624
1625	/* Enable MSI -> HT mapping. */
1626	pci_ht_map_msi(dev, address);
1627}
1628
1629void
1630pci_disable_msi(device_t dev)
1631{
1632	struct pci_devinfo *dinfo = device_get_ivars(dev);
1633	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1634
1635	/* Disable MSI -> HT mapping. */
1636	pci_ht_map_msi(dev, 0);
1637
1638	/* Disable MSI in the control register. */
1639	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1640	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1641	    2);
1642}
1643
1644/*
1645 * Restore MSI registers during resume.  If MSI is enabled then
1646 * restore the data and address registers in addition to the control
1647 * register.
1648 */
1649static void
1650pci_resume_msi(device_t dev)
1651{
1652	struct pci_devinfo *dinfo = device_get_ivars(dev);
1653	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1654	uint64_t address;
1655	uint16_t data;
1656
1657	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1658		address = msi->msi_addr;
1659		data = msi->msi_data;
1660		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1661		    address & 0xffffffff, 4);
1662		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1663			pci_write_config(dev, msi->msi_location +
1664			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1665			pci_write_config(dev, msi->msi_location +
1666			    PCIR_MSI_DATA_64BIT, data, 2);
1667		} else
1668			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1669			    data, 2);
1670	}
1671	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1672	    2);
1673}
1674
1675int
1676pci_remap_msi_irq(device_t dev, u_int irq)
1677{
1678	struct pci_devinfo *dinfo = device_get_ivars(dev);
1679	pcicfgregs *cfg = &dinfo->cfg;
1680	struct resource_list_entry *rle;
1681	struct msix_table_entry *mte;
1682	struct msix_vector *mv;
1683	device_t bus;
1684	uint64_t addr;
1685	uint32_t data;
1686	int error, i, j;
1687
1688	bus = device_get_parent(dev);
1689
1690	/*
1691	 * Handle MSI first.  We try to find this IRQ among our list
1692	 * of MSI IRQs.  If we find it, we request updated address and
1693	 * data registers and apply the results.
1694	 */
1695	if (cfg->msi.msi_alloc > 0) {
1696
1697		/* If we don't have any active handlers, nothing to do. */
1698		if (cfg->msi.msi_handlers == 0)
1699			return (0);
1700		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1701			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1702			    i + 1);
1703			if (rle->start == irq) {
1704				error = PCIB_MAP_MSI(device_get_parent(bus),
1705				    dev, irq, &addr, &data);
1706				if (error)
1707					return (error);
1708				pci_disable_msi(dev);
1709				dinfo->cfg.msi.msi_addr = addr;
1710				dinfo->cfg.msi.msi_data = data;
1711				pci_enable_msi(dev, addr, data);
1712				return (0);
1713			}
1714		}
1715		return (ENOENT);
1716	}
1717
1718	/*
1719	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1720	 * we request the updated mapping info.  If that works, we go
1721	 * through all the slots that use this IRQ and update them.
1722	 */
1723	if (cfg->msix.msix_alloc > 0) {
1724		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1725			mv = &cfg->msix.msix_vectors[i];
1726			if (mv->mv_irq == irq) {
1727				error = PCIB_MAP_MSI(device_get_parent(bus),
1728				    dev, irq, &addr, &data);
1729				if (error)
1730					return (error);
1731				mv->mv_address = addr;
1732				mv->mv_data = data;
1733				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1734					mte = &cfg->msix.msix_table[j];
1735					if (mte->mte_vector != i + 1)
1736						continue;
1737					if (mte->mte_handlers == 0)
1738						continue;
1739					pci_mask_msix(dev, j);
1740					pci_enable_msix(dev, j, addr, data);
1741					pci_unmask_msix(dev, j);
1742				}
1743			}
1744		}
1745		return (ENOENT);
1746	}
1747
1748	return (ENOENT);
1749}
1750
1751/*
1752 * Returns true if the specified device is blacklisted because MSI
1753 * doesn't work.
1754 */
1755int
1756pci_msi_device_blacklisted(device_t dev)
1757{
1758	struct pci_quirk *q;
1759
1760	if (!pci_honor_msi_blacklist)
1761		return (0);
1762
1763	for (q = &pci_quirks[0]; q->devid; q++) {
1764		if (q->devid == pci_get_devid(dev) &&
1765		    q->type == PCI_QUIRK_DISABLE_MSI)
1766			return (1);
1767	}
1768	return (0);
1769}
1770
1771/*
1772 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1773 * we just check for blacklisted chipsets as represented by the
1774 * host-PCI bridge at device 0:0:0.  In the future, it may become
1775 * necessary to check other system attributes, such as the kenv values
1776 * that give the motherboard manufacturer and model number.
1777 */
1778static int
1779pci_msi_blacklisted(void)
1780{
1781	device_t dev;
1782
1783	if (!pci_honor_msi_blacklist)
1784		return (0);
1785
1786	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1787	if (!(pcie_chipset || pcix_chipset))
1788		return (1);
1789
1790	dev = pci_find_bsf(0, 0, 0);
1791	if (dev != NULL)
1792		return (pci_msi_device_blacklisted(dev));
1793	return (0);
1794}
1795
1796/*
1797 * Attempt to allocate *count MSI messages.  The actual number allocated is
1798 * returned in *count.  After this function returns, each message will be
1799 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1800 */
1801int
1802pci_alloc_msi_method(device_t dev, device_t child, int *count)
1803{
1804	struct pci_devinfo *dinfo = device_get_ivars(child);
1805	pcicfgregs *cfg = &dinfo->cfg;
1806	struct resource_list_entry *rle;
1807	int actual, error, i, irqs[32];
1808	uint16_t ctrl;
1809
1810	/* Don't let count == 0 get us into trouble. */
1811	if (*count == 0)
1812		return (EINVAL);
1813
1814	/* If rid 0 is allocated, then fail. */
1815	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1816	if (rle != NULL && rle->res != NULL)
1817		return (ENXIO);
1818
1819	/* Already have allocated messages? */
1820	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1821		return (ENXIO);
1822
1823	/* If MSI is blacklisted for this system, fail. */
1824	if (pci_msi_blacklisted())
1825		return (ENXIO);
1826
1827	/* MSI capability present? */
1828	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1829		return (ENODEV);
1830
1831	if (bootverbose)
1832		device_printf(child,
1833		    "attempting to allocate %d MSI vectors (%d supported)\n",
1834		    *count, cfg->msi.msi_msgnum);
1835
1836	/* Don't ask for more than the device supports. */
1837	actual = min(*count, cfg->msi.msi_msgnum);
1838
1839	/* Don't ask for more than 32 messages. */
1840	actual = min(actual, 32);
1841
1842	/* MSI requires power of 2 number of messages. */
1843	if (!powerof2(actual))
1844		return (EINVAL);
1845
1846	for (;;) {
1847		/* Try to allocate N messages. */
1848		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1849		    cfg->msi.msi_msgnum, irqs);
1850		if (error == 0)
1851			break;
1852		if (actual == 1)
1853			return (error);
1854
1855		/* Try N / 2. */
1856		actual >>= 1;
1857	}
1858
1859	/*
1860	 * We now have N actual messages mapped onto SYS_RES_IRQ
1861	 * resources in the irqs[] array, so add new resources
1862	 * starting at rid 1.
1863	 */
1864	for (i = 0; i < actual; i++)
1865		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1866		    irqs[i], irqs[i], 1);
1867
1868	if (bootverbose) {
1869		if (actual == 1)
1870			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1871		else {
1872			int run;
1873
1874			/*
1875			 * Be fancy and try to print contiguous runs
1876			 * of IRQ values as ranges.  'run' is true if
1877			 * we are in a range.
1878			 */
1879			device_printf(child, "using IRQs %d", irqs[0]);
1880			run = 0;
1881			for (i = 1; i < actual; i++) {
1882
1883				/* Still in a run? */
1884				if (irqs[i] == irqs[i - 1] + 1) {
1885					run = 1;
1886					continue;
1887				}
1888
1889				/* Finish previous range. */
1890				if (run) {
1891					printf("-%d", irqs[i - 1]);
1892					run = 0;
1893				}
1894
1895				/* Start new range. */
1896				printf(",%d", irqs[i]);
1897			}
1898
1899			/* Unfinished range? */
1900			if (run)
1901				printf("-%d", irqs[actual - 1]);
1902			printf(" for MSI\n");
1903		}
1904	}
1905
1906	/* Update control register with actual count. */
1907	ctrl = cfg->msi.msi_ctrl;
1908	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1909	ctrl |= (ffs(actual) - 1) << 4;
1910	cfg->msi.msi_ctrl = ctrl;
1911	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1912
1913	/* Update counts of alloc'd messages. */
1914	cfg->msi.msi_alloc = actual;
1915	cfg->msi.msi_handlers = 0;
1916	*count = actual;
1917	return (0);
1918}
1919
1920/* Release the MSI messages associated with this device. */
1921int
1922pci_release_msi_method(device_t dev, device_t child)
1923{
1924	struct pci_devinfo *dinfo = device_get_ivars(child);
1925	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1926	struct resource_list_entry *rle;
1927	int error, i, irqs[32];
1928
1929	/* Try MSI-X first. */
1930	error = pci_release_msix(dev, child);
1931	if (error != ENODEV)
1932		return (error);
1933
1934	/* Do we have any messages to release? */
1935	if (msi->msi_alloc == 0)
1936		return (ENODEV);
1937	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1938
1939	/* Make sure none of the resources are allocated. */
1940	if (msi->msi_handlers > 0)
1941		return (EBUSY);
1942	for (i = 0; i < msi->msi_alloc; i++) {
1943		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1944		KASSERT(rle != NULL, ("missing MSI resource"));
1945		if (rle->res != NULL)
1946			return (EBUSY);
1947		irqs[i] = rle->start;
1948	}
1949
1950	/* Update control register with 0 count. */
1951	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1952	    ("%s: MSI still enabled", __func__));
1953	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1954	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1955	    msi->msi_ctrl, 2);
1956
1957	/* Release the messages. */
1958	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1959	for (i = 0; i < msi->msi_alloc; i++)
1960		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1961
1962	/* Update alloc count. */
1963	msi->msi_alloc = 0;
1964	msi->msi_addr = 0;
1965	msi->msi_data = 0;
1966	return (0);
1967}
1968
1969/*
1970 * Return the max supported MSI messages this device supports.
1971 * Basically, assuming the MD code can alloc messages, this function
1972 * should return the maximum value that pci_alloc_msi() can return.
1973 * Thus, it is subject to the tunables, etc.
1974 */
1975int
1976pci_msi_count_method(device_t dev, device_t child)
1977{
1978	struct pci_devinfo *dinfo = device_get_ivars(child);
1979	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1980
1981	if (pci_do_msi && msi->msi_location != 0)
1982		return (msi->msi_msgnum);
1983	return (0);
1984}
1985
1986/* free pcicfgregs structure and all depending data structures */
1987
1988int
1989pci_freecfg(struct pci_devinfo *dinfo)
1990{
1991	struct devlist *devlist_head;
1992	int i;
1993
1994	devlist_head = &pci_devq;
1995
1996	if (dinfo->cfg.vpd.vpd_reg) {
1997		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1998		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1999			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2000		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2001		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2002			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2003		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2004	}
2005	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2006	free(dinfo, M_DEVBUF);
2007
2008	/* increment the generation count */
2009	pci_generation++;
2010
2011	/* we're losing one device */
2012	pci_numdevs--;
2013	return (0);
2014}
2015
2016/*
2017 * PCI power manangement
2018 */
2019int
2020pci_set_powerstate_method(device_t dev, device_t child, int state)
2021{
2022	struct pci_devinfo *dinfo = device_get_ivars(child);
2023	pcicfgregs *cfg = &dinfo->cfg;
2024	uint16_t status;
2025	int result, oldstate, highest, delay;
2026
2027	if (cfg->pp.pp_cap == 0)
2028		return (EOPNOTSUPP);
2029
2030	/*
2031	 * Optimize a no state change request away.  While it would be OK to
2032	 * write to the hardware in theory, some devices have shown odd
2033	 * behavior when going from D3 -> D3.
2034	 */
2035	oldstate = pci_get_powerstate(child);
2036	if (oldstate == state)
2037		return (0);
2038
2039	/*
2040	 * The PCI power management specification states that after a state
2041	 * transition between PCI power states, system software must
2042	 * guarantee a minimal delay before the function accesses the device.
2043	 * Compute the worst case delay that we need to guarantee before we
2044	 * access the device.  Many devices will be responsive much more
2045	 * quickly than this delay, but there are some that don't respond
2046	 * instantly to state changes.  Transitions to/from D3 state require
2047	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2048	 * is done below with DELAY rather than a sleeper function because
2049	 * this function can be called from contexts where we cannot sleep.
2050	 */
2051	highest = (oldstate > state) ? oldstate : state;
2052	if (highest == PCI_POWERSTATE_D3)
2053	    delay = 10000;
2054	else if (highest == PCI_POWERSTATE_D2)
2055	    delay = 200;
2056	else
2057	    delay = 0;
2058	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2059	    & ~PCIM_PSTAT_DMASK;
2060	result = 0;
2061	switch (state) {
2062	case PCI_POWERSTATE_D0:
2063		status |= PCIM_PSTAT_D0;
2064		break;
2065	case PCI_POWERSTATE_D1:
2066		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2067			return (EOPNOTSUPP);
2068		status |= PCIM_PSTAT_D1;
2069		break;
2070	case PCI_POWERSTATE_D2:
2071		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2072			return (EOPNOTSUPP);
2073		status |= PCIM_PSTAT_D2;
2074		break;
2075	case PCI_POWERSTATE_D3:
2076		status |= PCIM_PSTAT_D3;
2077		break;
2078	default:
2079		return (EINVAL);
2080	}
2081
2082	if (bootverbose)
2083		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2084		    state);
2085
2086	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2087	if (delay)
2088		DELAY(delay);
2089	return (0);
2090}
2091
2092int
2093pci_get_powerstate_method(device_t dev, device_t child)
2094{
2095	struct pci_devinfo *dinfo = device_get_ivars(child);
2096	pcicfgregs *cfg = &dinfo->cfg;
2097	uint16_t status;
2098	int result;
2099
2100	if (cfg->pp.pp_cap != 0) {
2101		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2102		switch (status & PCIM_PSTAT_DMASK) {
2103		case PCIM_PSTAT_D0:
2104			result = PCI_POWERSTATE_D0;
2105			break;
2106		case PCIM_PSTAT_D1:
2107			result = PCI_POWERSTATE_D1;
2108			break;
2109		case PCIM_PSTAT_D2:
2110			result = PCI_POWERSTATE_D2;
2111			break;
2112		case PCIM_PSTAT_D3:
2113			result = PCI_POWERSTATE_D3;
2114			break;
2115		default:
2116			result = PCI_POWERSTATE_UNKNOWN;
2117			break;
2118		}
2119	} else {
2120		/* No support, device is always at D0 */
2121		result = PCI_POWERSTATE_D0;
2122	}
2123	return (result);
2124}
2125
2126/*
2127 * Some convenience functions for PCI device drivers.
2128 */
2129
2130static __inline void
2131pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2132{
2133	uint16_t	command;
2134
2135	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2136	command |= bit;
2137	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2138}
2139
2140static __inline void
2141pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2142{
2143	uint16_t	command;
2144
2145	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2146	command &= ~bit;
2147	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2148}
2149
2150int
2151pci_enable_busmaster_method(device_t dev, device_t child)
2152{
2153	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2154	return (0);
2155}
2156
2157int
2158pci_disable_busmaster_method(device_t dev, device_t child)
2159{
2160	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2161	return (0);
2162}
2163
2164int
2165pci_enable_io_method(device_t dev, device_t child, int space)
2166{
2167	uint16_t bit;
2168
2169	switch(space) {
2170	case SYS_RES_IOPORT:
2171		bit = PCIM_CMD_PORTEN;
2172		break;
2173	case SYS_RES_MEMORY:
2174		bit = PCIM_CMD_MEMEN;
2175		break;
2176	default:
2177		return (EINVAL);
2178	}
2179	pci_set_command_bit(dev, child, bit);
2180	return (0);
2181}
2182
2183int
2184pci_disable_io_method(device_t dev, device_t child, int space)
2185{
2186	uint16_t bit;
2187
2188	switch(space) {
2189	case SYS_RES_IOPORT:
2190		bit = PCIM_CMD_PORTEN;
2191		break;
2192	case SYS_RES_MEMORY:
2193		bit = PCIM_CMD_MEMEN;
2194		break;
2195	default:
2196		return (EINVAL);
2197	}
2198	pci_clear_command_bit(dev, child, bit);
2199	return (0);
2200}
2201
2202/*
2203 * New style pci driver.  Parent device is either a pci-host-bridge or a
2204 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2205 */
2206
2207void
2208pci_print_verbose(struct pci_devinfo *dinfo)
2209{
2210
2211	if (bootverbose) {
2212		pcicfgregs *cfg = &dinfo->cfg;
2213
2214		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2215		    cfg->vendor, cfg->device, cfg->revid);
2216		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2217		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2218		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2219		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2220		    cfg->mfdev);
2221		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2222		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2223		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2224		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2225		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2226		if (cfg->intpin > 0)
2227			printf("\tintpin=%c, irq=%d\n",
2228			    cfg->intpin +'a' -1, cfg->intline);
2229		if (cfg->pp.pp_cap) {
2230			uint16_t status;
2231
2232			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2233			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2234			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2235			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2236			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2237			    status & PCIM_PSTAT_DMASK);
2238		}
2239		if (cfg->msi.msi_location) {
2240			int ctrl;
2241
2242			ctrl = cfg->msi.msi_ctrl;
2243			printf("\tMSI supports %d message%s%s%s\n",
2244			    cfg->msi.msi_msgnum,
2245			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2246			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2247			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2248		}
2249		if (cfg->msix.msix_location) {
2250			printf("\tMSI-X supports %d message%s ",
2251			    cfg->msix.msix_msgnum,
2252			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2253			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2254				printf("in map 0x%x\n",
2255				    cfg->msix.msix_table_bar);
2256			else
2257				printf("in maps 0x%x and 0x%x\n",
2258				    cfg->msix.msix_table_bar,
2259				    cfg->msix.msix_pba_bar);
2260		}
2261	}
2262}
2263
2264static int
2265pci_porten(device_t dev)
2266{
2267	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2268}
2269
2270static int
2271pci_memen(device_t dev)
2272{
2273	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2274}
2275
2276static void
2277pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2278{
2279	pci_addr_t map, testval;
2280	int ln2range;
2281	uint16_t cmd;
2282
2283	map = pci_read_config(dev, reg, 4);
2284	ln2range = pci_maprange(map);
2285	if (ln2range == 64)
2286		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2287
2288	/*
2289	 * Disable decoding via the command register before
2290	 * determining the BAR's length since we will be placing it in
2291	 * a weird state.
2292	 */
2293	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2294	pci_write_config(dev, PCIR_COMMAND,
2295	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2296
2297	/*
2298	 * Determine the BAR's length by writing all 1's.  The bottom
2299	 * log_2(size) bits of the BAR will stick as 0 when we read
2300	 * the value back.
2301	 */
2302	pci_write_config(dev, reg, 0xffffffff, 4);
2303	testval = pci_read_config(dev, reg, 4);
2304	if (ln2range == 64) {
2305		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2306		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2307	}
2308
2309	/*
2310	 * Restore the original value of the BAR.  We may have reprogrammed
2311	 * the BAR of the low-level console device and when booting verbose,
2312	 * we need the console device addressable.
2313	 */
2314	pci_write_config(dev, reg, map, 4);
2315	if (ln2range == 64)
2316		pci_write_config(dev, reg + 4, map >> 32, 4);
2317	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2318
2319	*mapp = map;
2320	*testvalp = testval;
2321}
2322
2323static void
2324pci_write_bar(device_t dev, int reg, pci_addr_t base)
2325{
2326	pci_addr_t map;
2327	int ln2range;
2328
2329	map = pci_read_config(dev, reg, 4);
2330	ln2range = pci_maprange(map);
2331	pci_write_config(dev, reg, base, 4);
2332	if (ln2range == 64)
2333		pci_write_config(dev, reg + 4, base >> 32, 4);
2334}
2335
2336/*
2337 * Add a resource based on a pci map register. Return 1 if the map
2338 * register is a 32bit map register or 2 if it is a 64bit register.
2339 */
2340static int
2341pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2342    int force, int prefetch)
2343{
2344	pci_addr_t base, map, testval;
2345	pci_addr_t start, end, count;
2346	int barlen, basezero, maprange, mapsize, type;
2347	uint16_t cmd;
2348	struct resource *res;
2349
2350	pci_read_bar(dev, reg, &map, &testval);
2351	if (PCI_BAR_MEM(map)) {
2352		type = SYS_RES_MEMORY;
2353		if (map & PCIM_BAR_MEM_PREFETCH)
2354			prefetch = 1;
2355	} else
2356		type = SYS_RES_IOPORT;
2357	mapsize = pci_mapsize(testval);
2358	base = pci_mapbase(map);
2359#ifdef __PCI_BAR_ZERO_VALID
2360	basezero = 0;
2361#else
2362	basezero = base == 0;
2363#endif
2364	maprange = pci_maprange(map);
2365	barlen = maprange == 64 ? 2 : 1;
2366
2367	/*
2368	 * For I/O registers, if bottom bit is set, and the next bit up
2369	 * isn't clear, we know we have a BAR that doesn't conform to the
2370	 * spec, so ignore it.  Also, sanity check the size of the data
2371	 * areas to the type of memory involved.  Memory must be at least
2372	 * 16 bytes in size, while I/O ranges must be at least 4.
2373	 */
2374	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2375		return (barlen);
2376	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2377	    (type == SYS_RES_IOPORT && mapsize < 2))
2378		return (barlen);
2379
2380	if (bootverbose) {
2381		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2382		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2383		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2384			printf(", port disabled\n");
2385		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2386			printf(", memory disabled\n");
2387		else
2388			printf(", enabled\n");
2389	}
2390
2391	/*
2392	 * If base is 0, then we have problems if this architecture does
2393	 * not allow that.  It is best to ignore such entries for the
2394	 * moment.  These will be allocated later if the driver specifically
2395	 * requests them.  However, some removable busses look better when
2396	 * all resources are allocated, so allow '0' to be overriden.
2397	 *
2398	 * Similarly treat maps whose values is the same as the test value
2399	 * read back.  These maps have had all f's written to them by the
2400	 * BIOS in an attempt to disable the resources.
2401	 */
2402	if (!force && (basezero || map == testval))
2403		return (barlen);
2404	if ((u_long)base != base) {
2405		device_printf(bus,
2406		    "pci%d:%d:%d:%d bar %#x too many address bits",
2407		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2408		    pci_get_function(dev), reg);
2409		return (barlen);
2410	}
2411
2412	/*
2413	 * This code theoretically does the right thing, but has
2414	 * undesirable side effects in some cases where peripherals
2415	 * respond oddly to having these bits enabled.  Let the user
2416	 * be able to turn them off (since pci_enable_io_modes is 1 by
2417	 * default).
2418	 */
2419	if (pci_enable_io_modes) {
2420		/* Turn on resources that have been left off by a lazy BIOS */
2421		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2422			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2423			cmd |= PCIM_CMD_PORTEN;
2424			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2425		}
2426		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2427			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2428			cmd |= PCIM_CMD_MEMEN;
2429			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2430		}
2431	} else {
2432		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2433			return (barlen);
2434		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2435			return (barlen);
2436	}
2437
2438	count = 1 << mapsize;
2439	if (basezero || base == pci_mapbase(testval)) {
2440		start = 0;	/* Let the parent decide. */
2441		end = ~0ULL;
2442	} else {
2443		start = base;
2444		end = base + (1 << mapsize) - 1;
2445	}
2446	resource_list_add(rl, type, reg, start, end, count);
2447
2448	/*
2449	 * Try to allocate the resource for this BAR from our parent
2450	 * so that this resource range is already reserved.  The
2451	 * driver for this device will later inherit this resource in
2452	 * pci_alloc_resource().
2453	 */
2454	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2455	    prefetch ? RF_PREFETCHABLE : 0);
2456	if (res == NULL) {
2457		/*
2458		 * If the allocation fails, clear the BAR and delete
2459		 * the resource list entry to force
2460		 * pci_alloc_resource() to allocate resources from the
2461		 * parent.
2462		 */
2463		resource_list_delete(rl, type, reg);
2464		start = 0;
2465	} else
2466		start = rman_get_start(res);
2467	pci_write_bar(dev, reg, start);
2468	return (barlen);
2469}
2470
2471/*
2472 * For ATA devices we need to decide early what addressing mode to use.
2473 * Legacy demands that the primary and secondary ATA ports sits on the
2474 * same addresses that old ISA hardware did. This dictates that we use
2475 * those addresses and ignore the BAR's if we cannot set PCI native
2476 * addressing mode.
2477 */
2478static void
2479pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2480    uint32_t prefetchmask)
2481{
2482	struct resource *r;
2483	int rid, type, progif;
2484#if 0
2485	/* if this device supports PCI native addressing use it */
2486	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2487	if ((progif & 0x8a) == 0x8a) {
2488		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2489		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2490			printf("Trying ATA native PCI addressing mode\n");
2491			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2492		}
2493	}
2494#endif
2495	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2496	type = SYS_RES_IOPORT;
2497	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2498		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2499		    prefetchmask & (1 << 0));
2500		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2501		    prefetchmask & (1 << 1));
2502	} else {
2503		rid = PCIR_BAR(0);
2504		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2505		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2506		    0x1f7, 8, 0);
2507		rid = PCIR_BAR(1);
2508		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2509		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2510		    0x3f6, 1, 0);
2511	}
2512	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2513		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2514		    prefetchmask & (1 << 2));
2515		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2516		    prefetchmask & (1 << 3));
2517	} else {
2518		rid = PCIR_BAR(2);
2519		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2520		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2521		    0x177, 8, 0);
2522		rid = PCIR_BAR(3);
2523		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2524		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2525		    0x376, 1, 0);
2526	}
2527	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2528	    prefetchmask & (1 << 4));
2529	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2530	    prefetchmask & (1 << 5));
2531}
2532
2533static void
2534pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2535{
2536	struct pci_devinfo *dinfo = device_get_ivars(dev);
2537	pcicfgregs *cfg = &dinfo->cfg;
2538	char tunable_name[64];
2539	int irq;
2540
2541	/* Has to have an intpin to have an interrupt. */
2542	if (cfg->intpin == 0)
2543		return;
2544
2545	/* Let the user override the IRQ with a tunable. */
2546	irq = PCI_INVALID_IRQ;
2547	snprintf(tunable_name, sizeof(tunable_name),
2548	    "hw.pci%d.%d.%d.INT%c.irq",
2549	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2550	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2551		irq = PCI_INVALID_IRQ;
2552
2553	/*
2554	 * If we didn't get an IRQ via the tunable, then we either use the
2555	 * IRQ value in the intline register or we ask the bus to route an
2556	 * interrupt for us.  If force_route is true, then we only use the
2557	 * value in the intline register if the bus was unable to assign an
2558	 * IRQ.
2559	 */
2560	if (!PCI_INTERRUPT_VALID(irq)) {
2561		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2562			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2563		if (!PCI_INTERRUPT_VALID(irq))
2564			irq = cfg->intline;
2565	}
2566
2567	/* If after all that we don't have an IRQ, just bail. */
2568	if (!PCI_INTERRUPT_VALID(irq))
2569		return;
2570
2571	/* Update the config register if it changed. */
2572	if (irq != cfg->intline) {
2573		cfg->intline = irq;
2574		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2575	}
2576
2577	/* Add this IRQ as rid 0 interrupt resource. */
2578	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2579}
2580
2581/* Perform early OHCI takeover from SMM. */
2582static void
2583ohci_early_takeover(device_t self)
2584{
2585	struct resource *res;
2586	uint32_t ctl;
2587	int rid;
2588	int i;
2589
2590	rid = PCIR_BAR(0);
2591	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2592	if (res == NULL)
2593		return;
2594
2595	ctl = bus_read_4(res, OHCI_CONTROL);
2596	if (ctl & OHCI_IR) {
2597		if (bootverbose)
2598			printf("ohci early: "
2599			    "SMM active, request owner change\n");
2600		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2601		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2602			DELAY(1000);
2603			ctl = bus_read_4(res, OHCI_CONTROL);
2604		}
2605		if (ctl & OHCI_IR) {
2606			if (bootverbose)
2607				printf("ohci early: "
2608				    "SMM does not respond, resetting\n");
2609			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2610		}
2611		/* Disable interrupts */
2612		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2613	}
2614
2615	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2616}
2617
2618/* Perform early UHCI takeover from SMM. */
2619static void
2620uhci_early_takeover(device_t self)
2621{
2622	struct resource *res;
2623	int rid;
2624
2625	/*
2626	 * Set the PIRQD enable bit and switch off all the others. We don't
2627	 * want legacy support to interfere with us XXX Does this also mean
2628	 * that the BIOS won't touch the keyboard anymore if it is connected
2629	 * to the ports of the root hub?
2630	 */
2631	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2632
2633	/* Disable interrupts */
2634	rid = PCI_UHCI_BASE_REG;
2635	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2636	if (res != NULL) {
2637		bus_write_2(res, UHCI_INTR, 0);
2638		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2639	}
2640}
2641
2642/* Perform early EHCI takeover from SMM. */
2643static void
2644ehci_early_takeover(device_t self)
2645{
2646	struct resource *res;
2647	uint32_t cparams;
2648	uint32_t eec;
2649	uint8_t eecp;
2650	uint8_t bios_sem;
2651	uint8_t offs;
2652	int rid;
2653	int i;
2654
2655	rid = PCIR_BAR(0);
2656	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2657	if (res == NULL)
2658		return;
2659
2660	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2661
2662	/* Synchronise with the BIOS if it owns the controller. */
2663	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2664	    eecp = EHCI_EECP_NEXT(eec)) {
2665		eec = pci_read_config(self, eecp, 4);
2666		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2667			continue;
2668		}
2669		bios_sem = pci_read_config(self, eecp +
2670		    EHCI_LEGSUP_BIOS_SEM, 1);
2671		if (bios_sem == 0) {
2672			continue;
2673		}
2674		if (bootverbose)
2675			printf("ehci early: "
2676			    "SMM active, request owner change\n");
2677
2678		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2679
2680		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2681			DELAY(1000);
2682			bios_sem = pci_read_config(self, eecp +
2683			    EHCI_LEGSUP_BIOS_SEM, 1);
2684		}
2685
2686		if (bios_sem != 0) {
2687			if (bootverbose)
2688				printf("ehci early: "
2689				    "SMM does not respond\n");
2690		}
2691		/* Disable interrupts */
2692		offs = bus_read_1(res, EHCI_CAPLENGTH);
2693		bus_write_4(res, offs + EHCI_USBINTR, 0);
2694	}
2695	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2696}
2697
2698void
2699pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2700{
2701	struct pci_devinfo *dinfo = device_get_ivars(dev);
2702	pcicfgregs *cfg = &dinfo->cfg;
2703	struct resource_list *rl = &dinfo->resources;
2704	struct pci_quirk *q;
2705	int i;
2706
2707	/* ATA devices needs special map treatment */
2708	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2709	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2710	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2711	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2712	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2713		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2714	else
2715		for (i = 0; i < cfg->nummaps;)
2716			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2717			    prefetchmask & (1 << i));
2718
2719	/*
2720	 * Add additional, quirked resources.
2721	 */
2722	for (q = &pci_quirks[0]; q->devid; q++) {
2723		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2724		    && q->type == PCI_QUIRK_MAP_REG)
2725			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2726	}
2727
2728	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2729#ifdef __PCI_REROUTE_INTERRUPT
2730		/*
2731		 * Try to re-route interrupts. Sometimes the BIOS or
2732		 * firmware may leave bogus values in these registers.
2733		 * If the re-route fails, then just stick with what we
2734		 * have.
2735		 */
2736		pci_assign_interrupt(bus, dev, 1);
2737#else
2738		pci_assign_interrupt(bus, dev, 0);
2739#endif
2740	}
2741
2742	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2743	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2744		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2745			ehci_early_takeover(dev);
2746		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2747			ohci_early_takeover(dev);
2748		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2749			uhci_early_takeover(dev);
2750	}
2751}
2752
2753void
2754pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2755{
2756#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2757	device_t pcib = device_get_parent(dev);
2758	struct pci_devinfo *dinfo;
2759	int maxslots;
2760	int s, f, pcifunchigh;
2761	uint8_t hdrtype;
2762
2763	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2764	    ("dinfo_size too small"));
2765	maxslots = PCIB_MAXSLOTS(pcib);
2766	for (s = 0; s <= maxslots; s++) {
2767		pcifunchigh = 0;
2768		f = 0;
2769		DELAY(1);
2770		hdrtype = REG(PCIR_HDRTYPE, 1);
2771		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2772			continue;
2773		if (hdrtype & PCIM_MFDEV)
2774			pcifunchigh = PCI_FUNCMAX;
2775		for (f = 0; f <= pcifunchigh; f++) {
2776			dinfo = pci_read_device(pcib, domain, busno, s, f,
2777			    dinfo_size);
2778			if (dinfo != NULL) {
2779				pci_add_child(dev, dinfo);
2780			}
2781		}
2782	}
2783#undef REG
2784}
2785
2786void
2787pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2788{
2789	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2790	device_set_ivars(dinfo->cfg.dev, dinfo);
2791	resource_list_init(&dinfo->resources);
2792	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2793	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2794	pci_print_verbose(dinfo);
2795	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2796}
2797
2798static int
2799pci_probe(device_t dev)
2800{
2801
2802	device_set_desc(dev, "PCI bus");
2803
2804	/* Allow other subclasses to override this driver. */
2805	return (BUS_PROBE_GENERIC);
2806}
2807
2808static int
2809pci_attach(device_t dev)
2810{
2811	int busno, domain;
2812
2813	/*
2814	 * Since there can be multiple independantly numbered PCI
2815	 * busses on systems with multiple PCI domains, we can't use
2816	 * the unit number to decide which bus we are probing. We ask
2817	 * the parent pcib what our domain and bus numbers are.
2818	 */
2819	domain = pcib_get_domain(dev);
2820	busno = pcib_get_bus(dev);
2821	if (bootverbose)
2822		device_printf(dev, "domain=%d, physical bus=%d\n",
2823		    domain, busno);
2824	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2825	return (bus_generic_attach(dev));
2826}
2827
2828int
2829pci_suspend(device_t dev)
2830{
2831	int dstate, error, i, numdevs;
2832	device_t acpi_dev, child, *devlist;
2833	struct pci_devinfo *dinfo;
2834
2835	/*
2836	 * Save the PCI configuration space for each child and set the
2837	 * device in the appropriate power state for this sleep state.
2838	 */
2839	acpi_dev = NULL;
2840	if (pci_do_power_resume)
2841		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2842	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2843		return (error);
2844	for (i = 0; i < numdevs; i++) {
2845		child = devlist[i];
2846		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2847		pci_cfg_save(child, dinfo, 0);
2848	}
2849
2850	/* Suspend devices before potentially powering them down. */
2851	error = bus_generic_suspend(dev);
2852	if (error) {
2853		free(devlist, M_TEMP);
2854		return (error);
2855	}
2856
2857	/*
2858	 * Always set the device to D3.  If ACPI suggests a different
2859	 * power state, use it instead.  If ACPI is not present, the
2860	 * firmware is responsible for managing device power.  Skip
2861	 * children who aren't attached since they are powered down
2862	 * separately.  Only manage type 0 devices for now.
2863	 */
2864	for (i = 0; acpi_dev && i < numdevs; i++) {
2865		child = devlist[i];
2866		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2867		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2868			dstate = PCI_POWERSTATE_D3;
2869			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2870			pci_set_powerstate(child, dstate);
2871		}
2872	}
2873	free(devlist, M_TEMP);
2874	return (0);
2875}
2876
2877int
2878pci_resume(device_t dev)
2879{
2880	int i, numdevs, error;
2881	device_t acpi_dev, child, *devlist;
2882	struct pci_devinfo *dinfo;
2883
2884	/*
2885	 * Set each child to D0 and restore its PCI configuration space.
2886	 */
2887	acpi_dev = NULL;
2888	if (pci_do_power_resume)
2889		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2890	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2891		return (error);
2892	for (i = 0; i < numdevs; i++) {
2893		/*
2894		 * Notify ACPI we're going to D0 but ignore the result.  If
2895		 * ACPI is not present, the firmware is responsible for
2896		 * managing device power.  Only manage type 0 devices for now.
2897		 */
2898		child = devlist[i];
2899		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2900		if (acpi_dev && device_is_attached(child) &&
2901		    dinfo->cfg.hdrtype == 0) {
2902			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2903			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2904		}
2905
2906		/* Now the device is powered up, restore its config space. */
2907		pci_cfg_restore(child, dinfo);
2908	}
2909	free(devlist, M_TEMP);
2910	return (bus_generic_resume(dev));
2911}
2912
2913static void
2914pci_load_vendor_data(void)
2915{
2916	caddr_t vendordata, info;
2917
2918	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2919		info = preload_search_info(vendordata, MODINFO_ADDR);
2920		pci_vendordata = *(char **)info;
2921		info = preload_search_info(vendordata, MODINFO_SIZE);
2922		pci_vendordata_size = *(size_t *)info;
2923		/* terminate the database */
2924		pci_vendordata[pci_vendordata_size] = '\n';
2925	}
2926}
2927
2928void
2929pci_driver_added(device_t dev, driver_t *driver)
2930{
2931	int numdevs;
2932	device_t *devlist;
2933	device_t child;
2934	struct pci_devinfo *dinfo;
2935	int i;
2936
2937	if (bootverbose)
2938		device_printf(dev, "driver added\n");
2939	DEVICE_IDENTIFY(driver, dev);
2940	if (device_get_children(dev, &devlist, &numdevs) != 0)
2941		return;
2942	for (i = 0; i < numdevs; i++) {
2943		child = devlist[i];
2944		if (device_get_state(child) != DS_NOTPRESENT)
2945			continue;
2946		dinfo = device_get_ivars(child);
2947		pci_print_verbose(dinfo);
2948		if (bootverbose)
2949			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
2950		pci_cfg_restore(child, dinfo);
2951		if (device_probe_and_attach(child) != 0)
2952			pci_cfg_save(child, dinfo, 1);
2953	}
2954	free(devlist, M_TEMP);
2955}
2956
2957int
2958pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2959    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2960{
2961	struct pci_devinfo *dinfo;
2962	struct msix_table_entry *mte;
2963	struct msix_vector *mv;
2964	uint64_t addr;
2965	uint32_t data;
2966	void *cookie;
2967	int error, rid;
2968
2969	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2970	    arg, &cookie);
2971	if (error)
2972		return (error);
2973
2974	/* If this is not a direct child, just bail out. */
2975	if (device_get_parent(child) != dev) {
2976		*cookiep = cookie;
2977		return(0);
2978	}
2979
2980	rid = rman_get_rid(irq);
2981	if (rid == 0) {
2982		/* Make sure that INTx is enabled */
2983		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
2984	} else {
2985		/*
2986		 * Check to see if the interrupt is MSI or MSI-X.
2987		 * Ask our parent to map the MSI and give
2988		 * us the address and data register values.
2989		 * If we fail for some reason, teardown the
2990		 * interrupt handler.
2991		 */
2992		dinfo = device_get_ivars(child);
2993		if (dinfo->cfg.msi.msi_alloc > 0) {
2994			if (dinfo->cfg.msi.msi_addr == 0) {
2995				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2996			    ("MSI has handlers, but vectors not mapped"));
2997				error = PCIB_MAP_MSI(device_get_parent(dev),
2998				    child, rman_get_start(irq), &addr, &data);
2999				if (error)
3000					goto bad;
3001				dinfo->cfg.msi.msi_addr = addr;
3002				dinfo->cfg.msi.msi_data = data;
3003			}
3004			if (dinfo->cfg.msi.msi_handlers == 0)
3005				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3006				    dinfo->cfg.msi.msi_data);
3007			dinfo->cfg.msi.msi_handlers++;
3008		} else {
3009			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3010			    ("No MSI or MSI-X interrupts allocated"));
3011			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3012			    ("MSI-X index too high"));
3013			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3014			KASSERT(mte->mte_vector != 0, ("no message vector"));
3015			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3016			KASSERT(mv->mv_irq == rman_get_start(irq),
3017			    ("IRQ mismatch"));
3018			if (mv->mv_address == 0) {
3019				KASSERT(mte->mte_handlers == 0,
3020		    ("MSI-X table entry has handlers, but vector not mapped"));
3021				error = PCIB_MAP_MSI(device_get_parent(dev),
3022				    child, rman_get_start(irq), &addr, &data);
3023				if (error)
3024					goto bad;
3025				mv->mv_address = addr;
3026				mv->mv_data = data;
3027			}
3028			if (mte->mte_handlers == 0) {
3029				pci_enable_msix(child, rid - 1, mv->mv_address,
3030				    mv->mv_data);
3031				pci_unmask_msix(child, rid - 1);
3032			}
3033			mte->mte_handlers++;
3034		}
3035
3036		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3037		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3038	bad:
3039		if (error) {
3040			(void)bus_generic_teardown_intr(dev, child, irq,
3041			    cookie);
3042			return (error);
3043		}
3044	}
3045	*cookiep = cookie;
3046	return (0);
3047}
3048
3049int
3050pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3051    void *cookie)
3052{
3053	struct msix_table_entry *mte;
3054	struct resource_list_entry *rle;
3055	struct pci_devinfo *dinfo;
3056	int error, rid;
3057
3058	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3059		return (EINVAL);
3060
3061	/* If this isn't a direct child, just bail out */
3062	if (device_get_parent(child) != dev)
3063		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3064
3065	rid = rman_get_rid(irq);
3066	if (rid == 0) {
3067		/* Mask INTx */
3068		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3069	} else {
3070		/*
3071		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3072		 * decrement the appropriate handlers count and mask the
3073		 * MSI-X message, or disable MSI messages if the count
3074		 * drops to 0.
3075		 */
3076		dinfo = device_get_ivars(child);
3077		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3078		if (rle->res != irq)
3079			return (EINVAL);
3080		if (dinfo->cfg.msi.msi_alloc > 0) {
3081			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3082			    ("MSI-X index too high"));
3083			if (dinfo->cfg.msi.msi_handlers == 0)
3084				return (EINVAL);
3085			dinfo->cfg.msi.msi_handlers--;
3086			if (dinfo->cfg.msi.msi_handlers == 0)
3087				pci_disable_msi(child);
3088		} else {
3089			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3090			    ("No MSI or MSI-X interrupts allocated"));
3091			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3092			    ("MSI-X index too high"));
3093			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3094			if (mte->mte_handlers == 0)
3095				return (EINVAL);
3096			mte->mte_handlers--;
3097			if (mte->mte_handlers == 0)
3098				pci_mask_msix(child, rid - 1);
3099		}
3100	}
3101	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3102	if (rid > 0)
3103		KASSERT(error == 0,
3104		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3105	return (error);
3106}
3107
3108int
3109pci_print_child(device_t dev, device_t child)
3110{
3111	struct pci_devinfo *dinfo;
3112	struct resource_list *rl;
3113	int retval = 0;
3114
3115	dinfo = device_get_ivars(child);
3116	rl = &dinfo->resources;
3117
3118	retval += bus_print_child_header(dev, child);
3119
3120	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3121	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3122	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3123	if (device_get_flags(dev))
3124		retval += printf(" flags %#x", device_get_flags(dev));
3125
3126	retval += printf(" at device %d.%d", pci_get_slot(child),
3127	    pci_get_function(child));
3128
3129	retval += bus_print_child_footer(dev, child);
3130
3131	return (retval);
3132}
3133
3134static struct
3135{
3136	int	class;
3137	int	subclass;
3138	char	*desc;
3139} pci_nomatch_tab[] = {
3140	{PCIC_OLD,		-1,			"old"},
3141	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3142	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3143	{PCIC_STORAGE,		-1,			"mass storage"},
3144	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3145	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3146	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3147	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3148	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3149	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3150	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3151	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3152	{PCIC_NETWORK,		-1,			"network"},
3153	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3154	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3155	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3156	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3157	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3158	{PCIC_DISPLAY,		-1,			"display"},
3159	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3160	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3161	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3162	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3163	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3164	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3165	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3166	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3167	{PCIC_MEMORY,		-1,			"memory"},
3168	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3169	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3170	{PCIC_BRIDGE,		-1,			"bridge"},
3171	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3172	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3173	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3174	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3175	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3176	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3177	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3178	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3179	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3180	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3181	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3182	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3183	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3184	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3185	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3186	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3187	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3188	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3189	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3190	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3191	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3192	{PCIC_INPUTDEV,		-1,			"input device"},
3193	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3194	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3195	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3196	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3197	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3198	{PCIC_DOCKING,		-1,			"docking station"},
3199	{PCIC_PROCESSOR,	-1,			"processor"},
3200	{PCIC_SERIALBUS,	-1,			"serial bus"},
3201	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3202	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3203	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3204	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3205	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3206	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3207	{PCIC_WIRELESS,		-1,			"wireless controller"},
3208	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3209	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3210	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3211	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3212	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3213	{PCIC_SATCOM,		-1,			"satellite communication"},
3214	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3215	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3216	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3217	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3218	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3219	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3220	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3221	{PCIC_DASP,		-1,			"dasp"},
3222	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3223	{0, 0,		NULL}
3224};
3225
3226void
3227pci_probe_nomatch(device_t dev, device_t child)
3228{
3229	int	i;
3230	char	*cp, *scp, *device;
3231
3232	/*
3233	 * Look for a listing for this device in a loaded device database.
3234	 */
3235	if ((device = pci_describe_device(child)) != NULL) {
3236		device_printf(dev, "<%s>", device);
3237		free(device, M_DEVBUF);
3238	} else {
3239		/*
3240		 * Scan the class/subclass descriptions for a general
3241		 * description.
3242		 */
3243		cp = "unknown";
3244		scp = NULL;
3245		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3246			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3247				if (pci_nomatch_tab[i].subclass == -1) {
3248					cp = pci_nomatch_tab[i].desc;
3249				} else if (pci_nomatch_tab[i].subclass ==
3250				    pci_get_subclass(child)) {
3251					scp = pci_nomatch_tab[i].desc;
3252				}
3253			}
3254		}
3255		device_printf(dev, "<%s%s%s>",
3256		    cp ? cp : "",
3257		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3258		    scp ? scp : "");
3259	}
3260	printf(" at device %d.%d (no driver attached)\n",
3261	    pci_get_slot(child), pci_get_function(child));
3262	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3263	return;
3264}
3265
3266/*
3267 * Parse the PCI device database, if loaded, and return a pointer to a
3268 * description of the device.
3269 *
3270 * The database is flat text formatted as follows:
3271 *
3272 * Any line not in a valid format is ignored.
3273 * Lines are terminated with newline '\n' characters.
3274 *
3275 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3276 * the vendor name.
3277 *
3278 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3279 * - devices cannot be listed without a corresponding VENDOR line.
3280 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3281 * another TAB, then the device name.
3282 */
3283
3284/*
3285 * Assuming (ptr) points to the beginning of a line in the database,
3286 * return the vendor or device and description of the next entry.
3287 * The value of (vendor) or (device) inappropriate for the entry type
3288 * is set to -1.  Returns nonzero at the end of the database.
3289 *
3290 * Note that this is slightly unrobust in the face of corrupt data;
3291 * we attempt to safeguard against this by spamming the end of the
3292 * database with a newline when we initialise.
3293 */
3294static int
3295pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3296{
3297	char	*cp = *ptr;
3298	int	left;
3299
3300	*device = -1;
3301	*vendor = -1;
3302	**desc = '\0';
3303	for (;;) {
3304		left = pci_vendordata_size - (cp - pci_vendordata);
3305		if (left <= 0) {
3306			*ptr = cp;
3307			return(1);
3308		}
3309
3310		/* vendor entry? */
3311		if (*cp != '\t' &&
3312		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3313			break;
3314		/* device entry? */
3315		if (*cp == '\t' &&
3316		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3317			break;
3318
3319		/* skip to next line */
3320		while (*cp != '\n' && left > 0) {
3321			cp++;
3322			left--;
3323		}
3324		if (*cp == '\n') {
3325			cp++;
3326			left--;
3327		}
3328	}
3329	/* skip to next line */
3330	while (*cp != '\n' && left > 0) {
3331		cp++;
3332		left--;
3333	}
3334	if (*cp == '\n' && left > 0)
3335		cp++;
3336	*ptr = cp;
3337	return(0);
3338}
3339
3340static char *
3341pci_describe_device(device_t dev)
3342{
3343	int	vendor, device;
3344	char	*desc, *vp, *dp, *line;
3345
3346	desc = vp = dp = NULL;
3347
3348	/*
3349	 * If we have no vendor data, we can't do anything.
3350	 */
3351	if (pci_vendordata == NULL)
3352		goto out;
3353
3354	/*
3355	 * Scan the vendor data looking for this device
3356	 */
3357	line = pci_vendordata;
3358	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3359		goto out;
3360	for (;;) {
3361		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3362			goto out;
3363		if (vendor == pci_get_vendor(dev))
3364			break;
3365	}
3366	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3367		goto out;
3368	for (;;) {
3369		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3370			*dp = 0;
3371			break;
3372		}
3373		if (vendor != -1) {
3374			*dp = 0;
3375			break;
3376		}
3377		if (device == pci_get_device(dev))
3378			break;
3379	}
3380	if (dp[0] == '\0')
3381		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3382	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3383	    NULL)
3384		sprintf(desc, "%s, %s", vp, dp);
3385 out:
3386	if (vp != NULL)
3387		free(vp, M_DEVBUF);
3388	if (dp != NULL)
3389		free(dp, M_DEVBUF);
3390	return(desc);
3391}
3392
3393int
3394pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3395{
3396	struct pci_devinfo *dinfo;
3397	pcicfgregs *cfg;
3398
3399	dinfo = device_get_ivars(child);
3400	cfg = &dinfo->cfg;
3401
3402	switch (which) {
3403	case PCI_IVAR_ETHADDR:
3404		/*
3405		 * The generic accessor doesn't deal with failure, so
3406		 * we set the return value, then return an error.
3407		 */
3408		*((uint8_t **) result) = NULL;
3409		return (EINVAL);
3410	case PCI_IVAR_SUBVENDOR:
3411		*result = cfg->subvendor;
3412		break;
3413	case PCI_IVAR_SUBDEVICE:
3414		*result = cfg->subdevice;
3415		break;
3416	case PCI_IVAR_VENDOR:
3417		*result = cfg->vendor;
3418		break;
3419	case PCI_IVAR_DEVICE:
3420		*result = cfg->device;
3421		break;
3422	case PCI_IVAR_DEVID:
3423		*result = (cfg->device << 16) | cfg->vendor;
3424		break;
3425	case PCI_IVAR_CLASS:
3426		*result = cfg->baseclass;
3427		break;
3428	case PCI_IVAR_SUBCLASS:
3429		*result = cfg->subclass;
3430		break;
3431	case PCI_IVAR_PROGIF:
3432		*result = cfg->progif;
3433		break;
3434	case PCI_IVAR_REVID:
3435		*result = cfg->revid;
3436		break;
3437	case PCI_IVAR_INTPIN:
3438		*result = cfg->intpin;
3439		break;
3440	case PCI_IVAR_IRQ:
3441		*result = cfg->intline;
3442		break;
3443	case PCI_IVAR_DOMAIN:
3444		*result = cfg->domain;
3445		break;
3446	case PCI_IVAR_BUS:
3447		*result = cfg->bus;
3448		break;
3449	case PCI_IVAR_SLOT:
3450		*result = cfg->slot;
3451		break;
3452	case PCI_IVAR_FUNCTION:
3453		*result = cfg->func;
3454		break;
3455	case PCI_IVAR_CMDREG:
3456		*result = cfg->cmdreg;
3457		break;
3458	case PCI_IVAR_CACHELNSZ:
3459		*result = cfg->cachelnsz;
3460		break;
3461	case PCI_IVAR_MINGNT:
3462		*result = cfg->mingnt;
3463		break;
3464	case PCI_IVAR_MAXLAT:
3465		*result = cfg->maxlat;
3466		break;
3467	case PCI_IVAR_LATTIMER:
3468		*result = cfg->lattimer;
3469		break;
3470	default:
3471		return (ENOENT);
3472	}
3473	return (0);
3474}
3475
3476int
3477pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3478{
3479	struct pci_devinfo *dinfo;
3480
3481	dinfo = device_get_ivars(child);
3482
3483	switch (which) {
3484	case PCI_IVAR_INTPIN:
3485		dinfo->cfg.intpin = value;
3486		return (0);
3487	case PCI_IVAR_ETHADDR:
3488	case PCI_IVAR_SUBVENDOR:
3489	case PCI_IVAR_SUBDEVICE:
3490	case PCI_IVAR_VENDOR:
3491	case PCI_IVAR_DEVICE:
3492	case PCI_IVAR_DEVID:
3493	case PCI_IVAR_CLASS:
3494	case PCI_IVAR_SUBCLASS:
3495	case PCI_IVAR_PROGIF:
3496	case PCI_IVAR_REVID:
3497	case PCI_IVAR_IRQ:
3498	case PCI_IVAR_DOMAIN:
3499	case PCI_IVAR_BUS:
3500	case PCI_IVAR_SLOT:
3501	case PCI_IVAR_FUNCTION:
3502		return (EINVAL);	/* disallow for now */
3503
3504	default:
3505		return (ENOENT);
3506	}
3507}
3508
3509
3510#include "opt_ddb.h"
3511#ifdef DDB
3512#include <ddb/ddb.h>
3513#include <sys/cons.h>
3514
3515/*
3516 * List resources based on pci map registers, used for within ddb
3517 */
3518
3519DB_SHOW_COMMAND(pciregs, db_pci_dump)
3520{
3521	struct pci_devinfo *dinfo;
3522	struct devlist *devlist_head;
3523	struct pci_conf *p;
3524	const char *name;
3525	int i, error, none_count;
3526
3527	none_count = 0;
3528	/* get the head of the device queue */
3529	devlist_head = &pci_devq;
3530
3531	/*
3532	 * Go through the list of devices and print out devices
3533	 */
3534	for (error = 0, i = 0,
3535	     dinfo = STAILQ_FIRST(devlist_head);
3536	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3537	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3538
3539		/* Populate pd_name and pd_unit */
3540		name = NULL;
3541		if (dinfo->cfg.dev)
3542			name = device_get_name(dinfo->cfg.dev);
3543
3544		p = &dinfo->conf;
3545		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3546			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3547			(name && *name) ? name : "none",
3548			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3549			none_count++,
3550			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3551			p->pc_sel.pc_func, (p->pc_class << 16) |
3552			(p->pc_subclass << 8) | p->pc_progif,
3553			(p->pc_subdevice << 16) | p->pc_subvendor,
3554			(p->pc_device << 16) | p->pc_vendor,
3555			p->pc_revid, p->pc_hdr);
3556	}
3557}
3558#endif /* DDB */
3559
3560static struct resource *
3561pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3562    u_long start, u_long end, u_long count, u_int flags)
3563{
3564	struct pci_devinfo *dinfo = device_get_ivars(child);
3565	struct resource_list *rl = &dinfo->resources;
3566	struct resource_list_entry *rle;
3567	struct resource *res;
3568	pci_addr_t map, testval;
3569	int mapsize;
3570
3571	/*
3572	 * Weed out the bogons, and figure out how large the BAR/map
3573	 * is.  Bars that read back 0 here are bogus and unimplemented.
3574	 * Note: atapci in legacy mode are special and handled elsewhere
3575	 * in the code.  If you have a atapci device in legacy mode and
3576	 * it fails here, that other code is broken.
3577	 */
3578	res = NULL;
3579	pci_read_bar(child, *rid, &map, &testval);
3580
3581	/* Ignore a BAR with a base of 0. */
3582	if (pci_mapbase(testval) == 0)
3583		goto out;
3584
3585	if (PCI_BAR_MEM(testval)) {
3586		if (type != SYS_RES_MEMORY) {
3587			if (bootverbose)
3588				device_printf(dev,
3589				    "child %s requested type %d for rid %#x,"
3590				    " but the BAR says it is an memio\n",
3591				    device_get_nameunit(child), type, *rid);
3592			goto out;
3593		}
3594	} else {
3595		if (type != SYS_RES_IOPORT) {
3596			if (bootverbose)
3597				device_printf(dev,
3598				    "child %s requested type %d for rid %#x,"
3599				    " but the BAR says it is an ioport\n",
3600				    device_get_nameunit(child), type, *rid);
3601			goto out;
3602		}
3603	}
3604
3605	/*
3606	 * For real BARs, we need to override the size that
3607	 * the driver requests, because that's what the BAR
3608	 * actually uses and we would otherwise have a
3609	 * situation where we might allocate the excess to
3610	 * another driver, which won't work.
3611	 */
3612	mapsize = pci_mapsize(testval);
3613	count = 1UL << mapsize;
3614	if (RF_ALIGNMENT(flags) < mapsize)
3615		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3616	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3617		flags |= RF_PREFETCHABLE;
3618
3619	/*
3620	 * Allocate enough resource, and then write back the
3621	 * appropriate bar for that resource.
3622	 */
3623	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3624	    start, end, count, flags & ~RF_ACTIVE);
3625	if (res == NULL) {
3626		device_printf(child,
3627		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3628		    count, *rid, type, start, end);
3629		goto out;
3630	}
3631	resource_list_add(rl, type, *rid, start, end, count);
3632	rle = resource_list_find(rl, type, *rid);
3633	if (rle == NULL)
3634		panic("pci_reserve_map: unexpectedly can't find resource.");
3635	rle->res = res;
3636	rle->start = rman_get_start(res);
3637	rle->end = rman_get_end(res);
3638	rle->count = count;
3639	rle->flags = RLE_RESERVED;
3640	if (bootverbose)
3641		device_printf(child,
3642		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3643		    count, *rid, type, rman_get_start(res));
3644	map = rman_get_start(res);
3645	pci_write_bar(child, *rid, map);
3646out:;
3647	return (res);
3648}
3649
3650
3651struct resource *
3652pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3653		   u_long start, u_long end, u_long count, u_int flags)
3654{
3655	struct pci_devinfo *dinfo = device_get_ivars(child);
3656	struct resource_list *rl = &dinfo->resources;
3657	struct resource_list_entry *rle;
3658	struct resource *res;
3659	pcicfgregs *cfg = &dinfo->cfg;
3660
3661	if (device_get_parent(child) != dev)
3662		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3663		    type, rid, start, end, count, flags));
3664
3665	/*
3666	 * Perform lazy resource allocation
3667	 */
3668	switch (type) {
3669	case SYS_RES_IRQ:
3670		/*
3671		 * Can't alloc legacy interrupt once MSI messages have
3672		 * been allocated.
3673		 */
3674		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3675		    cfg->msix.msix_alloc > 0))
3676			return (NULL);
3677
3678		/*
3679		 * If the child device doesn't have an interrupt
3680		 * routed and is deserving of an interrupt, try to
3681		 * assign it one.
3682		 */
3683		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3684		    (cfg->intpin != 0))
3685			pci_assign_interrupt(dev, child, 0);
3686		break;
3687	case SYS_RES_IOPORT:
3688	case SYS_RES_MEMORY:
3689		/* Reserve resources for this BAR if needed. */
3690		rle = resource_list_find(rl, type, *rid);
3691		if (rle == NULL) {
3692			res = pci_reserve_map(dev, child, type, rid, start, end,
3693			    count, flags);
3694			if (res == NULL)
3695				return (NULL);
3696		}
3697	}
3698	return (resource_list_alloc(rl, dev, child, type, rid,
3699	    start, end, count, flags));
3700}
3701
3702int
3703pci_release_resource(device_t dev, device_t child, int type, int rid,
3704    struct resource *r)
3705{
3706
3707	if (device_get_parent(child) != dev)
3708		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
3709		    type, rid, r));
3710
3711	/*
3712	 * For BARs we don't actually want to release the resource.
3713	 * Instead, we deactivate the resource if needed and then give
3714	 * ownership of the BAR back to the bus.  This is handled for us
3715	 * in resource_list_release() since we use resource_list_reserve()
3716	 * for BARs.
3717	 */
3718	return (bus_generic_rl_release_resource(dev, child, type, rid, r));
3719}
3720
3721int
3722pci_activate_resource(device_t dev, device_t child, int type, int rid,
3723    struct resource *r)
3724{
3725	int error;
3726
3727	error = bus_generic_activate_resource(dev, child, type, rid, r);
3728	if (error)
3729		return (error);
3730
3731	/* Enable decoding in the command register when activating BARs. */
3732	if (device_get_parent(child) == dev) {
3733		switch (type) {
3734		case SYS_RES_IOPORT:
3735		case SYS_RES_MEMORY:
3736			error = PCI_ENABLE_IO(dev, child, type);
3737			break;
3738		}
3739	}
3740	return (error);
3741}
3742
3743void
3744pci_delete_resource(device_t dev, device_t child, int type, int rid)
3745{
3746	struct pci_devinfo *dinfo;
3747	struct resource_list *rl;
3748	struct resource_list_entry *rle;
3749
3750	if (device_get_parent(child) != dev)
3751		return;
3752
3753	dinfo = device_get_ivars(child);
3754	rl = &dinfo->resources;
3755	rle = resource_list_find(rl, type, rid);
3756	if (rle == NULL)
3757		return;
3758
3759	if (rle->res) {
3760		if (rman_get_flags(rle->res) & RF_ACTIVE ||
3761		    resource_list_busy(rl, type, rid)) {
3762			device_printf(dev, "delete_resource: "
3763			    "Resource still owned by child, oops. "
3764			    "(type=%d, rid=%d, addr=%lx)\n",
3765			    type, rid, rman_get_start(rle->res));
3766			return;
3767		}
3768
3769#ifndef __PCI_BAR_ZERO_VALID
3770		/*
3771		 * If this is a BAR, clear the BAR so it stops
3772		 * decoding before releasing the resource.
3773		 */
3774		switch (type) {
3775		case SYS_RES_IOPORT:
3776		case SYS_RES_MEMORY:
3777			pci_write_bar(child, rid, 0);
3778			break;
3779		}
3780#endif
3781		resource_list_unreserve(rl, dev, child, type, rid);
3782	}
3783	resource_list_delete(rl, type, rid);
3784}
3785
3786struct resource_list *
3787pci_get_resource_list (device_t dev, device_t child)
3788{
3789	struct pci_devinfo *dinfo = device_get_ivars(child);
3790
3791	return (&dinfo->resources);
3792}
3793
3794uint32_t
3795pci_read_config_method(device_t dev, device_t child, int reg, int width)
3796{
3797	struct pci_devinfo *dinfo = device_get_ivars(child);
3798	pcicfgregs *cfg = &dinfo->cfg;
3799
3800	return (PCIB_READ_CONFIG(device_get_parent(dev),
3801	    cfg->bus, cfg->slot, cfg->func, reg, width));
3802}
3803
3804void
3805pci_write_config_method(device_t dev, device_t child, int reg,
3806    uint32_t val, int width)
3807{
3808	struct pci_devinfo *dinfo = device_get_ivars(child);
3809	pcicfgregs *cfg = &dinfo->cfg;
3810
3811	PCIB_WRITE_CONFIG(device_get_parent(dev),
3812	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3813}
3814
3815int
3816pci_child_location_str_method(device_t dev, device_t child, char *buf,
3817    size_t buflen)
3818{
3819
3820	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3821	    pci_get_function(child));
3822	return (0);
3823}
3824
3825int
3826pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3827    size_t buflen)
3828{
3829	struct pci_devinfo *dinfo;
3830	pcicfgregs *cfg;
3831
3832	dinfo = device_get_ivars(child);
3833	cfg = &dinfo->cfg;
3834	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3835	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3836	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3837	    cfg->progif);
3838	return (0);
3839}
3840
3841int
3842pci_assign_interrupt_method(device_t dev, device_t child)
3843{
3844	struct pci_devinfo *dinfo = device_get_ivars(child);
3845	pcicfgregs *cfg = &dinfo->cfg;
3846
3847	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3848	    cfg->intpin));
3849}
3850
3851static int
3852pci_modevent(module_t mod, int what, void *arg)
3853{
3854	static struct cdev *pci_cdev;
3855
3856	switch (what) {
3857	case MOD_LOAD:
3858		STAILQ_INIT(&pci_devq);
3859		pci_generation = 0;
3860		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3861		    "pci");
3862		pci_load_vendor_data();
3863		break;
3864
3865	case MOD_UNLOAD:
3866		destroy_dev(pci_cdev);
3867		break;
3868	}
3869
3870	return (0);
3871}
3872
3873void
3874pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3875{
3876	int i;
3877
3878	/*
3879	 * Only do header type 0 devices.  Type 1 devices are bridges,
3880	 * which we know need special treatment.  Type 2 devices are
3881	 * cardbus bridges which also require special treatment.
3882	 * Other types are unknown, and we err on the side of safety
3883	 * by ignoring them.
3884	 */
3885	if (dinfo->cfg.hdrtype != 0)
3886		return;
3887
3888	/*
3889	 * Restore the device to full power mode.  We must do this
3890	 * before we restore the registers because moving from D3 to
3891	 * D0 will cause the chip's BARs and some other registers to
3892	 * be reset to some unknown power on reset values.  Cut down
3893	 * the noise on boot by doing nothing if we are already in
3894	 * state D0.
3895	 */
3896	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3897		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3898	}
3899	for (i = 0; i < dinfo->cfg.nummaps; i++)
3900		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3901	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3902	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3903	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3904	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3905	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3906	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3907	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3908	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3909	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3910	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3911
3912	/* Restore MSI and MSI-X configurations if they are present. */
3913	if (dinfo->cfg.msi.msi_location != 0)
3914		pci_resume_msi(dev);
3915	if (dinfo->cfg.msix.msix_location != 0)
3916		pci_resume_msix(dev);
3917}
3918
3919void
3920pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3921{
3922	int i;
3923	uint32_t cls;
3924	int ps;
3925
3926	/*
3927	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3928	 * we know need special treatment.  Type 2 devices are cardbus bridges
3929	 * which also require special treatment.  Other types are unknown, and
3930	 * we err on the side of safety by ignoring them.  Powering down
3931	 * bridges should not be undertaken lightly.
3932	 */
3933	if (dinfo->cfg.hdrtype != 0)
3934		return;
3935	for (i = 0; i < dinfo->cfg.nummaps; i++)
3936		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3937	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3938
3939	/*
3940	 * Some drivers apparently write to these registers w/o updating our
3941	 * cached copy.  No harm happens if we update the copy, so do so here
3942	 * so we can restore them.  The COMMAND register is modified by the
3943	 * bus w/o updating the cache.  This should represent the normally
3944	 * writable portion of the 'defined' part of type 0 headers.  In
3945	 * theory we also need to save/restore the PCI capability structures
3946	 * we know about, but apart from power we don't know any that are
3947	 * writable.
3948	 */
3949	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3950	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3951	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3952	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3953	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3954	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3955	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3956	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3957	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3958	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3959	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3960	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3961	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3962	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3963	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3964
3965	/*
3966	 * don't set the state for display devices, base peripherals and
3967	 * memory devices since bad things happen when they are powered down.
3968	 * We should (a) have drivers that can easily detach and (b) use
3969	 * generic drivers for these devices so that some device actually
3970	 * attaches.  We need to make sure that when we implement (a) we don't
3971	 * power the device down on a reattach.
3972	 */
3973	cls = pci_get_class(dev);
3974	if (!setstate)
3975		return;
3976	switch (pci_do_power_nodriver)
3977	{
3978		case 0:		/* NO powerdown at all */
3979			return;
3980		case 1:		/* Conservative about what to power down */
3981			if (cls == PCIC_STORAGE)
3982				return;
3983			/*FALLTHROUGH*/
3984		case 2:		/* Agressive about what to power down */
3985			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3986			    cls == PCIC_BASEPERIPH)
3987				return;
3988			/*FALLTHROUGH*/
3989		case 3:		/* Power down everything */
3990			break;
3991	}
3992	/*
3993	 * PCI spec says we can only go into D3 state from D0 state.
3994	 * Transition from D[12] into D0 before going to D3 state.
3995	 */
3996	ps = pci_get_powerstate(dev);
3997	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3998		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3999	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4000		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4001}
4002