pci.c revision 214110
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 214110 2010-10-20 16:47:09Z jkim $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/ehcireg.h>
66#include <dev/usb/controller/ohcireg.h>
67#include <dev/usb/controller/uhcireg.h>
68
69#include "pcib_if.h"
70#include "pci_if.h"
71
72static pci_addr_t	pci_mapbase(uint64_t mapreg);
73static const char	*pci_maptype(uint64_t mapreg);
74static int		pci_mapsize(uint64_t testval);
75static int		pci_maprange(uint64_t mapreg);
76static pci_addr_t	pci_rombase(uint64_t mapreg);
77static int		pci_romsize(uint64_t testval);
78static void		pci_fixancient(pcicfgregs *cfg);
79static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
80
81static int		pci_porten(device_t dev);
82static int		pci_memen(device_t dev);
83static void		pci_assign_interrupt(device_t bus, device_t dev,
84			    int force_route);
85static int		pci_add_map(device_t bus, device_t dev, int reg,
86			    struct resource_list *rl, int force, int prefetch);
87static int		pci_probe(device_t dev);
88static int		pci_attach(device_t dev);
89static void		pci_load_vendor_data(void);
90static int		pci_describe_parse_line(char **ptr, int *vendor,
91			    int *device, char **desc);
92static char		*pci_describe_device(device_t dev);
93static int		pci_modevent(module_t mod, int what, void *arg);
94static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95			    pcicfgregs *cfg);
96static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98			    int reg, uint32_t *data);
99#if 0
100static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101			    int reg, uint32_t data);
102#endif
103static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104static void		pci_disable_msi(device_t dev);
105static void		pci_enable_msi(device_t dev, uint64_t address,
106			    uint16_t data);
107static void		pci_enable_msix(device_t dev, u_int index,
108			    uint64_t address, uint32_t data);
109static void		pci_mask_msix(device_t dev, u_int index);
110static void		pci_unmask_msix(device_t dev, u_int index);
111static int		pci_msi_blacklisted(void);
112static void		pci_resume_msi(device_t dev);
113static void		pci_resume_msix(device_t dev);
114static int		pci_remap_intr_method(device_t bus, device_t dev,
115			    u_int irq);
116
117static device_method_t pci_methods[] = {
118	/* Device interface */
119	DEVMETHOD(device_probe,		pci_probe),
120	DEVMETHOD(device_attach,	pci_attach),
121	DEVMETHOD(device_detach,	bus_generic_detach),
122	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
123	DEVMETHOD(device_suspend,	pci_suspend),
124	DEVMETHOD(device_resume,	pci_resume),
125
126	/* Bus interface */
127	DEVMETHOD(bus_print_child,	pci_print_child),
128	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
129	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
130	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
131	DEVMETHOD(bus_driver_added,	pci_driver_added),
132	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
133	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
134
135	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
136	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
137	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
138	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
139	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
140	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
141	DEVMETHOD(bus_activate_resource, pci_activate_resource),
142	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
143	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
144	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
145	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
146
147	/* PCI interface */
148	DEVMETHOD(pci_read_config,	pci_read_config_method),
149	DEVMETHOD(pci_write_config,	pci_write_config_method),
150	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
151	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
152	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
153	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
154	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
155	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
156	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
157	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
158	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
159	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
160	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
161	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
162	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
163	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
164	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
165	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
166
167	{ 0, 0 }
168};
169
170DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
171
172static devclass_t pci_devclass;
173DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
174MODULE_VERSION(pci, 1);
175
176static char	*pci_vendordata;
177static size_t	pci_vendordata_size;
178
179
180struct pci_quirk {
181	uint32_t devid;	/* Vendor/device of the card */
182	int	type;
183#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
184#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
185	int	arg1;
186	int	arg2;
187};
188
189struct pci_quirk pci_quirks[] = {
190	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
191	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
192	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
193	/* As does the Serverworks OSB4 (the SMBus mapping register) */
194	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
195
196	/*
197	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
198	 * or the CMIC-SL (AKA ServerWorks GC_LE).
199	 */
200	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
201	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
202
203	/*
204	 * MSI doesn't work on earlier Intel chipsets including
205	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
206	 */
207	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
212	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
213	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214
215	/*
216	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
217	 * bridge.
218	 */
219	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220
221	{ 0 }
222};
223
224/* map register information */
225#define	PCI_MAPMEM	0x01	/* memory map */
226#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
227#define	PCI_MAPPORT	0x04	/* port map */
228
229struct devlist pci_devq;
230uint32_t pci_generation;
231uint32_t pci_numdevs = 0;
232static int pcie_chipset, pcix_chipset;
233
234/* sysctl vars */
235SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
236
237static int pci_enable_io_modes = 1;
238TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
239SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
240    &pci_enable_io_modes, 1,
241    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
242enable these bits correctly.  We'd like to do this all the time, but there\n\
243are some peripherals that this causes problems with.");
244
245static int pci_do_power_nodriver = 0;
246TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
247SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
248    &pci_do_power_nodriver, 0,
249  "Place a function into D3 state when no driver attaches to it.  0 means\n\
250disable.  1 means conservatively place devices into D3 state.  2 means\n\
251agressively place devices into D3 state.  3 means put absolutely everything\n\
252in D3 state.");
253
254int pci_do_power_resume = 1;
255TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
256SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
257    &pci_do_power_resume, 1,
258  "Transition from D3 -> D0 on resume.");
259
260int pci_do_power_suspend = 1;
261TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
262SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
263    &pci_do_power_suspend, 1,
264  "Transition from D0 -> D3 on suspend.");
265
266static int pci_do_msi = 1;
267TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
268SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
269    "Enable support for MSI interrupts");
270
271static int pci_do_msix = 1;
272TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
273SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
274    "Enable support for MSI-X interrupts");
275
276static int pci_honor_msi_blacklist = 1;
277TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
278SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
279    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
280
281#if defined(__i386__) || defined(__amd64__)
282static int pci_usb_takeover = 1;
283#else
284static int pci_usb_takeover = 0;
285#endif
286TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
287SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
288    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
289Disable this if you depend on BIOS emulation of USB devices, that is\n\
290you use USB devices (like keyboard or mouse) but do not load USB drivers");
291
292/* Find a device_t by bus/slot/function in domain 0 */
293
294device_t
295pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
296{
297
298	return (pci_find_dbsf(0, bus, slot, func));
299}
300
301/* Find a device_t by domain/bus/slot/function */
302
303device_t
304pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
305{
306	struct pci_devinfo *dinfo;
307
308	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
309		if ((dinfo->cfg.domain == domain) &&
310		    (dinfo->cfg.bus == bus) &&
311		    (dinfo->cfg.slot == slot) &&
312		    (dinfo->cfg.func == func)) {
313			return (dinfo->cfg.dev);
314		}
315	}
316
317	return (NULL);
318}
319
320/* Find a device_t by vendor/device ID */
321
322device_t
323pci_find_device(uint16_t vendor, uint16_t device)
324{
325	struct pci_devinfo *dinfo;
326
327	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
328		if ((dinfo->cfg.vendor == vendor) &&
329		    (dinfo->cfg.device == device)) {
330			return (dinfo->cfg.dev);
331		}
332	}
333
334	return (NULL);
335}
336
337static int
338pci_printf(pcicfgregs *cfg, const char *fmt, ...)
339{
340	va_list ap;
341	int retval;
342
343	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
344	    cfg->func);
345	va_start(ap, fmt);
346	retval += vprintf(fmt, ap);
347	va_end(ap);
348	return (retval);
349}
350
351/* return base address of memory or port map */
352
353static pci_addr_t
354pci_mapbase(uint64_t mapreg)
355{
356
357	if (PCI_BAR_MEM(mapreg))
358		return (mapreg & PCIM_BAR_MEM_BASE);
359	else
360		return (mapreg & PCIM_BAR_IO_BASE);
361}
362
363/* return map type of memory or port map */
364
365static const char *
366pci_maptype(uint64_t mapreg)
367{
368
369	if (PCI_BAR_IO(mapreg))
370		return ("I/O Port");
371	if (mapreg & PCIM_BAR_MEM_PREFETCH)
372		return ("Prefetchable Memory");
373	return ("Memory");
374}
375
376/* return log2 of map size decoded for memory or port map */
377
378static int
379pci_mapsize(uint64_t testval)
380{
381	int ln2size;
382
383	testval = pci_mapbase(testval);
384	ln2size = 0;
385	if (testval != 0) {
386		while ((testval & 1) == 0)
387		{
388			ln2size++;
389			testval >>= 1;
390		}
391	}
392	return (ln2size);
393}
394
395/* return base address of device ROM */
396
397static pci_addr_t
398pci_rombase(uint64_t mapreg)
399{
400
401	return (mapreg & PCIM_BIOS_ADDR_MASK);
402}
403
404/* return log2 of map size decided for device ROM */
405
406static int
407pci_romsize(uint64_t testval)
408{
409	int ln2size;
410
411	testval = pci_rombase(testval);
412	ln2size = 0;
413	if (testval != 0) {
414		while ((testval & 1) == 0)
415		{
416			ln2size++;
417			testval >>= 1;
418		}
419	}
420	return (ln2size);
421}
422
423/* return log2 of address range supported by map register */
424
425static int
426pci_maprange(uint64_t mapreg)
427{
428	int ln2range = 0;
429
430	if (PCI_BAR_IO(mapreg))
431		ln2range = 32;
432	else
433		switch (mapreg & PCIM_BAR_MEM_TYPE) {
434		case PCIM_BAR_MEM_32:
435			ln2range = 32;
436			break;
437		case PCIM_BAR_MEM_1MB:
438			ln2range = 20;
439			break;
440		case PCIM_BAR_MEM_64:
441			ln2range = 64;
442			break;
443		}
444	return (ln2range);
445}
446
447/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
448
449static void
450pci_fixancient(pcicfgregs *cfg)
451{
452	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
453		return;
454
455	/* PCI to PCI bridges use header type 1 */
456	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
457		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
458}
459
460/* extract header type specific config data */
461
462static void
463pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
464{
465#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
466	switch (cfg->hdrtype & PCIM_HDRTYPE) {
467	case PCIM_HDRTYPE_NORMAL:
468		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
469		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
470		cfg->nummaps	    = PCI_MAXMAPS_0;
471		break;
472	case PCIM_HDRTYPE_BRIDGE:
473		cfg->nummaps	    = PCI_MAXMAPS_1;
474		break;
475	case PCIM_HDRTYPE_CARDBUS:
476		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
477		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
478		cfg->nummaps	    = PCI_MAXMAPS_2;
479		break;
480	}
481#undef REG
482}
483
484/* read configuration header into pcicfgregs structure */
485struct pci_devinfo *
486pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
487{
488#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
489	pcicfgregs *cfg = NULL;
490	struct pci_devinfo *devlist_entry;
491	struct devlist *devlist_head;
492
493	devlist_head = &pci_devq;
494
495	devlist_entry = NULL;
496
497	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
498		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
499		if (devlist_entry == NULL)
500			return (NULL);
501
502		cfg = &devlist_entry->cfg;
503
504		cfg->domain		= d;
505		cfg->bus		= b;
506		cfg->slot		= s;
507		cfg->func		= f;
508		cfg->vendor		= REG(PCIR_VENDOR, 2);
509		cfg->device		= REG(PCIR_DEVICE, 2);
510		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
511		cfg->statreg		= REG(PCIR_STATUS, 2);
512		cfg->baseclass		= REG(PCIR_CLASS, 1);
513		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
514		cfg->progif		= REG(PCIR_PROGIF, 1);
515		cfg->revid		= REG(PCIR_REVID, 1);
516		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
517		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
518		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
519		cfg->intpin		= REG(PCIR_INTPIN, 1);
520		cfg->intline		= REG(PCIR_INTLINE, 1);
521
522		cfg->mingnt		= REG(PCIR_MINGNT, 1);
523		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
524
525		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
526		cfg->hdrtype		&= ~PCIM_MFDEV;
527
528		pci_fixancient(cfg);
529		pci_hdrtypedata(pcib, b, s, f, cfg);
530
531		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
532			pci_read_extcap(pcib, cfg);
533
534		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
535
536		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
537		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
538		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
539		devlist_entry->conf.pc_sel.pc_func = cfg->func;
540		devlist_entry->conf.pc_hdr = cfg->hdrtype;
541
542		devlist_entry->conf.pc_subvendor = cfg->subvendor;
543		devlist_entry->conf.pc_subdevice = cfg->subdevice;
544		devlist_entry->conf.pc_vendor = cfg->vendor;
545		devlist_entry->conf.pc_device = cfg->device;
546
547		devlist_entry->conf.pc_class = cfg->baseclass;
548		devlist_entry->conf.pc_subclass = cfg->subclass;
549		devlist_entry->conf.pc_progif = cfg->progif;
550		devlist_entry->conf.pc_revid = cfg->revid;
551
552		pci_numdevs++;
553		pci_generation++;
554	}
555	return (devlist_entry);
556#undef REG
557}
558
559static void
560pci_read_extcap(device_t pcib, pcicfgregs *cfg)
561{
562#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
563#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
564#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
565	uint64_t addr;
566#endif
567	uint32_t val;
568	int	ptr, nextptr, ptrptr;
569
570	switch (cfg->hdrtype & PCIM_HDRTYPE) {
571	case PCIM_HDRTYPE_NORMAL:
572	case PCIM_HDRTYPE_BRIDGE:
573		ptrptr = PCIR_CAP_PTR;
574		break;
575	case PCIM_HDRTYPE_CARDBUS:
576		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
577		break;
578	default:
579		return;		/* no extended capabilities support */
580	}
581	nextptr = REG(ptrptr, 1);	/* sanity check? */
582
583	/*
584	 * Read capability entries.
585	 */
586	while (nextptr != 0) {
587		/* Sanity check */
588		if (nextptr > 255) {
589			printf("illegal PCI extended capability offset %d\n",
590			    nextptr);
591			return;
592		}
593		/* Find the next entry */
594		ptr = nextptr;
595		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
596
597		/* Process this entry */
598		switch (REG(ptr + PCICAP_ID, 1)) {
599		case PCIY_PMG:		/* PCI power management */
600			if (cfg->pp.pp_cap == 0) {
601				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
602				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
603				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
604				if ((nextptr - ptr) > PCIR_POWER_DATA)
605					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
606			}
607			break;
608#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
609		case PCIY_HT:		/* HyperTransport */
610			/* Determine HT-specific capability type. */
611			val = REG(ptr + PCIR_HT_COMMAND, 2);
612			switch (val & PCIM_HTCMD_CAP_MASK) {
613			case PCIM_HTCAP_MSI_MAPPING:
614				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
615					/* Sanity check the mapping window. */
616					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
617					    4);
618					addr <<= 32;
619					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
620					    4);
621					if (addr != MSI_INTEL_ADDR_BASE)
622						device_printf(pcib,
623	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
624						    cfg->domain, cfg->bus,
625						    cfg->slot, cfg->func,
626						    (long long)addr);
627				} else
628					addr = MSI_INTEL_ADDR_BASE;
629
630				cfg->ht.ht_msimap = ptr;
631				cfg->ht.ht_msictrl = val;
632				cfg->ht.ht_msiaddr = addr;
633				break;
634			}
635			break;
636#endif
637		case PCIY_MSI:		/* PCI MSI */
638			cfg->msi.msi_location = ptr;
639			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
640			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
641						     PCIM_MSICTRL_MMC_MASK)>>1);
642			break;
643		case PCIY_MSIX:		/* PCI MSI-X */
644			cfg->msix.msix_location = ptr;
645			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
646			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
647			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
648			val = REG(ptr + PCIR_MSIX_TABLE, 4);
649			cfg->msix.msix_table_bar = PCIR_BAR(val &
650			    PCIM_MSIX_BIR_MASK);
651			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
652			val = REG(ptr + PCIR_MSIX_PBA, 4);
653			cfg->msix.msix_pba_bar = PCIR_BAR(val &
654			    PCIM_MSIX_BIR_MASK);
655			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
656			break;
657		case PCIY_VPD:		/* PCI Vital Product Data */
658			cfg->vpd.vpd_reg = ptr;
659			break;
660		case PCIY_SUBVENDOR:
661			/* Should always be true. */
662			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
663			    PCIM_HDRTYPE_BRIDGE) {
664				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
665				cfg->subvendor = val & 0xffff;
666				cfg->subdevice = val >> 16;
667			}
668			break;
669		case PCIY_PCIX:		/* PCI-X */
670			/*
671			 * Assume we have a PCI-X chipset if we have
672			 * at least one PCI-PCI bridge with a PCI-X
673			 * capability.  Note that some systems with
674			 * PCI-express or HT chipsets might match on
675			 * this check as well.
676			 */
677			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
678			    PCIM_HDRTYPE_BRIDGE)
679				pcix_chipset = 1;
680			break;
681		case PCIY_EXPRESS:	/* PCI-express */
682			/*
683			 * Assume we have a PCI-express chipset if we have
684			 * at least one PCI-express device.
685			 */
686			pcie_chipset = 1;
687			break;
688		default:
689			break;
690		}
691	}
692/* REG and WREG use carry through to next functions */
693}
694
695/*
696 * PCI Vital Product Data
697 */
698
699#define	PCI_VPD_TIMEOUT		1000000
700
701static int
702pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
703{
704	int count = PCI_VPD_TIMEOUT;
705
706	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
707
708	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
709
710	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
711		if (--count < 0)
712			return (ENXIO);
713		DELAY(1);	/* limit looping */
714	}
715	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
716
717	return (0);
718}
719
720#if 0
721static int
722pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
723{
724	int count = PCI_VPD_TIMEOUT;
725
726	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
727
728	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
729	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
730	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
731		if (--count < 0)
732			return (ENXIO);
733		DELAY(1);	/* limit looping */
734	}
735
736	return (0);
737}
738#endif
739
740#undef PCI_VPD_TIMEOUT
741
742struct vpd_readstate {
743	device_t	pcib;
744	pcicfgregs	*cfg;
745	uint32_t	val;
746	int		bytesinval;
747	int		off;
748	uint8_t		cksum;
749};
750
751static int
752vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
753{
754	uint32_t reg;
755	uint8_t byte;
756
757	if (vrs->bytesinval == 0) {
758		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
759			return (ENXIO);
760		vrs->val = le32toh(reg);
761		vrs->off += 4;
762		byte = vrs->val & 0xff;
763		vrs->bytesinval = 3;
764	} else {
765		vrs->val = vrs->val >> 8;
766		byte = vrs->val & 0xff;
767		vrs->bytesinval--;
768	}
769
770	vrs->cksum += byte;
771	*data = byte;
772	return (0);
773}
774
775static void
776pci_read_vpd(device_t pcib, pcicfgregs *cfg)
777{
778	struct vpd_readstate vrs;
779	int state;
780	int name;
781	int remain;
782	int i;
783	int alloc, off;		/* alloc/off for RO/W arrays */
784	int cksumvalid;
785	int dflen;
786	uint8_t byte;
787	uint8_t byte2;
788
789	/* init vpd reader */
790	vrs.bytesinval = 0;
791	vrs.off = 0;
792	vrs.pcib = pcib;
793	vrs.cfg = cfg;
794	vrs.cksum = 0;
795
796	state = 0;
797	name = remain = i = 0;	/* shut up stupid gcc */
798	alloc = off = 0;	/* shut up stupid gcc */
799	dflen = 0;		/* shut up stupid gcc */
800	cksumvalid = -1;
801	while (state >= 0) {
802		if (vpd_nextbyte(&vrs, &byte)) {
803			state = -2;
804			break;
805		}
806#if 0
807		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
808		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
809		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
810#endif
811		switch (state) {
812		case 0:		/* item name */
813			if (byte & 0x80) {
814				if (vpd_nextbyte(&vrs, &byte2)) {
815					state = -2;
816					break;
817				}
818				remain = byte2;
819				if (vpd_nextbyte(&vrs, &byte2)) {
820					state = -2;
821					break;
822				}
823				remain |= byte2 << 8;
824				if (remain > (0x7f*4 - vrs.off)) {
825					state = -1;
826					printf(
827			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
828					    cfg->domain, cfg->bus, cfg->slot,
829					    cfg->func, remain);
830				}
831				name = byte & 0x7f;
832			} else {
833				remain = byte & 0x7;
834				name = (byte >> 3) & 0xf;
835			}
836			switch (name) {
837			case 0x2:	/* String */
838				cfg->vpd.vpd_ident = malloc(remain + 1,
839				    M_DEVBUF, M_WAITOK);
840				i = 0;
841				state = 1;
842				break;
843			case 0xf:	/* End */
844				state = -1;
845				break;
846			case 0x10:	/* VPD-R */
847				alloc = 8;
848				off = 0;
849				cfg->vpd.vpd_ros = malloc(alloc *
850				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
851				    M_WAITOK | M_ZERO);
852				state = 2;
853				break;
854			case 0x11:	/* VPD-W */
855				alloc = 8;
856				off = 0;
857				cfg->vpd.vpd_w = malloc(alloc *
858				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
859				    M_WAITOK | M_ZERO);
860				state = 5;
861				break;
862			default:	/* Invalid data, abort */
863				state = -1;
864				break;
865			}
866			break;
867
868		case 1:	/* Identifier String */
869			cfg->vpd.vpd_ident[i++] = byte;
870			remain--;
871			if (remain == 0)  {
872				cfg->vpd.vpd_ident[i] = '\0';
873				state = 0;
874			}
875			break;
876
877		case 2:	/* VPD-R Keyword Header */
878			if (off == alloc) {
879				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
880				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
881				    M_DEVBUF, M_WAITOK | M_ZERO);
882			}
883			cfg->vpd.vpd_ros[off].keyword[0] = byte;
884			if (vpd_nextbyte(&vrs, &byte2)) {
885				state = -2;
886				break;
887			}
888			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
889			if (vpd_nextbyte(&vrs, &byte2)) {
890				state = -2;
891				break;
892			}
893			dflen = byte2;
894			if (dflen == 0 &&
895			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
896			    2) == 0) {
897				/*
898				 * if this happens, we can't trust the rest
899				 * of the VPD.
900				 */
901				printf(
902				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
903				    cfg->domain, cfg->bus, cfg->slot,
904				    cfg->func, dflen);
905				cksumvalid = 0;
906				state = -1;
907				break;
908			} else if (dflen == 0) {
909				cfg->vpd.vpd_ros[off].value = malloc(1 *
910				    sizeof(*cfg->vpd.vpd_ros[off].value),
911				    M_DEVBUF, M_WAITOK);
912				cfg->vpd.vpd_ros[off].value[0] = '\x00';
913			} else
914				cfg->vpd.vpd_ros[off].value = malloc(
915				    (dflen + 1) *
916				    sizeof(*cfg->vpd.vpd_ros[off].value),
917				    M_DEVBUF, M_WAITOK);
918			remain -= 3;
919			i = 0;
920			/* keep in sync w/ state 3's transistions */
921			if (dflen == 0 && remain == 0)
922				state = 0;
923			else if (dflen == 0)
924				state = 2;
925			else
926				state = 3;
927			break;
928
929		case 3:	/* VPD-R Keyword Value */
930			cfg->vpd.vpd_ros[off].value[i++] = byte;
931			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
932			    "RV", 2) == 0 && cksumvalid == -1) {
933				if (vrs.cksum == 0)
934					cksumvalid = 1;
935				else {
936					if (bootverbose)
937						printf(
938				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
939						    cfg->domain, cfg->bus,
940						    cfg->slot, cfg->func,
941						    vrs.cksum);
942					cksumvalid = 0;
943					state = -1;
944					break;
945				}
946			}
947			dflen--;
948			remain--;
949			/* keep in sync w/ state 2's transistions */
950			if (dflen == 0)
951				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
952			if (dflen == 0 && remain == 0) {
953				cfg->vpd.vpd_rocnt = off;
954				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
955				    off * sizeof(*cfg->vpd.vpd_ros),
956				    M_DEVBUF, M_WAITOK | M_ZERO);
957				state = 0;
958			} else if (dflen == 0)
959				state = 2;
960			break;
961
962		case 4:
963			remain--;
964			if (remain == 0)
965				state = 0;
966			break;
967
968		case 5:	/* VPD-W Keyword Header */
969			if (off == alloc) {
970				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
971				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
972				    M_DEVBUF, M_WAITOK | M_ZERO);
973			}
974			cfg->vpd.vpd_w[off].keyword[0] = byte;
975			if (vpd_nextbyte(&vrs, &byte2)) {
976				state = -2;
977				break;
978			}
979			cfg->vpd.vpd_w[off].keyword[1] = byte2;
980			if (vpd_nextbyte(&vrs, &byte2)) {
981				state = -2;
982				break;
983			}
984			cfg->vpd.vpd_w[off].len = dflen = byte2;
985			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
986			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
987			    sizeof(*cfg->vpd.vpd_w[off].value),
988			    M_DEVBUF, M_WAITOK);
989			remain -= 3;
990			i = 0;
991			/* keep in sync w/ state 6's transistions */
992			if (dflen == 0 && remain == 0)
993				state = 0;
994			else if (dflen == 0)
995				state = 5;
996			else
997				state = 6;
998			break;
999
1000		case 6:	/* VPD-W Keyword Value */
1001			cfg->vpd.vpd_w[off].value[i++] = byte;
1002			dflen--;
1003			remain--;
1004			/* keep in sync w/ state 5's transistions */
1005			if (dflen == 0)
1006				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1007			if (dflen == 0 && remain == 0) {
1008				cfg->vpd.vpd_wcnt = off;
1009				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1010				    off * sizeof(*cfg->vpd.vpd_w),
1011				    M_DEVBUF, M_WAITOK | M_ZERO);
1012				state = 0;
1013			} else if (dflen == 0)
1014				state = 5;
1015			break;
1016
1017		default:
1018			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1019			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1020			    state);
1021			state = -1;
1022			break;
1023		}
1024	}
1025
1026	if (cksumvalid == 0 || state < -1) {
1027		/* read-only data bad, clean up */
1028		if (cfg->vpd.vpd_ros != NULL) {
1029			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1030				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1031			free(cfg->vpd.vpd_ros, M_DEVBUF);
1032			cfg->vpd.vpd_ros = NULL;
1033		}
1034	}
1035	if (state < -1) {
1036		/* I/O error, clean up */
1037		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1038		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1039		if (cfg->vpd.vpd_ident != NULL) {
1040			free(cfg->vpd.vpd_ident, M_DEVBUF);
1041			cfg->vpd.vpd_ident = NULL;
1042		}
1043		if (cfg->vpd.vpd_w != NULL) {
1044			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1045				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1046			free(cfg->vpd.vpd_w, M_DEVBUF);
1047			cfg->vpd.vpd_w = NULL;
1048		}
1049	}
1050	cfg->vpd.vpd_cached = 1;
1051#undef REG
1052#undef WREG
1053}
1054
1055int
1056pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1057{
1058	struct pci_devinfo *dinfo = device_get_ivars(child);
1059	pcicfgregs *cfg = &dinfo->cfg;
1060
1061	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1062		pci_read_vpd(device_get_parent(dev), cfg);
1063
1064	*identptr = cfg->vpd.vpd_ident;
1065
1066	if (*identptr == NULL)
1067		return (ENXIO);
1068
1069	return (0);
1070}
1071
1072int
1073pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1074	const char **vptr)
1075{
1076	struct pci_devinfo *dinfo = device_get_ivars(child);
1077	pcicfgregs *cfg = &dinfo->cfg;
1078	int i;
1079
1080	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1081		pci_read_vpd(device_get_parent(dev), cfg);
1082
1083	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1084		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1085		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1086			*vptr = cfg->vpd.vpd_ros[i].value;
1087		}
1088
1089	if (i != cfg->vpd.vpd_rocnt)
1090		return (0);
1091
1092	*vptr = NULL;
1093	return (ENXIO);
1094}
1095
1096/*
1097 * Find the requested extended capability and return the offset in
1098 * configuration space via the pointer provided. The function returns
1099 * 0 on success and error code otherwise.
1100 */
1101int
1102pci_find_extcap_method(device_t dev, device_t child, int capability,
1103    int *capreg)
1104{
1105	struct pci_devinfo *dinfo = device_get_ivars(child);
1106	pcicfgregs *cfg = &dinfo->cfg;
1107	u_int32_t status;
1108	u_int8_t ptr;
1109
1110	/*
1111	 * Check the CAP_LIST bit of the PCI status register first.
1112	 */
1113	status = pci_read_config(child, PCIR_STATUS, 2);
1114	if (!(status & PCIM_STATUS_CAPPRESENT))
1115		return (ENXIO);
1116
1117	/*
1118	 * Determine the start pointer of the capabilities list.
1119	 */
1120	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1121	case PCIM_HDRTYPE_NORMAL:
1122	case PCIM_HDRTYPE_BRIDGE:
1123		ptr = PCIR_CAP_PTR;
1124		break;
1125	case PCIM_HDRTYPE_CARDBUS:
1126		ptr = PCIR_CAP_PTR_2;
1127		break;
1128	default:
1129		/* XXX: panic? */
1130		return (ENXIO);		/* no extended capabilities support */
1131	}
1132	ptr = pci_read_config(child, ptr, 1);
1133
1134	/*
1135	 * Traverse the capabilities list.
1136	 */
1137	while (ptr != 0) {
1138		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1139			if (capreg != NULL)
1140				*capreg = ptr;
1141			return (0);
1142		}
1143		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1144	}
1145
1146	return (ENOENT);
1147}
1148
1149/*
1150 * Support for MSI-X message interrupts.
1151 */
1152void
1153pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1154{
1155	struct pci_devinfo *dinfo = device_get_ivars(dev);
1156	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1157	uint32_t offset;
1158
1159	KASSERT(msix->msix_table_len > index, ("bogus index"));
1160	offset = msix->msix_table_offset + index * 16;
1161	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1162	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1163	bus_write_4(msix->msix_table_res, offset + 8, data);
1164
1165	/* Enable MSI -> HT mapping. */
1166	pci_ht_map_msi(dev, address);
1167}
1168
1169void
1170pci_mask_msix(device_t dev, u_int index)
1171{
1172	struct pci_devinfo *dinfo = device_get_ivars(dev);
1173	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1174	uint32_t offset, val;
1175
1176	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1177	offset = msix->msix_table_offset + index * 16 + 12;
1178	val = bus_read_4(msix->msix_table_res, offset);
1179	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1180		val |= PCIM_MSIX_VCTRL_MASK;
1181		bus_write_4(msix->msix_table_res, offset, val);
1182	}
1183}
1184
1185void
1186pci_unmask_msix(device_t dev, u_int index)
1187{
1188	struct pci_devinfo *dinfo = device_get_ivars(dev);
1189	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1190	uint32_t offset, val;
1191
1192	KASSERT(msix->msix_table_len > index, ("bogus index"));
1193	offset = msix->msix_table_offset + index * 16 + 12;
1194	val = bus_read_4(msix->msix_table_res, offset);
1195	if (val & PCIM_MSIX_VCTRL_MASK) {
1196		val &= ~PCIM_MSIX_VCTRL_MASK;
1197		bus_write_4(msix->msix_table_res, offset, val);
1198	}
1199}
1200
1201int
1202pci_pending_msix(device_t dev, u_int index)
1203{
1204	struct pci_devinfo *dinfo = device_get_ivars(dev);
1205	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1206	uint32_t offset, bit;
1207
1208	KASSERT(msix->msix_table_len > index, ("bogus index"));
1209	offset = msix->msix_pba_offset + (index / 32) * 4;
1210	bit = 1 << index % 32;
1211	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1212}
1213
1214/*
1215 * Restore MSI-X registers and table during resume.  If MSI-X is
1216 * enabled then walk the virtual table to restore the actual MSI-X
1217 * table.
1218 */
1219static void
1220pci_resume_msix(device_t dev)
1221{
1222	struct pci_devinfo *dinfo = device_get_ivars(dev);
1223	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1224	struct msix_table_entry *mte;
1225	struct msix_vector *mv;
1226	int i;
1227
1228	if (msix->msix_alloc > 0) {
1229		/* First, mask all vectors. */
1230		for (i = 0; i < msix->msix_msgnum; i++)
1231			pci_mask_msix(dev, i);
1232
1233		/* Second, program any messages with at least one handler. */
1234		for (i = 0; i < msix->msix_table_len; i++) {
1235			mte = &msix->msix_table[i];
1236			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1237				continue;
1238			mv = &msix->msix_vectors[mte->mte_vector - 1];
1239			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1240			pci_unmask_msix(dev, i);
1241		}
1242	}
1243	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1244	    msix->msix_ctrl, 2);
1245}
1246
1247/*
1248 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1249 * returned in *count.  After this function returns, each message will be
1250 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1251 */
1252int
1253pci_alloc_msix_method(device_t dev, device_t child, int *count)
1254{
1255	struct pci_devinfo *dinfo = device_get_ivars(child);
1256	pcicfgregs *cfg = &dinfo->cfg;
1257	struct resource_list_entry *rle;
1258	int actual, error, i, irq, max;
1259
1260	/* Don't let count == 0 get us into trouble. */
1261	if (*count == 0)
1262		return (EINVAL);
1263
1264	/* If rid 0 is allocated, then fail. */
1265	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1266	if (rle != NULL && rle->res != NULL)
1267		return (ENXIO);
1268
1269	/* Already have allocated messages? */
1270	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1271		return (ENXIO);
1272
1273	/* If MSI is blacklisted for this system, fail. */
1274	if (pci_msi_blacklisted())
1275		return (ENXIO);
1276
1277	/* MSI-X capability present? */
1278	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1279		return (ENODEV);
1280
1281	/* Make sure the appropriate BARs are mapped. */
1282	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1283	    cfg->msix.msix_table_bar);
1284	if (rle == NULL || rle->res == NULL ||
1285	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1286		return (ENXIO);
1287	cfg->msix.msix_table_res = rle->res;
1288	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1289		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1290		    cfg->msix.msix_pba_bar);
1291		if (rle == NULL || rle->res == NULL ||
1292		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1293			return (ENXIO);
1294	}
1295	cfg->msix.msix_pba_res = rle->res;
1296
1297	if (bootverbose)
1298		device_printf(child,
1299		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1300		    *count, cfg->msix.msix_msgnum);
1301	max = min(*count, cfg->msix.msix_msgnum);
1302	for (i = 0; i < max; i++) {
1303		/* Allocate a message. */
1304		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1305		if (error)
1306			break;
1307		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1308		    irq, 1);
1309	}
1310	actual = i;
1311
1312	if (bootverbose) {
1313		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1314		if (actual == 1)
1315			device_printf(child, "using IRQ %lu for MSI-X\n",
1316			    rle->start);
1317		else {
1318			int run;
1319
1320			/*
1321			 * Be fancy and try to print contiguous runs of
1322			 * IRQ values as ranges.  'irq' is the previous IRQ.
1323			 * 'run' is true if we are in a range.
1324			 */
1325			device_printf(child, "using IRQs %lu", rle->start);
1326			irq = rle->start;
1327			run = 0;
1328			for (i = 1; i < actual; i++) {
1329				rle = resource_list_find(&dinfo->resources,
1330				    SYS_RES_IRQ, i + 1);
1331
1332				/* Still in a run? */
1333				if (rle->start == irq + 1) {
1334					run = 1;
1335					irq++;
1336					continue;
1337				}
1338
1339				/* Finish previous range. */
1340				if (run) {
1341					printf("-%d", irq);
1342					run = 0;
1343				}
1344
1345				/* Start new range. */
1346				printf(",%lu", rle->start);
1347				irq = rle->start;
1348			}
1349
1350			/* Unfinished range? */
1351			if (run)
1352				printf("-%d", irq);
1353			printf(" for MSI-X\n");
1354		}
1355	}
1356
1357	/* Mask all vectors. */
1358	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1359		pci_mask_msix(child, i);
1360
1361	/* Allocate and initialize vector data and virtual table. */
1362	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1363	    M_DEVBUF, M_WAITOK | M_ZERO);
1364	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1365	    M_DEVBUF, M_WAITOK | M_ZERO);
1366	for (i = 0; i < actual; i++) {
1367		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1368		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1369		cfg->msix.msix_table[i].mte_vector = i + 1;
1370	}
1371
1372	/* Update control register to enable MSI-X. */
1373	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1374	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1375	    cfg->msix.msix_ctrl, 2);
1376
1377	/* Update counts of alloc'd messages. */
1378	cfg->msix.msix_alloc = actual;
1379	cfg->msix.msix_table_len = actual;
1380	*count = actual;
1381	return (0);
1382}
1383
1384/*
1385 * By default, pci_alloc_msix() will assign the allocated IRQ
1386 * resources consecutively to the first N messages in the MSI-X table.
1387 * However, device drivers may want to use different layouts if they
1388 * either receive fewer messages than they asked for, or they wish to
1389 * populate the MSI-X table sparsely.  This method allows the driver
1390 * to specify what layout it wants.  It must be called after a
1391 * successful pci_alloc_msix() but before any of the associated
1392 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1393 *
1394 * The 'vectors' array contains 'count' message vectors.  The array
1395 * maps directly to the MSI-X table in that index 0 in the array
1396 * specifies the vector for the first message in the MSI-X table, etc.
1397 * The vector value in each array index can either be 0 to indicate
1398 * that no vector should be assigned to a message slot, or it can be a
1399 * number from 1 to N (where N is the count returned from a
1400 * succcessful call to pci_alloc_msix()) to indicate which message
1401 * vector (IRQ) to be used for the corresponding message.
1402 *
1403 * On successful return, each message with a non-zero vector will have
1404 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1405 * 1.  Additionally, if any of the IRQs allocated via the previous
1406 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1407 * will be freed back to the system automatically.
1408 *
1409 * For example, suppose a driver has a MSI-X table with 6 messages and
1410 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1411 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1412 * C.  After the call to pci_alloc_msix(), the device will be setup to
1413 * have an MSI-X table of ABC--- (where - means no vector assigned).
1414 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1415 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1416 * be freed back to the system.  This device will also have valid
1417 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1418 *
1419 * In any case, the SYS_RES_IRQ rid X will always map to the message
1420 * at MSI-X table index X - 1 and will only be valid if a vector is
1421 * assigned to that table entry.
1422 */
1423int
1424pci_remap_msix_method(device_t dev, device_t child, int count,
1425    const u_int *vectors)
1426{
1427	struct pci_devinfo *dinfo = device_get_ivars(child);
1428	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1429	struct resource_list_entry *rle;
1430	int i, irq, j, *used;
1431
1432	/*
1433	 * Have to have at least one message in the table but the
1434	 * table can't be bigger than the actual MSI-X table in the
1435	 * device.
1436	 */
1437	if (count == 0 || count > msix->msix_msgnum)
1438		return (EINVAL);
1439
1440	/* Sanity check the vectors. */
1441	for (i = 0; i < count; i++)
1442		if (vectors[i] > msix->msix_alloc)
1443			return (EINVAL);
1444
1445	/*
1446	 * Make sure there aren't any holes in the vectors to be used.
1447	 * It's a big pain to support it, and it doesn't really make
1448	 * sense anyway.  Also, at least one vector must be used.
1449	 */
1450	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1451	    M_ZERO);
1452	for (i = 0; i < count; i++)
1453		if (vectors[i] != 0)
1454			used[vectors[i] - 1] = 1;
1455	for (i = 0; i < msix->msix_alloc - 1; i++)
1456		if (used[i] == 0 && used[i + 1] == 1) {
1457			free(used, M_DEVBUF);
1458			return (EINVAL);
1459		}
1460	if (used[0] != 1) {
1461		free(used, M_DEVBUF);
1462		return (EINVAL);
1463	}
1464
1465	/* Make sure none of the resources are allocated. */
1466	for (i = 0; i < msix->msix_table_len; i++) {
1467		if (msix->msix_table[i].mte_vector == 0)
1468			continue;
1469		if (msix->msix_table[i].mte_handlers > 0)
1470			return (EBUSY);
1471		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1472		KASSERT(rle != NULL, ("missing resource"));
1473		if (rle->res != NULL)
1474			return (EBUSY);
1475	}
1476
1477	/* Free the existing resource list entries. */
1478	for (i = 0; i < msix->msix_table_len; i++) {
1479		if (msix->msix_table[i].mte_vector == 0)
1480			continue;
1481		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1482	}
1483
1484	/*
1485	 * Build the new virtual table keeping track of which vectors are
1486	 * used.
1487	 */
1488	free(msix->msix_table, M_DEVBUF);
1489	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1490	    M_DEVBUF, M_WAITOK | M_ZERO);
1491	for (i = 0; i < count; i++)
1492		msix->msix_table[i].mte_vector = vectors[i];
1493	msix->msix_table_len = count;
1494
1495	/* Free any unused IRQs and resize the vectors array if necessary. */
1496	j = msix->msix_alloc - 1;
1497	if (used[j] == 0) {
1498		struct msix_vector *vec;
1499
1500		while (used[j] == 0) {
1501			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1502			    msix->msix_vectors[j].mv_irq);
1503			j--;
1504		}
1505		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1506		    M_WAITOK);
1507		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1508		    (j + 1));
1509		free(msix->msix_vectors, M_DEVBUF);
1510		msix->msix_vectors = vec;
1511		msix->msix_alloc = j + 1;
1512	}
1513	free(used, M_DEVBUF);
1514
1515	/* Map the IRQs onto the rids. */
1516	for (i = 0; i < count; i++) {
1517		if (vectors[i] == 0)
1518			continue;
1519		irq = msix->msix_vectors[vectors[i]].mv_irq;
1520		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1521		    irq, 1);
1522	}
1523
1524	if (bootverbose) {
1525		device_printf(child, "Remapped MSI-X IRQs as: ");
1526		for (i = 0; i < count; i++) {
1527			if (i != 0)
1528				printf(", ");
1529			if (vectors[i] == 0)
1530				printf("---");
1531			else
1532				printf("%d",
1533				    msix->msix_vectors[vectors[i]].mv_irq);
1534		}
1535		printf("\n");
1536	}
1537
1538	return (0);
1539}
1540
1541static int
1542pci_release_msix(device_t dev, device_t child)
1543{
1544	struct pci_devinfo *dinfo = device_get_ivars(child);
1545	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1546	struct resource_list_entry *rle;
1547	int i;
1548
1549	/* Do we have any messages to release? */
1550	if (msix->msix_alloc == 0)
1551		return (ENODEV);
1552
1553	/* Make sure none of the resources are allocated. */
1554	for (i = 0; i < msix->msix_table_len; i++) {
1555		if (msix->msix_table[i].mte_vector == 0)
1556			continue;
1557		if (msix->msix_table[i].mte_handlers > 0)
1558			return (EBUSY);
1559		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1560		KASSERT(rle != NULL, ("missing resource"));
1561		if (rle->res != NULL)
1562			return (EBUSY);
1563	}
1564
1565	/* Update control register to disable MSI-X. */
1566	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1567	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1568	    msix->msix_ctrl, 2);
1569
1570	/* Free the resource list entries. */
1571	for (i = 0; i < msix->msix_table_len; i++) {
1572		if (msix->msix_table[i].mte_vector == 0)
1573			continue;
1574		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1575	}
1576	free(msix->msix_table, M_DEVBUF);
1577	msix->msix_table_len = 0;
1578
1579	/* Release the IRQs. */
1580	for (i = 0; i < msix->msix_alloc; i++)
1581		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1582		    msix->msix_vectors[i].mv_irq);
1583	free(msix->msix_vectors, M_DEVBUF);
1584	msix->msix_alloc = 0;
1585	return (0);
1586}
1587
1588/*
1589 * Return the max supported MSI-X messages this device supports.
1590 * Basically, assuming the MD code can alloc messages, this function
1591 * should return the maximum value that pci_alloc_msix() can return.
1592 * Thus, it is subject to the tunables, etc.
1593 */
1594int
1595pci_msix_count_method(device_t dev, device_t child)
1596{
1597	struct pci_devinfo *dinfo = device_get_ivars(child);
1598	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1599
1600	if (pci_do_msix && msix->msix_location != 0)
1601		return (msix->msix_msgnum);
1602	return (0);
1603}
1604
1605/*
1606 * HyperTransport MSI mapping control
1607 */
1608void
1609pci_ht_map_msi(device_t dev, uint64_t addr)
1610{
1611	struct pci_devinfo *dinfo = device_get_ivars(dev);
1612	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1613
1614	if (!ht->ht_msimap)
1615		return;
1616
1617	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1618	    ht->ht_msiaddr >> 20 == addr >> 20) {
1619		/* Enable MSI -> HT mapping. */
1620		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1621		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1622		    ht->ht_msictrl, 2);
1623	}
1624
1625	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1626		/* Disable MSI -> HT mapping. */
1627		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1628		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1629		    ht->ht_msictrl, 2);
1630	}
1631}
1632
1633int
1634pci_get_max_read_req(device_t dev)
1635{
1636	int cap;
1637	uint16_t val;
1638
1639	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1640		return (0);
1641	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1642	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1643	val >>= 12;
1644	return (1 << (val + 7));
1645}
1646
1647int
1648pci_set_max_read_req(device_t dev, int size)
1649{
1650	int cap;
1651	uint16_t val;
1652
1653	if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0)
1654		return (0);
1655	if (size < 128)
1656		size = 128;
1657	if (size > 4096)
1658		size = 4096;
1659	size = (1 << (fls(size) - 1));
1660	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1661	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1662	val |= (fls(size) - 8) << 12;
1663	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1664	return (size);
1665}
1666
1667/*
1668 * Support for MSI message signalled interrupts.
1669 */
1670void
1671pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1672{
1673	struct pci_devinfo *dinfo = device_get_ivars(dev);
1674	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1675
1676	/* Write data and address values. */
1677	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1678	    address & 0xffffffff, 4);
1679	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1680		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1681		    address >> 32, 4);
1682		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1683		    data, 2);
1684	} else
1685		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1686		    2);
1687
1688	/* Enable MSI in the control register. */
1689	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1690	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1691	    2);
1692
1693	/* Enable MSI -> HT mapping. */
1694	pci_ht_map_msi(dev, address);
1695}
1696
1697void
1698pci_disable_msi(device_t dev)
1699{
1700	struct pci_devinfo *dinfo = device_get_ivars(dev);
1701	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1702
1703	/* Disable MSI -> HT mapping. */
1704	pci_ht_map_msi(dev, 0);
1705
1706	/* Disable MSI in the control register. */
1707	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1708	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1709	    2);
1710}
1711
1712/*
1713 * Restore MSI registers during resume.  If MSI is enabled then
1714 * restore the data and address registers in addition to the control
1715 * register.
1716 */
1717static void
1718pci_resume_msi(device_t dev)
1719{
1720	struct pci_devinfo *dinfo = device_get_ivars(dev);
1721	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1722	uint64_t address;
1723	uint16_t data;
1724
1725	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1726		address = msi->msi_addr;
1727		data = msi->msi_data;
1728		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1729		    address & 0xffffffff, 4);
1730		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1731			pci_write_config(dev, msi->msi_location +
1732			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1733			pci_write_config(dev, msi->msi_location +
1734			    PCIR_MSI_DATA_64BIT, data, 2);
1735		} else
1736			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1737			    data, 2);
1738	}
1739	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1740	    2);
1741}
1742
1743static int
1744pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1745{
1746	struct pci_devinfo *dinfo = device_get_ivars(dev);
1747	pcicfgregs *cfg = &dinfo->cfg;
1748	struct resource_list_entry *rle;
1749	struct msix_table_entry *mte;
1750	struct msix_vector *mv;
1751	uint64_t addr;
1752	uint32_t data;
1753	int error, i, j;
1754
1755	/*
1756	 * Handle MSI first.  We try to find this IRQ among our list
1757	 * of MSI IRQs.  If we find it, we request updated address and
1758	 * data registers and apply the results.
1759	 */
1760	if (cfg->msi.msi_alloc > 0) {
1761
1762		/* If we don't have any active handlers, nothing to do. */
1763		if (cfg->msi.msi_handlers == 0)
1764			return (0);
1765		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1766			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1767			    i + 1);
1768			if (rle->start == irq) {
1769				error = PCIB_MAP_MSI(device_get_parent(bus),
1770				    dev, irq, &addr, &data);
1771				if (error)
1772					return (error);
1773				pci_disable_msi(dev);
1774				dinfo->cfg.msi.msi_addr = addr;
1775				dinfo->cfg.msi.msi_data = data;
1776				pci_enable_msi(dev, addr, data);
1777				return (0);
1778			}
1779		}
1780		return (ENOENT);
1781	}
1782
1783	/*
1784	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1785	 * we request the updated mapping info.  If that works, we go
1786	 * through all the slots that use this IRQ and update them.
1787	 */
1788	if (cfg->msix.msix_alloc > 0) {
1789		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1790			mv = &cfg->msix.msix_vectors[i];
1791			if (mv->mv_irq == irq) {
1792				error = PCIB_MAP_MSI(device_get_parent(bus),
1793				    dev, irq, &addr, &data);
1794				if (error)
1795					return (error);
1796				mv->mv_address = addr;
1797				mv->mv_data = data;
1798				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1799					mte = &cfg->msix.msix_table[j];
1800					if (mte->mte_vector != i + 1)
1801						continue;
1802					if (mte->mte_handlers == 0)
1803						continue;
1804					pci_mask_msix(dev, j);
1805					pci_enable_msix(dev, j, addr, data);
1806					pci_unmask_msix(dev, j);
1807				}
1808			}
1809		}
1810		return (ENOENT);
1811	}
1812
1813	return (ENOENT);
1814}
1815
1816/*
1817 * Returns true if the specified device is blacklisted because MSI
1818 * doesn't work.
1819 */
1820int
1821pci_msi_device_blacklisted(device_t dev)
1822{
1823	struct pci_quirk *q;
1824
1825	if (!pci_honor_msi_blacklist)
1826		return (0);
1827
1828	for (q = &pci_quirks[0]; q->devid; q++) {
1829		if (q->devid == pci_get_devid(dev) &&
1830		    q->type == PCI_QUIRK_DISABLE_MSI)
1831			return (1);
1832	}
1833	return (0);
1834}
1835
1836/*
1837 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1838 * we just check for blacklisted chipsets as represented by the
1839 * host-PCI bridge at device 0:0:0.  In the future, it may become
1840 * necessary to check other system attributes, such as the kenv values
1841 * that give the motherboard manufacturer and model number.
1842 */
1843static int
1844pci_msi_blacklisted(void)
1845{
1846	device_t dev;
1847
1848	if (!pci_honor_msi_blacklist)
1849		return (0);
1850
1851	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1852	if (!(pcie_chipset || pcix_chipset))
1853		return (1);
1854
1855	dev = pci_find_bsf(0, 0, 0);
1856	if (dev != NULL)
1857		return (pci_msi_device_blacklisted(dev));
1858	return (0);
1859}
1860
1861/*
1862 * Attempt to allocate *count MSI messages.  The actual number allocated is
1863 * returned in *count.  After this function returns, each message will be
1864 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1865 */
1866int
1867pci_alloc_msi_method(device_t dev, device_t child, int *count)
1868{
1869	struct pci_devinfo *dinfo = device_get_ivars(child);
1870	pcicfgregs *cfg = &dinfo->cfg;
1871	struct resource_list_entry *rle;
1872	int actual, error, i, irqs[32];
1873	uint16_t ctrl;
1874
1875	/* Don't let count == 0 get us into trouble. */
1876	if (*count == 0)
1877		return (EINVAL);
1878
1879	/* If rid 0 is allocated, then fail. */
1880	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1881	if (rle != NULL && rle->res != NULL)
1882		return (ENXIO);
1883
1884	/* Already have allocated messages? */
1885	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1886		return (ENXIO);
1887
1888	/* If MSI is blacklisted for this system, fail. */
1889	if (pci_msi_blacklisted())
1890		return (ENXIO);
1891
1892	/* MSI capability present? */
1893	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1894		return (ENODEV);
1895
1896	if (bootverbose)
1897		device_printf(child,
1898		    "attempting to allocate %d MSI vectors (%d supported)\n",
1899		    *count, cfg->msi.msi_msgnum);
1900
1901	/* Don't ask for more than the device supports. */
1902	actual = min(*count, cfg->msi.msi_msgnum);
1903
1904	/* Don't ask for more than 32 messages. */
1905	actual = min(actual, 32);
1906
1907	/* MSI requires power of 2 number of messages. */
1908	if (!powerof2(actual))
1909		return (EINVAL);
1910
1911	for (;;) {
1912		/* Try to allocate N messages. */
1913		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1914		    cfg->msi.msi_msgnum, irqs);
1915		if (error == 0)
1916			break;
1917		if (actual == 1)
1918			return (error);
1919
1920		/* Try N / 2. */
1921		actual >>= 1;
1922	}
1923
1924	/*
1925	 * We now have N actual messages mapped onto SYS_RES_IRQ
1926	 * resources in the irqs[] array, so add new resources
1927	 * starting at rid 1.
1928	 */
1929	for (i = 0; i < actual; i++)
1930		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1931		    irqs[i], irqs[i], 1);
1932
1933	if (bootverbose) {
1934		if (actual == 1)
1935			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1936		else {
1937			int run;
1938
1939			/*
1940			 * Be fancy and try to print contiguous runs
1941			 * of IRQ values as ranges.  'run' is true if
1942			 * we are in a range.
1943			 */
1944			device_printf(child, "using IRQs %d", irqs[0]);
1945			run = 0;
1946			for (i = 1; i < actual; i++) {
1947
1948				/* Still in a run? */
1949				if (irqs[i] == irqs[i - 1] + 1) {
1950					run = 1;
1951					continue;
1952				}
1953
1954				/* Finish previous range. */
1955				if (run) {
1956					printf("-%d", irqs[i - 1]);
1957					run = 0;
1958				}
1959
1960				/* Start new range. */
1961				printf(",%d", irqs[i]);
1962			}
1963
1964			/* Unfinished range? */
1965			if (run)
1966				printf("-%d", irqs[actual - 1]);
1967			printf(" for MSI\n");
1968		}
1969	}
1970
1971	/* Update control register with actual count. */
1972	ctrl = cfg->msi.msi_ctrl;
1973	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1974	ctrl |= (ffs(actual) - 1) << 4;
1975	cfg->msi.msi_ctrl = ctrl;
1976	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1977
1978	/* Update counts of alloc'd messages. */
1979	cfg->msi.msi_alloc = actual;
1980	cfg->msi.msi_handlers = 0;
1981	*count = actual;
1982	return (0);
1983}
1984
1985/* Release the MSI messages associated with this device. */
1986int
1987pci_release_msi_method(device_t dev, device_t child)
1988{
1989	struct pci_devinfo *dinfo = device_get_ivars(child);
1990	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1991	struct resource_list_entry *rle;
1992	int error, i, irqs[32];
1993
1994	/* Try MSI-X first. */
1995	error = pci_release_msix(dev, child);
1996	if (error != ENODEV)
1997		return (error);
1998
1999	/* Do we have any messages to release? */
2000	if (msi->msi_alloc == 0)
2001		return (ENODEV);
2002	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2003
2004	/* Make sure none of the resources are allocated. */
2005	if (msi->msi_handlers > 0)
2006		return (EBUSY);
2007	for (i = 0; i < msi->msi_alloc; i++) {
2008		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2009		KASSERT(rle != NULL, ("missing MSI resource"));
2010		if (rle->res != NULL)
2011			return (EBUSY);
2012		irqs[i] = rle->start;
2013	}
2014
2015	/* Update control register with 0 count. */
2016	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2017	    ("%s: MSI still enabled", __func__));
2018	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2019	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2020	    msi->msi_ctrl, 2);
2021
2022	/* Release the messages. */
2023	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2024	for (i = 0; i < msi->msi_alloc; i++)
2025		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2026
2027	/* Update alloc count. */
2028	msi->msi_alloc = 0;
2029	msi->msi_addr = 0;
2030	msi->msi_data = 0;
2031	return (0);
2032}
2033
2034/*
2035 * Return the max supported MSI messages this device supports.
2036 * Basically, assuming the MD code can alloc messages, this function
2037 * should return the maximum value that pci_alloc_msi() can return.
2038 * Thus, it is subject to the tunables, etc.
2039 */
2040int
2041pci_msi_count_method(device_t dev, device_t child)
2042{
2043	struct pci_devinfo *dinfo = device_get_ivars(child);
2044	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2045
2046	if (pci_do_msi && msi->msi_location != 0)
2047		return (msi->msi_msgnum);
2048	return (0);
2049}
2050
2051/* free pcicfgregs structure and all depending data structures */
2052
2053int
2054pci_freecfg(struct pci_devinfo *dinfo)
2055{
2056	struct devlist *devlist_head;
2057	int i;
2058
2059	devlist_head = &pci_devq;
2060
2061	if (dinfo->cfg.vpd.vpd_reg) {
2062		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2063		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2064			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2065		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2066		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2067			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2068		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2069	}
2070	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2071	free(dinfo, M_DEVBUF);
2072
2073	/* increment the generation count */
2074	pci_generation++;
2075
2076	/* we're losing one device */
2077	pci_numdevs--;
2078	return (0);
2079}
2080
2081/*
2082 * PCI power manangement
2083 */
2084int
2085pci_set_powerstate_method(device_t dev, device_t child, int state)
2086{
2087	struct pci_devinfo *dinfo = device_get_ivars(child);
2088	pcicfgregs *cfg = &dinfo->cfg;
2089	uint16_t status;
2090	int result, oldstate, highest, delay;
2091
2092	if (cfg->pp.pp_cap == 0)
2093		return (EOPNOTSUPP);
2094
2095	/*
2096	 * Optimize a no state change request away.  While it would be OK to
2097	 * write to the hardware in theory, some devices have shown odd
2098	 * behavior when going from D3 -> D3.
2099	 */
2100	oldstate = pci_get_powerstate(child);
2101	if (oldstate == state)
2102		return (0);
2103
2104	/*
2105	 * The PCI power management specification states that after a state
2106	 * transition between PCI power states, system software must
2107	 * guarantee a minimal delay before the function accesses the device.
2108	 * Compute the worst case delay that we need to guarantee before we
2109	 * access the device.  Many devices will be responsive much more
2110	 * quickly than this delay, but there are some that don't respond
2111	 * instantly to state changes.  Transitions to/from D3 state require
2112	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2113	 * is done below with DELAY rather than a sleeper function because
2114	 * this function can be called from contexts where we cannot sleep.
2115	 */
2116	highest = (oldstate > state) ? oldstate : state;
2117	if (highest == PCI_POWERSTATE_D3)
2118	    delay = 10000;
2119	else if (highest == PCI_POWERSTATE_D2)
2120	    delay = 200;
2121	else
2122	    delay = 0;
2123	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2124	    & ~PCIM_PSTAT_DMASK;
2125	result = 0;
2126	switch (state) {
2127	case PCI_POWERSTATE_D0:
2128		status |= PCIM_PSTAT_D0;
2129		break;
2130	case PCI_POWERSTATE_D1:
2131		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2132			return (EOPNOTSUPP);
2133		status |= PCIM_PSTAT_D1;
2134		break;
2135	case PCI_POWERSTATE_D2:
2136		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2137			return (EOPNOTSUPP);
2138		status |= PCIM_PSTAT_D2;
2139		break;
2140	case PCI_POWERSTATE_D3:
2141		status |= PCIM_PSTAT_D3;
2142		break;
2143	default:
2144		return (EINVAL);
2145	}
2146
2147	if (bootverbose)
2148		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2149		    state);
2150
2151	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2152	if (delay)
2153		DELAY(delay);
2154	return (0);
2155}
2156
2157int
2158pci_get_powerstate_method(device_t dev, device_t child)
2159{
2160	struct pci_devinfo *dinfo = device_get_ivars(child);
2161	pcicfgregs *cfg = &dinfo->cfg;
2162	uint16_t status;
2163	int result;
2164
2165	if (cfg->pp.pp_cap != 0) {
2166		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2167		switch (status & PCIM_PSTAT_DMASK) {
2168		case PCIM_PSTAT_D0:
2169			result = PCI_POWERSTATE_D0;
2170			break;
2171		case PCIM_PSTAT_D1:
2172			result = PCI_POWERSTATE_D1;
2173			break;
2174		case PCIM_PSTAT_D2:
2175			result = PCI_POWERSTATE_D2;
2176			break;
2177		case PCIM_PSTAT_D3:
2178			result = PCI_POWERSTATE_D3;
2179			break;
2180		default:
2181			result = PCI_POWERSTATE_UNKNOWN;
2182			break;
2183		}
2184	} else {
2185		/* No support, device is always at D0 */
2186		result = PCI_POWERSTATE_D0;
2187	}
2188	return (result);
2189}
2190
2191/*
2192 * Some convenience functions for PCI device drivers.
2193 */
2194
2195static __inline void
2196pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2197{
2198	uint16_t	command;
2199
2200	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2201	command |= bit;
2202	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2203}
2204
2205static __inline void
2206pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2207{
2208	uint16_t	command;
2209
2210	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2211	command &= ~bit;
2212	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2213}
2214
2215int
2216pci_enable_busmaster_method(device_t dev, device_t child)
2217{
2218	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2219	return (0);
2220}
2221
2222int
2223pci_disable_busmaster_method(device_t dev, device_t child)
2224{
2225	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2226	return (0);
2227}
2228
2229int
2230pci_enable_io_method(device_t dev, device_t child, int space)
2231{
2232	uint16_t bit;
2233
2234	switch(space) {
2235	case SYS_RES_IOPORT:
2236		bit = PCIM_CMD_PORTEN;
2237		break;
2238	case SYS_RES_MEMORY:
2239		bit = PCIM_CMD_MEMEN;
2240		break;
2241	default:
2242		return (EINVAL);
2243	}
2244	pci_set_command_bit(dev, child, bit);
2245	return (0);
2246}
2247
2248int
2249pci_disable_io_method(device_t dev, device_t child, int space)
2250{
2251	uint16_t bit;
2252
2253	switch(space) {
2254	case SYS_RES_IOPORT:
2255		bit = PCIM_CMD_PORTEN;
2256		break;
2257	case SYS_RES_MEMORY:
2258		bit = PCIM_CMD_MEMEN;
2259		break;
2260	default:
2261		return (EINVAL);
2262	}
2263	pci_clear_command_bit(dev, child, bit);
2264	return (0);
2265}
2266
2267/*
2268 * New style pci driver.  Parent device is either a pci-host-bridge or a
2269 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2270 */
2271
2272void
2273pci_print_verbose(struct pci_devinfo *dinfo)
2274{
2275
2276	if (bootverbose) {
2277		pcicfgregs *cfg = &dinfo->cfg;
2278
2279		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2280		    cfg->vendor, cfg->device, cfg->revid);
2281		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2282		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2283		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2284		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2285		    cfg->mfdev);
2286		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2287		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2288		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2289		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2290		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2291		if (cfg->intpin > 0)
2292			printf("\tintpin=%c, irq=%d\n",
2293			    cfg->intpin +'a' -1, cfg->intline);
2294		if (cfg->pp.pp_cap) {
2295			uint16_t status;
2296
2297			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2298			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2299			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2300			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2301			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2302			    status & PCIM_PSTAT_DMASK);
2303		}
2304		if (cfg->msi.msi_location) {
2305			int ctrl;
2306
2307			ctrl = cfg->msi.msi_ctrl;
2308			printf("\tMSI supports %d message%s%s%s\n",
2309			    cfg->msi.msi_msgnum,
2310			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2311			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2312			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2313		}
2314		if (cfg->msix.msix_location) {
2315			printf("\tMSI-X supports %d message%s ",
2316			    cfg->msix.msix_msgnum,
2317			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2318			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2319				printf("in map 0x%x\n",
2320				    cfg->msix.msix_table_bar);
2321			else
2322				printf("in maps 0x%x and 0x%x\n",
2323				    cfg->msix.msix_table_bar,
2324				    cfg->msix.msix_pba_bar);
2325		}
2326	}
2327}
2328
2329static int
2330pci_porten(device_t dev)
2331{
2332	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2333}
2334
2335static int
2336pci_memen(device_t dev)
2337{
2338	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2339}
2340
2341static void
2342pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2343{
2344	pci_addr_t map, testval;
2345	int ln2range;
2346	uint16_t cmd;
2347
2348	/*
2349	 * The device ROM BAR is special.  It is always a 32-bit
2350	 * memory BAR.  Bit 0 is special and should not be set when
2351	 * sizing the BAR.
2352	 */
2353	if (reg == PCIR_BIOS) {
2354		map = pci_read_config(dev, reg, 4);
2355		pci_write_config(dev, reg, 0xfffffffe, 4);
2356		testval = pci_read_config(dev, reg, 4);
2357		pci_write_config(dev, reg, map, 4);
2358		*mapp = map;
2359		*testvalp = testval;
2360		return;
2361	}
2362
2363	map = pci_read_config(dev, reg, 4);
2364	ln2range = pci_maprange(map);
2365	if (ln2range == 64)
2366		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2367
2368	/*
2369	 * Disable decoding via the command register before
2370	 * determining the BAR's length since we will be placing it in
2371	 * a weird state.
2372	 */
2373	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2374	pci_write_config(dev, PCIR_COMMAND,
2375	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2376
2377	/*
2378	 * Determine the BAR's length by writing all 1's.  The bottom
2379	 * log_2(size) bits of the BAR will stick as 0 when we read
2380	 * the value back.
2381	 */
2382	pci_write_config(dev, reg, 0xffffffff, 4);
2383	testval = pci_read_config(dev, reg, 4);
2384	if (ln2range == 64) {
2385		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2386		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2387	}
2388
2389	/*
2390	 * Restore the original value of the BAR.  We may have reprogrammed
2391	 * the BAR of the low-level console device and when booting verbose,
2392	 * we need the console device addressable.
2393	 */
2394	pci_write_config(dev, reg, map, 4);
2395	if (ln2range == 64)
2396		pci_write_config(dev, reg + 4, map >> 32, 4);
2397	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2398
2399	*mapp = map;
2400	*testvalp = testval;
2401}
2402
2403static void
2404pci_write_bar(device_t dev, int reg, pci_addr_t base)
2405{
2406	pci_addr_t map;
2407	int ln2range;
2408
2409	map = pci_read_config(dev, reg, 4);
2410
2411	/* The device ROM BAR is always 32-bits. */
2412	if (reg == PCIR_BIOS)
2413		return;
2414	ln2range = pci_maprange(map);
2415	pci_write_config(dev, reg, base, 4);
2416	if (ln2range == 64)
2417		pci_write_config(dev, reg + 4, base >> 32, 4);
2418}
2419
2420/*
2421 * Add a resource based on a pci map register. Return 1 if the map
2422 * register is a 32bit map register or 2 if it is a 64bit register.
2423 */
2424static int
2425pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2426    int force, int prefetch)
2427{
2428	pci_addr_t base, map, testval;
2429	pci_addr_t start, end, count;
2430	int barlen, basezero, maprange, mapsize, type;
2431	uint16_t cmd;
2432	struct resource *res;
2433
2434	pci_read_bar(dev, reg, &map, &testval);
2435	if (PCI_BAR_MEM(map)) {
2436		type = SYS_RES_MEMORY;
2437		if (map & PCIM_BAR_MEM_PREFETCH)
2438			prefetch = 1;
2439	} else
2440		type = SYS_RES_IOPORT;
2441	mapsize = pci_mapsize(testval);
2442	base = pci_mapbase(map);
2443#ifdef __PCI_BAR_ZERO_VALID
2444	basezero = 0;
2445#else
2446	basezero = base == 0;
2447#endif
2448	maprange = pci_maprange(map);
2449	barlen = maprange == 64 ? 2 : 1;
2450
2451	/*
2452	 * For I/O registers, if bottom bit is set, and the next bit up
2453	 * isn't clear, we know we have a BAR that doesn't conform to the
2454	 * spec, so ignore it.  Also, sanity check the size of the data
2455	 * areas to the type of memory involved.  Memory must be at least
2456	 * 16 bytes in size, while I/O ranges must be at least 4.
2457	 */
2458	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2459		return (barlen);
2460	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2461	    (type == SYS_RES_IOPORT && mapsize < 2))
2462		return (barlen);
2463
2464	if (bootverbose) {
2465		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2466		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2467		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2468			printf(", port disabled\n");
2469		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2470			printf(", memory disabled\n");
2471		else
2472			printf(", enabled\n");
2473	}
2474
2475	/*
2476	 * If base is 0, then we have problems if this architecture does
2477	 * not allow that.  It is best to ignore such entries for the
2478	 * moment.  These will be allocated later if the driver specifically
2479	 * requests them.  However, some removable busses look better when
2480	 * all resources are allocated, so allow '0' to be overriden.
2481	 *
2482	 * Similarly treat maps whose values is the same as the test value
2483	 * read back.  These maps have had all f's written to them by the
2484	 * BIOS in an attempt to disable the resources.
2485	 */
2486	if (!force && (basezero || map == testval))
2487		return (barlen);
2488	if ((u_long)base != base) {
2489		device_printf(bus,
2490		    "pci%d:%d:%d:%d bar %#x too many address bits",
2491		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2492		    pci_get_function(dev), reg);
2493		return (barlen);
2494	}
2495
2496	/*
2497	 * This code theoretically does the right thing, but has
2498	 * undesirable side effects in some cases where peripherals
2499	 * respond oddly to having these bits enabled.  Let the user
2500	 * be able to turn them off (since pci_enable_io_modes is 1 by
2501	 * default).
2502	 */
2503	if (pci_enable_io_modes) {
2504		/* Turn on resources that have been left off by a lazy BIOS */
2505		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2506			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2507			cmd |= PCIM_CMD_PORTEN;
2508			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2509		}
2510		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2511			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2512			cmd |= PCIM_CMD_MEMEN;
2513			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2514		}
2515	} else {
2516		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2517			return (barlen);
2518		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2519			return (barlen);
2520	}
2521
2522	count = 1 << mapsize;
2523	if (basezero || base == pci_mapbase(testval)) {
2524		start = 0;	/* Let the parent decide. */
2525		end = ~0ULL;
2526	} else {
2527		start = base;
2528		end = base + (1 << mapsize) - 1;
2529	}
2530	resource_list_add(rl, type, reg, start, end, count);
2531
2532	/*
2533	 * Try to allocate the resource for this BAR from our parent
2534	 * so that this resource range is already reserved.  The
2535	 * driver for this device will later inherit this resource in
2536	 * pci_alloc_resource().
2537	 */
2538	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2539	    prefetch ? RF_PREFETCHABLE : 0);
2540	if (res == NULL) {
2541		/*
2542		 * If the allocation fails, clear the BAR and delete
2543		 * the resource list entry to force
2544		 * pci_alloc_resource() to allocate resources from the
2545		 * parent.
2546		 */
2547		resource_list_delete(rl, type, reg);
2548		start = 0;
2549	} else
2550		start = rman_get_start(res);
2551	pci_write_bar(dev, reg, start);
2552	return (barlen);
2553}
2554
2555/*
2556 * For ATA devices we need to decide early what addressing mode to use.
2557 * Legacy demands that the primary and secondary ATA ports sits on the
2558 * same addresses that old ISA hardware did. This dictates that we use
2559 * those addresses and ignore the BAR's if we cannot set PCI native
2560 * addressing mode.
2561 */
2562static void
2563pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2564    uint32_t prefetchmask)
2565{
2566	struct resource *r;
2567	int rid, type, progif;
2568#if 0
2569	/* if this device supports PCI native addressing use it */
2570	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2571	if ((progif & 0x8a) == 0x8a) {
2572		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2573		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2574			printf("Trying ATA native PCI addressing mode\n");
2575			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2576		}
2577	}
2578#endif
2579	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2580	type = SYS_RES_IOPORT;
2581	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2582		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2583		    prefetchmask & (1 << 0));
2584		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2585		    prefetchmask & (1 << 1));
2586	} else {
2587		rid = PCIR_BAR(0);
2588		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2589		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2590		    0x1f7, 8, 0);
2591		rid = PCIR_BAR(1);
2592		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2593		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2594		    0x3f6, 1, 0);
2595	}
2596	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2597		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2598		    prefetchmask & (1 << 2));
2599		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2600		    prefetchmask & (1 << 3));
2601	} else {
2602		rid = PCIR_BAR(2);
2603		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2604		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2605		    0x177, 8, 0);
2606		rid = PCIR_BAR(3);
2607		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2608		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2609		    0x376, 1, 0);
2610	}
2611	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2612	    prefetchmask & (1 << 4));
2613	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2614	    prefetchmask & (1 << 5));
2615}
2616
2617static void
2618pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2619{
2620	struct pci_devinfo *dinfo = device_get_ivars(dev);
2621	pcicfgregs *cfg = &dinfo->cfg;
2622	char tunable_name[64];
2623	int irq;
2624
2625	/* Has to have an intpin to have an interrupt. */
2626	if (cfg->intpin == 0)
2627		return;
2628
2629	/* Let the user override the IRQ with a tunable. */
2630	irq = PCI_INVALID_IRQ;
2631	snprintf(tunable_name, sizeof(tunable_name),
2632	    "hw.pci%d.%d.%d.INT%c.irq",
2633	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2634	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2635		irq = PCI_INVALID_IRQ;
2636
2637	/*
2638	 * If we didn't get an IRQ via the tunable, then we either use the
2639	 * IRQ value in the intline register or we ask the bus to route an
2640	 * interrupt for us.  If force_route is true, then we only use the
2641	 * value in the intline register if the bus was unable to assign an
2642	 * IRQ.
2643	 */
2644	if (!PCI_INTERRUPT_VALID(irq)) {
2645		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2646			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2647		if (!PCI_INTERRUPT_VALID(irq))
2648			irq = cfg->intline;
2649	}
2650
2651	/* If after all that we don't have an IRQ, just bail. */
2652	if (!PCI_INTERRUPT_VALID(irq))
2653		return;
2654
2655	/* Update the config register if it changed. */
2656	if (irq != cfg->intline) {
2657		cfg->intline = irq;
2658		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2659	}
2660
2661	/* Add this IRQ as rid 0 interrupt resource. */
2662	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2663}
2664
2665/* Perform early OHCI takeover from SMM. */
2666static void
2667ohci_early_takeover(device_t self)
2668{
2669	struct resource *res;
2670	uint32_t ctl;
2671	int rid;
2672	int i;
2673
2674	rid = PCIR_BAR(0);
2675	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2676	if (res == NULL)
2677		return;
2678
2679	ctl = bus_read_4(res, OHCI_CONTROL);
2680	if (ctl & OHCI_IR) {
2681		if (bootverbose)
2682			printf("ohci early: "
2683			    "SMM active, request owner change\n");
2684		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2685		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2686			DELAY(1000);
2687			ctl = bus_read_4(res, OHCI_CONTROL);
2688		}
2689		if (ctl & OHCI_IR) {
2690			if (bootverbose)
2691				printf("ohci early: "
2692				    "SMM does not respond, resetting\n");
2693			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2694		}
2695		/* Disable interrupts */
2696		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2697	}
2698
2699	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2700}
2701
2702/* Perform early UHCI takeover from SMM. */
2703static void
2704uhci_early_takeover(device_t self)
2705{
2706	struct resource *res;
2707	int rid;
2708
2709	/*
2710	 * Set the PIRQD enable bit and switch off all the others. We don't
2711	 * want legacy support to interfere with us XXX Does this also mean
2712	 * that the BIOS won't touch the keyboard anymore if it is connected
2713	 * to the ports of the root hub?
2714	 */
2715	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2716
2717	/* Disable interrupts */
2718	rid = PCI_UHCI_BASE_REG;
2719	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2720	if (res != NULL) {
2721		bus_write_2(res, UHCI_INTR, 0);
2722		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2723	}
2724}
2725
2726/* Perform early EHCI takeover from SMM. */
2727static void
2728ehci_early_takeover(device_t self)
2729{
2730	struct resource *res;
2731	uint32_t cparams;
2732	uint32_t eec;
2733	uint8_t eecp;
2734	uint8_t bios_sem;
2735	uint8_t offs;
2736	int rid;
2737	int i;
2738
2739	rid = PCIR_BAR(0);
2740	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2741	if (res == NULL)
2742		return;
2743
2744	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2745
2746	/* Synchronise with the BIOS if it owns the controller. */
2747	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2748	    eecp = EHCI_EECP_NEXT(eec)) {
2749		eec = pci_read_config(self, eecp, 4);
2750		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2751			continue;
2752		}
2753		bios_sem = pci_read_config(self, eecp +
2754		    EHCI_LEGSUP_BIOS_SEM, 1);
2755		if (bios_sem == 0) {
2756			continue;
2757		}
2758		if (bootverbose)
2759			printf("ehci early: "
2760			    "SMM active, request owner change\n");
2761
2762		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2763
2764		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2765			DELAY(1000);
2766			bios_sem = pci_read_config(self, eecp +
2767			    EHCI_LEGSUP_BIOS_SEM, 1);
2768		}
2769
2770		if (bios_sem != 0) {
2771			if (bootverbose)
2772				printf("ehci early: "
2773				    "SMM does not respond\n");
2774		}
2775		/* Disable interrupts */
2776		offs = bus_read_1(res, EHCI_CAPLENGTH);
2777		bus_write_4(res, offs + EHCI_USBINTR, 0);
2778	}
2779	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2780}
2781
2782void
2783pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2784{
2785	struct pci_devinfo *dinfo = device_get_ivars(dev);
2786	pcicfgregs *cfg = &dinfo->cfg;
2787	struct resource_list *rl = &dinfo->resources;
2788	struct pci_quirk *q;
2789	int i;
2790
2791	/* ATA devices needs special map treatment */
2792	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2793	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2794	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2795	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2796	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2797		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2798	else
2799		for (i = 0; i < cfg->nummaps;)
2800			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2801			    prefetchmask & (1 << i));
2802
2803	/*
2804	 * Add additional, quirked resources.
2805	 */
2806	for (q = &pci_quirks[0]; q->devid; q++) {
2807		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2808		    && q->type == PCI_QUIRK_MAP_REG)
2809			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2810	}
2811
2812	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2813#ifdef __PCI_REROUTE_INTERRUPT
2814		/*
2815		 * Try to re-route interrupts. Sometimes the BIOS or
2816		 * firmware may leave bogus values in these registers.
2817		 * If the re-route fails, then just stick with what we
2818		 * have.
2819		 */
2820		pci_assign_interrupt(bus, dev, 1);
2821#else
2822		pci_assign_interrupt(bus, dev, 0);
2823#endif
2824	}
2825
2826	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2827	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2828		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2829			ehci_early_takeover(dev);
2830		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2831			ohci_early_takeover(dev);
2832		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2833			uhci_early_takeover(dev);
2834	}
2835}
2836
2837void
2838pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2839{
2840#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2841	device_t pcib = device_get_parent(dev);
2842	struct pci_devinfo *dinfo;
2843	int maxslots;
2844	int s, f, pcifunchigh;
2845	uint8_t hdrtype;
2846
2847	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2848	    ("dinfo_size too small"));
2849	maxslots = PCIB_MAXSLOTS(pcib);
2850	for (s = 0; s <= maxslots; s++) {
2851		pcifunchigh = 0;
2852		f = 0;
2853		DELAY(1);
2854		hdrtype = REG(PCIR_HDRTYPE, 1);
2855		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2856			continue;
2857		if (hdrtype & PCIM_MFDEV)
2858			pcifunchigh = PCI_FUNCMAX;
2859		for (f = 0; f <= pcifunchigh; f++) {
2860			dinfo = pci_read_device(pcib, domain, busno, s, f,
2861			    dinfo_size);
2862			if (dinfo != NULL) {
2863				pci_add_child(dev, dinfo);
2864			}
2865		}
2866	}
2867#undef REG
2868}
2869
2870void
2871pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2872{
2873	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2874	device_set_ivars(dinfo->cfg.dev, dinfo);
2875	resource_list_init(&dinfo->resources);
2876	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2877	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2878	pci_print_verbose(dinfo);
2879	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2880}
2881
2882static int
2883pci_probe(device_t dev)
2884{
2885
2886	device_set_desc(dev, "PCI bus");
2887
2888	/* Allow other subclasses to override this driver. */
2889	return (BUS_PROBE_GENERIC);
2890}
2891
2892static int
2893pci_attach(device_t dev)
2894{
2895	int busno, domain;
2896
2897	/*
2898	 * Since there can be multiple independantly numbered PCI
2899	 * busses on systems with multiple PCI domains, we can't use
2900	 * the unit number to decide which bus we are probing. We ask
2901	 * the parent pcib what our domain and bus numbers are.
2902	 */
2903	domain = pcib_get_domain(dev);
2904	busno = pcib_get_bus(dev);
2905	if (bootverbose)
2906		device_printf(dev, "domain=%d, physical bus=%d\n",
2907		    domain, busno);
2908	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2909	return (bus_generic_attach(dev));
2910}
2911
2912static void
2913pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
2914    int state)
2915{
2916	device_t child, pcib;
2917	struct pci_devinfo *dinfo;
2918	int dstate, i;
2919
2920	/*
2921	 * Set the device to the given state.  If the firmware suggests
2922	 * a different power state, use it instead.  If power management
2923	 * is not present, the firmware is responsible for managing
2924	 * device power.  Skip children who aren't attached since they
2925	 * are handled separately.
2926	 */
2927	pcib = device_get_parent(dev);
2928	for (i = 0; i < numdevs; i++) {
2929		child = devlist[i];
2930		dinfo = device_get_ivars(child);
2931		dstate = state;
2932		if (device_is_attached(child) &&
2933		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
2934			pci_set_powerstate(child, dstate);
2935	}
2936}
2937
2938int
2939pci_suspend(device_t dev)
2940{
2941	device_t child, *devlist;
2942	struct pci_devinfo *dinfo;
2943	int error, i, numdevs;
2944
2945	/*
2946	 * Save the PCI configuration space for each child and set the
2947	 * device in the appropriate power state for this sleep state.
2948	 */
2949	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2950		return (error);
2951	for (i = 0; i < numdevs; i++) {
2952		child = devlist[i];
2953		dinfo = device_get_ivars(child);
2954		pci_cfg_save(child, dinfo, 0);
2955	}
2956
2957	/* Suspend devices before potentially powering them down. */
2958	error = bus_generic_suspend(dev);
2959	if (error) {
2960		free(devlist, M_TEMP);
2961		return (error);
2962	}
2963	if (pci_do_power_suspend)
2964		pci_set_power_children(dev, devlist, numdevs,
2965		    PCI_POWERSTATE_D3);
2966	free(devlist, M_TEMP);
2967	return (0);
2968}
2969
2970int
2971pci_resume(device_t dev)
2972{
2973	device_t child, *devlist;
2974	struct pci_devinfo *dinfo;
2975	int error, i, numdevs;
2976
2977	/*
2978	 * Set each child to D0 and restore its PCI configuration space.
2979	 */
2980	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2981		return (error);
2982	if (pci_do_power_resume)
2983		pci_set_power_children(dev, devlist, numdevs,
2984		    PCI_POWERSTATE_D0);
2985
2986	/* Now the device is powered up, restore its config space. */
2987	for (i = 0; i < numdevs; i++) {
2988		child = devlist[i];
2989		dinfo = device_get_ivars(child);
2990
2991		pci_cfg_restore(child, dinfo);
2992		if (!device_is_attached(child))
2993			pci_cfg_save(child, dinfo, 1);
2994	}
2995	free(devlist, M_TEMP);
2996	return (bus_generic_resume(dev));
2997}
2998
2999static void
3000pci_load_vendor_data(void)
3001{
3002	caddr_t vendordata, info;
3003
3004	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3005		info = preload_search_info(vendordata, MODINFO_ADDR);
3006		pci_vendordata = *(char **)info;
3007		info = preload_search_info(vendordata, MODINFO_SIZE);
3008		pci_vendordata_size = *(size_t *)info;
3009		/* terminate the database */
3010		pci_vendordata[pci_vendordata_size] = '\n';
3011	}
3012}
3013
3014void
3015pci_driver_added(device_t dev, driver_t *driver)
3016{
3017	int numdevs;
3018	device_t *devlist;
3019	device_t child;
3020	struct pci_devinfo *dinfo;
3021	int i;
3022
3023	if (bootverbose)
3024		device_printf(dev, "driver added\n");
3025	DEVICE_IDENTIFY(driver, dev);
3026	if (device_get_children(dev, &devlist, &numdevs) != 0)
3027		return;
3028	for (i = 0; i < numdevs; i++) {
3029		child = devlist[i];
3030		if (device_get_state(child) != DS_NOTPRESENT)
3031			continue;
3032		dinfo = device_get_ivars(child);
3033		pci_print_verbose(dinfo);
3034		if (bootverbose)
3035			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3036		pci_cfg_restore(child, dinfo);
3037		if (device_probe_and_attach(child) != 0)
3038			pci_cfg_save(child, dinfo, 1);
3039	}
3040	free(devlist, M_TEMP);
3041}
3042
3043int
3044pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3045    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3046{
3047	struct pci_devinfo *dinfo;
3048	struct msix_table_entry *mte;
3049	struct msix_vector *mv;
3050	uint64_t addr;
3051	uint32_t data;
3052	void *cookie;
3053	int error, rid;
3054
3055	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3056	    arg, &cookie);
3057	if (error)
3058		return (error);
3059
3060	/* If this is not a direct child, just bail out. */
3061	if (device_get_parent(child) != dev) {
3062		*cookiep = cookie;
3063		return(0);
3064	}
3065
3066	rid = rman_get_rid(irq);
3067	if (rid == 0) {
3068		/* Make sure that INTx is enabled */
3069		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3070	} else {
3071		/*
3072		 * Check to see if the interrupt is MSI or MSI-X.
3073		 * Ask our parent to map the MSI and give
3074		 * us the address and data register values.
3075		 * If we fail for some reason, teardown the
3076		 * interrupt handler.
3077		 */
3078		dinfo = device_get_ivars(child);
3079		if (dinfo->cfg.msi.msi_alloc > 0) {
3080			if (dinfo->cfg.msi.msi_addr == 0) {
3081				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3082			    ("MSI has handlers, but vectors not mapped"));
3083				error = PCIB_MAP_MSI(device_get_parent(dev),
3084				    child, rman_get_start(irq), &addr, &data);
3085				if (error)
3086					goto bad;
3087				dinfo->cfg.msi.msi_addr = addr;
3088				dinfo->cfg.msi.msi_data = data;
3089			}
3090			if (dinfo->cfg.msi.msi_handlers == 0)
3091				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3092				    dinfo->cfg.msi.msi_data);
3093			dinfo->cfg.msi.msi_handlers++;
3094		} else {
3095			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3096			    ("No MSI or MSI-X interrupts allocated"));
3097			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3098			    ("MSI-X index too high"));
3099			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3100			KASSERT(mte->mte_vector != 0, ("no message vector"));
3101			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3102			KASSERT(mv->mv_irq == rman_get_start(irq),
3103			    ("IRQ mismatch"));
3104			if (mv->mv_address == 0) {
3105				KASSERT(mte->mte_handlers == 0,
3106		    ("MSI-X table entry has handlers, but vector not mapped"));
3107				error = PCIB_MAP_MSI(device_get_parent(dev),
3108				    child, rman_get_start(irq), &addr, &data);
3109				if (error)
3110					goto bad;
3111				mv->mv_address = addr;
3112				mv->mv_data = data;
3113			}
3114			if (mte->mte_handlers == 0) {
3115				pci_enable_msix(child, rid - 1, mv->mv_address,
3116				    mv->mv_data);
3117				pci_unmask_msix(child, rid - 1);
3118			}
3119			mte->mte_handlers++;
3120		}
3121
3122		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3123		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3124	bad:
3125		if (error) {
3126			(void)bus_generic_teardown_intr(dev, child, irq,
3127			    cookie);
3128			return (error);
3129		}
3130	}
3131	*cookiep = cookie;
3132	return (0);
3133}
3134
3135int
3136pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3137    void *cookie)
3138{
3139	struct msix_table_entry *mte;
3140	struct resource_list_entry *rle;
3141	struct pci_devinfo *dinfo;
3142	int error, rid;
3143
3144	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3145		return (EINVAL);
3146
3147	/* If this isn't a direct child, just bail out */
3148	if (device_get_parent(child) != dev)
3149		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3150
3151	rid = rman_get_rid(irq);
3152	if (rid == 0) {
3153		/* Mask INTx */
3154		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3155	} else {
3156		/*
3157		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3158		 * decrement the appropriate handlers count and mask the
3159		 * MSI-X message, or disable MSI messages if the count
3160		 * drops to 0.
3161		 */
3162		dinfo = device_get_ivars(child);
3163		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3164		if (rle->res != irq)
3165			return (EINVAL);
3166		if (dinfo->cfg.msi.msi_alloc > 0) {
3167			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3168			    ("MSI-X index too high"));
3169			if (dinfo->cfg.msi.msi_handlers == 0)
3170				return (EINVAL);
3171			dinfo->cfg.msi.msi_handlers--;
3172			if (dinfo->cfg.msi.msi_handlers == 0)
3173				pci_disable_msi(child);
3174		} else {
3175			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3176			    ("No MSI or MSI-X interrupts allocated"));
3177			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3178			    ("MSI-X index too high"));
3179			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3180			if (mte->mte_handlers == 0)
3181				return (EINVAL);
3182			mte->mte_handlers--;
3183			if (mte->mte_handlers == 0)
3184				pci_mask_msix(child, rid - 1);
3185		}
3186	}
3187	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3188	if (rid > 0)
3189		KASSERT(error == 0,
3190		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3191	return (error);
3192}
3193
3194int
3195pci_print_child(device_t dev, device_t child)
3196{
3197	struct pci_devinfo *dinfo;
3198	struct resource_list *rl;
3199	int retval = 0;
3200
3201	dinfo = device_get_ivars(child);
3202	rl = &dinfo->resources;
3203
3204	retval += bus_print_child_header(dev, child);
3205
3206	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3207	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3208	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3209	if (device_get_flags(dev))
3210		retval += printf(" flags %#x", device_get_flags(dev));
3211
3212	retval += printf(" at device %d.%d", pci_get_slot(child),
3213	    pci_get_function(child));
3214
3215	retval += bus_print_child_footer(dev, child);
3216
3217	return (retval);
3218}
3219
3220static struct
3221{
3222	int	class;
3223	int	subclass;
3224	char	*desc;
3225} pci_nomatch_tab[] = {
3226	{PCIC_OLD,		-1,			"old"},
3227	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3228	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3229	{PCIC_STORAGE,		-1,			"mass storage"},
3230	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3231	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3232	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3233	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3234	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3235	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3236	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3237	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3238	{PCIC_NETWORK,		-1,			"network"},
3239	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3240	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3241	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3242	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3243	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3244	{PCIC_DISPLAY,		-1,			"display"},
3245	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3246	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3247	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3248	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3249	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3250	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3251	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3252	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3253	{PCIC_MEMORY,		-1,			"memory"},
3254	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3255	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3256	{PCIC_BRIDGE,		-1,			"bridge"},
3257	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3258	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3259	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3260	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3261	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3262	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3263	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3264	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3265	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3266	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3267	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3268	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3269	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3270	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3271	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3272	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3273	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3274	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3275	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3276	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3277	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3278	{PCIC_INPUTDEV,		-1,			"input device"},
3279	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3280	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3281	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3282	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3283	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3284	{PCIC_DOCKING,		-1,			"docking station"},
3285	{PCIC_PROCESSOR,	-1,			"processor"},
3286	{PCIC_SERIALBUS,	-1,			"serial bus"},
3287	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3288	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3289	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3290	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3291	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3292	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3293	{PCIC_WIRELESS,		-1,			"wireless controller"},
3294	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3295	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3296	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3297	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3298	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3299	{PCIC_SATCOM,		-1,			"satellite communication"},
3300	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3301	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3302	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3303	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3304	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3305	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3306	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3307	{PCIC_DASP,		-1,			"dasp"},
3308	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3309	{0, 0,		NULL}
3310};
3311
3312void
3313pci_probe_nomatch(device_t dev, device_t child)
3314{
3315	int	i;
3316	char	*cp, *scp, *device;
3317
3318	/*
3319	 * Look for a listing for this device in a loaded device database.
3320	 */
3321	if ((device = pci_describe_device(child)) != NULL) {
3322		device_printf(dev, "<%s>", device);
3323		free(device, M_DEVBUF);
3324	} else {
3325		/*
3326		 * Scan the class/subclass descriptions for a general
3327		 * description.
3328		 */
3329		cp = "unknown";
3330		scp = NULL;
3331		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3332			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3333				if (pci_nomatch_tab[i].subclass == -1) {
3334					cp = pci_nomatch_tab[i].desc;
3335				} else if (pci_nomatch_tab[i].subclass ==
3336				    pci_get_subclass(child)) {
3337					scp = pci_nomatch_tab[i].desc;
3338				}
3339			}
3340		}
3341		device_printf(dev, "<%s%s%s>",
3342		    cp ? cp : "",
3343		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3344		    scp ? scp : "");
3345	}
3346	printf(" at device %d.%d (no driver attached)\n",
3347	    pci_get_slot(child), pci_get_function(child));
3348	pci_cfg_save(child, device_get_ivars(child), 1);
3349	return;
3350}
3351
3352/*
3353 * Parse the PCI device database, if loaded, and return a pointer to a
3354 * description of the device.
3355 *
3356 * The database is flat text formatted as follows:
3357 *
3358 * Any line not in a valid format is ignored.
3359 * Lines are terminated with newline '\n' characters.
3360 *
3361 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3362 * the vendor name.
3363 *
3364 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3365 * - devices cannot be listed without a corresponding VENDOR line.
3366 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3367 * another TAB, then the device name.
3368 */
3369
3370/*
3371 * Assuming (ptr) points to the beginning of a line in the database,
3372 * return the vendor or device and description of the next entry.
3373 * The value of (vendor) or (device) inappropriate for the entry type
3374 * is set to -1.  Returns nonzero at the end of the database.
3375 *
3376 * Note that this is slightly unrobust in the face of corrupt data;
3377 * we attempt to safeguard against this by spamming the end of the
3378 * database with a newline when we initialise.
3379 */
3380static int
3381pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3382{
3383	char	*cp = *ptr;
3384	int	left;
3385
3386	*device = -1;
3387	*vendor = -1;
3388	**desc = '\0';
3389	for (;;) {
3390		left = pci_vendordata_size - (cp - pci_vendordata);
3391		if (left <= 0) {
3392			*ptr = cp;
3393			return(1);
3394		}
3395
3396		/* vendor entry? */
3397		if (*cp != '\t' &&
3398		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3399			break;
3400		/* device entry? */
3401		if (*cp == '\t' &&
3402		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3403			break;
3404
3405		/* skip to next line */
3406		while (*cp != '\n' && left > 0) {
3407			cp++;
3408			left--;
3409		}
3410		if (*cp == '\n') {
3411			cp++;
3412			left--;
3413		}
3414	}
3415	/* skip to next line */
3416	while (*cp != '\n' && left > 0) {
3417		cp++;
3418		left--;
3419	}
3420	if (*cp == '\n' && left > 0)
3421		cp++;
3422	*ptr = cp;
3423	return(0);
3424}
3425
3426static char *
3427pci_describe_device(device_t dev)
3428{
3429	int	vendor, device;
3430	char	*desc, *vp, *dp, *line;
3431
3432	desc = vp = dp = NULL;
3433
3434	/*
3435	 * If we have no vendor data, we can't do anything.
3436	 */
3437	if (pci_vendordata == NULL)
3438		goto out;
3439
3440	/*
3441	 * Scan the vendor data looking for this device
3442	 */
3443	line = pci_vendordata;
3444	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3445		goto out;
3446	for (;;) {
3447		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3448			goto out;
3449		if (vendor == pci_get_vendor(dev))
3450			break;
3451	}
3452	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3453		goto out;
3454	for (;;) {
3455		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3456			*dp = 0;
3457			break;
3458		}
3459		if (vendor != -1) {
3460			*dp = 0;
3461			break;
3462		}
3463		if (device == pci_get_device(dev))
3464			break;
3465	}
3466	if (dp[0] == '\0')
3467		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3468	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3469	    NULL)
3470		sprintf(desc, "%s, %s", vp, dp);
3471 out:
3472	if (vp != NULL)
3473		free(vp, M_DEVBUF);
3474	if (dp != NULL)
3475		free(dp, M_DEVBUF);
3476	return(desc);
3477}
3478
3479int
3480pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3481{
3482	struct pci_devinfo *dinfo;
3483	pcicfgregs *cfg;
3484
3485	dinfo = device_get_ivars(child);
3486	cfg = &dinfo->cfg;
3487
3488	switch (which) {
3489	case PCI_IVAR_ETHADDR:
3490		/*
3491		 * The generic accessor doesn't deal with failure, so
3492		 * we set the return value, then return an error.
3493		 */
3494		*((uint8_t **) result) = NULL;
3495		return (EINVAL);
3496	case PCI_IVAR_SUBVENDOR:
3497		*result = cfg->subvendor;
3498		break;
3499	case PCI_IVAR_SUBDEVICE:
3500		*result = cfg->subdevice;
3501		break;
3502	case PCI_IVAR_VENDOR:
3503		*result = cfg->vendor;
3504		break;
3505	case PCI_IVAR_DEVICE:
3506		*result = cfg->device;
3507		break;
3508	case PCI_IVAR_DEVID:
3509		*result = (cfg->device << 16) | cfg->vendor;
3510		break;
3511	case PCI_IVAR_CLASS:
3512		*result = cfg->baseclass;
3513		break;
3514	case PCI_IVAR_SUBCLASS:
3515		*result = cfg->subclass;
3516		break;
3517	case PCI_IVAR_PROGIF:
3518		*result = cfg->progif;
3519		break;
3520	case PCI_IVAR_REVID:
3521		*result = cfg->revid;
3522		break;
3523	case PCI_IVAR_INTPIN:
3524		*result = cfg->intpin;
3525		break;
3526	case PCI_IVAR_IRQ:
3527		*result = cfg->intline;
3528		break;
3529	case PCI_IVAR_DOMAIN:
3530		*result = cfg->domain;
3531		break;
3532	case PCI_IVAR_BUS:
3533		*result = cfg->bus;
3534		break;
3535	case PCI_IVAR_SLOT:
3536		*result = cfg->slot;
3537		break;
3538	case PCI_IVAR_FUNCTION:
3539		*result = cfg->func;
3540		break;
3541	case PCI_IVAR_CMDREG:
3542		*result = cfg->cmdreg;
3543		break;
3544	case PCI_IVAR_CACHELNSZ:
3545		*result = cfg->cachelnsz;
3546		break;
3547	case PCI_IVAR_MINGNT:
3548		*result = cfg->mingnt;
3549		break;
3550	case PCI_IVAR_MAXLAT:
3551		*result = cfg->maxlat;
3552		break;
3553	case PCI_IVAR_LATTIMER:
3554		*result = cfg->lattimer;
3555		break;
3556	default:
3557		return (ENOENT);
3558	}
3559	return (0);
3560}
3561
3562int
3563pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3564{
3565	struct pci_devinfo *dinfo;
3566
3567	dinfo = device_get_ivars(child);
3568
3569	switch (which) {
3570	case PCI_IVAR_INTPIN:
3571		dinfo->cfg.intpin = value;
3572		return (0);
3573	case PCI_IVAR_ETHADDR:
3574	case PCI_IVAR_SUBVENDOR:
3575	case PCI_IVAR_SUBDEVICE:
3576	case PCI_IVAR_VENDOR:
3577	case PCI_IVAR_DEVICE:
3578	case PCI_IVAR_DEVID:
3579	case PCI_IVAR_CLASS:
3580	case PCI_IVAR_SUBCLASS:
3581	case PCI_IVAR_PROGIF:
3582	case PCI_IVAR_REVID:
3583	case PCI_IVAR_IRQ:
3584	case PCI_IVAR_DOMAIN:
3585	case PCI_IVAR_BUS:
3586	case PCI_IVAR_SLOT:
3587	case PCI_IVAR_FUNCTION:
3588		return (EINVAL);	/* disallow for now */
3589
3590	default:
3591		return (ENOENT);
3592	}
3593}
3594
3595
3596#include "opt_ddb.h"
3597#ifdef DDB
3598#include <ddb/ddb.h>
3599#include <sys/cons.h>
3600
3601/*
3602 * List resources based on pci map registers, used for within ddb
3603 */
3604
3605DB_SHOW_COMMAND(pciregs, db_pci_dump)
3606{
3607	struct pci_devinfo *dinfo;
3608	struct devlist *devlist_head;
3609	struct pci_conf *p;
3610	const char *name;
3611	int i, error, none_count;
3612
3613	none_count = 0;
3614	/* get the head of the device queue */
3615	devlist_head = &pci_devq;
3616
3617	/*
3618	 * Go through the list of devices and print out devices
3619	 */
3620	for (error = 0, i = 0,
3621	     dinfo = STAILQ_FIRST(devlist_head);
3622	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3623	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3624
3625		/* Populate pd_name and pd_unit */
3626		name = NULL;
3627		if (dinfo->cfg.dev)
3628			name = device_get_name(dinfo->cfg.dev);
3629
3630		p = &dinfo->conf;
3631		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3632			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3633			(name && *name) ? name : "none",
3634			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3635			none_count++,
3636			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3637			p->pc_sel.pc_func, (p->pc_class << 16) |
3638			(p->pc_subclass << 8) | p->pc_progif,
3639			(p->pc_subdevice << 16) | p->pc_subvendor,
3640			(p->pc_device << 16) | p->pc_vendor,
3641			p->pc_revid, p->pc_hdr);
3642	}
3643}
3644#endif /* DDB */
3645
3646static struct resource *
3647pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3648    u_long start, u_long end, u_long count, u_int flags)
3649{
3650	struct pci_devinfo *dinfo = device_get_ivars(child);
3651	struct resource_list *rl = &dinfo->resources;
3652	struct resource_list_entry *rle;
3653	struct resource *res;
3654	pci_addr_t map, testval;
3655	int mapsize;
3656
3657	/*
3658	 * Weed out the bogons, and figure out how large the BAR/map
3659	 * is.  Bars that read back 0 here are bogus and unimplemented.
3660	 * Note: atapci in legacy mode are special and handled elsewhere
3661	 * in the code.  If you have a atapci device in legacy mode and
3662	 * it fails here, that other code is broken.
3663	 */
3664	res = NULL;
3665	pci_read_bar(child, *rid, &map, &testval);
3666
3667	/* Ignore a BAR with a base of 0. */
3668	if ((*rid == PCIR_BIOS && pci_rombase(testval) == 0) ||
3669	    pci_mapbase(testval) == 0)
3670		goto out;
3671
3672	if (PCI_BAR_MEM(testval) || *rid == PCIR_BIOS) {
3673		if (type != SYS_RES_MEMORY) {
3674			if (bootverbose)
3675				device_printf(dev,
3676				    "child %s requested type %d for rid %#x,"
3677				    " but the BAR says it is an memio\n",
3678				    device_get_nameunit(child), type, *rid);
3679			goto out;
3680		}
3681	} else {
3682		if (type != SYS_RES_IOPORT) {
3683			if (bootverbose)
3684				device_printf(dev,
3685				    "child %s requested type %d for rid %#x,"
3686				    " but the BAR says it is an ioport\n",
3687				    device_get_nameunit(child), type, *rid);
3688			goto out;
3689		}
3690	}
3691
3692	/*
3693	 * For real BARs, we need to override the size that
3694	 * the driver requests, because that's what the BAR
3695	 * actually uses and we would otherwise have a
3696	 * situation where we might allocate the excess to
3697	 * another driver, which won't work.
3698	 *
3699	 * Device ROM BARs use a different mask value.
3700	 */
3701	if (*rid == PCIR_BIOS)
3702		mapsize = pci_romsize(testval);
3703	else
3704		mapsize = pci_mapsize(testval);
3705	count = 1UL << mapsize;
3706	if (RF_ALIGNMENT(flags) < mapsize)
3707		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3708	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3709		flags |= RF_PREFETCHABLE;
3710
3711	/*
3712	 * Allocate enough resource, and then write back the
3713	 * appropriate bar for that resource.
3714	 */
3715	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3716	    start, end, count, flags & ~RF_ACTIVE);
3717	if (res == NULL) {
3718		device_printf(child,
3719		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3720		    count, *rid, type, start, end);
3721		goto out;
3722	}
3723	resource_list_add(rl, type, *rid, start, end, count);
3724	rle = resource_list_find(rl, type, *rid);
3725	if (rle == NULL)
3726		panic("pci_reserve_map: unexpectedly can't find resource.");
3727	rle->res = res;
3728	rle->start = rman_get_start(res);
3729	rle->end = rman_get_end(res);
3730	rle->count = count;
3731	rle->flags = RLE_RESERVED;
3732	if (bootverbose)
3733		device_printf(child,
3734		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3735		    count, *rid, type, rman_get_start(res));
3736	map = rman_get_start(res);
3737	pci_write_bar(child, *rid, map);
3738out:;
3739	return (res);
3740}
3741
3742
3743struct resource *
3744pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3745		   u_long start, u_long end, u_long count, u_int flags)
3746{
3747	struct pci_devinfo *dinfo = device_get_ivars(child);
3748	struct resource_list *rl = &dinfo->resources;
3749	struct resource_list_entry *rle;
3750	struct resource *res;
3751	pcicfgregs *cfg = &dinfo->cfg;
3752
3753	if (device_get_parent(child) != dev)
3754		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3755		    type, rid, start, end, count, flags));
3756
3757	/*
3758	 * Perform lazy resource allocation
3759	 */
3760	switch (type) {
3761	case SYS_RES_IRQ:
3762		/*
3763		 * Can't alloc legacy interrupt once MSI messages have
3764		 * been allocated.
3765		 */
3766		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3767		    cfg->msix.msix_alloc > 0))
3768			return (NULL);
3769
3770		/*
3771		 * If the child device doesn't have an interrupt
3772		 * routed and is deserving of an interrupt, try to
3773		 * assign it one.
3774		 */
3775		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3776		    (cfg->intpin != 0))
3777			pci_assign_interrupt(dev, child, 0);
3778		break;
3779	case SYS_RES_IOPORT:
3780	case SYS_RES_MEMORY:
3781		/* Reserve resources for this BAR if needed. */
3782		rle = resource_list_find(rl, type, *rid);
3783		if (rle == NULL) {
3784			res = pci_reserve_map(dev, child, type, rid, start, end,
3785			    count, flags);
3786			if (res == NULL)
3787				return (NULL);
3788		}
3789	}
3790	return (resource_list_alloc(rl, dev, child, type, rid,
3791	    start, end, count, flags));
3792}
3793
3794int
3795pci_activate_resource(device_t dev, device_t child, int type, int rid,
3796    struct resource *r)
3797{
3798	int error;
3799
3800	error = bus_generic_activate_resource(dev, child, type, rid, r);
3801	if (error)
3802		return (error);
3803
3804	/* Enable decoding in the command register when activating BARs. */
3805	if (device_get_parent(child) == dev) {
3806		/* Device ROMs need their decoding explicitly enabled. */
3807		if (rid == PCIR_BIOS)
3808			pci_write_config(child, rid, rman_get_start(r) |
3809			    PCIM_BIOS_ENABLE, 4);
3810		switch (type) {
3811		case SYS_RES_IOPORT:
3812		case SYS_RES_MEMORY:
3813			error = PCI_ENABLE_IO(dev, child, type);
3814			break;
3815		}
3816	}
3817	return (error);
3818}
3819
3820int
3821pci_deactivate_resource(device_t dev, device_t child, int type,
3822    int rid, struct resource *r)
3823{
3824	int error;
3825
3826	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
3827	if (error)
3828		return (error);
3829
3830	/* Disable decoding for device ROMs. */
3831	if (rid == PCIR_BIOS)
3832		pci_write_config(child, rid, rman_get_start(r), 4);
3833	return (0);
3834}
3835
3836void
3837pci_delete_child(device_t dev, device_t child)
3838{
3839	struct resource_list_entry *rle;
3840	struct resource_list *rl;
3841	struct pci_devinfo *dinfo;
3842
3843	dinfo = device_get_ivars(child);
3844	rl = &dinfo->resources;
3845
3846	if (device_is_attached(child))
3847		device_detach(child);
3848
3849	/* Turn off access to resources we're about to free */
3850	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
3851	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
3852
3853	/* Free all allocated resources */
3854	STAILQ_FOREACH(rle, rl, link) {
3855		if (rle->res) {
3856			if (rman_get_flags(rle->res) & RF_ACTIVE ||
3857			    resource_list_busy(rl, rle->type, rle->rid)) {
3858				pci_printf(&dinfo->cfg,
3859				    "Resource still owned, oops. "
3860				    "(type=%d, rid=%d, addr=%lx)\n",
3861				    rle->type, rle->rid,
3862				    rman_get_start(rle->res));
3863				bus_release_resource(child, rle->type, rle->rid,
3864				    rle->res);
3865			}
3866			resource_list_unreserve(rl, dev, child, rle->type,
3867			    rle->rid);
3868		}
3869	}
3870	resource_list_free(rl);
3871
3872	device_delete_child(dev, child);
3873	pci_freecfg(dinfo);
3874}
3875
3876void
3877pci_delete_resource(device_t dev, device_t child, int type, int rid)
3878{
3879	struct pci_devinfo *dinfo;
3880	struct resource_list *rl;
3881	struct resource_list_entry *rle;
3882
3883	if (device_get_parent(child) != dev)
3884		return;
3885
3886	dinfo = device_get_ivars(child);
3887	rl = &dinfo->resources;
3888	rle = resource_list_find(rl, type, rid);
3889	if (rle == NULL)
3890		return;
3891
3892	if (rle->res) {
3893		if (rman_get_flags(rle->res) & RF_ACTIVE ||
3894		    resource_list_busy(rl, type, rid)) {
3895			device_printf(dev, "delete_resource: "
3896			    "Resource still owned by child, oops. "
3897			    "(type=%d, rid=%d, addr=%lx)\n",
3898			    type, rid, rman_get_start(rle->res));
3899			return;
3900		}
3901
3902#ifndef __PCI_BAR_ZERO_VALID
3903		/*
3904		 * If this is a BAR, clear the BAR so it stops
3905		 * decoding before releasing the resource.
3906		 */
3907		switch (type) {
3908		case SYS_RES_IOPORT:
3909		case SYS_RES_MEMORY:
3910			pci_write_bar(child, rid, 0);
3911			break;
3912		}
3913#endif
3914		resource_list_unreserve(rl, dev, child, type, rid);
3915	}
3916	resource_list_delete(rl, type, rid);
3917}
3918
3919struct resource_list *
3920pci_get_resource_list (device_t dev, device_t child)
3921{
3922	struct pci_devinfo *dinfo = device_get_ivars(child);
3923
3924	return (&dinfo->resources);
3925}
3926
3927uint32_t
3928pci_read_config_method(device_t dev, device_t child, int reg, int width)
3929{
3930	struct pci_devinfo *dinfo = device_get_ivars(child);
3931	pcicfgregs *cfg = &dinfo->cfg;
3932
3933	return (PCIB_READ_CONFIG(device_get_parent(dev),
3934	    cfg->bus, cfg->slot, cfg->func, reg, width));
3935}
3936
3937void
3938pci_write_config_method(device_t dev, device_t child, int reg,
3939    uint32_t val, int width)
3940{
3941	struct pci_devinfo *dinfo = device_get_ivars(child);
3942	pcicfgregs *cfg = &dinfo->cfg;
3943
3944	PCIB_WRITE_CONFIG(device_get_parent(dev),
3945	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3946}
3947
3948int
3949pci_child_location_str_method(device_t dev, device_t child, char *buf,
3950    size_t buflen)
3951{
3952
3953	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3954	    pci_get_function(child));
3955	return (0);
3956}
3957
3958int
3959pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3960    size_t buflen)
3961{
3962	struct pci_devinfo *dinfo;
3963	pcicfgregs *cfg;
3964
3965	dinfo = device_get_ivars(child);
3966	cfg = &dinfo->cfg;
3967	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3968	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3969	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3970	    cfg->progif);
3971	return (0);
3972}
3973
3974int
3975pci_assign_interrupt_method(device_t dev, device_t child)
3976{
3977	struct pci_devinfo *dinfo = device_get_ivars(child);
3978	pcicfgregs *cfg = &dinfo->cfg;
3979
3980	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3981	    cfg->intpin));
3982}
3983
3984static int
3985pci_modevent(module_t mod, int what, void *arg)
3986{
3987	static struct cdev *pci_cdev;
3988
3989	switch (what) {
3990	case MOD_LOAD:
3991		STAILQ_INIT(&pci_devq);
3992		pci_generation = 0;
3993		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3994		    "pci");
3995		pci_load_vendor_data();
3996		break;
3997
3998	case MOD_UNLOAD:
3999		destroy_dev(pci_cdev);
4000		break;
4001	}
4002
4003	return (0);
4004}
4005
4006void
4007pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4008{
4009	int i;
4010
4011	/*
4012	 * Only do header type 0 devices.  Type 1 devices are bridges,
4013	 * which we know need special treatment.  Type 2 devices are
4014	 * cardbus bridges which also require special treatment.
4015	 * Other types are unknown, and we err on the side of safety
4016	 * by ignoring them.
4017	 */
4018	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4019		return;
4020
4021	/*
4022	 * Restore the device to full power mode.  We must do this
4023	 * before we restore the registers because moving from D3 to
4024	 * D0 will cause the chip's BARs and some other registers to
4025	 * be reset to some unknown power on reset values.  Cut down
4026	 * the noise on boot by doing nothing if we are already in
4027	 * state D0.
4028	 */
4029	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4030		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4031	for (i = 0; i < dinfo->cfg.nummaps; i++)
4032		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4033	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4034	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4035	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4036	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4037	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4038	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4039	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4040	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4041	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4042	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4043
4044	/* Restore MSI and MSI-X configurations if they are present. */
4045	if (dinfo->cfg.msi.msi_location != 0)
4046		pci_resume_msi(dev);
4047	if (dinfo->cfg.msix.msix_location != 0)
4048		pci_resume_msix(dev);
4049}
4050
4051void
4052pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4053{
4054	int i;
4055	uint32_t cls;
4056	int ps;
4057
4058	/*
4059	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4060	 * we know need special treatment.  Type 2 devices are cardbus bridges
4061	 * which also require special treatment.  Other types are unknown, and
4062	 * we err on the side of safety by ignoring them.  Powering down
4063	 * bridges should not be undertaken lightly.
4064	 */
4065	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4066		return;
4067	for (i = 0; i < dinfo->cfg.nummaps; i++)
4068		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4069	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4070
4071	/*
4072	 * Some drivers apparently write to these registers w/o updating our
4073	 * cached copy.  No harm happens if we update the copy, so do so here
4074	 * so we can restore them.  The COMMAND register is modified by the
4075	 * bus w/o updating the cache.  This should represent the normally
4076	 * writable portion of the 'defined' part of type 0 headers.  In
4077	 * theory we also need to save/restore the PCI capability structures
4078	 * we know about, but apart from power we don't know any that are
4079	 * writable.
4080	 */
4081	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4082	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4083	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4084	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4085	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4086	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4087	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4088	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4089	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4090	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4091	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4092	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4093	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4094	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4095	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4096
4097	/*
4098	 * don't set the state for display devices, base peripherals and
4099	 * memory devices since bad things happen when they are powered down.
4100	 * We should (a) have drivers that can easily detach and (b) use
4101	 * generic drivers for these devices so that some device actually
4102	 * attaches.  We need to make sure that when we implement (a) we don't
4103	 * power the device down on a reattach.
4104	 */
4105	cls = pci_get_class(dev);
4106	if (!setstate)
4107		return;
4108	switch (pci_do_power_nodriver)
4109	{
4110		case 0:		/* NO powerdown at all */
4111			return;
4112		case 1:		/* Conservative about what to power down */
4113			if (cls == PCIC_STORAGE)
4114				return;
4115			/*FALLTHROUGH*/
4116		case 2:		/* Agressive about what to power down */
4117			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4118			    cls == PCIC_BASEPERIPH)
4119				return;
4120			/*FALLTHROUGH*/
4121		case 3:		/* Power down everything */
4122			break;
4123	}
4124	/*
4125	 * PCI spec says we can only go into D3 state from D0 state.
4126	 * Transition from D[12] into D0 before going to D3 state.
4127	 */
4128	ps = pci_get_powerstate(dev);
4129	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4130		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4131	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4132		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4133}
4134