pci.c revision 232318
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 232318 2012-02-29 22:06:44Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/xhcireg.h>
66#include <dev/usb/controller/ehcireg.h>
67#include <dev/usb/controller/ohcireg.h>
68#include <dev/usb/controller/uhcireg.h>
69
70#include "pcib_if.h"
71#include "pci_if.h"
72
73#define	PCIR_IS_BIOS(cfg, reg)						\
74	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
75	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
76
77static pci_addr_t	pci_mapbase(uint64_t mapreg);
78static const char	*pci_maptype(uint64_t mapreg);
79static int		pci_mapsize(uint64_t testval);
80static int		pci_maprange(uint64_t mapreg);
81static pci_addr_t	pci_rombase(uint64_t mapreg);
82static int		pci_romsize(uint64_t testval);
83static void		pci_fixancient(pcicfgregs *cfg);
84static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
85
86static int		pci_porten(device_t dev);
87static int		pci_memen(device_t dev);
88static void		pci_assign_interrupt(device_t bus, device_t dev,
89			    int force_route);
90static int		pci_add_map(device_t bus, device_t dev, int reg,
91			    struct resource_list *rl, int force, int prefetch);
92static int		pci_probe(device_t dev);
93static int		pci_attach(device_t dev);
94static void		pci_load_vendor_data(void);
95static int		pci_describe_parse_line(char **ptr, int *vendor,
96			    int *device, char **desc);
97static char		*pci_describe_device(device_t dev);
98static int		pci_modevent(module_t mod, int what, void *arg);
99static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
100			    pcicfgregs *cfg);
101static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
102static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
103			    int reg, uint32_t *data);
104#if 0
105static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
106			    int reg, uint32_t data);
107#endif
108static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
109static void		pci_disable_msi(device_t dev);
110static void		pci_enable_msi(device_t dev, uint64_t address,
111			    uint16_t data);
112static void		pci_enable_msix(device_t dev, u_int index,
113			    uint64_t address, uint32_t data);
114static void		pci_mask_msix(device_t dev, u_int index);
115static void		pci_unmask_msix(device_t dev, u_int index);
116static int		pci_msi_blacklisted(void);
117static void		pci_resume_msi(device_t dev);
118static void		pci_resume_msix(device_t dev);
119static int		pci_remap_intr_method(device_t bus, device_t dev,
120			    u_int irq);
121
122static device_method_t pci_methods[] = {
123	/* Device interface */
124	DEVMETHOD(device_probe,		pci_probe),
125	DEVMETHOD(device_attach,	pci_attach),
126	DEVMETHOD(device_detach,	bus_generic_detach),
127	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
128	DEVMETHOD(device_suspend,	pci_suspend),
129	DEVMETHOD(device_resume,	pci_resume),
130
131	/* Bus interface */
132	DEVMETHOD(bus_print_child,	pci_print_child),
133	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
134	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
135	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
136	DEVMETHOD(bus_driver_added,	pci_driver_added),
137	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
138	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
139
140	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
141	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
142	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
143	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
144	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
145	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
146	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
147	DEVMETHOD(bus_activate_resource, pci_activate_resource),
148	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
149	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
150	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
151	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
152
153	/* PCI interface */
154	DEVMETHOD(pci_read_config,	pci_read_config_method),
155	DEVMETHOD(pci_write_config,	pci_write_config_method),
156	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
157	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
158	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
159	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
160	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
161	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
162	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
163	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
164	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
165	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
166	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
167	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
168	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
169	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
170	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
171	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
172
173	DEVMETHOD_END
174};
175
176DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
177
178static devclass_t pci_devclass;
179DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
180MODULE_VERSION(pci, 1);
181
182static char	*pci_vendordata;
183static size_t	pci_vendordata_size;
184
185struct pci_quirk {
186	uint32_t devid;	/* Vendor/device of the card */
187	int	type;
188#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
189#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
190#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
191	int	arg1;
192	int	arg2;
193};
194
195static const struct pci_quirk const pci_quirks[] = {
196	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
197	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
198	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
199	/* As does the Serverworks OSB4 (the SMBus mapping register) */
200	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
201
202	/*
203	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
204	 * or the CMIC-SL (AKA ServerWorks GC_LE).
205	 */
206	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208
209	/*
210	 * MSI doesn't work on earlier Intel chipsets including
211	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
212	 */
213	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
219	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220
221	/*
222	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
223	 * bridge.
224	 */
225	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226
227	/*
228	 * MSI-X doesn't work with at least LSI SAS1068E passed through by
229	 * VMware.
230	 */
231	{ 0x079015ad, PCI_QUIRK_DISABLE_MSI,	0,	0 },
232
233	/*
234	 * Some virtualization environments emulate an older chipset
235	 * but support MSI just fine.  QEMU uses the Intel 82440.
236	 */
237	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
238
239	{ 0 }
240};
241
242/* map register information */
243#define	PCI_MAPMEM	0x01	/* memory map */
244#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
245#define	PCI_MAPPORT	0x04	/* port map */
246
247struct devlist pci_devq;
248uint32_t pci_generation;
249uint32_t pci_numdevs = 0;
250static int pcie_chipset, pcix_chipset;
251
252/* sysctl vars */
253SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
254
255static int pci_enable_io_modes = 1;
256TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
257SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
258    &pci_enable_io_modes, 1,
259    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
260enable these bits correctly.  We'd like to do this all the time, but there\n\
261are some peripherals that this causes problems with.");
262
263static int pci_do_power_nodriver = 0;
264TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
265SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
266    &pci_do_power_nodriver, 0,
267  "Place a function into D3 state when no driver attaches to it.  0 means\n\
268disable.  1 means conservatively place devices into D3 state.  2 means\n\
269agressively place devices into D3 state.  3 means put absolutely everything\n\
270in D3 state.");
271
272int pci_do_power_resume = 1;
273TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
274SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
275    &pci_do_power_resume, 1,
276  "Transition from D3 -> D0 on resume.");
277
278int pci_do_power_suspend = 1;
279TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
280SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
281    &pci_do_power_suspend, 1,
282  "Transition from D0 -> D3 on suspend.");
283
284static int pci_do_msi = 1;
285TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
286SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
287    "Enable support for MSI interrupts");
288
289static int pci_do_msix = 1;
290TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
291SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
292    "Enable support for MSI-X interrupts");
293
294static int pci_honor_msi_blacklist = 1;
295TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
296SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
297    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
298
299#if defined(__i386__) || defined(__amd64__)
300static int pci_usb_takeover = 1;
301#else
302static int pci_usb_takeover = 0;
303#endif
304TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
305SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
306    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
307Disable this if you depend on BIOS emulation of USB devices, that is\n\
308you use USB devices (like keyboard or mouse) but do not load USB drivers");
309
310/* Find a device_t by bus/slot/function in domain 0 */
311
312device_t
313pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
314{
315
316	return (pci_find_dbsf(0, bus, slot, func));
317}
318
319/* Find a device_t by domain/bus/slot/function */
320
321device_t
322pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
323{
324	struct pci_devinfo *dinfo;
325
326	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
327		if ((dinfo->cfg.domain == domain) &&
328		    (dinfo->cfg.bus == bus) &&
329		    (dinfo->cfg.slot == slot) &&
330		    (dinfo->cfg.func == func)) {
331			return (dinfo->cfg.dev);
332		}
333	}
334
335	return (NULL);
336}
337
338/* Find a device_t by vendor/device ID */
339
340device_t
341pci_find_device(uint16_t vendor, uint16_t device)
342{
343	struct pci_devinfo *dinfo;
344
345	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
346		if ((dinfo->cfg.vendor == vendor) &&
347		    (dinfo->cfg.device == device)) {
348			return (dinfo->cfg.dev);
349		}
350	}
351
352	return (NULL);
353}
354
355device_t
356pci_find_class(uint8_t class, uint8_t subclass)
357{
358	struct pci_devinfo *dinfo;
359
360	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
361		if (dinfo->cfg.baseclass == class &&
362		    dinfo->cfg.subclass == subclass) {
363			return (dinfo->cfg.dev);
364		}
365	}
366
367	return (NULL);
368}
369
370static int
371pci_printf(pcicfgregs *cfg, const char *fmt, ...)
372{
373	va_list ap;
374	int retval;
375
376	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
377	    cfg->func);
378	va_start(ap, fmt);
379	retval += vprintf(fmt, ap);
380	va_end(ap);
381	return (retval);
382}
383
384/* return base address of memory or port map */
385
386static pci_addr_t
387pci_mapbase(uint64_t mapreg)
388{
389
390	if (PCI_BAR_MEM(mapreg))
391		return (mapreg & PCIM_BAR_MEM_BASE);
392	else
393		return (mapreg & PCIM_BAR_IO_BASE);
394}
395
396/* return map type of memory or port map */
397
398static const char *
399pci_maptype(uint64_t mapreg)
400{
401
402	if (PCI_BAR_IO(mapreg))
403		return ("I/O Port");
404	if (mapreg & PCIM_BAR_MEM_PREFETCH)
405		return ("Prefetchable Memory");
406	return ("Memory");
407}
408
409/* return log2 of map size decoded for memory or port map */
410
411static int
412pci_mapsize(uint64_t testval)
413{
414	int ln2size;
415
416	testval = pci_mapbase(testval);
417	ln2size = 0;
418	if (testval != 0) {
419		while ((testval & 1) == 0)
420		{
421			ln2size++;
422			testval >>= 1;
423		}
424	}
425	return (ln2size);
426}
427
428/* return base address of device ROM */
429
430static pci_addr_t
431pci_rombase(uint64_t mapreg)
432{
433
434	return (mapreg & PCIM_BIOS_ADDR_MASK);
435}
436
437/* return log2 of map size decided for device ROM */
438
439static int
440pci_romsize(uint64_t testval)
441{
442	int ln2size;
443
444	testval = pci_rombase(testval);
445	ln2size = 0;
446	if (testval != 0) {
447		while ((testval & 1) == 0)
448		{
449			ln2size++;
450			testval >>= 1;
451		}
452	}
453	return (ln2size);
454}
455
456/* return log2 of address range supported by map register */
457
458static int
459pci_maprange(uint64_t mapreg)
460{
461	int ln2range = 0;
462
463	if (PCI_BAR_IO(mapreg))
464		ln2range = 32;
465	else
466		switch (mapreg & PCIM_BAR_MEM_TYPE) {
467		case PCIM_BAR_MEM_32:
468			ln2range = 32;
469			break;
470		case PCIM_BAR_MEM_1MB:
471			ln2range = 20;
472			break;
473		case PCIM_BAR_MEM_64:
474			ln2range = 64;
475			break;
476		}
477	return (ln2range);
478}
479
480/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
481
482static void
483pci_fixancient(pcicfgregs *cfg)
484{
485	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
486		return;
487
488	/* PCI to PCI bridges use header type 1 */
489	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
490		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
491}
492
493/* extract header type specific config data */
494
495static void
496pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
497{
498#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
499	switch (cfg->hdrtype & PCIM_HDRTYPE) {
500	case PCIM_HDRTYPE_NORMAL:
501		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
502		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
503		cfg->nummaps	    = PCI_MAXMAPS_0;
504		break;
505	case PCIM_HDRTYPE_BRIDGE:
506		cfg->nummaps	    = PCI_MAXMAPS_1;
507		break;
508	case PCIM_HDRTYPE_CARDBUS:
509		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
510		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
511		cfg->nummaps	    = PCI_MAXMAPS_2;
512		break;
513	}
514#undef REG
515}
516
517/* read configuration header into pcicfgregs structure */
518struct pci_devinfo *
519pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
520{
521#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
522	pcicfgregs *cfg = NULL;
523	struct pci_devinfo *devlist_entry;
524	struct devlist *devlist_head;
525
526	devlist_head = &pci_devq;
527
528	devlist_entry = NULL;
529
530	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
531		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
532		if (devlist_entry == NULL)
533			return (NULL);
534
535		cfg = &devlist_entry->cfg;
536
537		cfg->domain		= d;
538		cfg->bus		= b;
539		cfg->slot		= s;
540		cfg->func		= f;
541		cfg->vendor		= REG(PCIR_VENDOR, 2);
542		cfg->device		= REG(PCIR_DEVICE, 2);
543		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
544		cfg->statreg		= REG(PCIR_STATUS, 2);
545		cfg->baseclass		= REG(PCIR_CLASS, 1);
546		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
547		cfg->progif		= REG(PCIR_PROGIF, 1);
548		cfg->revid		= REG(PCIR_REVID, 1);
549		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
550		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
551		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
552		cfg->intpin		= REG(PCIR_INTPIN, 1);
553		cfg->intline		= REG(PCIR_INTLINE, 1);
554
555		cfg->mingnt		= REG(PCIR_MINGNT, 1);
556		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
557
558		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
559		cfg->hdrtype		&= ~PCIM_MFDEV;
560		STAILQ_INIT(&cfg->maps);
561
562		pci_fixancient(cfg);
563		pci_hdrtypedata(pcib, b, s, f, cfg);
564
565		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
566			pci_read_cap(pcib, cfg);
567
568		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
569
570		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
571		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
572		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
573		devlist_entry->conf.pc_sel.pc_func = cfg->func;
574		devlist_entry->conf.pc_hdr = cfg->hdrtype;
575
576		devlist_entry->conf.pc_subvendor = cfg->subvendor;
577		devlist_entry->conf.pc_subdevice = cfg->subdevice;
578		devlist_entry->conf.pc_vendor = cfg->vendor;
579		devlist_entry->conf.pc_device = cfg->device;
580
581		devlist_entry->conf.pc_class = cfg->baseclass;
582		devlist_entry->conf.pc_subclass = cfg->subclass;
583		devlist_entry->conf.pc_progif = cfg->progif;
584		devlist_entry->conf.pc_revid = cfg->revid;
585
586		pci_numdevs++;
587		pci_generation++;
588	}
589	return (devlist_entry);
590#undef REG
591}
592
593static void
594pci_read_cap(device_t pcib, pcicfgregs *cfg)
595{
596#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
597#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
598#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
599	uint64_t addr;
600#endif
601	uint32_t val;
602	int	ptr, nextptr, ptrptr;
603
604	switch (cfg->hdrtype & PCIM_HDRTYPE) {
605	case PCIM_HDRTYPE_NORMAL:
606	case PCIM_HDRTYPE_BRIDGE:
607		ptrptr = PCIR_CAP_PTR;
608		break;
609	case PCIM_HDRTYPE_CARDBUS:
610		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
611		break;
612	default:
613		return;		/* no extended capabilities support */
614	}
615	nextptr = REG(ptrptr, 1);	/* sanity check? */
616
617	/*
618	 * Read capability entries.
619	 */
620	while (nextptr != 0) {
621		/* Sanity check */
622		if (nextptr > 255) {
623			printf("illegal PCI extended capability offset %d\n",
624			    nextptr);
625			return;
626		}
627		/* Find the next entry */
628		ptr = nextptr;
629		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
630
631		/* Process this entry */
632		switch (REG(ptr + PCICAP_ID, 1)) {
633		case PCIY_PMG:		/* PCI power management */
634			if (cfg->pp.pp_cap == 0) {
635				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
636				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
637				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
638				if ((nextptr - ptr) > PCIR_POWER_DATA)
639					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
640			}
641			break;
642		case PCIY_HT:		/* HyperTransport */
643			/* Determine HT-specific capability type. */
644			val = REG(ptr + PCIR_HT_COMMAND, 2);
645
646			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
647				cfg->ht.ht_slave = ptr;
648
649#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
650			switch (val & PCIM_HTCMD_CAP_MASK) {
651			case PCIM_HTCAP_MSI_MAPPING:
652				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
653					/* Sanity check the mapping window. */
654					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
655					    4);
656					addr <<= 32;
657					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
658					    4);
659					if (addr != MSI_INTEL_ADDR_BASE)
660						device_printf(pcib,
661	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
662						    cfg->domain, cfg->bus,
663						    cfg->slot, cfg->func,
664						    (long long)addr);
665				} else
666					addr = MSI_INTEL_ADDR_BASE;
667
668				cfg->ht.ht_msimap = ptr;
669				cfg->ht.ht_msictrl = val;
670				cfg->ht.ht_msiaddr = addr;
671				break;
672			}
673#endif
674			break;
675		case PCIY_MSI:		/* PCI MSI */
676			cfg->msi.msi_location = ptr;
677			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
678			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
679						     PCIM_MSICTRL_MMC_MASK)>>1);
680			break;
681		case PCIY_MSIX:		/* PCI MSI-X */
682			cfg->msix.msix_location = ptr;
683			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
684			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
685			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
686			val = REG(ptr + PCIR_MSIX_TABLE, 4);
687			cfg->msix.msix_table_bar = PCIR_BAR(val &
688			    PCIM_MSIX_BIR_MASK);
689			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
690			val = REG(ptr + PCIR_MSIX_PBA, 4);
691			cfg->msix.msix_pba_bar = PCIR_BAR(val &
692			    PCIM_MSIX_BIR_MASK);
693			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
694			break;
695		case PCIY_VPD:		/* PCI Vital Product Data */
696			cfg->vpd.vpd_reg = ptr;
697			break;
698		case PCIY_SUBVENDOR:
699			/* Should always be true. */
700			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
701			    PCIM_HDRTYPE_BRIDGE) {
702				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
703				cfg->subvendor = val & 0xffff;
704				cfg->subdevice = val >> 16;
705			}
706			break;
707		case PCIY_PCIX:		/* PCI-X */
708			/*
709			 * Assume we have a PCI-X chipset if we have
710			 * at least one PCI-PCI bridge with a PCI-X
711			 * capability.  Note that some systems with
712			 * PCI-express or HT chipsets might match on
713			 * this check as well.
714			 */
715			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
716			    PCIM_HDRTYPE_BRIDGE)
717				pcix_chipset = 1;
718			break;
719		case PCIY_EXPRESS:	/* PCI-express */
720			/*
721			 * Assume we have a PCI-express chipset if we have
722			 * at least one PCI-express device.
723			 */
724			pcie_chipset = 1;
725			break;
726		default:
727			break;
728		}
729	}
730
731#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
732	/*
733	 * Enable the MSI mapping window for all HyperTransport
734	 * slaves.  PCI-PCI bridges have their windows enabled via
735	 * PCIB_MAP_MSI().
736	 */
737	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
738	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
739		device_printf(pcib,
740	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
741		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
742		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
743		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
744		     2);
745	}
746#endif
747/* REG and WREG use carry through to next functions */
748}
749
750/*
751 * PCI Vital Product Data
752 */
753
754#define	PCI_VPD_TIMEOUT		1000000
755
756static int
757pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
758{
759	int count = PCI_VPD_TIMEOUT;
760
761	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
762
763	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
764
765	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
766		if (--count < 0)
767			return (ENXIO);
768		DELAY(1);	/* limit looping */
769	}
770	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
771
772	return (0);
773}
774
775#if 0
776static int
777pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
778{
779	int count = PCI_VPD_TIMEOUT;
780
781	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
782
783	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
784	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
785	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
786		if (--count < 0)
787			return (ENXIO);
788		DELAY(1);	/* limit looping */
789	}
790
791	return (0);
792}
793#endif
794
795#undef PCI_VPD_TIMEOUT
796
797struct vpd_readstate {
798	device_t	pcib;
799	pcicfgregs	*cfg;
800	uint32_t	val;
801	int		bytesinval;
802	int		off;
803	uint8_t		cksum;
804};
805
806static int
807vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
808{
809	uint32_t reg;
810	uint8_t byte;
811
812	if (vrs->bytesinval == 0) {
813		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
814			return (ENXIO);
815		vrs->val = le32toh(reg);
816		vrs->off += 4;
817		byte = vrs->val & 0xff;
818		vrs->bytesinval = 3;
819	} else {
820		vrs->val = vrs->val >> 8;
821		byte = vrs->val & 0xff;
822		vrs->bytesinval--;
823	}
824
825	vrs->cksum += byte;
826	*data = byte;
827	return (0);
828}
829
830static void
831pci_read_vpd(device_t pcib, pcicfgregs *cfg)
832{
833	struct vpd_readstate vrs;
834	int state;
835	int name;
836	int remain;
837	int i;
838	int alloc, off;		/* alloc/off for RO/W arrays */
839	int cksumvalid;
840	int dflen;
841	uint8_t byte;
842	uint8_t byte2;
843
844	/* init vpd reader */
845	vrs.bytesinval = 0;
846	vrs.off = 0;
847	vrs.pcib = pcib;
848	vrs.cfg = cfg;
849	vrs.cksum = 0;
850
851	state = 0;
852	name = remain = i = 0;	/* shut up stupid gcc */
853	alloc = off = 0;	/* shut up stupid gcc */
854	dflen = 0;		/* shut up stupid gcc */
855	cksumvalid = -1;
856	while (state >= 0) {
857		if (vpd_nextbyte(&vrs, &byte)) {
858			state = -2;
859			break;
860		}
861#if 0
862		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
863		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
864		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
865#endif
866		switch (state) {
867		case 0:		/* item name */
868			if (byte & 0x80) {
869				if (vpd_nextbyte(&vrs, &byte2)) {
870					state = -2;
871					break;
872				}
873				remain = byte2;
874				if (vpd_nextbyte(&vrs, &byte2)) {
875					state = -2;
876					break;
877				}
878				remain |= byte2 << 8;
879				if (remain > (0x7f*4 - vrs.off)) {
880					state = -1;
881					pci_printf(cfg,
882					    "invalid VPD data, remain %#x\n",
883					    remain);
884				}
885				name = byte & 0x7f;
886			} else {
887				remain = byte & 0x7;
888				name = (byte >> 3) & 0xf;
889			}
890			switch (name) {
891			case 0x2:	/* String */
892				cfg->vpd.vpd_ident = malloc(remain + 1,
893				    M_DEVBUF, M_WAITOK);
894				i = 0;
895				state = 1;
896				break;
897			case 0xf:	/* End */
898				state = -1;
899				break;
900			case 0x10:	/* VPD-R */
901				alloc = 8;
902				off = 0;
903				cfg->vpd.vpd_ros = malloc(alloc *
904				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
905				    M_WAITOK | M_ZERO);
906				state = 2;
907				break;
908			case 0x11:	/* VPD-W */
909				alloc = 8;
910				off = 0;
911				cfg->vpd.vpd_w = malloc(alloc *
912				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
913				    M_WAITOK | M_ZERO);
914				state = 5;
915				break;
916			default:	/* Invalid data, abort */
917				state = -1;
918				break;
919			}
920			break;
921
922		case 1:	/* Identifier String */
923			cfg->vpd.vpd_ident[i++] = byte;
924			remain--;
925			if (remain == 0)  {
926				cfg->vpd.vpd_ident[i] = '\0';
927				state = 0;
928			}
929			break;
930
931		case 2:	/* VPD-R Keyword Header */
932			if (off == alloc) {
933				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
934				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
935				    M_DEVBUF, M_WAITOK | M_ZERO);
936			}
937			cfg->vpd.vpd_ros[off].keyword[0] = byte;
938			if (vpd_nextbyte(&vrs, &byte2)) {
939				state = -2;
940				break;
941			}
942			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
943			if (vpd_nextbyte(&vrs, &byte2)) {
944				state = -2;
945				break;
946			}
947			dflen = byte2;
948			if (dflen == 0 &&
949			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
950			    2) == 0) {
951				/*
952				 * if this happens, we can't trust the rest
953				 * of the VPD.
954				 */
955				pci_printf(cfg, "bad keyword length: %d\n",
956				    dflen);
957				cksumvalid = 0;
958				state = -1;
959				break;
960			} else if (dflen == 0) {
961				cfg->vpd.vpd_ros[off].value = malloc(1 *
962				    sizeof(*cfg->vpd.vpd_ros[off].value),
963				    M_DEVBUF, M_WAITOK);
964				cfg->vpd.vpd_ros[off].value[0] = '\x00';
965			} else
966				cfg->vpd.vpd_ros[off].value = malloc(
967				    (dflen + 1) *
968				    sizeof(*cfg->vpd.vpd_ros[off].value),
969				    M_DEVBUF, M_WAITOK);
970			remain -= 3;
971			i = 0;
972			/* keep in sync w/ state 3's transistions */
973			if (dflen == 0 && remain == 0)
974				state = 0;
975			else if (dflen == 0)
976				state = 2;
977			else
978				state = 3;
979			break;
980
981		case 3:	/* VPD-R Keyword Value */
982			cfg->vpd.vpd_ros[off].value[i++] = byte;
983			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
984			    "RV", 2) == 0 && cksumvalid == -1) {
985				if (vrs.cksum == 0)
986					cksumvalid = 1;
987				else {
988					if (bootverbose)
989						pci_printf(cfg,
990					    "bad VPD cksum, remain %hhu\n",
991						    vrs.cksum);
992					cksumvalid = 0;
993					state = -1;
994					break;
995				}
996			}
997			dflen--;
998			remain--;
999			/* keep in sync w/ state 2's transistions */
1000			if (dflen == 0)
1001				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1002			if (dflen == 0 && remain == 0) {
1003				cfg->vpd.vpd_rocnt = off;
1004				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1005				    off * sizeof(*cfg->vpd.vpd_ros),
1006				    M_DEVBUF, M_WAITOK | M_ZERO);
1007				state = 0;
1008			} else if (dflen == 0)
1009				state = 2;
1010			break;
1011
1012		case 4:
1013			remain--;
1014			if (remain == 0)
1015				state = 0;
1016			break;
1017
1018		case 5:	/* VPD-W Keyword Header */
1019			if (off == alloc) {
1020				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1021				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1022				    M_DEVBUF, M_WAITOK | M_ZERO);
1023			}
1024			cfg->vpd.vpd_w[off].keyword[0] = byte;
1025			if (vpd_nextbyte(&vrs, &byte2)) {
1026				state = -2;
1027				break;
1028			}
1029			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1030			if (vpd_nextbyte(&vrs, &byte2)) {
1031				state = -2;
1032				break;
1033			}
1034			cfg->vpd.vpd_w[off].len = dflen = byte2;
1035			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1036			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1037			    sizeof(*cfg->vpd.vpd_w[off].value),
1038			    M_DEVBUF, M_WAITOK);
1039			remain -= 3;
1040			i = 0;
1041			/* keep in sync w/ state 6's transistions */
1042			if (dflen == 0 && remain == 0)
1043				state = 0;
1044			else if (dflen == 0)
1045				state = 5;
1046			else
1047				state = 6;
1048			break;
1049
1050		case 6:	/* VPD-W Keyword Value */
1051			cfg->vpd.vpd_w[off].value[i++] = byte;
1052			dflen--;
1053			remain--;
1054			/* keep in sync w/ state 5's transistions */
1055			if (dflen == 0)
1056				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1057			if (dflen == 0 && remain == 0) {
1058				cfg->vpd.vpd_wcnt = off;
1059				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1060				    off * sizeof(*cfg->vpd.vpd_w),
1061				    M_DEVBUF, M_WAITOK | M_ZERO);
1062				state = 0;
1063			} else if (dflen == 0)
1064				state = 5;
1065			break;
1066
1067		default:
1068			pci_printf(cfg, "invalid state: %d\n", state);
1069			state = -1;
1070			break;
1071		}
1072	}
1073
1074	if (cksumvalid == 0 || state < -1) {
1075		/* read-only data bad, clean up */
1076		if (cfg->vpd.vpd_ros != NULL) {
1077			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1078				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1079			free(cfg->vpd.vpd_ros, M_DEVBUF);
1080			cfg->vpd.vpd_ros = NULL;
1081		}
1082	}
1083	if (state < -1) {
1084		/* I/O error, clean up */
1085		pci_printf(cfg, "failed to read VPD data.\n");
1086		if (cfg->vpd.vpd_ident != NULL) {
1087			free(cfg->vpd.vpd_ident, M_DEVBUF);
1088			cfg->vpd.vpd_ident = NULL;
1089		}
1090		if (cfg->vpd.vpd_w != NULL) {
1091			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1092				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1093			free(cfg->vpd.vpd_w, M_DEVBUF);
1094			cfg->vpd.vpd_w = NULL;
1095		}
1096	}
1097	cfg->vpd.vpd_cached = 1;
1098#undef REG
1099#undef WREG
1100}
1101
1102int
1103pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1104{
1105	struct pci_devinfo *dinfo = device_get_ivars(child);
1106	pcicfgregs *cfg = &dinfo->cfg;
1107
1108	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1109		pci_read_vpd(device_get_parent(dev), cfg);
1110
1111	*identptr = cfg->vpd.vpd_ident;
1112
1113	if (*identptr == NULL)
1114		return (ENXIO);
1115
1116	return (0);
1117}
1118
1119int
1120pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1121	const char **vptr)
1122{
1123	struct pci_devinfo *dinfo = device_get_ivars(child);
1124	pcicfgregs *cfg = &dinfo->cfg;
1125	int i;
1126
1127	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1128		pci_read_vpd(device_get_parent(dev), cfg);
1129
1130	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1131		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1132		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1133			*vptr = cfg->vpd.vpd_ros[i].value;
1134			return (0);
1135		}
1136
1137	*vptr = NULL;
1138	return (ENXIO);
1139}
1140
1141/*
1142 * Find the requested extended capability and return the offset in
1143 * configuration space via the pointer provided. The function returns
1144 * 0 on success and error code otherwise.
1145 */
1146int
1147pci_find_extcap_method(device_t dev, device_t child, int capability,
1148    int *capreg)
1149{
1150	struct pci_devinfo *dinfo = device_get_ivars(child);
1151	pcicfgregs *cfg = &dinfo->cfg;
1152	u_int32_t status;
1153	u_int8_t ptr;
1154
1155	/*
1156	 * Check the CAP_LIST bit of the PCI status register first.
1157	 */
1158	status = pci_read_config(child, PCIR_STATUS, 2);
1159	if (!(status & PCIM_STATUS_CAPPRESENT))
1160		return (ENXIO);
1161
1162	/*
1163	 * Determine the start pointer of the capabilities list.
1164	 */
1165	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1166	case PCIM_HDRTYPE_NORMAL:
1167	case PCIM_HDRTYPE_BRIDGE:
1168		ptr = PCIR_CAP_PTR;
1169		break;
1170	case PCIM_HDRTYPE_CARDBUS:
1171		ptr = PCIR_CAP_PTR_2;
1172		break;
1173	default:
1174		/* XXX: panic? */
1175		return (ENXIO);		/* no extended capabilities support */
1176	}
1177	ptr = pci_read_config(child, ptr, 1);
1178
1179	/*
1180	 * Traverse the capabilities list.
1181	 */
1182	while (ptr != 0) {
1183		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1184			if (capreg != NULL)
1185				*capreg = ptr;
1186			return (0);
1187		}
1188		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1189	}
1190
1191	return (ENOENT);
1192}
1193
1194/*
1195 * Support for MSI-X message interrupts.
1196 */
1197void
1198pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1199{
1200	struct pci_devinfo *dinfo = device_get_ivars(dev);
1201	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1202	uint32_t offset;
1203
1204	KASSERT(msix->msix_table_len > index, ("bogus index"));
1205	offset = msix->msix_table_offset + index * 16;
1206	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1207	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1208	bus_write_4(msix->msix_table_res, offset + 8, data);
1209
1210	/* Enable MSI -> HT mapping. */
1211	pci_ht_map_msi(dev, address);
1212}
1213
1214void
1215pci_mask_msix(device_t dev, u_int index)
1216{
1217	struct pci_devinfo *dinfo = device_get_ivars(dev);
1218	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1219	uint32_t offset, val;
1220
1221	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1222	offset = msix->msix_table_offset + index * 16 + 12;
1223	val = bus_read_4(msix->msix_table_res, offset);
1224	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1225		val |= PCIM_MSIX_VCTRL_MASK;
1226		bus_write_4(msix->msix_table_res, offset, val);
1227	}
1228}
1229
1230void
1231pci_unmask_msix(device_t dev, u_int index)
1232{
1233	struct pci_devinfo *dinfo = device_get_ivars(dev);
1234	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1235	uint32_t offset, val;
1236
1237	KASSERT(msix->msix_table_len > index, ("bogus index"));
1238	offset = msix->msix_table_offset + index * 16 + 12;
1239	val = bus_read_4(msix->msix_table_res, offset);
1240	if (val & PCIM_MSIX_VCTRL_MASK) {
1241		val &= ~PCIM_MSIX_VCTRL_MASK;
1242		bus_write_4(msix->msix_table_res, offset, val);
1243	}
1244}
1245
1246int
1247pci_pending_msix(device_t dev, u_int index)
1248{
1249	struct pci_devinfo *dinfo = device_get_ivars(dev);
1250	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1251	uint32_t offset, bit;
1252
1253	KASSERT(msix->msix_table_len > index, ("bogus index"));
1254	offset = msix->msix_pba_offset + (index / 32) * 4;
1255	bit = 1 << index % 32;
1256	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1257}
1258
1259/*
1260 * Restore MSI-X registers and table during resume.  If MSI-X is
1261 * enabled then walk the virtual table to restore the actual MSI-X
1262 * table.
1263 */
1264static void
1265pci_resume_msix(device_t dev)
1266{
1267	struct pci_devinfo *dinfo = device_get_ivars(dev);
1268	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1269	struct msix_table_entry *mte;
1270	struct msix_vector *mv;
1271	int i;
1272
1273	if (msix->msix_alloc > 0) {
1274		/* First, mask all vectors. */
1275		for (i = 0; i < msix->msix_msgnum; i++)
1276			pci_mask_msix(dev, i);
1277
1278		/* Second, program any messages with at least one handler. */
1279		for (i = 0; i < msix->msix_table_len; i++) {
1280			mte = &msix->msix_table[i];
1281			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1282				continue;
1283			mv = &msix->msix_vectors[mte->mte_vector - 1];
1284			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1285			pci_unmask_msix(dev, i);
1286		}
1287	}
1288	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1289	    msix->msix_ctrl, 2);
1290}
1291
1292/*
1293 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1294 * returned in *count.  After this function returns, each message will be
1295 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1296 */
1297int
1298pci_alloc_msix_method(device_t dev, device_t child, int *count)
1299{
1300	struct pci_devinfo *dinfo = device_get_ivars(child);
1301	pcicfgregs *cfg = &dinfo->cfg;
1302	struct resource_list_entry *rle;
1303	int actual, error, i, irq, max;
1304
1305	/* Don't let count == 0 get us into trouble. */
1306	if (*count == 0)
1307		return (EINVAL);
1308
1309	/* If rid 0 is allocated, then fail. */
1310	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1311	if (rle != NULL && rle->res != NULL)
1312		return (ENXIO);
1313
1314	/* Already have allocated messages? */
1315	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1316		return (ENXIO);
1317
1318	/* If MSI is blacklisted for this system, fail. */
1319	if (pci_msi_blacklisted())
1320		return (ENXIO);
1321
1322	/* MSI-X capability present? */
1323	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1324		return (ENODEV);
1325
1326	/* Make sure the appropriate BARs are mapped. */
1327	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1328	    cfg->msix.msix_table_bar);
1329	if (rle == NULL || rle->res == NULL ||
1330	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1331		return (ENXIO);
1332	cfg->msix.msix_table_res = rle->res;
1333	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1334		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1335		    cfg->msix.msix_pba_bar);
1336		if (rle == NULL || rle->res == NULL ||
1337		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1338			return (ENXIO);
1339	}
1340	cfg->msix.msix_pba_res = rle->res;
1341
1342	if (bootverbose)
1343		device_printf(child,
1344		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1345		    *count, cfg->msix.msix_msgnum);
1346	max = min(*count, cfg->msix.msix_msgnum);
1347	for (i = 0; i < max; i++) {
1348		/* Allocate a message. */
1349		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1350		if (error) {
1351			if (i == 0)
1352				return (error);
1353			break;
1354		}
1355		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1356		    irq, 1);
1357	}
1358	actual = i;
1359
1360	if (bootverbose) {
1361		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1362		if (actual == 1)
1363			device_printf(child, "using IRQ %lu for MSI-X\n",
1364			    rle->start);
1365		else {
1366			int run;
1367
1368			/*
1369			 * Be fancy and try to print contiguous runs of
1370			 * IRQ values as ranges.  'irq' is the previous IRQ.
1371			 * 'run' is true if we are in a range.
1372			 */
1373			device_printf(child, "using IRQs %lu", rle->start);
1374			irq = rle->start;
1375			run = 0;
1376			for (i = 1; i < actual; i++) {
1377				rle = resource_list_find(&dinfo->resources,
1378				    SYS_RES_IRQ, i + 1);
1379
1380				/* Still in a run? */
1381				if (rle->start == irq + 1) {
1382					run = 1;
1383					irq++;
1384					continue;
1385				}
1386
1387				/* Finish previous range. */
1388				if (run) {
1389					printf("-%d", irq);
1390					run = 0;
1391				}
1392
1393				/* Start new range. */
1394				printf(",%lu", rle->start);
1395				irq = rle->start;
1396			}
1397
1398			/* Unfinished range? */
1399			if (run)
1400				printf("-%d", irq);
1401			printf(" for MSI-X\n");
1402		}
1403	}
1404
1405	/* Mask all vectors. */
1406	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1407		pci_mask_msix(child, i);
1408
1409	/* Allocate and initialize vector data and virtual table. */
1410	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1411	    M_DEVBUF, M_WAITOK | M_ZERO);
1412	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1413	    M_DEVBUF, M_WAITOK | M_ZERO);
1414	for (i = 0; i < actual; i++) {
1415		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1416		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1417		cfg->msix.msix_table[i].mte_vector = i + 1;
1418	}
1419
1420	/* Update control register to enable MSI-X. */
1421	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1422	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1423	    cfg->msix.msix_ctrl, 2);
1424
1425	/* Update counts of alloc'd messages. */
1426	cfg->msix.msix_alloc = actual;
1427	cfg->msix.msix_table_len = actual;
1428	*count = actual;
1429	return (0);
1430}
1431
1432/*
1433 * By default, pci_alloc_msix() will assign the allocated IRQ
1434 * resources consecutively to the first N messages in the MSI-X table.
1435 * However, device drivers may want to use different layouts if they
1436 * either receive fewer messages than they asked for, or they wish to
1437 * populate the MSI-X table sparsely.  This method allows the driver
1438 * to specify what layout it wants.  It must be called after a
1439 * successful pci_alloc_msix() but before any of the associated
1440 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1441 *
1442 * The 'vectors' array contains 'count' message vectors.  The array
1443 * maps directly to the MSI-X table in that index 0 in the array
1444 * specifies the vector for the first message in the MSI-X table, etc.
1445 * The vector value in each array index can either be 0 to indicate
1446 * that no vector should be assigned to a message slot, or it can be a
1447 * number from 1 to N (where N is the count returned from a
1448 * succcessful call to pci_alloc_msix()) to indicate which message
1449 * vector (IRQ) to be used for the corresponding message.
1450 *
1451 * On successful return, each message with a non-zero vector will have
1452 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1453 * 1.  Additionally, if any of the IRQs allocated via the previous
1454 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1455 * will be freed back to the system automatically.
1456 *
1457 * For example, suppose a driver has a MSI-X table with 6 messages and
1458 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1459 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1460 * C.  After the call to pci_alloc_msix(), the device will be setup to
1461 * have an MSI-X table of ABC--- (where - means no vector assigned).
1462 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1463 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1464 * be freed back to the system.  This device will also have valid
1465 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1466 *
1467 * In any case, the SYS_RES_IRQ rid X will always map to the message
1468 * at MSI-X table index X - 1 and will only be valid if a vector is
1469 * assigned to that table entry.
1470 */
1471int
1472pci_remap_msix_method(device_t dev, device_t child, int count,
1473    const u_int *vectors)
1474{
1475	struct pci_devinfo *dinfo = device_get_ivars(child);
1476	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1477	struct resource_list_entry *rle;
1478	int i, irq, j, *used;
1479
1480	/*
1481	 * Have to have at least one message in the table but the
1482	 * table can't be bigger than the actual MSI-X table in the
1483	 * device.
1484	 */
1485	if (count == 0 || count > msix->msix_msgnum)
1486		return (EINVAL);
1487
1488	/* Sanity check the vectors. */
1489	for (i = 0; i < count; i++)
1490		if (vectors[i] > msix->msix_alloc)
1491			return (EINVAL);
1492
1493	/*
1494	 * Make sure there aren't any holes in the vectors to be used.
1495	 * It's a big pain to support it, and it doesn't really make
1496	 * sense anyway.  Also, at least one vector must be used.
1497	 */
1498	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1499	    M_ZERO);
1500	for (i = 0; i < count; i++)
1501		if (vectors[i] != 0)
1502			used[vectors[i] - 1] = 1;
1503	for (i = 0; i < msix->msix_alloc - 1; i++)
1504		if (used[i] == 0 && used[i + 1] == 1) {
1505			free(used, M_DEVBUF);
1506			return (EINVAL);
1507		}
1508	if (used[0] != 1) {
1509		free(used, M_DEVBUF);
1510		return (EINVAL);
1511	}
1512
1513	/* Make sure none of the resources are allocated. */
1514	for (i = 0; i < msix->msix_table_len; i++) {
1515		if (msix->msix_table[i].mte_vector == 0)
1516			continue;
1517		if (msix->msix_table[i].mte_handlers > 0)
1518			return (EBUSY);
1519		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1520		KASSERT(rle != NULL, ("missing resource"));
1521		if (rle->res != NULL)
1522			return (EBUSY);
1523	}
1524
1525	/* Free the existing resource list entries. */
1526	for (i = 0; i < msix->msix_table_len; i++) {
1527		if (msix->msix_table[i].mte_vector == 0)
1528			continue;
1529		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1530	}
1531
1532	/*
1533	 * Build the new virtual table keeping track of which vectors are
1534	 * used.
1535	 */
1536	free(msix->msix_table, M_DEVBUF);
1537	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1538	    M_DEVBUF, M_WAITOK | M_ZERO);
1539	for (i = 0; i < count; i++)
1540		msix->msix_table[i].mte_vector = vectors[i];
1541	msix->msix_table_len = count;
1542
1543	/* Free any unused IRQs and resize the vectors array if necessary. */
1544	j = msix->msix_alloc - 1;
1545	if (used[j] == 0) {
1546		struct msix_vector *vec;
1547
1548		while (used[j] == 0) {
1549			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1550			    msix->msix_vectors[j].mv_irq);
1551			j--;
1552		}
1553		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1554		    M_WAITOK);
1555		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1556		    (j + 1));
1557		free(msix->msix_vectors, M_DEVBUF);
1558		msix->msix_vectors = vec;
1559		msix->msix_alloc = j + 1;
1560	}
1561	free(used, M_DEVBUF);
1562
1563	/* Map the IRQs onto the rids. */
1564	for (i = 0; i < count; i++) {
1565		if (vectors[i] == 0)
1566			continue;
1567		irq = msix->msix_vectors[vectors[i]].mv_irq;
1568		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1569		    irq, 1);
1570	}
1571
1572	if (bootverbose) {
1573		device_printf(child, "Remapped MSI-X IRQs as: ");
1574		for (i = 0; i < count; i++) {
1575			if (i != 0)
1576				printf(", ");
1577			if (vectors[i] == 0)
1578				printf("---");
1579			else
1580				printf("%d",
1581				    msix->msix_vectors[vectors[i]].mv_irq);
1582		}
1583		printf("\n");
1584	}
1585
1586	return (0);
1587}
1588
1589static int
1590pci_release_msix(device_t dev, device_t child)
1591{
1592	struct pci_devinfo *dinfo = device_get_ivars(child);
1593	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1594	struct resource_list_entry *rle;
1595	int i;
1596
1597	/* Do we have any messages to release? */
1598	if (msix->msix_alloc == 0)
1599		return (ENODEV);
1600
1601	/* Make sure none of the resources are allocated. */
1602	for (i = 0; i < msix->msix_table_len; i++) {
1603		if (msix->msix_table[i].mte_vector == 0)
1604			continue;
1605		if (msix->msix_table[i].mte_handlers > 0)
1606			return (EBUSY);
1607		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1608		KASSERT(rle != NULL, ("missing resource"));
1609		if (rle->res != NULL)
1610			return (EBUSY);
1611	}
1612
1613	/* Update control register to disable MSI-X. */
1614	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1615	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1616	    msix->msix_ctrl, 2);
1617
1618	/* Free the resource list entries. */
1619	for (i = 0; i < msix->msix_table_len; i++) {
1620		if (msix->msix_table[i].mte_vector == 0)
1621			continue;
1622		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1623	}
1624	free(msix->msix_table, M_DEVBUF);
1625	msix->msix_table_len = 0;
1626
1627	/* Release the IRQs. */
1628	for (i = 0; i < msix->msix_alloc; i++)
1629		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1630		    msix->msix_vectors[i].mv_irq);
1631	free(msix->msix_vectors, M_DEVBUF);
1632	msix->msix_alloc = 0;
1633	return (0);
1634}
1635
1636/*
1637 * Return the max supported MSI-X messages this device supports.
1638 * Basically, assuming the MD code can alloc messages, this function
1639 * should return the maximum value that pci_alloc_msix() can return.
1640 * Thus, it is subject to the tunables, etc.
1641 */
1642int
1643pci_msix_count_method(device_t dev, device_t child)
1644{
1645	struct pci_devinfo *dinfo = device_get_ivars(child);
1646	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1647
1648	if (pci_do_msix && msix->msix_location != 0)
1649		return (msix->msix_msgnum);
1650	return (0);
1651}
1652
1653/*
1654 * HyperTransport MSI mapping control
1655 */
1656void
1657pci_ht_map_msi(device_t dev, uint64_t addr)
1658{
1659	struct pci_devinfo *dinfo = device_get_ivars(dev);
1660	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1661
1662	if (!ht->ht_msimap)
1663		return;
1664
1665	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1666	    ht->ht_msiaddr >> 20 == addr >> 20) {
1667		/* Enable MSI -> HT mapping. */
1668		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1669		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1670		    ht->ht_msictrl, 2);
1671	}
1672
1673	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1674		/* Disable MSI -> HT mapping. */
1675		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1676		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1677		    ht->ht_msictrl, 2);
1678	}
1679}
1680
1681int
1682pci_get_max_read_req(device_t dev)
1683{
1684	int cap;
1685	uint16_t val;
1686
1687	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1688		return (0);
1689	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1690	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1691	val >>= 12;
1692	return (1 << (val + 7));
1693}
1694
1695int
1696pci_set_max_read_req(device_t dev, int size)
1697{
1698	int cap;
1699	uint16_t val;
1700
1701	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1702		return (0);
1703	if (size < 128)
1704		size = 128;
1705	if (size > 4096)
1706		size = 4096;
1707	size = (1 << (fls(size) - 1));
1708	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1709	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1710	val |= (fls(size) - 8) << 12;
1711	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1712	return (size);
1713}
1714
1715/*
1716 * Support for MSI message signalled interrupts.
1717 */
1718void
1719pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1720{
1721	struct pci_devinfo *dinfo = device_get_ivars(dev);
1722	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1723
1724	/* Write data and address values. */
1725	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1726	    address & 0xffffffff, 4);
1727	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1728		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1729		    address >> 32, 4);
1730		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1731		    data, 2);
1732	} else
1733		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1734		    2);
1735
1736	/* Enable MSI in the control register. */
1737	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1738	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1739	    2);
1740
1741	/* Enable MSI -> HT mapping. */
1742	pci_ht_map_msi(dev, address);
1743}
1744
1745void
1746pci_disable_msi(device_t dev)
1747{
1748	struct pci_devinfo *dinfo = device_get_ivars(dev);
1749	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1750
1751	/* Disable MSI -> HT mapping. */
1752	pci_ht_map_msi(dev, 0);
1753
1754	/* Disable MSI in the control register. */
1755	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1756	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1757	    2);
1758}
1759
1760/*
1761 * Restore MSI registers during resume.  If MSI is enabled then
1762 * restore the data and address registers in addition to the control
1763 * register.
1764 */
1765static void
1766pci_resume_msi(device_t dev)
1767{
1768	struct pci_devinfo *dinfo = device_get_ivars(dev);
1769	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1770	uint64_t address;
1771	uint16_t data;
1772
1773	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1774		address = msi->msi_addr;
1775		data = msi->msi_data;
1776		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1777		    address & 0xffffffff, 4);
1778		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1779			pci_write_config(dev, msi->msi_location +
1780			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1781			pci_write_config(dev, msi->msi_location +
1782			    PCIR_MSI_DATA_64BIT, data, 2);
1783		} else
1784			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1785			    data, 2);
1786	}
1787	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1788	    2);
1789}
1790
1791static int
1792pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1793{
1794	struct pci_devinfo *dinfo = device_get_ivars(dev);
1795	pcicfgregs *cfg = &dinfo->cfg;
1796	struct resource_list_entry *rle;
1797	struct msix_table_entry *mte;
1798	struct msix_vector *mv;
1799	uint64_t addr;
1800	uint32_t data;
1801	int error, i, j;
1802
1803	/*
1804	 * Handle MSI first.  We try to find this IRQ among our list
1805	 * of MSI IRQs.  If we find it, we request updated address and
1806	 * data registers and apply the results.
1807	 */
1808	if (cfg->msi.msi_alloc > 0) {
1809
1810		/* If we don't have any active handlers, nothing to do. */
1811		if (cfg->msi.msi_handlers == 0)
1812			return (0);
1813		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1814			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1815			    i + 1);
1816			if (rle->start == irq) {
1817				error = PCIB_MAP_MSI(device_get_parent(bus),
1818				    dev, irq, &addr, &data);
1819				if (error)
1820					return (error);
1821				pci_disable_msi(dev);
1822				dinfo->cfg.msi.msi_addr = addr;
1823				dinfo->cfg.msi.msi_data = data;
1824				pci_enable_msi(dev, addr, data);
1825				return (0);
1826			}
1827		}
1828		return (ENOENT);
1829	}
1830
1831	/*
1832	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1833	 * we request the updated mapping info.  If that works, we go
1834	 * through all the slots that use this IRQ and update them.
1835	 */
1836	if (cfg->msix.msix_alloc > 0) {
1837		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1838			mv = &cfg->msix.msix_vectors[i];
1839			if (mv->mv_irq == irq) {
1840				error = PCIB_MAP_MSI(device_get_parent(bus),
1841				    dev, irq, &addr, &data);
1842				if (error)
1843					return (error);
1844				mv->mv_address = addr;
1845				mv->mv_data = data;
1846				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1847					mte = &cfg->msix.msix_table[j];
1848					if (mte->mte_vector != i + 1)
1849						continue;
1850					if (mte->mte_handlers == 0)
1851						continue;
1852					pci_mask_msix(dev, j);
1853					pci_enable_msix(dev, j, addr, data);
1854					pci_unmask_msix(dev, j);
1855				}
1856			}
1857		}
1858		return (ENOENT);
1859	}
1860
1861	return (ENOENT);
1862}
1863
1864/*
1865 * Returns true if the specified device is blacklisted because MSI
1866 * doesn't work.
1867 */
1868int
1869pci_msi_device_blacklisted(device_t dev)
1870{
1871	const struct pci_quirk *q;
1872
1873	if (!pci_honor_msi_blacklist)
1874		return (0);
1875
1876	for (q = &pci_quirks[0]; q->devid; q++) {
1877		if (q->devid == pci_get_devid(dev) &&
1878		    q->type == PCI_QUIRK_DISABLE_MSI)
1879			return (1);
1880	}
1881	return (0);
1882}
1883
1884/*
1885 * Returns true if a specified chipset supports MSI when it is
1886 * emulated hardware in a virtual machine.
1887 */
1888static int
1889pci_msi_vm_chipset(device_t dev)
1890{
1891	const struct pci_quirk *q;
1892
1893	for (q = &pci_quirks[0]; q->devid; q++) {
1894		if (q->devid == pci_get_devid(dev) &&
1895		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
1896			return (1);
1897	}
1898	return (0);
1899}
1900
1901/*
1902 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1903 * we just check for blacklisted chipsets as represented by the
1904 * host-PCI bridge at device 0:0:0.  In the future, it may become
1905 * necessary to check other system attributes, such as the kenv values
1906 * that give the motherboard manufacturer and model number.
1907 */
1908static int
1909pci_msi_blacklisted(void)
1910{
1911	device_t dev;
1912
1913	if (!pci_honor_msi_blacklist)
1914		return (0);
1915
1916	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1917	if (!(pcie_chipset || pcix_chipset)) {
1918		if (vm_guest != VM_GUEST_NO) {
1919			dev = pci_find_bsf(0, 0, 0);
1920			if (dev != NULL)
1921				return (pci_msi_vm_chipset(dev) == 0);
1922		}
1923		return (1);
1924	}
1925
1926	dev = pci_find_bsf(0, 0, 0);
1927	if (dev != NULL)
1928		return (pci_msi_device_blacklisted(dev));
1929	return (0);
1930}
1931
1932/*
1933 * Attempt to allocate *count MSI messages.  The actual number allocated is
1934 * returned in *count.  After this function returns, each message will be
1935 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1936 */
1937int
1938pci_alloc_msi_method(device_t dev, device_t child, int *count)
1939{
1940	struct pci_devinfo *dinfo = device_get_ivars(child);
1941	pcicfgregs *cfg = &dinfo->cfg;
1942	struct resource_list_entry *rle;
1943	int actual, error, i, irqs[32];
1944	uint16_t ctrl;
1945
1946	/* Don't let count == 0 get us into trouble. */
1947	if (*count == 0)
1948		return (EINVAL);
1949
1950	/* If rid 0 is allocated, then fail. */
1951	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1952	if (rle != NULL && rle->res != NULL)
1953		return (ENXIO);
1954
1955	/* Already have allocated messages? */
1956	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1957		return (ENXIO);
1958
1959	/* If MSI is blacklisted for this system, fail. */
1960	if (pci_msi_blacklisted())
1961		return (ENXIO);
1962
1963	/* MSI capability present? */
1964	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1965		return (ENODEV);
1966
1967	if (bootverbose)
1968		device_printf(child,
1969		    "attempting to allocate %d MSI vectors (%d supported)\n",
1970		    *count, cfg->msi.msi_msgnum);
1971
1972	/* Don't ask for more than the device supports. */
1973	actual = min(*count, cfg->msi.msi_msgnum);
1974
1975	/* Don't ask for more than 32 messages. */
1976	actual = min(actual, 32);
1977
1978	/* MSI requires power of 2 number of messages. */
1979	if (!powerof2(actual))
1980		return (EINVAL);
1981
1982	for (;;) {
1983		/* Try to allocate N messages. */
1984		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1985		    actual, irqs);
1986		if (error == 0)
1987			break;
1988		if (actual == 1)
1989			return (error);
1990
1991		/* Try N / 2. */
1992		actual >>= 1;
1993	}
1994
1995	/*
1996	 * We now have N actual messages mapped onto SYS_RES_IRQ
1997	 * resources in the irqs[] array, so add new resources
1998	 * starting at rid 1.
1999	 */
2000	for (i = 0; i < actual; i++)
2001		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2002		    irqs[i], irqs[i], 1);
2003
2004	if (bootverbose) {
2005		if (actual == 1)
2006			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2007		else {
2008			int run;
2009
2010			/*
2011			 * Be fancy and try to print contiguous runs
2012			 * of IRQ values as ranges.  'run' is true if
2013			 * we are in a range.
2014			 */
2015			device_printf(child, "using IRQs %d", irqs[0]);
2016			run = 0;
2017			for (i = 1; i < actual; i++) {
2018
2019				/* Still in a run? */
2020				if (irqs[i] == irqs[i - 1] + 1) {
2021					run = 1;
2022					continue;
2023				}
2024
2025				/* Finish previous range. */
2026				if (run) {
2027					printf("-%d", irqs[i - 1]);
2028					run = 0;
2029				}
2030
2031				/* Start new range. */
2032				printf(",%d", irqs[i]);
2033			}
2034
2035			/* Unfinished range? */
2036			if (run)
2037				printf("-%d", irqs[actual - 1]);
2038			printf(" for MSI\n");
2039		}
2040	}
2041
2042	/* Update control register with actual count. */
2043	ctrl = cfg->msi.msi_ctrl;
2044	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2045	ctrl |= (ffs(actual) - 1) << 4;
2046	cfg->msi.msi_ctrl = ctrl;
2047	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2048
2049	/* Update counts of alloc'd messages. */
2050	cfg->msi.msi_alloc = actual;
2051	cfg->msi.msi_handlers = 0;
2052	*count = actual;
2053	return (0);
2054}
2055
2056/* Release the MSI messages associated with this device. */
2057int
2058pci_release_msi_method(device_t dev, device_t child)
2059{
2060	struct pci_devinfo *dinfo = device_get_ivars(child);
2061	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2062	struct resource_list_entry *rle;
2063	int error, i, irqs[32];
2064
2065	/* Try MSI-X first. */
2066	error = pci_release_msix(dev, child);
2067	if (error != ENODEV)
2068		return (error);
2069
2070	/* Do we have any messages to release? */
2071	if (msi->msi_alloc == 0)
2072		return (ENODEV);
2073	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2074
2075	/* Make sure none of the resources are allocated. */
2076	if (msi->msi_handlers > 0)
2077		return (EBUSY);
2078	for (i = 0; i < msi->msi_alloc; i++) {
2079		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2080		KASSERT(rle != NULL, ("missing MSI resource"));
2081		if (rle->res != NULL)
2082			return (EBUSY);
2083		irqs[i] = rle->start;
2084	}
2085
2086	/* Update control register with 0 count. */
2087	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2088	    ("%s: MSI still enabled", __func__));
2089	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2090	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2091	    msi->msi_ctrl, 2);
2092
2093	/* Release the messages. */
2094	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2095	for (i = 0; i < msi->msi_alloc; i++)
2096		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2097
2098	/* Update alloc count. */
2099	msi->msi_alloc = 0;
2100	msi->msi_addr = 0;
2101	msi->msi_data = 0;
2102	return (0);
2103}
2104
2105/*
2106 * Return the max supported MSI messages this device supports.
2107 * Basically, assuming the MD code can alloc messages, this function
2108 * should return the maximum value that pci_alloc_msi() can return.
2109 * Thus, it is subject to the tunables, etc.
2110 */
2111int
2112pci_msi_count_method(device_t dev, device_t child)
2113{
2114	struct pci_devinfo *dinfo = device_get_ivars(child);
2115	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2116
2117	if (pci_do_msi && msi->msi_location != 0)
2118		return (msi->msi_msgnum);
2119	return (0);
2120}
2121
2122/* free pcicfgregs structure and all depending data structures */
2123
2124int
2125pci_freecfg(struct pci_devinfo *dinfo)
2126{
2127	struct devlist *devlist_head;
2128	struct pci_map *pm, *next;
2129	int i;
2130
2131	devlist_head = &pci_devq;
2132
2133	if (dinfo->cfg.vpd.vpd_reg) {
2134		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2135		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2136			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2137		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2138		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2139			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2140		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2141	}
2142	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2143		free(pm, M_DEVBUF);
2144	}
2145	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2146	free(dinfo, M_DEVBUF);
2147
2148	/* increment the generation count */
2149	pci_generation++;
2150
2151	/* we're losing one device */
2152	pci_numdevs--;
2153	return (0);
2154}
2155
2156/*
2157 * PCI power manangement
2158 */
2159int
2160pci_set_powerstate_method(device_t dev, device_t child, int state)
2161{
2162	struct pci_devinfo *dinfo = device_get_ivars(child);
2163	pcicfgregs *cfg = &dinfo->cfg;
2164	uint16_t status;
2165	int result, oldstate, highest, delay;
2166
2167	if (cfg->pp.pp_cap == 0)
2168		return (EOPNOTSUPP);
2169
2170	/*
2171	 * Optimize a no state change request away.  While it would be OK to
2172	 * write to the hardware in theory, some devices have shown odd
2173	 * behavior when going from D3 -> D3.
2174	 */
2175	oldstate = pci_get_powerstate(child);
2176	if (oldstate == state)
2177		return (0);
2178
2179	/*
2180	 * The PCI power management specification states that after a state
2181	 * transition between PCI power states, system software must
2182	 * guarantee a minimal delay before the function accesses the device.
2183	 * Compute the worst case delay that we need to guarantee before we
2184	 * access the device.  Many devices will be responsive much more
2185	 * quickly than this delay, but there are some that don't respond
2186	 * instantly to state changes.  Transitions to/from D3 state require
2187	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2188	 * is done below with DELAY rather than a sleeper function because
2189	 * this function can be called from contexts where we cannot sleep.
2190	 */
2191	highest = (oldstate > state) ? oldstate : state;
2192	if (highest == PCI_POWERSTATE_D3)
2193	    delay = 10000;
2194	else if (highest == PCI_POWERSTATE_D2)
2195	    delay = 200;
2196	else
2197	    delay = 0;
2198	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2199	    & ~PCIM_PSTAT_DMASK;
2200	result = 0;
2201	switch (state) {
2202	case PCI_POWERSTATE_D0:
2203		status |= PCIM_PSTAT_D0;
2204		break;
2205	case PCI_POWERSTATE_D1:
2206		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2207			return (EOPNOTSUPP);
2208		status |= PCIM_PSTAT_D1;
2209		break;
2210	case PCI_POWERSTATE_D2:
2211		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2212			return (EOPNOTSUPP);
2213		status |= PCIM_PSTAT_D2;
2214		break;
2215	case PCI_POWERSTATE_D3:
2216		status |= PCIM_PSTAT_D3;
2217		break;
2218	default:
2219		return (EINVAL);
2220	}
2221
2222	if (bootverbose)
2223		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2224		    state);
2225
2226	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2227	if (delay)
2228		DELAY(delay);
2229	return (0);
2230}
2231
2232int
2233pci_get_powerstate_method(device_t dev, device_t child)
2234{
2235	struct pci_devinfo *dinfo = device_get_ivars(child);
2236	pcicfgregs *cfg = &dinfo->cfg;
2237	uint16_t status;
2238	int result;
2239
2240	if (cfg->pp.pp_cap != 0) {
2241		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2242		switch (status & PCIM_PSTAT_DMASK) {
2243		case PCIM_PSTAT_D0:
2244			result = PCI_POWERSTATE_D0;
2245			break;
2246		case PCIM_PSTAT_D1:
2247			result = PCI_POWERSTATE_D1;
2248			break;
2249		case PCIM_PSTAT_D2:
2250			result = PCI_POWERSTATE_D2;
2251			break;
2252		case PCIM_PSTAT_D3:
2253			result = PCI_POWERSTATE_D3;
2254			break;
2255		default:
2256			result = PCI_POWERSTATE_UNKNOWN;
2257			break;
2258		}
2259	} else {
2260		/* No support, device is always at D0 */
2261		result = PCI_POWERSTATE_D0;
2262	}
2263	return (result);
2264}
2265
2266/*
2267 * Some convenience functions for PCI device drivers.
2268 */
2269
2270static __inline void
2271pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2272{
2273	uint16_t	command;
2274
2275	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2276	command |= bit;
2277	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2278}
2279
2280static __inline void
2281pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2282{
2283	uint16_t	command;
2284
2285	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2286	command &= ~bit;
2287	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2288}
2289
2290int
2291pci_enable_busmaster_method(device_t dev, device_t child)
2292{
2293	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2294	return (0);
2295}
2296
2297int
2298pci_disable_busmaster_method(device_t dev, device_t child)
2299{
2300	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2301	return (0);
2302}
2303
2304int
2305pci_enable_io_method(device_t dev, device_t child, int space)
2306{
2307	uint16_t bit;
2308
2309	switch(space) {
2310	case SYS_RES_IOPORT:
2311		bit = PCIM_CMD_PORTEN;
2312		break;
2313	case SYS_RES_MEMORY:
2314		bit = PCIM_CMD_MEMEN;
2315		break;
2316	default:
2317		return (EINVAL);
2318	}
2319	pci_set_command_bit(dev, child, bit);
2320	return (0);
2321}
2322
2323int
2324pci_disable_io_method(device_t dev, device_t child, int space)
2325{
2326	uint16_t bit;
2327
2328	switch(space) {
2329	case SYS_RES_IOPORT:
2330		bit = PCIM_CMD_PORTEN;
2331		break;
2332	case SYS_RES_MEMORY:
2333		bit = PCIM_CMD_MEMEN;
2334		break;
2335	default:
2336		return (EINVAL);
2337	}
2338	pci_clear_command_bit(dev, child, bit);
2339	return (0);
2340}
2341
2342/*
2343 * New style pci driver.  Parent device is either a pci-host-bridge or a
2344 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2345 */
2346
2347void
2348pci_print_verbose(struct pci_devinfo *dinfo)
2349{
2350
2351	if (bootverbose) {
2352		pcicfgregs *cfg = &dinfo->cfg;
2353
2354		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2355		    cfg->vendor, cfg->device, cfg->revid);
2356		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2357		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2358		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2359		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2360		    cfg->mfdev);
2361		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2362		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2363		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2364		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2365		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2366		if (cfg->intpin > 0)
2367			printf("\tintpin=%c, irq=%d\n",
2368			    cfg->intpin +'a' -1, cfg->intline);
2369		if (cfg->pp.pp_cap) {
2370			uint16_t status;
2371
2372			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2373			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2374			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2375			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2376			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2377			    status & PCIM_PSTAT_DMASK);
2378		}
2379		if (cfg->msi.msi_location) {
2380			int ctrl;
2381
2382			ctrl = cfg->msi.msi_ctrl;
2383			printf("\tMSI supports %d message%s%s%s\n",
2384			    cfg->msi.msi_msgnum,
2385			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2386			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2387			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2388		}
2389		if (cfg->msix.msix_location) {
2390			printf("\tMSI-X supports %d message%s ",
2391			    cfg->msix.msix_msgnum,
2392			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2393			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2394				printf("in map 0x%x\n",
2395				    cfg->msix.msix_table_bar);
2396			else
2397				printf("in maps 0x%x and 0x%x\n",
2398				    cfg->msix.msix_table_bar,
2399				    cfg->msix.msix_pba_bar);
2400		}
2401	}
2402}
2403
2404static int
2405pci_porten(device_t dev)
2406{
2407	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2408}
2409
2410static int
2411pci_memen(device_t dev)
2412{
2413	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2414}
2415
2416static void
2417pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2418{
2419	struct pci_devinfo *dinfo;
2420	pci_addr_t map, testval;
2421	int ln2range;
2422	uint16_t cmd;
2423
2424	/*
2425	 * The device ROM BAR is special.  It is always a 32-bit
2426	 * memory BAR.  Bit 0 is special and should not be set when
2427	 * sizing the BAR.
2428	 */
2429	dinfo = device_get_ivars(dev);
2430	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2431		map = pci_read_config(dev, reg, 4);
2432		pci_write_config(dev, reg, 0xfffffffe, 4);
2433		testval = pci_read_config(dev, reg, 4);
2434		pci_write_config(dev, reg, map, 4);
2435		*mapp = map;
2436		*testvalp = testval;
2437		return;
2438	}
2439
2440	map = pci_read_config(dev, reg, 4);
2441	ln2range = pci_maprange(map);
2442	if (ln2range == 64)
2443		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2444
2445	/*
2446	 * Disable decoding via the command register before
2447	 * determining the BAR's length since we will be placing it in
2448	 * a weird state.
2449	 */
2450	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2451	pci_write_config(dev, PCIR_COMMAND,
2452	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2453
2454	/*
2455	 * Determine the BAR's length by writing all 1's.  The bottom
2456	 * log_2(size) bits of the BAR will stick as 0 when we read
2457	 * the value back.
2458	 */
2459	pci_write_config(dev, reg, 0xffffffff, 4);
2460	testval = pci_read_config(dev, reg, 4);
2461	if (ln2range == 64) {
2462		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2463		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2464	}
2465
2466	/*
2467	 * Restore the original value of the BAR.  We may have reprogrammed
2468	 * the BAR of the low-level console device and when booting verbose,
2469	 * we need the console device addressable.
2470	 */
2471	pci_write_config(dev, reg, map, 4);
2472	if (ln2range == 64)
2473		pci_write_config(dev, reg + 4, map >> 32, 4);
2474	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2475
2476	*mapp = map;
2477	*testvalp = testval;
2478}
2479
2480static void
2481pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2482{
2483	struct pci_devinfo *dinfo;
2484	int ln2range;
2485
2486	/* The device ROM BAR is always a 32-bit memory BAR. */
2487	dinfo = device_get_ivars(dev);
2488	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2489		ln2range = 32;
2490	else
2491		ln2range = pci_maprange(pm->pm_value);
2492	pci_write_config(dev, pm->pm_reg, base, 4);
2493	if (ln2range == 64)
2494		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2495	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2496	if (ln2range == 64)
2497		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2498		    pm->pm_reg + 4, 4) << 32;
2499}
2500
2501struct pci_map *
2502pci_find_bar(device_t dev, int reg)
2503{
2504	struct pci_devinfo *dinfo;
2505	struct pci_map *pm;
2506
2507	dinfo = device_get_ivars(dev);
2508	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2509		if (pm->pm_reg == reg)
2510			return (pm);
2511	}
2512	return (NULL);
2513}
2514
2515int
2516pci_bar_enabled(device_t dev, struct pci_map *pm)
2517{
2518	struct pci_devinfo *dinfo;
2519	uint16_t cmd;
2520
2521	dinfo = device_get_ivars(dev);
2522	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2523	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2524		return (0);
2525	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2526	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2527		return ((cmd & PCIM_CMD_MEMEN) != 0);
2528	else
2529		return ((cmd & PCIM_CMD_PORTEN) != 0);
2530}
2531
2532static struct pci_map *
2533pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2534{
2535	struct pci_devinfo *dinfo;
2536	struct pci_map *pm, *prev;
2537
2538	dinfo = device_get_ivars(dev);
2539	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2540	pm->pm_reg = reg;
2541	pm->pm_value = value;
2542	pm->pm_size = size;
2543	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2544		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2545		    reg));
2546		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2547		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2548			break;
2549	}
2550	if (prev != NULL)
2551		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2552	else
2553		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2554	return (pm);
2555}
2556
2557static void
2558pci_restore_bars(device_t dev)
2559{
2560	struct pci_devinfo *dinfo;
2561	struct pci_map *pm;
2562	int ln2range;
2563
2564	dinfo = device_get_ivars(dev);
2565	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2566		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2567			ln2range = 32;
2568		else
2569			ln2range = pci_maprange(pm->pm_value);
2570		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2571		if (ln2range == 64)
2572			pci_write_config(dev, pm->pm_reg + 4,
2573			    pm->pm_value >> 32, 4);
2574	}
2575}
2576
2577/*
2578 * Add a resource based on a pci map register. Return 1 if the map
2579 * register is a 32bit map register or 2 if it is a 64bit register.
2580 */
2581static int
2582pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2583    int force, int prefetch)
2584{
2585	struct pci_map *pm;
2586	pci_addr_t base, map, testval;
2587	pci_addr_t start, end, count;
2588	int barlen, basezero, maprange, mapsize, type;
2589	uint16_t cmd;
2590	struct resource *res;
2591
2592	/*
2593	 * The BAR may already exist if the device is a CardBus card
2594	 * whose CIS is stored in this BAR.
2595	 */
2596	pm = pci_find_bar(dev, reg);
2597	if (pm != NULL) {
2598		maprange = pci_maprange(pm->pm_value);
2599		barlen = maprange == 64 ? 2 : 1;
2600		return (barlen);
2601	}
2602
2603	pci_read_bar(dev, reg, &map, &testval);
2604	if (PCI_BAR_MEM(map)) {
2605		type = SYS_RES_MEMORY;
2606		if (map & PCIM_BAR_MEM_PREFETCH)
2607			prefetch = 1;
2608	} else
2609		type = SYS_RES_IOPORT;
2610	mapsize = pci_mapsize(testval);
2611	base = pci_mapbase(map);
2612#ifdef __PCI_BAR_ZERO_VALID
2613	basezero = 0;
2614#else
2615	basezero = base == 0;
2616#endif
2617	maprange = pci_maprange(map);
2618	barlen = maprange == 64 ? 2 : 1;
2619
2620	/*
2621	 * For I/O registers, if bottom bit is set, and the next bit up
2622	 * isn't clear, we know we have a BAR that doesn't conform to the
2623	 * spec, so ignore it.  Also, sanity check the size of the data
2624	 * areas to the type of memory involved.  Memory must be at least
2625	 * 16 bytes in size, while I/O ranges must be at least 4.
2626	 */
2627	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2628		return (barlen);
2629	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2630	    (type == SYS_RES_IOPORT && mapsize < 2))
2631		return (barlen);
2632
2633	/* Save a record of this BAR. */
2634	pm = pci_add_bar(dev, reg, map, mapsize);
2635	if (bootverbose) {
2636		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2637		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2638		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2639			printf(", port disabled\n");
2640		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2641			printf(", memory disabled\n");
2642		else
2643			printf(", enabled\n");
2644	}
2645
2646	/*
2647	 * If base is 0, then we have problems if this architecture does
2648	 * not allow that.  It is best to ignore such entries for the
2649	 * moment.  These will be allocated later if the driver specifically
2650	 * requests them.  However, some removable busses look better when
2651	 * all resources are allocated, so allow '0' to be overriden.
2652	 *
2653	 * Similarly treat maps whose values is the same as the test value
2654	 * read back.  These maps have had all f's written to them by the
2655	 * BIOS in an attempt to disable the resources.
2656	 */
2657	if (!force && (basezero || map == testval))
2658		return (barlen);
2659	if ((u_long)base != base) {
2660		device_printf(bus,
2661		    "pci%d:%d:%d:%d bar %#x too many address bits",
2662		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2663		    pci_get_function(dev), reg);
2664		return (barlen);
2665	}
2666
2667	/*
2668	 * This code theoretically does the right thing, but has
2669	 * undesirable side effects in some cases where peripherals
2670	 * respond oddly to having these bits enabled.  Let the user
2671	 * be able to turn them off (since pci_enable_io_modes is 1 by
2672	 * default).
2673	 */
2674	if (pci_enable_io_modes) {
2675		/* Turn on resources that have been left off by a lazy BIOS */
2676		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2677			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2678			cmd |= PCIM_CMD_PORTEN;
2679			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2680		}
2681		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2682			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2683			cmd |= PCIM_CMD_MEMEN;
2684			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2685		}
2686	} else {
2687		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2688			return (barlen);
2689		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2690			return (barlen);
2691	}
2692
2693	count = (pci_addr_t)1 << mapsize;
2694	if (basezero || base == pci_mapbase(testval)) {
2695		start = 0;	/* Let the parent decide. */
2696		end = ~0ul;
2697	} else {
2698		start = base;
2699		end = base + count - 1;
2700	}
2701	resource_list_add(rl, type, reg, start, end, count);
2702
2703	/*
2704	 * Try to allocate the resource for this BAR from our parent
2705	 * so that this resource range is already reserved.  The
2706	 * driver for this device will later inherit this resource in
2707	 * pci_alloc_resource().
2708	 */
2709	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2710	    prefetch ? RF_PREFETCHABLE : 0);
2711	if (res == NULL) {
2712		/*
2713		 * If the allocation fails, clear the BAR and delete
2714		 * the resource list entry to force
2715		 * pci_alloc_resource() to allocate resources from the
2716		 * parent.
2717		 */
2718		resource_list_delete(rl, type, reg);
2719		start = 0;
2720	} else
2721		start = rman_get_start(res);
2722	pci_write_bar(dev, pm, start);
2723	return (barlen);
2724}
2725
2726/*
2727 * For ATA devices we need to decide early what addressing mode to use.
2728 * Legacy demands that the primary and secondary ATA ports sits on the
2729 * same addresses that old ISA hardware did. This dictates that we use
2730 * those addresses and ignore the BAR's if we cannot set PCI native
2731 * addressing mode.
2732 */
2733static void
2734pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2735    uint32_t prefetchmask)
2736{
2737	struct resource *r;
2738	int rid, type, progif;
2739#if 0
2740	/* if this device supports PCI native addressing use it */
2741	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2742	if ((progif & 0x8a) == 0x8a) {
2743		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2744		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2745			printf("Trying ATA native PCI addressing mode\n");
2746			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2747		}
2748	}
2749#endif
2750	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2751	type = SYS_RES_IOPORT;
2752	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2753		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2754		    prefetchmask & (1 << 0));
2755		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2756		    prefetchmask & (1 << 1));
2757	} else {
2758		rid = PCIR_BAR(0);
2759		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2760		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2761		    0x1f7, 8, 0);
2762		rid = PCIR_BAR(1);
2763		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2764		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2765		    0x3f6, 1, 0);
2766	}
2767	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2768		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2769		    prefetchmask & (1 << 2));
2770		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2771		    prefetchmask & (1 << 3));
2772	} else {
2773		rid = PCIR_BAR(2);
2774		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2775		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2776		    0x177, 8, 0);
2777		rid = PCIR_BAR(3);
2778		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2779		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2780		    0x376, 1, 0);
2781	}
2782	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2783	    prefetchmask & (1 << 4));
2784	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2785	    prefetchmask & (1 << 5));
2786}
2787
2788static void
2789pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2790{
2791	struct pci_devinfo *dinfo = device_get_ivars(dev);
2792	pcicfgregs *cfg = &dinfo->cfg;
2793	char tunable_name[64];
2794	int irq;
2795
2796	/* Has to have an intpin to have an interrupt. */
2797	if (cfg->intpin == 0)
2798		return;
2799
2800	/* Let the user override the IRQ with a tunable. */
2801	irq = PCI_INVALID_IRQ;
2802	snprintf(tunable_name, sizeof(tunable_name),
2803	    "hw.pci%d.%d.%d.INT%c.irq",
2804	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2805	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2806		irq = PCI_INVALID_IRQ;
2807
2808	/*
2809	 * If we didn't get an IRQ via the tunable, then we either use the
2810	 * IRQ value in the intline register or we ask the bus to route an
2811	 * interrupt for us.  If force_route is true, then we only use the
2812	 * value in the intline register if the bus was unable to assign an
2813	 * IRQ.
2814	 */
2815	if (!PCI_INTERRUPT_VALID(irq)) {
2816		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2817			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2818		if (!PCI_INTERRUPT_VALID(irq))
2819			irq = cfg->intline;
2820	}
2821
2822	/* If after all that we don't have an IRQ, just bail. */
2823	if (!PCI_INTERRUPT_VALID(irq))
2824		return;
2825
2826	/* Update the config register if it changed. */
2827	if (irq != cfg->intline) {
2828		cfg->intline = irq;
2829		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2830	}
2831
2832	/* Add this IRQ as rid 0 interrupt resource. */
2833	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2834}
2835
2836/* Perform early OHCI takeover from SMM. */
2837static void
2838ohci_early_takeover(device_t self)
2839{
2840	struct resource *res;
2841	uint32_t ctl;
2842	int rid;
2843	int i;
2844
2845	rid = PCIR_BAR(0);
2846	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2847	if (res == NULL)
2848		return;
2849
2850	ctl = bus_read_4(res, OHCI_CONTROL);
2851	if (ctl & OHCI_IR) {
2852		if (bootverbose)
2853			printf("ohci early: "
2854			    "SMM active, request owner change\n");
2855		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2856		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2857			DELAY(1000);
2858			ctl = bus_read_4(res, OHCI_CONTROL);
2859		}
2860		if (ctl & OHCI_IR) {
2861			if (bootverbose)
2862				printf("ohci early: "
2863				    "SMM does not respond, resetting\n");
2864			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2865		}
2866		/* Disable interrupts */
2867		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2868	}
2869
2870	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2871}
2872
2873/* Perform early UHCI takeover from SMM. */
2874static void
2875uhci_early_takeover(device_t self)
2876{
2877	struct resource *res;
2878	int rid;
2879
2880	/*
2881	 * Set the PIRQD enable bit and switch off all the others. We don't
2882	 * want legacy support to interfere with us XXX Does this also mean
2883	 * that the BIOS won't touch the keyboard anymore if it is connected
2884	 * to the ports of the root hub?
2885	 */
2886	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2887
2888	/* Disable interrupts */
2889	rid = PCI_UHCI_BASE_REG;
2890	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2891	if (res != NULL) {
2892		bus_write_2(res, UHCI_INTR, 0);
2893		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2894	}
2895}
2896
2897/* Perform early EHCI takeover from SMM. */
2898static void
2899ehci_early_takeover(device_t self)
2900{
2901	struct resource *res;
2902	uint32_t cparams;
2903	uint32_t eec;
2904	uint8_t eecp;
2905	uint8_t bios_sem;
2906	uint8_t offs;
2907	int rid;
2908	int i;
2909
2910	rid = PCIR_BAR(0);
2911	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2912	if (res == NULL)
2913		return;
2914
2915	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2916
2917	/* Synchronise with the BIOS if it owns the controller. */
2918	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2919	    eecp = EHCI_EECP_NEXT(eec)) {
2920		eec = pci_read_config(self, eecp, 4);
2921		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2922			continue;
2923		}
2924		bios_sem = pci_read_config(self, eecp +
2925		    EHCI_LEGSUP_BIOS_SEM, 1);
2926		if (bios_sem == 0) {
2927			continue;
2928		}
2929		if (bootverbose)
2930			printf("ehci early: "
2931			    "SMM active, request owner change\n");
2932
2933		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2934
2935		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2936			DELAY(1000);
2937			bios_sem = pci_read_config(self, eecp +
2938			    EHCI_LEGSUP_BIOS_SEM, 1);
2939		}
2940
2941		if (bios_sem != 0) {
2942			if (bootverbose)
2943				printf("ehci early: "
2944				    "SMM does not respond\n");
2945		}
2946		/* Disable interrupts */
2947		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2948		bus_write_4(res, offs + EHCI_USBINTR, 0);
2949	}
2950	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2951}
2952
2953/* Perform early XHCI takeover from SMM. */
2954static void
2955xhci_early_takeover(device_t self)
2956{
2957	struct resource *res;
2958	uint32_t cparams;
2959	uint32_t eec;
2960	uint8_t eecp;
2961	uint8_t bios_sem;
2962	uint8_t offs;
2963	int rid;
2964	int i;
2965
2966	rid = PCIR_BAR(0);
2967	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2968	if (res == NULL)
2969		return;
2970
2971	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
2972
2973	eec = -1;
2974
2975	/* Synchronise with the BIOS if it owns the controller. */
2976	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
2977	    eecp += XHCI_XECP_NEXT(eec) << 2) {
2978		eec = bus_read_4(res, eecp);
2979
2980		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
2981			continue;
2982
2983		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
2984		if (bios_sem == 0)
2985			continue;
2986
2987		if (bootverbose)
2988			printf("xhci early: "
2989			    "SMM active, request owner change\n");
2990
2991		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
2992
2993		/* wait a maximum of 5 second */
2994
2995		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
2996			DELAY(1000);
2997			bios_sem = bus_read_1(res, eecp +
2998			    XHCI_XECP_BIOS_SEM);
2999		}
3000
3001		if (bios_sem != 0) {
3002			if (bootverbose)
3003				printf("xhci early: "
3004				    "SMM does not respond\n");
3005		}
3006
3007		/* Disable interrupts */
3008		offs = bus_read_1(res, XHCI_CAPLENGTH);
3009		bus_write_4(res, offs + XHCI_USBCMD, 0);
3010		bus_read_4(res, offs + XHCI_USBSTS);
3011	}
3012	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3013}
3014
3015void
3016pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3017{
3018	struct pci_devinfo *dinfo = device_get_ivars(dev);
3019	pcicfgregs *cfg = &dinfo->cfg;
3020	struct resource_list *rl = &dinfo->resources;
3021	const struct pci_quirk *q;
3022	int i;
3023
3024	/* ATA devices needs special map treatment */
3025	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3026	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3027	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3028	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3029	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3030		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3031	else
3032		for (i = 0; i < cfg->nummaps;)
3033			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3034			    prefetchmask & (1 << i));
3035
3036	/*
3037	 * Add additional, quirked resources.
3038	 */
3039	for (q = &pci_quirks[0]; q->devid; q++) {
3040		if (q->devid == ((cfg->device << 16) | cfg->vendor)
3041		    && q->type == PCI_QUIRK_MAP_REG)
3042			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3043	}
3044
3045	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3046#ifdef __PCI_REROUTE_INTERRUPT
3047		/*
3048		 * Try to re-route interrupts. Sometimes the BIOS or
3049		 * firmware may leave bogus values in these registers.
3050		 * If the re-route fails, then just stick with what we
3051		 * have.
3052		 */
3053		pci_assign_interrupt(bus, dev, 1);
3054#else
3055		pci_assign_interrupt(bus, dev, 0);
3056#endif
3057	}
3058
3059	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3060	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3061		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3062			xhci_early_takeover(dev);
3063		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3064			ehci_early_takeover(dev);
3065		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3066			ohci_early_takeover(dev);
3067		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3068			uhci_early_takeover(dev);
3069	}
3070}
3071
3072void
3073pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3074{
3075#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3076	device_t pcib = device_get_parent(dev);
3077	struct pci_devinfo *dinfo;
3078	int maxslots;
3079	int s, f, pcifunchigh;
3080	uint8_t hdrtype;
3081
3082	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3083	    ("dinfo_size too small"));
3084	maxslots = PCIB_MAXSLOTS(pcib);
3085	for (s = 0; s <= maxslots; s++) {
3086		pcifunchigh = 0;
3087		f = 0;
3088		DELAY(1);
3089		hdrtype = REG(PCIR_HDRTYPE, 1);
3090		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3091			continue;
3092		if (hdrtype & PCIM_MFDEV)
3093			pcifunchigh = PCI_FUNCMAX;
3094		for (f = 0; f <= pcifunchigh; f++) {
3095			dinfo = pci_read_device(pcib, domain, busno, s, f,
3096			    dinfo_size);
3097			if (dinfo != NULL) {
3098				pci_add_child(dev, dinfo);
3099			}
3100		}
3101	}
3102#undef REG
3103}
3104
3105void
3106pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3107{
3108	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3109	device_set_ivars(dinfo->cfg.dev, dinfo);
3110	resource_list_init(&dinfo->resources);
3111	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3112	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3113	pci_print_verbose(dinfo);
3114	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3115}
3116
3117static int
3118pci_probe(device_t dev)
3119{
3120
3121	device_set_desc(dev, "PCI bus");
3122
3123	/* Allow other subclasses to override this driver. */
3124	return (BUS_PROBE_GENERIC);
3125}
3126
3127static int
3128pci_attach(device_t dev)
3129{
3130	int busno, domain;
3131
3132	/*
3133	 * Since there can be multiple independantly numbered PCI
3134	 * busses on systems with multiple PCI domains, we can't use
3135	 * the unit number to decide which bus we are probing. We ask
3136	 * the parent pcib what our domain and bus numbers are.
3137	 */
3138	domain = pcib_get_domain(dev);
3139	busno = pcib_get_bus(dev);
3140	if (bootverbose)
3141		device_printf(dev, "domain=%d, physical bus=%d\n",
3142		    domain, busno);
3143	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3144	return (bus_generic_attach(dev));
3145}
3146
3147static void
3148pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3149    int state)
3150{
3151	device_t child, pcib;
3152	struct pci_devinfo *dinfo;
3153	int dstate, i;
3154
3155	/*
3156	 * Set the device to the given state.  If the firmware suggests
3157	 * a different power state, use it instead.  If power management
3158	 * is not present, the firmware is responsible for managing
3159	 * device power.  Skip children who aren't attached since they
3160	 * are handled separately.
3161	 */
3162	pcib = device_get_parent(dev);
3163	for (i = 0; i < numdevs; i++) {
3164		child = devlist[i];
3165		dinfo = device_get_ivars(child);
3166		dstate = state;
3167		if (device_is_attached(child) &&
3168		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3169			pci_set_powerstate(child, dstate);
3170	}
3171}
3172
3173int
3174pci_suspend(device_t dev)
3175{
3176	device_t child, *devlist;
3177	struct pci_devinfo *dinfo;
3178	int error, i, numdevs;
3179
3180	/*
3181	 * Save the PCI configuration space for each child and set the
3182	 * device in the appropriate power state for this sleep state.
3183	 */
3184	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3185		return (error);
3186	for (i = 0; i < numdevs; i++) {
3187		child = devlist[i];
3188		dinfo = device_get_ivars(child);
3189		pci_cfg_save(child, dinfo, 0);
3190	}
3191
3192	/* Suspend devices before potentially powering them down. */
3193	error = bus_generic_suspend(dev);
3194	if (error) {
3195		free(devlist, M_TEMP);
3196		return (error);
3197	}
3198	if (pci_do_power_suspend)
3199		pci_set_power_children(dev, devlist, numdevs,
3200		    PCI_POWERSTATE_D3);
3201	free(devlist, M_TEMP);
3202	return (0);
3203}
3204
3205int
3206pci_resume(device_t dev)
3207{
3208	device_t child, *devlist;
3209	struct pci_devinfo *dinfo;
3210	int error, i, numdevs;
3211
3212	/*
3213	 * Set each child to D0 and restore its PCI configuration space.
3214	 */
3215	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3216		return (error);
3217	if (pci_do_power_resume)
3218		pci_set_power_children(dev, devlist, numdevs,
3219		    PCI_POWERSTATE_D0);
3220
3221	/* Now the device is powered up, restore its config space. */
3222	for (i = 0; i < numdevs; i++) {
3223		child = devlist[i];
3224		dinfo = device_get_ivars(child);
3225
3226		pci_cfg_restore(child, dinfo);
3227		if (!device_is_attached(child))
3228			pci_cfg_save(child, dinfo, 1);
3229	}
3230
3231	/*
3232	 * Resume critical devices first, then everything else later.
3233	 */
3234	for (i = 0; i < numdevs; i++) {
3235		child = devlist[i];
3236		switch (pci_get_class(child)) {
3237		case PCIC_DISPLAY:
3238		case PCIC_MEMORY:
3239		case PCIC_BRIDGE:
3240		case PCIC_BASEPERIPH:
3241			DEVICE_RESUME(child);
3242			break;
3243		}
3244	}
3245	for (i = 0; i < numdevs; i++) {
3246		child = devlist[i];
3247		switch (pci_get_class(child)) {
3248		case PCIC_DISPLAY:
3249		case PCIC_MEMORY:
3250		case PCIC_BRIDGE:
3251		case PCIC_BASEPERIPH:
3252			break;
3253		default:
3254			DEVICE_RESUME(child);
3255		}
3256	}
3257	free(devlist, M_TEMP);
3258	return (0);
3259}
3260
3261static void
3262pci_load_vendor_data(void)
3263{
3264	caddr_t data;
3265	void *ptr;
3266	size_t sz;
3267
3268	data = preload_search_by_type("pci_vendor_data");
3269	if (data != NULL) {
3270		ptr = preload_fetch_addr(data);
3271		sz = preload_fetch_size(data);
3272		if (ptr != NULL && sz != 0) {
3273			pci_vendordata = ptr;
3274			pci_vendordata_size = sz;
3275			/* terminate the database */
3276			pci_vendordata[pci_vendordata_size] = '\n';
3277		}
3278	}
3279}
3280
3281void
3282pci_driver_added(device_t dev, driver_t *driver)
3283{
3284	int numdevs;
3285	device_t *devlist;
3286	device_t child;
3287	struct pci_devinfo *dinfo;
3288	int i;
3289
3290	if (bootverbose)
3291		device_printf(dev, "driver added\n");
3292	DEVICE_IDENTIFY(driver, dev);
3293	if (device_get_children(dev, &devlist, &numdevs) != 0)
3294		return;
3295	for (i = 0; i < numdevs; i++) {
3296		child = devlist[i];
3297		if (device_get_state(child) != DS_NOTPRESENT)
3298			continue;
3299		dinfo = device_get_ivars(child);
3300		pci_print_verbose(dinfo);
3301		if (bootverbose)
3302			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3303		pci_cfg_restore(child, dinfo);
3304		if (device_probe_and_attach(child) != 0)
3305			pci_cfg_save(child, dinfo, 1);
3306	}
3307	free(devlist, M_TEMP);
3308}
3309
3310int
3311pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3312    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3313{
3314	struct pci_devinfo *dinfo;
3315	struct msix_table_entry *mte;
3316	struct msix_vector *mv;
3317	uint64_t addr;
3318	uint32_t data;
3319	void *cookie;
3320	int error, rid;
3321
3322	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3323	    arg, &cookie);
3324	if (error)
3325		return (error);
3326
3327	/* If this is not a direct child, just bail out. */
3328	if (device_get_parent(child) != dev) {
3329		*cookiep = cookie;
3330		return(0);
3331	}
3332
3333	rid = rman_get_rid(irq);
3334	if (rid == 0) {
3335		/* Make sure that INTx is enabled */
3336		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3337	} else {
3338		/*
3339		 * Check to see if the interrupt is MSI or MSI-X.
3340		 * Ask our parent to map the MSI and give
3341		 * us the address and data register values.
3342		 * If we fail for some reason, teardown the
3343		 * interrupt handler.
3344		 */
3345		dinfo = device_get_ivars(child);
3346		if (dinfo->cfg.msi.msi_alloc > 0) {
3347			if (dinfo->cfg.msi.msi_addr == 0) {
3348				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3349			    ("MSI has handlers, but vectors not mapped"));
3350				error = PCIB_MAP_MSI(device_get_parent(dev),
3351				    child, rman_get_start(irq), &addr, &data);
3352				if (error)
3353					goto bad;
3354				dinfo->cfg.msi.msi_addr = addr;
3355				dinfo->cfg.msi.msi_data = data;
3356			}
3357			if (dinfo->cfg.msi.msi_handlers == 0)
3358				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3359				    dinfo->cfg.msi.msi_data);
3360			dinfo->cfg.msi.msi_handlers++;
3361		} else {
3362			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3363			    ("No MSI or MSI-X interrupts allocated"));
3364			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3365			    ("MSI-X index too high"));
3366			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3367			KASSERT(mte->mte_vector != 0, ("no message vector"));
3368			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3369			KASSERT(mv->mv_irq == rman_get_start(irq),
3370			    ("IRQ mismatch"));
3371			if (mv->mv_address == 0) {
3372				KASSERT(mte->mte_handlers == 0,
3373		    ("MSI-X table entry has handlers, but vector not mapped"));
3374				error = PCIB_MAP_MSI(device_get_parent(dev),
3375				    child, rman_get_start(irq), &addr, &data);
3376				if (error)
3377					goto bad;
3378				mv->mv_address = addr;
3379				mv->mv_data = data;
3380			}
3381			if (mte->mte_handlers == 0) {
3382				pci_enable_msix(child, rid - 1, mv->mv_address,
3383				    mv->mv_data);
3384				pci_unmask_msix(child, rid - 1);
3385			}
3386			mte->mte_handlers++;
3387		}
3388
3389		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3390		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3391	bad:
3392		if (error) {
3393			(void)bus_generic_teardown_intr(dev, child, irq,
3394			    cookie);
3395			return (error);
3396		}
3397	}
3398	*cookiep = cookie;
3399	return (0);
3400}
3401
3402int
3403pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3404    void *cookie)
3405{
3406	struct msix_table_entry *mte;
3407	struct resource_list_entry *rle;
3408	struct pci_devinfo *dinfo;
3409	int error, rid;
3410
3411	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3412		return (EINVAL);
3413
3414	/* If this isn't a direct child, just bail out */
3415	if (device_get_parent(child) != dev)
3416		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3417
3418	rid = rman_get_rid(irq);
3419	if (rid == 0) {
3420		/* Mask INTx */
3421		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3422	} else {
3423		/*
3424		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3425		 * decrement the appropriate handlers count and mask the
3426		 * MSI-X message, or disable MSI messages if the count
3427		 * drops to 0.
3428		 */
3429		dinfo = device_get_ivars(child);
3430		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3431		if (rle->res != irq)
3432			return (EINVAL);
3433		if (dinfo->cfg.msi.msi_alloc > 0) {
3434			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3435			    ("MSI-X index too high"));
3436			if (dinfo->cfg.msi.msi_handlers == 0)
3437				return (EINVAL);
3438			dinfo->cfg.msi.msi_handlers--;
3439			if (dinfo->cfg.msi.msi_handlers == 0)
3440				pci_disable_msi(child);
3441		} else {
3442			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3443			    ("No MSI or MSI-X interrupts allocated"));
3444			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3445			    ("MSI-X index too high"));
3446			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3447			if (mte->mte_handlers == 0)
3448				return (EINVAL);
3449			mte->mte_handlers--;
3450			if (mte->mte_handlers == 0)
3451				pci_mask_msix(child, rid - 1);
3452		}
3453	}
3454	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3455	if (rid > 0)
3456		KASSERT(error == 0,
3457		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3458	return (error);
3459}
3460
3461int
3462pci_print_child(device_t dev, device_t child)
3463{
3464	struct pci_devinfo *dinfo;
3465	struct resource_list *rl;
3466	int retval = 0;
3467
3468	dinfo = device_get_ivars(child);
3469	rl = &dinfo->resources;
3470
3471	retval += bus_print_child_header(dev, child);
3472
3473	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3474	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3475	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3476	if (device_get_flags(dev))
3477		retval += printf(" flags %#x", device_get_flags(dev));
3478
3479	retval += printf(" at device %d.%d", pci_get_slot(child),
3480	    pci_get_function(child));
3481
3482	retval += bus_print_child_footer(dev, child);
3483
3484	return (retval);
3485}
3486
3487static struct
3488{
3489	int	class;
3490	int	subclass;
3491	char	*desc;
3492} pci_nomatch_tab[] = {
3493	{PCIC_OLD,		-1,			"old"},
3494	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3495	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3496	{PCIC_STORAGE,		-1,			"mass storage"},
3497	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3498	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3499	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3500	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3501	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3502	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3503	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3504	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3505	{PCIC_NETWORK,		-1,			"network"},
3506	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3507	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3508	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3509	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3510	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3511	{PCIC_DISPLAY,		-1,			"display"},
3512	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3513	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3514	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3515	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3516	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3517	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3518	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3519	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3520	{PCIC_MEMORY,		-1,			"memory"},
3521	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3522	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3523	{PCIC_BRIDGE,		-1,			"bridge"},
3524	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3525	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3526	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3527	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3528	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3529	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3530	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3531	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3532	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3533	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3534	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3535	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3536	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3537	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3538	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3539	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3540	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3541	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3542	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3543	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3544	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3545	{PCIC_INPUTDEV,		-1,			"input device"},
3546	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3547	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3548	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3549	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3550	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3551	{PCIC_DOCKING,		-1,			"docking station"},
3552	{PCIC_PROCESSOR,	-1,			"processor"},
3553	{PCIC_SERIALBUS,	-1,			"serial bus"},
3554	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3555	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3556	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3557	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3558	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3559	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3560	{PCIC_WIRELESS,		-1,			"wireless controller"},
3561	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3562	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3563	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3564	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3565	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3566	{PCIC_SATCOM,		-1,			"satellite communication"},
3567	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3568	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3569	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3570	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3571	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3572	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3573	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3574	{PCIC_DASP,		-1,			"dasp"},
3575	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3576	{0, 0,		NULL}
3577};
3578
3579void
3580pci_probe_nomatch(device_t dev, device_t child)
3581{
3582	int	i;
3583	char	*cp, *scp, *device;
3584
3585	/*
3586	 * Look for a listing for this device in a loaded device database.
3587	 */
3588	if ((device = pci_describe_device(child)) != NULL) {
3589		device_printf(dev, "<%s>", device);
3590		free(device, M_DEVBUF);
3591	} else {
3592		/*
3593		 * Scan the class/subclass descriptions for a general
3594		 * description.
3595		 */
3596		cp = "unknown";
3597		scp = NULL;
3598		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3599			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3600				if (pci_nomatch_tab[i].subclass == -1) {
3601					cp = pci_nomatch_tab[i].desc;
3602				} else if (pci_nomatch_tab[i].subclass ==
3603				    pci_get_subclass(child)) {
3604					scp = pci_nomatch_tab[i].desc;
3605				}
3606			}
3607		}
3608		device_printf(dev, "<%s%s%s>",
3609		    cp ? cp : "",
3610		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3611		    scp ? scp : "");
3612	}
3613	printf(" at device %d.%d (no driver attached)\n",
3614	    pci_get_slot(child), pci_get_function(child));
3615	pci_cfg_save(child, device_get_ivars(child), 1);
3616	return;
3617}
3618
3619/*
3620 * Parse the PCI device database, if loaded, and return a pointer to a
3621 * description of the device.
3622 *
3623 * The database is flat text formatted as follows:
3624 *
3625 * Any line not in a valid format is ignored.
3626 * Lines are terminated with newline '\n' characters.
3627 *
3628 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3629 * the vendor name.
3630 *
3631 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3632 * - devices cannot be listed without a corresponding VENDOR line.
3633 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3634 * another TAB, then the device name.
3635 */
3636
3637/*
3638 * Assuming (ptr) points to the beginning of a line in the database,
3639 * return the vendor or device and description of the next entry.
3640 * The value of (vendor) or (device) inappropriate for the entry type
3641 * is set to -1.  Returns nonzero at the end of the database.
3642 *
3643 * Note that this is slightly unrobust in the face of corrupt data;
3644 * we attempt to safeguard against this by spamming the end of the
3645 * database with a newline when we initialise.
3646 */
3647static int
3648pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3649{
3650	char	*cp = *ptr;
3651	int	left;
3652
3653	*device = -1;
3654	*vendor = -1;
3655	**desc = '\0';
3656	for (;;) {
3657		left = pci_vendordata_size - (cp - pci_vendordata);
3658		if (left <= 0) {
3659			*ptr = cp;
3660			return(1);
3661		}
3662
3663		/* vendor entry? */
3664		if (*cp != '\t' &&
3665		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3666			break;
3667		/* device entry? */
3668		if (*cp == '\t' &&
3669		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3670			break;
3671
3672		/* skip to next line */
3673		while (*cp != '\n' && left > 0) {
3674			cp++;
3675			left--;
3676		}
3677		if (*cp == '\n') {
3678			cp++;
3679			left--;
3680		}
3681	}
3682	/* skip to next line */
3683	while (*cp != '\n' && left > 0) {
3684		cp++;
3685		left--;
3686	}
3687	if (*cp == '\n' && left > 0)
3688		cp++;
3689	*ptr = cp;
3690	return(0);
3691}
3692
3693static char *
3694pci_describe_device(device_t dev)
3695{
3696	int	vendor, device;
3697	char	*desc, *vp, *dp, *line;
3698
3699	desc = vp = dp = NULL;
3700
3701	/*
3702	 * If we have no vendor data, we can't do anything.
3703	 */
3704	if (pci_vendordata == NULL)
3705		goto out;
3706
3707	/*
3708	 * Scan the vendor data looking for this device
3709	 */
3710	line = pci_vendordata;
3711	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3712		goto out;
3713	for (;;) {
3714		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3715			goto out;
3716		if (vendor == pci_get_vendor(dev))
3717			break;
3718	}
3719	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3720		goto out;
3721	for (;;) {
3722		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3723			*dp = 0;
3724			break;
3725		}
3726		if (vendor != -1) {
3727			*dp = 0;
3728			break;
3729		}
3730		if (device == pci_get_device(dev))
3731			break;
3732	}
3733	if (dp[0] == '\0')
3734		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3735	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3736	    NULL)
3737		sprintf(desc, "%s, %s", vp, dp);
3738 out:
3739	if (vp != NULL)
3740		free(vp, M_DEVBUF);
3741	if (dp != NULL)
3742		free(dp, M_DEVBUF);
3743	return(desc);
3744}
3745
3746int
3747pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3748{
3749	struct pci_devinfo *dinfo;
3750	pcicfgregs *cfg;
3751
3752	dinfo = device_get_ivars(child);
3753	cfg = &dinfo->cfg;
3754
3755	switch (which) {
3756	case PCI_IVAR_ETHADDR:
3757		/*
3758		 * The generic accessor doesn't deal with failure, so
3759		 * we set the return value, then return an error.
3760		 */
3761		*((uint8_t **) result) = NULL;
3762		return (EINVAL);
3763	case PCI_IVAR_SUBVENDOR:
3764		*result = cfg->subvendor;
3765		break;
3766	case PCI_IVAR_SUBDEVICE:
3767		*result = cfg->subdevice;
3768		break;
3769	case PCI_IVAR_VENDOR:
3770		*result = cfg->vendor;
3771		break;
3772	case PCI_IVAR_DEVICE:
3773		*result = cfg->device;
3774		break;
3775	case PCI_IVAR_DEVID:
3776		*result = (cfg->device << 16) | cfg->vendor;
3777		break;
3778	case PCI_IVAR_CLASS:
3779		*result = cfg->baseclass;
3780		break;
3781	case PCI_IVAR_SUBCLASS:
3782		*result = cfg->subclass;
3783		break;
3784	case PCI_IVAR_PROGIF:
3785		*result = cfg->progif;
3786		break;
3787	case PCI_IVAR_REVID:
3788		*result = cfg->revid;
3789		break;
3790	case PCI_IVAR_INTPIN:
3791		*result = cfg->intpin;
3792		break;
3793	case PCI_IVAR_IRQ:
3794		*result = cfg->intline;
3795		break;
3796	case PCI_IVAR_DOMAIN:
3797		*result = cfg->domain;
3798		break;
3799	case PCI_IVAR_BUS:
3800		*result = cfg->bus;
3801		break;
3802	case PCI_IVAR_SLOT:
3803		*result = cfg->slot;
3804		break;
3805	case PCI_IVAR_FUNCTION:
3806		*result = cfg->func;
3807		break;
3808	case PCI_IVAR_CMDREG:
3809		*result = cfg->cmdreg;
3810		break;
3811	case PCI_IVAR_CACHELNSZ:
3812		*result = cfg->cachelnsz;
3813		break;
3814	case PCI_IVAR_MINGNT:
3815		*result = cfg->mingnt;
3816		break;
3817	case PCI_IVAR_MAXLAT:
3818		*result = cfg->maxlat;
3819		break;
3820	case PCI_IVAR_LATTIMER:
3821		*result = cfg->lattimer;
3822		break;
3823	default:
3824		return (ENOENT);
3825	}
3826	return (0);
3827}
3828
3829int
3830pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3831{
3832	struct pci_devinfo *dinfo;
3833
3834	dinfo = device_get_ivars(child);
3835
3836	switch (which) {
3837	case PCI_IVAR_INTPIN:
3838		dinfo->cfg.intpin = value;
3839		return (0);
3840	case PCI_IVAR_ETHADDR:
3841	case PCI_IVAR_SUBVENDOR:
3842	case PCI_IVAR_SUBDEVICE:
3843	case PCI_IVAR_VENDOR:
3844	case PCI_IVAR_DEVICE:
3845	case PCI_IVAR_DEVID:
3846	case PCI_IVAR_CLASS:
3847	case PCI_IVAR_SUBCLASS:
3848	case PCI_IVAR_PROGIF:
3849	case PCI_IVAR_REVID:
3850	case PCI_IVAR_IRQ:
3851	case PCI_IVAR_DOMAIN:
3852	case PCI_IVAR_BUS:
3853	case PCI_IVAR_SLOT:
3854	case PCI_IVAR_FUNCTION:
3855		return (EINVAL);	/* disallow for now */
3856
3857	default:
3858		return (ENOENT);
3859	}
3860}
3861
3862#include "opt_ddb.h"
3863#ifdef DDB
3864#include <ddb/ddb.h>
3865#include <sys/cons.h>
3866
3867/*
3868 * List resources based on pci map registers, used for within ddb
3869 */
3870
3871DB_SHOW_COMMAND(pciregs, db_pci_dump)
3872{
3873	struct pci_devinfo *dinfo;
3874	struct devlist *devlist_head;
3875	struct pci_conf *p;
3876	const char *name;
3877	int i, error, none_count;
3878
3879	none_count = 0;
3880	/* get the head of the device queue */
3881	devlist_head = &pci_devq;
3882
3883	/*
3884	 * Go through the list of devices and print out devices
3885	 */
3886	for (error = 0, i = 0,
3887	     dinfo = STAILQ_FIRST(devlist_head);
3888	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3889	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3890
3891		/* Populate pd_name and pd_unit */
3892		name = NULL;
3893		if (dinfo->cfg.dev)
3894			name = device_get_name(dinfo->cfg.dev);
3895
3896		p = &dinfo->conf;
3897		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3898			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3899			(name && *name) ? name : "none",
3900			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3901			none_count++,
3902			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3903			p->pc_sel.pc_func, (p->pc_class << 16) |
3904			(p->pc_subclass << 8) | p->pc_progif,
3905			(p->pc_subdevice << 16) | p->pc_subvendor,
3906			(p->pc_device << 16) | p->pc_vendor,
3907			p->pc_revid, p->pc_hdr);
3908	}
3909}
3910#endif /* DDB */
3911
3912static struct resource *
3913pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3914    u_long start, u_long end, u_long count, u_int flags)
3915{
3916	struct pci_devinfo *dinfo = device_get_ivars(child);
3917	struct resource_list *rl = &dinfo->resources;
3918	struct resource_list_entry *rle;
3919	struct resource *res;
3920	struct pci_map *pm;
3921	pci_addr_t map, testval;
3922	int mapsize;
3923
3924	res = NULL;
3925	pm = pci_find_bar(child, *rid);
3926	if (pm != NULL) {
3927		/* This is a BAR that we failed to allocate earlier. */
3928		mapsize = pm->pm_size;
3929		map = pm->pm_value;
3930	} else {
3931		/*
3932		 * Weed out the bogons, and figure out how large the
3933		 * BAR/map is.  BARs that read back 0 here are bogus
3934		 * and unimplemented.  Note: atapci in legacy mode are
3935		 * special and handled elsewhere in the code.  If you
3936		 * have a atapci device in legacy mode and it fails
3937		 * here, that other code is broken.
3938		 */
3939		pci_read_bar(child, *rid, &map, &testval);
3940
3941		/*
3942		 * Determine the size of the BAR and ignore BARs with a size
3943		 * of 0.  Device ROM BARs use a different mask value.
3944		 */
3945		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
3946			mapsize = pci_romsize(testval);
3947		else
3948			mapsize = pci_mapsize(testval);
3949		if (mapsize == 0)
3950			goto out;
3951		pm = pci_add_bar(child, *rid, map, mapsize);
3952	}
3953
3954	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
3955		if (type != SYS_RES_MEMORY) {
3956			if (bootverbose)
3957				device_printf(dev,
3958				    "child %s requested type %d for rid %#x,"
3959				    " but the BAR says it is an memio\n",
3960				    device_get_nameunit(child), type, *rid);
3961			goto out;
3962		}
3963	} else {
3964		if (type != SYS_RES_IOPORT) {
3965			if (bootverbose)
3966				device_printf(dev,
3967				    "child %s requested type %d for rid %#x,"
3968				    " but the BAR says it is an ioport\n",
3969				    device_get_nameunit(child), type, *rid);
3970			goto out;
3971		}
3972	}
3973
3974	/*
3975	 * For real BARs, we need to override the size that
3976	 * the driver requests, because that's what the BAR
3977	 * actually uses and we would otherwise have a
3978	 * situation where we might allocate the excess to
3979	 * another driver, which won't work.
3980	 */
3981	count = (pci_addr_t)1 << mapsize;
3982	if (RF_ALIGNMENT(flags) < mapsize)
3983		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3984	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
3985		flags |= RF_PREFETCHABLE;
3986
3987	/*
3988	 * Allocate enough resource, and then write back the
3989	 * appropriate BAR for that resource.
3990	 */
3991	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3992	    start, end, count, flags & ~RF_ACTIVE);
3993	if (res == NULL) {
3994		device_printf(child,
3995		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3996		    count, *rid, type, start, end);
3997		goto out;
3998	}
3999	resource_list_add(rl, type, *rid, start, end, count);
4000	rle = resource_list_find(rl, type, *rid);
4001	if (rle == NULL)
4002		panic("pci_reserve_map: unexpectedly can't find resource.");
4003	rle->res = res;
4004	rle->start = rman_get_start(res);
4005	rle->end = rman_get_end(res);
4006	rle->count = count;
4007	rle->flags = RLE_RESERVED;
4008	if (bootverbose)
4009		device_printf(child,
4010		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4011		    count, *rid, type, rman_get_start(res));
4012	map = rman_get_start(res);
4013	pci_write_bar(child, pm, map);
4014out:;
4015	return (res);
4016}
4017
4018struct resource *
4019pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4020		   u_long start, u_long end, u_long count, u_int flags)
4021{
4022	struct pci_devinfo *dinfo = device_get_ivars(child);
4023	struct resource_list *rl = &dinfo->resources;
4024	struct resource_list_entry *rle;
4025	struct resource *res;
4026	pcicfgregs *cfg = &dinfo->cfg;
4027
4028	if (device_get_parent(child) != dev)
4029		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4030		    type, rid, start, end, count, flags));
4031
4032	/*
4033	 * Perform lazy resource allocation
4034	 */
4035	switch (type) {
4036	case SYS_RES_IRQ:
4037		/*
4038		 * Can't alloc legacy interrupt once MSI messages have
4039		 * been allocated.
4040		 */
4041		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4042		    cfg->msix.msix_alloc > 0))
4043			return (NULL);
4044
4045		/*
4046		 * If the child device doesn't have an interrupt
4047		 * routed and is deserving of an interrupt, try to
4048		 * assign it one.
4049		 */
4050		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4051		    (cfg->intpin != 0))
4052			pci_assign_interrupt(dev, child, 0);
4053		break;
4054	case SYS_RES_IOPORT:
4055	case SYS_RES_MEMORY:
4056#ifdef NEW_PCIB
4057		/*
4058		 * PCI-PCI bridge I/O window resources are not BARs.
4059		 * For those allocations just pass the request up the
4060		 * tree.
4061		 */
4062		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4063			switch (*rid) {
4064			case PCIR_IOBASEL_1:
4065			case PCIR_MEMBASE_1:
4066			case PCIR_PMBASEL_1:
4067				/*
4068				 * XXX: Should we bother creating a resource
4069				 * list entry?
4070				 */
4071				return (bus_generic_alloc_resource(dev, child,
4072				    type, rid, start, end, count, flags));
4073			}
4074		}
4075#endif
4076		/* Reserve resources for this BAR if needed. */
4077		rle = resource_list_find(rl, type, *rid);
4078		if (rle == NULL) {
4079			res = pci_reserve_map(dev, child, type, rid, start, end,
4080			    count, flags);
4081			if (res == NULL)
4082				return (NULL);
4083		}
4084	}
4085	return (resource_list_alloc(rl, dev, child, type, rid,
4086	    start, end, count, flags));
4087}
4088
4089int
4090pci_activate_resource(device_t dev, device_t child, int type, int rid,
4091    struct resource *r)
4092{
4093	struct pci_devinfo *dinfo;
4094	int error;
4095
4096	error = bus_generic_activate_resource(dev, child, type, rid, r);
4097	if (error)
4098		return (error);
4099
4100	/* Enable decoding in the command register when activating BARs. */
4101	if (device_get_parent(child) == dev) {
4102		/* Device ROMs need their decoding explicitly enabled. */
4103		dinfo = device_get_ivars(child);
4104		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4105			pci_write_bar(child, pci_find_bar(child, rid),
4106			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4107		switch (type) {
4108		case SYS_RES_IOPORT:
4109		case SYS_RES_MEMORY:
4110			error = PCI_ENABLE_IO(dev, child, type);
4111			break;
4112		}
4113	}
4114	return (error);
4115}
4116
4117int
4118pci_deactivate_resource(device_t dev, device_t child, int type,
4119    int rid, struct resource *r)
4120{
4121	struct pci_devinfo *dinfo;
4122	int error;
4123
4124	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4125	if (error)
4126		return (error);
4127
4128	/* Disable decoding for device ROMs. */
4129	if (device_get_parent(child) == dev) {
4130		dinfo = device_get_ivars(child);
4131		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4132			pci_write_bar(child, pci_find_bar(child, rid),
4133			    rman_get_start(r));
4134	}
4135	return (0);
4136}
4137
4138void
4139pci_delete_child(device_t dev, device_t child)
4140{
4141	struct resource_list_entry *rle;
4142	struct resource_list *rl;
4143	struct pci_devinfo *dinfo;
4144
4145	dinfo = device_get_ivars(child);
4146	rl = &dinfo->resources;
4147
4148	if (device_is_attached(child))
4149		device_detach(child);
4150
4151	/* Turn off access to resources we're about to free */
4152	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4153	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4154
4155	/* Free all allocated resources */
4156	STAILQ_FOREACH(rle, rl, link) {
4157		if (rle->res) {
4158			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4159			    resource_list_busy(rl, rle->type, rle->rid)) {
4160				pci_printf(&dinfo->cfg,
4161				    "Resource still owned, oops. "
4162				    "(type=%d, rid=%d, addr=%lx)\n",
4163				    rle->type, rle->rid,
4164				    rman_get_start(rle->res));
4165				bus_release_resource(child, rle->type, rle->rid,
4166				    rle->res);
4167			}
4168			resource_list_unreserve(rl, dev, child, rle->type,
4169			    rle->rid);
4170		}
4171	}
4172	resource_list_free(rl);
4173
4174	device_delete_child(dev, child);
4175	pci_freecfg(dinfo);
4176}
4177
4178void
4179pci_delete_resource(device_t dev, device_t child, int type, int rid)
4180{
4181	struct pci_devinfo *dinfo;
4182	struct resource_list *rl;
4183	struct resource_list_entry *rle;
4184
4185	if (device_get_parent(child) != dev)
4186		return;
4187
4188	dinfo = device_get_ivars(child);
4189	rl = &dinfo->resources;
4190	rle = resource_list_find(rl, type, rid);
4191	if (rle == NULL)
4192		return;
4193
4194	if (rle->res) {
4195		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4196		    resource_list_busy(rl, type, rid)) {
4197			device_printf(dev, "delete_resource: "
4198			    "Resource still owned by child, oops. "
4199			    "(type=%d, rid=%d, addr=%lx)\n",
4200			    type, rid, rman_get_start(rle->res));
4201			return;
4202		}
4203
4204#ifndef __PCI_BAR_ZERO_VALID
4205		/*
4206		 * If this is a BAR, clear the BAR so it stops
4207		 * decoding before releasing the resource.
4208		 */
4209		switch (type) {
4210		case SYS_RES_IOPORT:
4211		case SYS_RES_MEMORY:
4212			pci_write_bar(child, pci_find_bar(child, rid), 0);
4213			break;
4214		}
4215#endif
4216		resource_list_unreserve(rl, dev, child, type, rid);
4217	}
4218	resource_list_delete(rl, type, rid);
4219}
4220
4221struct resource_list *
4222pci_get_resource_list (device_t dev, device_t child)
4223{
4224	struct pci_devinfo *dinfo = device_get_ivars(child);
4225
4226	return (&dinfo->resources);
4227}
4228
4229uint32_t
4230pci_read_config_method(device_t dev, device_t child, int reg, int width)
4231{
4232	struct pci_devinfo *dinfo = device_get_ivars(child);
4233	pcicfgregs *cfg = &dinfo->cfg;
4234
4235	return (PCIB_READ_CONFIG(device_get_parent(dev),
4236	    cfg->bus, cfg->slot, cfg->func, reg, width));
4237}
4238
4239void
4240pci_write_config_method(device_t dev, device_t child, int reg,
4241    uint32_t val, int width)
4242{
4243	struct pci_devinfo *dinfo = device_get_ivars(child);
4244	pcicfgregs *cfg = &dinfo->cfg;
4245
4246	PCIB_WRITE_CONFIG(device_get_parent(dev),
4247	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4248}
4249
4250int
4251pci_child_location_str_method(device_t dev, device_t child, char *buf,
4252    size_t buflen)
4253{
4254
4255	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4256	    pci_get_function(child));
4257	return (0);
4258}
4259
4260int
4261pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4262    size_t buflen)
4263{
4264	struct pci_devinfo *dinfo;
4265	pcicfgregs *cfg;
4266
4267	dinfo = device_get_ivars(child);
4268	cfg = &dinfo->cfg;
4269	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4270	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4271	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4272	    cfg->progif);
4273	return (0);
4274}
4275
4276int
4277pci_assign_interrupt_method(device_t dev, device_t child)
4278{
4279	struct pci_devinfo *dinfo = device_get_ivars(child);
4280	pcicfgregs *cfg = &dinfo->cfg;
4281
4282	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4283	    cfg->intpin));
4284}
4285
4286static int
4287pci_modevent(module_t mod, int what, void *arg)
4288{
4289	static struct cdev *pci_cdev;
4290
4291	switch (what) {
4292	case MOD_LOAD:
4293		STAILQ_INIT(&pci_devq);
4294		pci_generation = 0;
4295		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4296		    "pci");
4297		pci_load_vendor_data();
4298		break;
4299
4300	case MOD_UNLOAD:
4301		destroy_dev(pci_cdev);
4302		break;
4303	}
4304
4305	return (0);
4306}
4307
4308void
4309pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4310{
4311
4312	/*
4313	 * Only do header type 0 devices.  Type 1 devices are bridges,
4314	 * which we know need special treatment.  Type 2 devices are
4315	 * cardbus bridges which also require special treatment.
4316	 * Other types are unknown, and we err on the side of safety
4317	 * by ignoring them.
4318	 */
4319	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4320		return;
4321
4322	/*
4323	 * Restore the device to full power mode.  We must do this
4324	 * before we restore the registers because moving from D3 to
4325	 * D0 will cause the chip's BARs and some other registers to
4326	 * be reset to some unknown power on reset values.  Cut down
4327	 * the noise on boot by doing nothing if we are already in
4328	 * state D0.
4329	 */
4330	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4331		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4332	pci_restore_bars(dev);
4333	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4334	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4335	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4336	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4337	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4338	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4339	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4340	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4341	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4342
4343	/* Restore MSI and MSI-X configurations if they are present. */
4344	if (dinfo->cfg.msi.msi_location != 0)
4345		pci_resume_msi(dev);
4346	if (dinfo->cfg.msix.msix_location != 0)
4347		pci_resume_msix(dev);
4348}
4349
4350void
4351pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4352{
4353	uint32_t cls;
4354	int ps;
4355
4356	/*
4357	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4358	 * we know need special treatment.  Type 2 devices are cardbus bridges
4359	 * which also require special treatment.  Other types are unknown, and
4360	 * we err on the side of safety by ignoring them.  Powering down
4361	 * bridges should not be undertaken lightly.
4362	 */
4363	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4364		return;
4365
4366	/*
4367	 * Some drivers apparently write to these registers w/o updating our
4368	 * cached copy.  No harm happens if we update the copy, so do so here
4369	 * so we can restore them.  The COMMAND register is modified by the
4370	 * bus w/o updating the cache.  This should represent the normally
4371	 * writable portion of the 'defined' part of type 0 headers.  In
4372	 * theory we also need to save/restore the PCI capability structures
4373	 * we know about, but apart from power we don't know any that are
4374	 * writable.
4375	 */
4376	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4377	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4378	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4379	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4380	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4381	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4382	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4383	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4384	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4385	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4386	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4387	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4388	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4389	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4390	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4391
4392	/*
4393	 * don't set the state for display devices, base peripherals and
4394	 * memory devices since bad things happen when they are powered down.
4395	 * We should (a) have drivers that can easily detach and (b) use
4396	 * generic drivers for these devices so that some device actually
4397	 * attaches.  We need to make sure that when we implement (a) we don't
4398	 * power the device down on a reattach.
4399	 */
4400	cls = pci_get_class(dev);
4401	if (!setstate)
4402		return;
4403	switch (pci_do_power_nodriver)
4404	{
4405		case 0:		/* NO powerdown at all */
4406			return;
4407		case 1:		/* Conservative about what to power down */
4408			if (cls == PCIC_STORAGE)
4409				return;
4410			/*FALLTHROUGH*/
4411		case 2:		/* Agressive about what to power down */
4412			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4413			    cls == PCIC_BASEPERIPH)
4414				return;
4415			/*FALLTHROUGH*/
4416		case 3:		/* Power down everything */
4417			break;
4418	}
4419	/*
4420	 * PCI spec says we can only go into D3 state from D0 state.
4421	 * Transition from D[12] into D0 before going to D3 state.
4422	 */
4423	ps = pci_get_powerstate(dev);
4424	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4425		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4426	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4427		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4428}
4429