pci.c revision 169037
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 169037 2007-04-25 14:45:46Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54
55#if defined(__i386__) || defined(__amd64__)
56#include <machine/intr_machdep.h>
57#endif
58
59#include <sys/pciio.h>
60#include <dev/pci/pcireg.h>
61#include <dev/pci/pcivar.h>
62#include <dev/pci/pci_private.h>
63
64#include "pcib_if.h"
65#include "pci_if.h"
66
67#ifdef __HAVE_ACPI
68#include <contrib/dev/acpica/acpi.h>
69#include "acpi_if.h"
70#else
71#define	ACPI_PWR_FOR_SLEEP(x, y, z)
72#endif
73
74static uint32_t		pci_mapbase(unsigned mapreg);
75static const char	*pci_maptype(unsigned mapreg);
76static int		pci_mapsize(unsigned testval);
77static int		pci_maprange(unsigned mapreg);
78static void		pci_fixancient(pcicfgregs *cfg);
79
80static int		pci_porten(device_t pcib, int b, int s, int f);
81static int		pci_memen(device_t pcib, int b, int s, int f);
82static void		pci_assign_interrupt(device_t bus, device_t dev,
83			    int force_route);
84static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85			    int b, int s, int f, int reg,
86			    struct resource_list *rl, int force, int prefetch);
87static int		pci_probe(device_t dev);
88static int		pci_attach(device_t dev);
89static void		pci_load_vendor_data(void);
90static int		pci_describe_parse_line(char **ptr, int *vendor,
91			    int *device, char **desc);
92static char		*pci_describe_device(device_t dev);
93static int		pci_modevent(module_t mod, int what, void *arg);
94static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95			    pcicfgregs *cfg);
96static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97static uint32_t		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98			    int reg);
99#if 0
100static void		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101			    int reg, uint32_t data);
102#endif
103static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104static int		pci_msi_blacklisted(void);
105
106static device_method_t pci_methods[] = {
107	/* Device interface */
108	DEVMETHOD(device_probe,		pci_probe),
109	DEVMETHOD(device_attach,	pci_attach),
110	DEVMETHOD(device_detach,	bus_generic_detach),
111	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
112	DEVMETHOD(device_suspend,	pci_suspend),
113	DEVMETHOD(device_resume,	pci_resume),
114
115	/* Bus interface */
116	DEVMETHOD(bus_print_child,	pci_print_child),
117	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
118	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
119	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
120	DEVMETHOD(bus_driver_added,	pci_driver_added),
121	DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr),
122	DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
123
124	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
125	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
126	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
127	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
128	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
129	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
130	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
131	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
132	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
133	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
134
135	/* PCI interface */
136	DEVMETHOD(pci_read_config,	pci_read_config_method),
137	DEVMETHOD(pci_write_config,	pci_write_config_method),
138	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
139	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
140	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
141	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
142	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
143	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
144	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
145	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
146	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
147	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
148	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
149	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
150	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
151	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
152	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
153	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
154
155	{ 0, 0 }
156};
157
158DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
159
160static devclass_t pci_devclass;
161DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
162MODULE_VERSION(pci, 1);
163
164static char	*pci_vendordata;
165static size_t	pci_vendordata_size;
166
167
168struct pci_quirk {
169	uint32_t devid;	/* Vendor/device of the card */
170	int	type;
171#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
172#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
173	int	arg1;
174	int	arg2;
175};
176
177struct pci_quirk pci_quirks[] = {
178	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
179	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
180	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
181	/* As does the Serverworks OSB4 (the SMBus mapping register) */
182	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
183
184	/*
185	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
186	 * or the CMIC-SL (AKA ServerWorks GC_LE).
187	 */
188	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
189	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
190
191	/*
192	 * MSI doesn't work on earlier Intel chipsets including
193	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
194	 */
195	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
196	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
197	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
200	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
201	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
202
203	/*
204	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
205	 * bridge.
206	 */
207	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208
209	{ 0 }
210};
211
212/* map register information */
213#define	PCI_MAPMEM	0x01	/* memory map */
214#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
215#define	PCI_MAPPORT	0x04	/* port map */
216
217struct devlist pci_devq;
218uint32_t pci_generation;
219uint32_t pci_numdevs = 0;
220static int pcie_chipset, pcix_chipset;
221
222/* sysctl vars */
223SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
224
225static int pci_enable_io_modes = 1;
226TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
227SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
228    &pci_enable_io_modes, 1,
229    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
230enable these bits correctly.  We'd like to do this all the time, but there\n\
231are some peripherals that this causes problems with.");
232
233static int pci_do_power_nodriver = 0;
234TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
235SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
236    &pci_do_power_nodriver, 0,
237  "Place a function into D3 state when no driver attaches to it.  0 means\n\
238disable.  1 means conservatively place devices into D3 state.  2 means\n\
239agressively place devices into D3 state.  3 means put absolutely everything\n\
240in D3 state.");
241
242static int pci_do_power_resume = 1;
243TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
244SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
245    &pci_do_power_resume, 1,
246  "Transition from D3 -> D0 on resume.");
247
248static int pci_do_vpd = 1;
249TUNABLE_INT("hw.pci.enable_vpd", &pci_do_vpd);
250SYSCTL_INT(_hw_pci, OID_AUTO, enable_vpd, CTLFLAG_RW, &pci_do_vpd, 1,
251    "Enable support for VPD.");
252
253static int pci_do_msi = 1;
254TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
255SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
256    "Enable support for MSI interrupts");
257
258static int pci_do_msix = 1;
259TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
260SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
261    "Enable support for MSI-X interrupts");
262
263static int pci_honor_msi_blacklist = 1;
264TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
265SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
266    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
267
268/* Find a device_t by bus/slot/function */
269
270device_t
271pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
272{
273	struct pci_devinfo *dinfo;
274
275	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
276		if ((dinfo->cfg.bus == bus) &&
277		    (dinfo->cfg.slot == slot) &&
278		    (dinfo->cfg.func == func)) {
279			return (dinfo->cfg.dev);
280		}
281	}
282
283	return (NULL);
284}
285
286/* Find a device_t by vendor/device ID */
287
288device_t
289pci_find_device(uint16_t vendor, uint16_t device)
290{
291	struct pci_devinfo *dinfo;
292
293	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
294		if ((dinfo->cfg.vendor == vendor) &&
295		    (dinfo->cfg.device == device)) {
296			return (dinfo->cfg.dev);
297		}
298	}
299
300	return (NULL);
301}
302
303/* return base address of memory or port map */
304
305static uint32_t
306pci_mapbase(uint32_t mapreg)
307{
308
309	if (PCI_BAR_MEM(mapreg))
310		return (mapreg & PCIM_BAR_MEM_BASE);
311	else
312		return (mapreg & PCIM_BAR_IO_BASE);
313}
314
315/* return map type of memory or port map */
316
317static const char *
318pci_maptype(unsigned mapreg)
319{
320
321	if (PCI_BAR_IO(mapreg))
322		return ("I/O Port");
323	if (mapreg & PCIM_BAR_MEM_PREFETCH)
324		return ("Prefetchable Memory");
325	return ("Memory");
326}
327
328/* return log2 of map size decoded for memory or port map */
329
330static int
331pci_mapsize(uint32_t testval)
332{
333	int ln2size;
334
335	testval = pci_mapbase(testval);
336	ln2size = 0;
337	if (testval != 0) {
338		while ((testval & 1) == 0)
339		{
340			ln2size++;
341			testval >>= 1;
342		}
343	}
344	return (ln2size);
345}
346
347/* return log2 of address range supported by map register */
348
349static int
350pci_maprange(unsigned mapreg)
351{
352	int ln2range = 0;
353
354	if (PCI_BAR_IO(mapreg))
355		ln2range = 32;
356	else
357		switch (mapreg & PCIM_BAR_MEM_TYPE) {
358		case PCIM_BAR_MEM_32:
359			ln2range = 32;
360			break;
361		case PCIM_BAR_MEM_1MB:
362			ln2range = 20;
363			break;
364		case PCIM_BAR_MEM_64:
365			ln2range = 64;
366			break;
367		}
368	return (ln2range);
369}
370
371/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
372
373static void
374pci_fixancient(pcicfgregs *cfg)
375{
376	if (cfg->hdrtype != 0)
377		return;
378
379	/* PCI to PCI bridges use header type 1 */
380	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
381		cfg->hdrtype = 1;
382}
383
384/* extract header type specific config data */
385
386static void
387pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
388{
389#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
390	switch (cfg->hdrtype) {
391	case 0:
392		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
393		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
394		cfg->nummaps	    = PCI_MAXMAPS_0;
395		break;
396	case 1:
397		cfg->nummaps	    = PCI_MAXMAPS_1;
398		break;
399	case 2:
400		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
401		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
402		cfg->nummaps	    = PCI_MAXMAPS_2;
403		break;
404	}
405#undef REG
406}
407
408/* read configuration header into pcicfgregs structure */
409struct pci_devinfo *
410pci_read_device(device_t pcib, int b, int s, int f, size_t size)
411{
412#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
413	pcicfgregs *cfg = NULL;
414	struct pci_devinfo *devlist_entry;
415	struct devlist *devlist_head;
416
417	devlist_head = &pci_devq;
418
419	devlist_entry = NULL;
420
421	if (REG(PCIR_DEVVENDOR, 4) != -1) {
422		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
423		if (devlist_entry == NULL)
424			return (NULL);
425
426		cfg = &devlist_entry->cfg;
427
428		cfg->bus		= b;
429		cfg->slot		= s;
430		cfg->func		= f;
431		cfg->vendor		= REG(PCIR_VENDOR, 2);
432		cfg->device		= REG(PCIR_DEVICE, 2);
433		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
434		cfg->statreg		= REG(PCIR_STATUS, 2);
435		cfg->baseclass		= REG(PCIR_CLASS, 1);
436		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
437		cfg->progif		= REG(PCIR_PROGIF, 1);
438		cfg->revid		= REG(PCIR_REVID, 1);
439		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
440		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
441		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
442		cfg->intpin		= REG(PCIR_INTPIN, 1);
443		cfg->intline		= REG(PCIR_INTLINE, 1);
444
445		cfg->mingnt		= REG(PCIR_MINGNT, 1);
446		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
447
448		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
449		cfg->hdrtype		&= ~PCIM_MFDEV;
450
451		pci_fixancient(cfg);
452		pci_hdrtypedata(pcib, b, s, f, cfg);
453
454		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
455			pci_read_extcap(pcib, cfg);
456
457		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
458
459		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
460		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
461		devlist_entry->conf.pc_sel.pc_func = cfg->func;
462		devlist_entry->conf.pc_hdr = cfg->hdrtype;
463
464		devlist_entry->conf.pc_subvendor = cfg->subvendor;
465		devlist_entry->conf.pc_subdevice = cfg->subdevice;
466		devlist_entry->conf.pc_vendor = cfg->vendor;
467		devlist_entry->conf.pc_device = cfg->device;
468
469		devlist_entry->conf.pc_class = cfg->baseclass;
470		devlist_entry->conf.pc_subclass = cfg->subclass;
471		devlist_entry->conf.pc_progif = cfg->progif;
472		devlist_entry->conf.pc_revid = cfg->revid;
473
474		pci_numdevs++;
475		pci_generation++;
476	}
477	return (devlist_entry);
478#undef REG
479}
480
481static void
482pci_read_extcap(device_t pcib, pcicfgregs *cfg)
483{
484#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
485#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
486#if defined(__i386__) || defined(__amd64__)
487	uint64_t addr;
488#endif
489	uint32_t val;
490	int	ptr, nextptr, ptrptr;
491
492	switch (cfg->hdrtype & PCIM_HDRTYPE) {
493	case 0:
494	case 1:
495		ptrptr = PCIR_CAP_PTR;
496		break;
497	case 2:
498		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
499		break;
500	default:
501		return;		/* no extended capabilities support */
502	}
503	nextptr = REG(ptrptr, 1);	/* sanity check? */
504
505	/*
506	 * Read capability entries.
507	 */
508	while (nextptr != 0) {
509		/* Sanity check */
510		if (nextptr > 255) {
511			printf("illegal PCI extended capability offset %d\n",
512			    nextptr);
513			return;
514		}
515		/* Find the next entry */
516		ptr = nextptr;
517		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
518
519		/* Process this entry */
520		switch (REG(ptr + PCICAP_ID, 1)) {
521		case PCIY_PMG:		/* PCI power management */
522			if (cfg->pp.pp_cap == 0) {
523				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
524				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
525				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
526				if ((nextptr - ptr) > PCIR_POWER_DATA)
527					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
528			}
529			break;
530#if defined(__i386__) || defined(__amd64__)
531		case PCIY_HT:		/* HyperTransport */
532			/* Determine HT-specific capability type. */
533			val = REG(ptr + PCIR_HT_COMMAND, 2);
534			switch (val & PCIM_HTCMD_CAP_MASK) {
535			case PCIM_HTCAP_MSI_MAPPING:
536				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
537					/* Sanity check the mapping window. */
538					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
539					    4);
540					addr <<= 32;
541					addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO,
542					    4);
543					if (addr != MSI_INTEL_ADDR_BASE)
544						device_printf(pcib,
545		    "HT Bridge at %d:%d:%d has non-default MSI window 0x%llx\n",
546						    cfg->bus, cfg->slot,
547						    cfg->func, (long long)addr);
548				}
549
550				/* Enable MSI -> HT mapping. */
551				val |= PCIM_HTCMD_MSI_ENABLE;
552				WREG(ptr + PCIR_HT_COMMAND, val, 2);
553				break;
554			}
555			break;
556#endif
557		case PCIY_MSI:		/* PCI MSI */
558			cfg->msi.msi_location = ptr;
559			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
560			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
561						     PCIM_MSICTRL_MMC_MASK)>>1);
562			break;
563		case PCIY_MSIX:		/* PCI MSI-X */
564			cfg->msix.msix_location = ptr;
565			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
566			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
567			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
568			val = REG(ptr + PCIR_MSIX_TABLE, 4);
569			cfg->msix.msix_table_bar = PCIR_BAR(val &
570			    PCIM_MSIX_BIR_MASK);
571			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
572			val = REG(ptr + PCIR_MSIX_PBA, 4);
573			cfg->msix.msix_pba_bar = PCIR_BAR(val &
574			    PCIM_MSIX_BIR_MASK);
575			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
576			break;
577		case PCIY_VPD:		/* PCI Vital Product Data */
578			cfg->vpd.vpd_reg = ptr;
579			break;
580		case PCIY_SUBVENDOR:
581			/* Should always be true. */
582			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
583				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
584				cfg->subvendor = val & 0xffff;
585				cfg->subdevice = val >> 16;
586			}
587			break;
588		case PCIY_PCIX:		/* PCI-X */
589			/*
590			 * Assume we have a PCI-X chipset if we have
591			 * at least one PCI-PCI bridge with a PCI-X
592			 * capability.  Note that some systems with
593			 * PCI-express or HT chipsets might match on
594			 * this check as well.
595			 */
596			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
597				pcix_chipset = 1;
598			break;
599		case PCIY_EXPRESS:	/* PCI-express */
600			/*
601			 * Assume we have a PCI-express chipset if we have
602			 * at least one PCI-express root port.
603			 */
604			val = REG(ptr + PCIR_EXPRESS_FLAGS, 2);
605			if ((val & PCIM_EXP_FLAGS_TYPE) ==
606			    PCIM_EXP_TYPE_ROOT_PORT)
607				pcie_chipset = 1;
608			break;
609		default:
610			break;
611		}
612	}
613/* REG and WREG use carry through to next functions */
614}
615
616/*
617 * PCI Vital Product Data
618 */
619static uint32_t
620pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg)
621{
622
623	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
624
625	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
626	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000)
627		DELAY(1);	/* limit looping */
628
629	return (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
630}
631
632#if 0
633static void
634pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
635{
636	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
637
638	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
639	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
640	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000)
641		DELAY(1);	/* limit looping */
642
643	return;
644}
645#endif
646
647struct vpd_readstate {
648	device_t	pcib;
649	pcicfgregs	*cfg;
650	uint32_t	val;
651	int		bytesinval;
652	int		off;
653	uint8_t		cksum;
654};
655
656static uint8_t
657vpd_nextbyte(struct vpd_readstate *vrs)
658{
659	uint8_t byte;
660
661	if (vrs->bytesinval == 0) {
662		vrs->val = le32toh(pci_read_vpd_reg(vrs->pcib, vrs->cfg,
663		    vrs->off));
664		vrs->off += 4;
665		byte = vrs->val & 0xff;
666		vrs->bytesinval = 3;
667	} else {
668		vrs->val = vrs->val >> 8;
669		byte = vrs->val & 0xff;
670		vrs->bytesinval--;
671	}
672
673	vrs->cksum += byte;
674	return (byte);
675}
676
677static void
678pci_read_vpd(device_t pcib, pcicfgregs *cfg)
679{
680	struct vpd_readstate vrs;
681	int state;
682	int name;
683	int remain;
684	int end;
685	int i;
686	uint8_t byte;
687	int alloc, off;		/* alloc/off for RO/W arrays */
688	int cksumvalid;
689	int dflen;
690
691	if (!pci_do_vpd) {
692		cfg->vpd.vpd_cached = 1;
693		return;
694	}
695
696	/* init vpd reader */
697	vrs.bytesinval = 0;
698	vrs.off = 0;
699	vrs.pcib = pcib;
700	vrs.cfg = cfg;
701	vrs.cksum = 0;
702
703	state = 0;
704	name = remain = i = 0;	/* shut up stupid gcc */
705	alloc = off = 0;	/* shut up stupid gcc */
706	dflen = 0;		/* shut up stupid gcc */
707	end = 0;
708	cksumvalid = -1;
709	for (; !end;) {
710		byte = vpd_nextbyte(&vrs);
711#if 0
712		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
713		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
714		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
715#endif
716		switch (state) {
717		case 0:		/* item name */
718			if (byte & 0x80) {
719				remain = vpd_nextbyte(&vrs);
720				remain |= vpd_nextbyte(&vrs) << 8;
721				if (remain > (0x7f*4 - vrs.off)) {
722					end = 1;
723					printf(
724			    "pci%d:%d:%d: invalid vpd data, remain %#x\n",
725					    cfg->bus, cfg->slot, cfg->func,
726					    remain);
727				}
728				name = byte & 0x7f;
729			} else {
730				remain = byte & 0x7;
731				name = (byte >> 3) & 0xf;
732			}
733			switch (name) {
734			case 0x2:	/* String */
735				cfg->vpd.vpd_ident = malloc(remain + 1,
736				    M_DEVBUF, M_WAITOK);
737				i = 0;
738				state = 1;
739				break;
740			case 0xf:	/* End */
741				end = 1;
742				state = -1;
743				break;
744			case 0x10:	/* VPD-R */
745				alloc = 8;
746				off = 0;
747				cfg->vpd.vpd_ros = malloc(alloc *
748				    sizeof *cfg->vpd.vpd_ros, M_DEVBUF,
749				    M_WAITOK);
750				state = 2;
751				break;
752			case 0x11:	/* VPD-W */
753				alloc = 8;
754				off = 0;
755				cfg->vpd.vpd_w = malloc(alloc *
756				    sizeof *cfg->vpd.vpd_w, M_DEVBUF,
757				    M_WAITOK);
758				state = 5;
759				break;
760			default:	/* Invalid data, abort */
761				end = 1;
762				continue;
763			}
764			break;
765
766		case 1:	/* Identifier String */
767			cfg->vpd.vpd_ident[i++] = byte;
768			remain--;
769			if (remain == 0)  {
770				cfg->vpd.vpd_ident[i] = '\0';
771				state = 0;
772			}
773			break;
774
775		case 2:	/* VPD-R Keyword Header */
776			if (off == alloc) {
777				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
778				    (alloc *= 2) * sizeof *cfg->vpd.vpd_ros,
779				    M_DEVBUF, M_WAITOK);
780			}
781			cfg->vpd.vpd_ros[off].keyword[0] = byte;
782			cfg->vpd.vpd_ros[off].keyword[1] = vpd_nextbyte(&vrs);
783			dflen = vpd_nextbyte(&vrs);
784			if (dflen == 0 &&
785			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
786			    2) == 0) {
787				/*
788				 * if this happens, we can't trust the rest
789				 * of the VPD.
790				 */
791				printf("pci%d:%d:%d: bad keyword length: %d\n",
792				    cfg->bus, cfg->slot, cfg->func, dflen);
793				cksumvalid = 0;
794				end = 1;
795				break;
796			} else if (dflen == 0) {
797				cfg->vpd.vpd_ros[off].value = malloc(1 *
798				    sizeof *cfg->vpd.vpd_ros[off].value,
799				    M_DEVBUF, M_WAITOK);
800				cfg->vpd.vpd_ros[off].value[0] = '\x00';
801			} else
802				cfg->vpd.vpd_ros[off].value = malloc(
803				    (dflen + 1) *
804				    sizeof *cfg->vpd.vpd_ros[off].value,
805				    M_DEVBUF, M_WAITOK);
806			remain -= 3;
807			i = 0;
808			/* keep in sync w/ state 3's transistions */
809			if (dflen == 0 && remain == 0)
810				state = 0;
811			else if (dflen == 0)
812				state = 2;
813			else
814				state = 3;
815			break;
816
817		case 3:	/* VPD-R Keyword Value */
818			cfg->vpd.vpd_ros[off].value[i++] = byte;
819			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
820			    "RV", 2) == 0 && cksumvalid == -1) {
821				if (vrs.cksum == 0)
822					cksumvalid = 1;
823				else {
824					printf(
825				    "pci%d:%d:%d: bad VPD cksum, remain %hhu\n",
826					    cfg->bus, cfg->slot, cfg->func,
827					    vrs.cksum);
828					cksumvalid = 0;
829					end = 1;
830					break;
831				}
832			}
833			dflen--;
834			remain--;
835			/* keep in sync w/ state 2's transistions */
836			if (dflen == 0)
837				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
838			if (dflen == 0 && remain == 0) {
839				cfg->vpd.vpd_rocnt = off;
840				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
841				    off * sizeof *cfg->vpd.vpd_ros,
842				    M_DEVBUF, M_WAITOK);
843				state = 0;
844			} else if (dflen == 0)
845				state = 2;
846			break;
847
848		case 4:
849			remain--;
850			if (remain == 0)
851				state = 0;
852			break;
853
854		case 5:	/* VPD-W Keyword Header */
855			if (off == alloc) {
856				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
857				    (alloc *= 2) * sizeof *cfg->vpd.vpd_w,
858				    M_DEVBUF, M_WAITOK);
859			}
860			cfg->vpd.vpd_w[off].keyword[0] = byte;
861			cfg->vpd.vpd_w[off].keyword[1] = vpd_nextbyte(&vrs);
862			cfg->vpd.vpd_w[off].len = dflen = vpd_nextbyte(&vrs);
863			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
864			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
865			    sizeof *cfg->vpd.vpd_w[off].value,
866			    M_DEVBUF, M_WAITOK);
867			remain -= 3;
868			i = 0;
869			/* keep in sync w/ state 6's transistions */
870			if (dflen == 0 && remain == 0)
871				state = 0;
872			else if (dflen == 0)
873				state = 5;
874			else
875				state = 6;
876			break;
877
878		case 6:	/* VPD-W Keyword Value */
879			cfg->vpd.vpd_w[off].value[i++] = byte;
880			dflen--;
881			remain--;
882			/* keep in sync w/ state 5's transistions */
883			if (dflen == 0)
884				cfg->vpd.vpd_w[off++].value[i++] = '\0';
885			if (dflen == 0 && remain == 0) {
886				cfg->vpd.vpd_wcnt = off;
887				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
888				    off * sizeof *cfg->vpd.vpd_w,
889				    M_DEVBUF, M_WAITOK);
890				state = 0;
891			} else if (dflen == 0)
892				state = 5;
893			break;
894
895		default:
896			printf("pci%d:%d:%d: invalid state: %d\n",
897			    cfg->bus, cfg->slot, cfg->func, state);
898			end = 1;
899			break;
900		}
901	}
902
903	if (cksumvalid == 0) {
904		/* read-only data bad, clean up */
905		for (; off; off--)
906			free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
907
908		free(cfg->vpd.vpd_ros, M_DEVBUF);
909		cfg->vpd.vpd_ros = NULL;
910	}
911	cfg->vpd.vpd_cached = 1;
912#undef REG
913#undef WREG
914}
915
916int
917pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
918{
919	struct pci_devinfo *dinfo = device_get_ivars(child);
920	pcicfgregs *cfg = &dinfo->cfg;
921
922	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
923		pci_read_vpd(device_get_parent(dev), cfg);
924
925	*identptr = cfg->vpd.vpd_ident;
926
927	if (*identptr == NULL)
928		return (ENXIO);
929
930	return (0);
931}
932
933int
934pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
935	const char **vptr)
936{
937	struct pci_devinfo *dinfo = device_get_ivars(child);
938	pcicfgregs *cfg = &dinfo->cfg;
939	int i;
940
941	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
942		pci_read_vpd(device_get_parent(dev), cfg);
943
944	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
945		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
946		    sizeof cfg->vpd.vpd_ros[i].keyword) == 0) {
947			*vptr = cfg->vpd.vpd_ros[i].value;
948		}
949
950	if (i != cfg->vpd.vpd_rocnt)
951		return (0);
952
953	*vptr = NULL;
954	return (ENXIO);
955}
956
957/*
958 * Return the offset in configuration space of the requested extended
959 * capability entry or 0 if the specified capability was not found.
960 */
961int
962pci_find_extcap_method(device_t dev, device_t child, int capability,
963    int *capreg)
964{
965	struct pci_devinfo *dinfo = device_get_ivars(child);
966	pcicfgregs *cfg = &dinfo->cfg;
967	u_int32_t status;
968	u_int8_t ptr;
969
970	/*
971	 * Check the CAP_LIST bit of the PCI status register first.
972	 */
973	status = pci_read_config(child, PCIR_STATUS, 2);
974	if (!(status & PCIM_STATUS_CAPPRESENT))
975		return (ENXIO);
976
977	/*
978	 * Determine the start pointer of the capabilities list.
979	 */
980	switch (cfg->hdrtype & PCIM_HDRTYPE) {
981	case 0:
982	case 1:
983		ptr = PCIR_CAP_PTR;
984		break;
985	case 2:
986		ptr = PCIR_CAP_PTR_2;
987		break;
988	default:
989		/* XXX: panic? */
990		return (ENXIO);		/* no extended capabilities support */
991	}
992	ptr = pci_read_config(child, ptr, 1);
993
994	/*
995	 * Traverse the capabilities list.
996	 */
997	while (ptr != 0) {
998		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
999			if (capreg != NULL)
1000				*capreg = ptr;
1001			return (0);
1002		}
1003		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1004	}
1005
1006	return (ENOENT);
1007}
1008
1009/*
1010 * Support for MSI-X message interrupts.
1011 */
1012void
1013pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1014{
1015	struct pci_devinfo *dinfo = device_get_ivars(dev);
1016	pcicfgregs *cfg = &dinfo->cfg;
1017	uint32_t offset;
1018
1019	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1020	offset = cfg->msix.msix_table_offset + index * 16;
1021	bus_write_4(cfg->msix.msix_table_res, offset, address & 0xffffffff);
1022	bus_write_4(cfg->msix.msix_table_res, offset + 4, address >> 32);
1023	bus_write_4(cfg->msix.msix_table_res, offset + 8, data);
1024}
1025
1026void
1027pci_mask_msix(device_t dev, u_int index)
1028{
1029	struct pci_devinfo *dinfo = device_get_ivars(dev);
1030	pcicfgregs *cfg = &dinfo->cfg;
1031	uint32_t offset, val;
1032
1033	KASSERT(cfg->msix.msix_msgnum > index, ("bogus index"));
1034	offset = cfg->msix.msix_table_offset + index * 16 + 12;
1035	val = bus_read_4(cfg->msix.msix_table_res, offset);
1036	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1037		val |= PCIM_MSIX_VCTRL_MASK;
1038		bus_write_4(cfg->msix.msix_table_res, offset, val);
1039	}
1040}
1041
1042void
1043pci_unmask_msix(device_t dev, u_int index)
1044{
1045	struct pci_devinfo *dinfo = device_get_ivars(dev);
1046	pcicfgregs *cfg = &dinfo->cfg;
1047	uint32_t offset, val;
1048
1049	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1050	offset = cfg->msix.msix_table_offset + index * 16 + 12;
1051	val = bus_read_4(cfg->msix.msix_table_res, offset);
1052	if (val & PCIM_MSIX_VCTRL_MASK) {
1053		val &= ~PCIM_MSIX_VCTRL_MASK;
1054		bus_write_4(cfg->msix.msix_table_res, offset, val);
1055	}
1056}
1057
1058int
1059pci_pending_msix(device_t dev, u_int index)
1060{
1061	struct pci_devinfo *dinfo = device_get_ivars(dev);
1062	pcicfgregs *cfg = &dinfo->cfg;
1063	uint32_t offset, bit;
1064
1065	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1066	offset = cfg->msix.msix_pba_offset + (index / 32) * 4;
1067	bit = 1 << index % 32;
1068	return (bus_read_4(cfg->msix.msix_pba_res, offset) & bit);
1069}
1070
1071/*
1072 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1073 * returned in *count.  After this function returns, each message will be
1074 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1075 */
1076int
1077pci_alloc_msix_method(device_t dev, device_t child, int *count)
1078{
1079	struct pci_devinfo *dinfo = device_get_ivars(child);
1080	pcicfgregs *cfg = &dinfo->cfg;
1081	struct resource_list_entry *rle;
1082	int actual, error, i, irq, max;
1083
1084	/* Don't let count == 0 get us into trouble. */
1085	if (*count == 0)
1086		return (EINVAL);
1087
1088	/* If rid 0 is allocated, then fail. */
1089	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1090	if (rle != NULL && rle->res != NULL)
1091		return (ENXIO);
1092
1093	/* Already have allocated messages? */
1094	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1095		return (ENXIO);
1096
1097	/* If MSI is blacklisted for this system, fail. */
1098	if (pci_msi_blacklisted())
1099		return (ENXIO);
1100
1101	/* MSI-X capability present? */
1102	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1103		return (ENODEV);
1104
1105	/* Make sure the appropriate BARs are mapped. */
1106	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1107	    cfg->msix.msix_table_bar);
1108	if (rle == NULL || rle->res == NULL ||
1109	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1110		return (ENXIO);
1111	cfg->msix.msix_table_res = rle->res;
1112	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1113		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1114		    cfg->msix.msix_pba_bar);
1115		if (rle == NULL || rle->res == NULL ||
1116		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1117			return (ENXIO);
1118	}
1119	cfg->msix.msix_pba_res = rle->res;
1120
1121	if (bootverbose)
1122		device_printf(child,
1123		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1124		    *count, cfg->msix.msix_msgnum);
1125	max = min(*count, cfg->msix.msix_msgnum);
1126	for (i = 0; i < max; i++) {
1127		/* Allocate a message. */
1128		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, i,
1129		    &irq);
1130		if (error)
1131			break;
1132		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1133		    irq, 1);
1134	}
1135	actual = i;
1136
1137	if (bootverbose) {
1138		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1139		if (actual == 1)
1140			device_printf(child, "using IRQ %lu for MSI-X\n",
1141			    rle->start);
1142		else {
1143			int run;
1144
1145			/*
1146			 * Be fancy and try to print contiguous runs of
1147			 * IRQ values as ranges.  'irq' is the previous IRQ.
1148			 * 'run' is true if we are in a range.
1149			 */
1150			device_printf(child, "using IRQs %lu", rle->start);
1151			irq = rle->start;
1152			run = 0;
1153			for (i = 1; i < actual; i++) {
1154				rle = resource_list_find(&dinfo->resources,
1155				    SYS_RES_IRQ, i + 1);
1156
1157				/* Still in a run? */
1158				if (rle->start == irq + 1) {
1159					run = 1;
1160					irq++;
1161					continue;
1162				}
1163
1164				/* Finish previous range. */
1165				if (run) {
1166					printf("-%d", irq);
1167					run = 0;
1168				}
1169
1170				/* Start new range. */
1171				printf(",%lu", rle->start);
1172				irq = rle->start;
1173			}
1174
1175			/* Unfinished range? */
1176			if (run)
1177				printf("-%d", irq);
1178			printf(" for MSI-X\n");
1179		}
1180	}
1181
1182	/* Mask all vectors. */
1183	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1184		pci_mask_msix(child, i);
1185
1186	/* Update control register to enable MSI-X. */
1187	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1188	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1189	    cfg->msix.msix_ctrl, 2);
1190
1191	/* Update counts of alloc'd messages. */
1192	cfg->msix.msix_alloc = actual;
1193	*count = actual;
1194	return (0);
1195}
1196
1197/*
1198 * By default, pci_alloc_msix() will assign the allocated IRQ resources to
1199 * the first N messages in the MSI-X table.  However, device drivers may
1200 * want to use different layouts in the case that they do not allocate a
1201 * full table.  This method allows the driver to specify what layout it
1202 * wants.  It must be called after a successful pci_alloc_msix() but
1203 * before any of the associated SYS_RES_IRQ resources are allocated via
1204 * bus_alloc_resource().  The 'indices' array contains N (where N equals
1205 * the 'count' returned from pci_alloc_msix()) message indices.  The
1206 * indices are 1-based (meaning the first message is at index 1).  On
1207 * successful return, each of the messages in the 'indices' array will
1208 * have an associated SYS_RES_IRQ whose rid is equal to the index.  Thus,
1209 * if indices contains { 2, 4 }, then upon successful return, the 'child'
1210 * device will have two SYS_RES_IRQ resources available at rids 2 and 4.
1211 */
1212int
1213pci_remap_msix_method(device_t dev, device_t child, u_int *indices)
1214{
1215	struct pci_devinfo *dinfo = device_get_ivars(child);
1216	pcicfgregs *cfg = &dinfo->cfg;
1217	struct resource_list_entry *rle;
1218	int count, error, i, j, *irqs;
1219
1220	/* Sanity check the indices. */
1221	for (i = 0; i < cfg->msix.msix_alloc; i++)
1222		if (indices[i] == 0 || indices[i] > cfg->msix.msix_msgnum)
1223			return (EINVAL);
1224
1225	/* Check for duplicates. */
1226	for (i = 0; i < cfg->msix.msix_alloc; i++)
1227		for (j = i + 1; j < cfg->msix.msix_alloc; j++)
1228			if (indices[i] == indices[j])
1229				return (EINVAL);
1230
1231	/* Make sure none of the resources are allocated. */
1232	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1233		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1234		if (rle == NULL)
1235			continue;
1236		if (rle->res != NULL)
1237			return (EBUSY);
1238		count++;
1239	}
1240
1241	/* Save the IRQ values and free the existing resources. */
1242	irqs = malloc(sizeof(int) * cfg->msix.msix_alloc, M_TEMP, M_WAITOK);
1243	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1244		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1245		if (rle == NULL)
1246			continue;
1247		irqs[count] = rle->start;
1248		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
1249		count++;
1250	}
1251
1252	/* Map the IRQ values to the new message indices and rids. */
1253	for (i = 0; i < cfg->msix.msix_alloc; i++) {
1254		resource_list_add(&dinfo->resources, SYS_RES_IRQ, indices[i],
1255		    irqs[i], irqs[i], 1);
1256
1257		/*
1258		 * The indices in the backend code (PCIB_* methods and the
1259		 * MI helper routines for MD code such as pci_enable_msix())
1260		 * are all zero-based.  However, the indices passed to this
1261		 * function are 1-based so that the correspond 1:1 with the
1262		 * SYS_RES_IRQ resource IDs.
1263		 */
1264		error = PCIB_REMAP_MSIX(device_get_parent(dev), child,
1265		    indices[i] - 1, irqs[i]);
1266		KASSERT(error == 0, ("Failed to remap MSI-X message"));
1267	}
1268	if (bootverbose) {
1269		if (cfg->msix.msix_alloc == 1)
1270			device_printf(child,
1271			    "Remapped MSI-X IRQ to index %d\n", indices[0]);
1272		else {
1273			device_printf(child, "Remapped MSI-X IRQs to indices");
1274			for (i = 0; i < cfg->msix.msix_alloc - 1; i++)
1275				printf(" %d,", indices[i]);
1276			printf(" %d\n", indices[cfg->msix.msix_alloc - 1]);
1277		}
1278	}
1279	free(irqs, M_TEMP);
1280
1281	return (0);
1282}
1283
1284static int
1285pci_release_msix(device_t dev, device_t child)
1286{
1287	struct pci_devinfo *dinfo = device_get_ivars(child);
1288	pcicfgregs *cfg = &dinfo->cfg;
1289	struct resource_list_entry *rle;
1290	int count, i;
1291
1292	/* Do we have any messages to release? */
1293	if (cfg->msix.msix_alloc == 0)
1294		return (ENODEV);
1295
1296	/* Make sure none of the resources are allocated. */
1297	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1298		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1299		if (rle == NULL)
1300			continue;
1301		if (rle->res != NULL)
1302			return (EBUSY);
1303		count++;
1304	}
1305
1306	/* Update control register with to disable MSI-X. */
1307	cfg->msix.msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1308	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1309	    cfg->msix.msix_ctrl, 2);
1310
1311	/* Release the messages. */
1312	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1313		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1314		if (rle == NULL)
1315			continue;
1316		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1317		    rle->start);
1318		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
1319		count++;
1320	}
1321
1322	/* Update alloc count. */
1323	cfg->msix.msix_alloc = 0;
1324	return (0);
1325}
1326
1327/*
1328 * Return the max supported MSI-X messages this device supports.
1329 * Basically, assuming the MD code can alloc messages, this function
1330 * should return the maximum value that pci_alloc_msix() can return.
1331 * Thus, it is subject to the tunables, etc.
1332 */
1333int
1334pci_msix_count_method(device_t dev, device_t child)
1335{
1336	struct pci_devinfo *dinfo = device_get_ivars(child);
1337	pcicfgregs *cfg = &dinfo->cfg;
1338
1339	if (pci_do_msix && cfg->msix.msix_location != 0)
1340		return (cfg->msix.msix_msgnum);
1341	return (0);
1342}
1343
1344/*
1345 * Support for MSI message signalled interrupts.
1346 */
1347void
1348pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1349{
1350	struct pci_devinfo *dinfo = device_get_ivars(dev);
1351	pcicfgregs *cfg = &dinfo->cfg;
1352
1353	/* Write data and address values. */
1354	cfg->msi.msi_addr = address;
1355	cfg->msi.msi_data = data;
1356	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1357	    address & 0xffffffff, 4);
1358	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1359		pci_write_config(dev, cfg->msi.msi_location +
1360		    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1361		pci_write_config(dev, cfg->msi.msi_location +
1362		    PCIR_MSI_DATA_64BIT, data, 2);
1363	} else
1364		pci_write_config(dev, cfg->msi.msi_location +
1365		    PCIR_MSI_DATA, data, 2);
1366
1367	/* Enable MSI in the control register. */
1368	cfg->msi.msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1369	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1370	    cfg->msi.msi_ctrl, 2);
1371}
1372
1373/*
1374 * Restore MSI registers during resume.  If MSI is enabled then
1375 * restore the data and address registers in addition to the control
1376 * register.
1377 */
1378static void
1379pci_resume_msi(device_t dev)
1380{
1381	struct pci_devinfo *dinfo = device_get_ivars(dev);
1382	pcicfgregs *cfg = &dinfo->cfg;
1383	uint64_t address;
1384	uint16_t data;
1385
1386	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1387		address = cfg->msi.msi_addr;
1388		data = cfg->msi.msi_data;
1389		pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1390		    address & 0xffffffff, 4);
1391		if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1392			pci_write_config(dev, cfg->msi.msi_location +
1393			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1394			pci_write_config(dev, cfg->msi.msi_location +
1395			    PCIR_MSI_DATA_64BIT, data, 2);
1396		} else
1397			pci_write_config(dev, cfg->msi.msi_location +
1398			    PCIR_MSI_DATA, data, 2);
1399	}
1400	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1401	    cfg->msi.msi_ctrl, 2);
1402}
1403
1404/*
1405 * Returns true if the specified device is blacklisted because MSI
1406 * doesn't work.
1407 */
1408int
1409pci_msi_device_blacklisted(device_t dev)
1410{
1411	struct pci_quirk *q;
1412
1413	if (!pci_honor_msi_blacklist)
1414		return (0);
1415
1416	for (q = &pci_quirks[0]; q->devid; q++) {
1417		if (q->devid == pci_get_devid(dev) &&
1418		    q->type == PCI_QUIRK_DISABLE_MSI)
1419			return (1);
1420	}
1421	return (0);
1422}
1423
1424/*
1425 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1426 * we just check for blacklisted chipsets as represented by the
1427 * host-PCI bridge at device 0:0:0.  In the future, it may become
1428 * necessary to check other system attributes, such as the kenv values
1429 * that give the motherboard manufacturer and model number.
1430 */
1431static int
1432pci_msi_blacklisted(void)
1433{
1434	device_t dev;
1435
1436	if (!pci_honor_msi_blacklist)
1437		return (0);
1438
1439	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1440	if (!(pcie_chipset || pcix_chipset))
1441		return (1);
1442
1443	dev = pci_find_bsf(0, 0, 0);
1444	if (dev != NULL)
1445		return (pci_msi_device_blacklisted(dev));
1446	return (0);
1447}
1448
1449/*
1450 * Attempt to allocate *count MSI messages.  The actual number allocated is
1451 * returned in *count.  After this function returns, each message will be
1452 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1453 */
1454int
1455pci_alloc_msi_method(device_t dev, device_t child, int *count)
1456{
1457	struct pci_devinfo *dinfo = device_get_ivars(child);
1458	pcicfgregs *cfg = &dinfo->cfg;
1459	struct resource_list_entry *rle;
1460	int actual, error, i, irqs[32];
1461	uint16_t ctrl;
1462
1463	/* Don't let count == 0 get us into trouble. */
1464	if (*count == 0)
1465		return (EINVAL);
1466
1467	/* If rid 0 is allocated, then fail. */
1468	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1469	if (rle != NULL && rle->res != NULL)
1470		return (ENXIO);
1471
1472	/* Already have allocated messages? */
1473	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1474		return (ENXIO);
1475
1476	/* If MSI is blacklisted for this system, fail. */
1477	if (pci_msi_blacklisted())
1478		return (ENXIO);
1479
1480	/* MSI capability present? */
1481	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1482		return (ENODEV);
1483
1484	if (bootverbose)
1485		device_printf(child,
1486		    "attempting to allocate %d MSI vectors (%d supported)\n",
1487		    *count, cfg->msi.msi_msgnum);
1488
1489	/* Don't ask for more than the device supports. */
1490	actual = min(*count, cfg->msi.msi_msgnum);
1491
1492	/* Don't ask for more than 32 messages. */
1493	actual = min(actual, 32);
1494
1495	/* MSI requires power of 2 number of messages. */
1496	if (!powerof2(actual))
1497		return (EINVAL);
1498
1499	for (;;) {
1500		/* Try to allocate N messages. */
1501		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1502		    cfg->msi.msi_msgnum, irqs);
1503		if (error == 0)
1504			break;
1505		if (actual == 1)
1506			return (error);
1507
1508		/* Try N / 2. */
1509		actual >>= 1;
1510	}
1511
1512	/*
1513	 * We now have N actual messages mapped onto SYS_RES_IRQ
1514	 * resources in the irqs[] array, so add new resources
1515	 * starting at rid 1.
1516	 */
1517	for (i = 0; i < actual; i++)
1518		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1519		    irqs[i], irqs[i], 1);
1520
1521	if (bootverbose) {
1522		if (actual == 1)
1523			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1524		else {
1525			int run;
1526
1527			/*
1528			 * Be fancy and try to print contiguous runs
1529			 * of IRQ values as ranges.  'run' is true if
1530			 * we are in a range.
1531			 */
1532			device_printf(child, "using IRQs %d", irqs[0]);
1533			run = 0;
1534			for (i = 1; i < actual; i++) {
1535
1536				/* Still in a run? */
1537				if (irqs[i] == irqs[i - 1] + 1) {
1538					run = 1;
1539					continue;
1540				}
1541
1542				/* Finish previous range. */
1543				if (run) {
1544					printf("-%d", irqs[i - 1]);
1545					run = 0;
1546				}
1547
1548				/* Start new range. */
1549				printf(",%d", irqs[i]);
1550			}
1551
1552			/* Unfinished range? */
1553			if (run)
1554				printf("%d", irqs[actual - 1]);
1555			printf(" for MSI\n");
1556		}
1557	}
1558
1559	/* Update control register with actual count and enable MSI. */
1560	ctrl = cfg->msi.msi_ctrl;
1561	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1562	ctrl |= (ffs(actual) - 1) << 4;
1563	cfg->msi.msi_ctrl = ctrl;
1564	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1565
1566	/* Update counts of alloc'd messages. */
1567	cfg->msi.msi_alloc = actual;
1568	*count = actual;
1569	return (0);
1570}
1571
1572/* Release the MSI messages associated with this device. */
1573int
1574pci_release_msi_method(device_t dev, device_t child)
1575{
1576	struct pci_devinfo *dinfo = device_get_ivars(child);
1577	pcicfgregs *cfg = &dinfo->cfg;
1578	struct resource_list_entry *rle;
1579	int error, i, irqs[32];
1580
1581	/* Try MSI-X first. */
1582	error = pci_release_msix(dev, child);
1583	if (error != ENODEV)
1584		return (error);
1585
1586	/* Do we have any messages to release? */
1587	if (cfg->msi.msi_alloc == 0)
1588		return (ENODEV);
1589	KASSERT(cfg->msi.msi_alloc <= 32, ("more than 32 alloc'd messages"));
1590
1591	/* Make sure none of the resources are allocated. */
1592	for (i = 0; i < cfg->msi.msi_alloc; i++) {
1593		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1594		KASSERT(rle != NULL, ("missing MSI resource"));
1595		if (rle->res != NULL)
1596			return (EBUSY);
1597		irqs[i] = rle->start;
1598	}
1599
1600	/* Update control register with 0 count and disable MSI. */
1601	cfg->msi.msi_ctrl &= ~(PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE);
1602	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL,
1603	    cfg->msi.msi_ctrl, 2);
1604
1605	/* Release the messages. */
1606	PCIB_RELEASE_MSI(device_get_parent(dev), child, cfg->msi.msi_alloc,
1607	    irqs);
1608	for (i = 0; i < cfg->msi.msi_alloc; i++)
1609		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1610
1611	/* Update alloc count. */
1612	cfg->msi.msi_alloc = 0;
1613	return (0);
1614}
1615
1616/*
1617 * Return the max supported MSI messages this device supports.
1618 * Basically, assuming the MD code can alloc messages, this function
1619 * should return the maximum value that pci_alloc_msi() can return.
1620 * Thus, it is subject to the tunables, etc.
1621 */
1622int
1623pci_msi_count_method(device_t dev, device_t child)
1624{
1625	struct pci_devinfo *dinfo = device_get_ivars(child);
1626	pcicfgregs *cfg = &dinfo->cfg;
1627
1628	if (pci_do_msi && cfg->msi.msi_location != 0)
1629		return (cfg->msi.msi_msgnum);
1630	return (0);
1631}
1632
1633/* free pcicfgregs structure and all depending data structures */
1634
1635int
1636pci_freecfg(struct pci_devinfo *dinfo)
1637{
1638	struct devlist *devlist_head;
1639	int i;
1640
1641	devlist_head = &pci_devq;
1642
1643	if (dinfo->cfg.vpd.vpd_reg) {
1644		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1645		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1646			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1647		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1648		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1649			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1650		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1651	}
1652	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1653	free(dinfo, M_DEVBUF);
1654
1655	/* increment the generation count */
1656	pci_generation++;
1657
1658	/* we're losing one device */
1659	pci_numdevs--;
1660	return (0);
1661}
1662
1663/*
1664 * PCI power manangement
1665 */
1666int
1667pci_set_powerstate_method(device_t dev, device_t child, int state)
1668{
1669	struct pci_devinfo *dinfo = device_get_ivars(child);
1670	pcicfgregs *cfg = &dinfo->cfg;
1671	uint16_t status;
1672	int result, oldstate, highest, delay;
1673
1674	if (cfg->pp.pp_cap == 0)
1675		return (EOPNOTSUPP);
1676
1677	/*
1678	 * Optimize a no state change request away.  While it would be OK to
1679	 * write to the hardware in theory, some devices have shown odd
1680	 * behavior when going from D3 -> D3.
1681	 */
1682	oldstate = pci_get_powerstate(child);
1683	if (oldstate == state)
1684		return (0);
1685
1686	/*
1687	 * The PCI power management specification states that after a state
1688	 * transition between PCI power states, system software must
1689	 * guarantee a minimal delay before the function accesses the device.
1690	 * Compute the worst case delay that we need to guarantee before we
1691	 * access the device.  Many devices will be responsive much more
1692	 * quickly than this delay, but there are some that don't respond
1693	 * instantly to state changes.  Transitions to/from D3 state require
1694	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
1695	 * is done below with DELAY rather than a sleeper function because
1696	 * this function can be called from contexts where we cannot sleep.
1697	 */
1698	highest = (oldstate > state) ? oldstate : state;
1699	if (highest == PCI_POWERSTATE_D3)
1700	    delay = 10000;
1701	else if (highest == PCI_POWERSTATE_D2)
1702	    delay = 200;
1703	else
1704	    delay = 0;
1705	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
1706	    & ~PCIM_PSTAT_DMASK;
1707	result = 0;
1708	switch (state) {
1709	case PCI_POWERSTATE_D0:
1710		status |= PCIM_PSTAT_D0;
1711		break;
1712	case PCI_POWERSTATE_D1:
1713		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
1714			return (EOPNOTSUPP);
1715		status |= PCIM_PSTAT_D1;
1716		break;
1717	case PCI_POWERSTATE_D2:
1718		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
1719			return (EOPNOTSUPP);
1720		status |= PCIM_PSTAT_D2;
1721		break;
1722	case PCI_POWERSTATE_D3:
1723		status |= PCIM_PSTAT_D3;
1724		break;
1725	default:
1726		return (EINVAL);
1727	}
1728
1729	if (bootverbose)
1730		printf(
1731		    "pci%d:%d:%d: Transition from D%d to D%d\n",
1732		    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func,
1733		    oldstate, state);
1734
1735	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
1736	if (delay)
1737		DELAY(delay);
1738	return (0);
1739}
1740
1741int
1742pci_get_powerstate_method(device_t dev, device_t child)
1743{
1744	struct pci_devinfo *dinfo = device_get_ivars(child);
1745	pcicfgregs *cfg = &dinfo->cfg;
1746	uint16_t status;
1747	int result;
1748
1749	if (cfg->pp.pp_cap != 0) {
1750		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
1751		switch (status & PCIM_PSTAT_DMASK) {
1752		case PCIM_PSTAT_D0:
1753			result = PCI_POWERSTATE_D0;
1754			break;
1755		case PCIM_PSTAT_D1:
1756			result = PCI_POWERSTATE_D1;
1757			break;
1758		case PCIM_PSTAT_D2:
1759			result = PCI_POWERSTATE_D2;
1760			break;
1761		case PCIM_PSTAT_D3:
1762			result = PCI_POWERSTATE_D3;
1763			break;
1764		default:
1765			result = PCI_POWERSTATE_UNKNOWN;
1766			break;
1767		}
1768	} else {
1769		/* No support, device is always at D0 */
1770		result = PCI_POWERSTATE_D0;
1771	}
1772	return (result);
1773}
1774
1775/*
1776 * Some convenience functions for PCI device drivers.
1777 */
1778
1779static __inline void
1780pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
1781{
1782	uint16_t	command;
1783
1784	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1785	command |= bit;
1786	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1787}
1788
1789static __inline void
1790pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
1791{
1792	uint16_t	command;
1793
1794	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1795	command &= ~bit;
1796	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1797}
1798
1799int
1800pci_enable_busmaster_method(device_t dev, device_t child)
1801{
1802	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1803	return (0);
1804}
1805
1806int
1807pci_disable_busmaster_method(device_t dev, device_t child)
1808{
1809	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1810	return (0);
1811}
1812
1813int
1814pci_enable_io_method(device_t dev, device_t child, int space)
1815{
1816	uint16_t command;
1817	uint16_t bit;
1818	char *error;
1819
1820	bit = 0;
1821	error = NULL;
1822
1823	switch(space) {
1824	case SYS_RES_IOPORT:
1825		bit = PCIM_CMD_PORTEN;
1826		error = "port";
1827		break;
1828	case SYS_RES_MEMORY:
1829		bit = PCIM_CMD_MEMEN;
1830		error = "memory";
1831		break;
1832	default:
1833		return (EINVAL);
1834	}
1835	pci_set_command_bit(dev, child, bit);
1836	/* Some devices seem to need a brief stall here, what do to? */
1837	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1838	if (command & bit)
1839		return (0);
1840	device_printf(child, "failed to enable %s mapping!\n", error);
1841	return (ENXIO);
1842}
1843
1844int
1845pci_disable_io_method(device_t dev, device_t child, int space)
1846{
1847	uint16_t command;
1848	uint16_t bit;
1849	char *error;
1850
1851	bit = 0;
1852	error = NULL;
1853
1854	switch(space) {
1855	case SYS_RES_IOPORT:
1856		bit = PCIM_CMD_PORTEN;
1857		error = "port";
1858		break;
1859	case SYS_RES_MEMORY:
1860		bit = PCIM_CMD_MEMEN;
1861		error = "memory";
1862		break;
1863	default:
1864		return (EINVAL);
1865	}
1866	pci_clear_command_bit(dev, child, bit);
1867	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1868	if (command & bit) {
1869		device_printf(child, "failed to disable %s mapping!\n", error);
1870		return (ENXIO);
1871	}
1872	return (0);
1873}
1874
1875/*
1876 * New style pci driver.  Parent device is either a pci-host-bridge or a
1877 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
1878 */
1879
1880void
1881pci_print_verbose(struct pci_devinfo *dinfo)
1882{
1883
1884	if (bootverbose) {
1885		pcicfgregs *cfg = &dinfo->cfg;
1886
1887		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
1888		    cfg->vendor, cfg->device, cfg->revid);
1889		printf("\tbus=%d, slot=%d, func=%d\n",
1890		    cfg->bus, cfg->slot, cfg->func);
1891		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
1892		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
1893		    cfg->mfdev);
1894		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
1895		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
1896		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
1897		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
1898		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
1899		if (cfg->intpin > 0)
1900			printf("\tintpin=%c, irq=%d\n",
1901			    cfg->intpin +'a' -1, cfg->intline);
1902		if (cfg->pp.pp_cap) {
1903			uint16_t status;
1904
1905			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
1906			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
1907			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
1908			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
1909			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
1910			    status & PCIM_PSTAT_DMASK);
1911		}
1912		if (cfg->msi.msi_location) {
1913			int ctrl;
1914
1915			ctrl = cfg->msi.msi_ctrl;
1916			printf("\tMSI supports %d message%s%s%s\n",
1917			    cfg->msi.msi_msgnum,
1918			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
1919			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
1920			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
1921		}
1922		if (cfg->msix.msix_location) {
1923			printf("\tMSI-X supports %d message%s ",
1924			    cfg->msix.msix_msgnum,
1925			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
1926			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
1927				printf("in map 0x%x\n",
1928				    cfg->msix.msix_table_bar);
1929			else
1930				printf("in maps 0x%x and 0x%x\n",
1931				    cfg->msix.msix_table_bar,
1932				    cfg->msix.msix_pba_bar);
1933		}
1934	}
1935}
1936
1937static int
1938pci_porten(device_t pcib, int b, int s, int f)
1939{
1940	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1941		& PCIM_CMD_PORTEN) != 0;
1942}
1943
1944static int
1945pci_memen(device_t pcib, int b, int s, int f)
1946{
1947	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1948		& PCIM_CMD_MEMEN) != 0;
1949}
1950
1951/*
1952 * Add a resource based on a pci map register. Return 1 if the map
1953 * register is a 32bit map register or 2 if it is a 64bit register.
1954 */
1955static int
1956pci_add_map(device_t pcib, device_t bus, device_t dev,
1957    int b, int s, int f, int reg, struct resource_list *rl, int force,
1958    int prefetch)
1959{
1960	uint32_t map;
1961	pci_addr_t base;
1962	pci_addr_t start, end, count;
1963	uint8_t ln2size;
1964	uint8_t ln2range;
1965	uint32_t testval;
1966	uint16_t cmd;
1967	int type;
1968	int barlen;
1969	struct resource *res;
1970
1971	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1972	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
1973	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1974	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
1975
1976	if (PCI_BAR_MEM(map))
1977		type = SYS_RES_MEMORY;
1978	else
1979		type = SYS_RES_IOPORT;
1980	ln2size = pci_mapsize(testval);
1981	ln2range = pci_maprange(testval);
1982	base = pci_mapbase(map);
1983	barlen = ln2range == 64 ? 2 : 1;
1984
1985	/*
1986	 * For I/O registers, if bottom bit is set, and the next bit up
1987	 * isn't clear, we know we have a BAR that doesn't conform to the
1988	 * spec, so ignore it.  Also, sanity check the size of the data
1989	 * areas to the type of memory involved.  Memory must be at least
1990	 * 16 bytes in size, while I/O ranges must be at least 4.
1991	 */
1992	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
1993		return (barlen);
1994	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
1995	    (type == SYS_RES_IOPORT && ln2size < 2))
1996		return (barlen);
1997
1998	if (ln2range == 64)
1999		/* Read the other half of a 64bit map register */
2000		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2001	if (bootverbose) {
2002		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2003		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2004		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2005			printf(", port disabled\n");
2006		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2007			printf(", memory disabled\n");
2008		else
2009			printf(", enabled\n");
2010	}
2011
2012	/*
2013	 * If base is 0, then we have problems.  It is best to ignore
2014	 * such entries for the moment.  These will be allocated later if
2015	 * the driver specifically requests them.  However, some
2016	 * removable busses look better when all resources are allocated,
2017	 * so allow '0' to be overriden.
2018	 *
2019	 * Similarly treat maps whose values is the same as the test value
2020	 * read back.  These maps have had all f's written to them by the
2021	 * BIOS in an attempt to disable the resources.
2022	 */
2023	if (!force && (base == 0 || map == testval))
2024		return (barlen);
2025	if ((u_long)base != base) {
2026		device_printf(bus,
2027		    "pci%d:%d:%d bar %#x too many address bits", b, s, f, reg);
2028		return (barlen);
2029	}
2030
2031	/*
2032	 * This code theoretically does the right thing, but has
2033	 * undesirable side effects in some cases where peripherals
2034	 * respond oddly to having these bits enabled.  Let the user
2035	 * be able to turn them off (since pci_enable_io_modes is 1 by
2036	 * default).
2037	 */
2038	if (pci_enable_io_modes) {
2039		/* Turn on resources that have been left off by a lazy BIOS */
2040		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2041			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2042			cmd |= PCIM_CMD_PORTEN;
2043			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2044		}
2045		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2046			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2047			cmd |= PCIM_CMD_MEMEN;
2048			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2049		}
2050	} else {
2051		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2052			return (barlen);
2053		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2054			return (barlen);
2055	}
2056
2057	count = 1 << ln2size;
2058	if (base == 0 || base == pci_mapbase(testval)) {
2059		start = 0;	/* Let the parent deside */
2060		end = ~0ULL;
2061	} else {
2062		start = base;
2063		end = base + (1 << ln2size) - 1;
2064	}
2065	resource_list_add(rl, type, reg, start, end, count);
2066
2067	/*
2068	 * Not quite sure what to do on failure of allocating the resource
2069	 * since I can postulate several right answers.
2070	 */
2071	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2072	    prefetch ? RF_PREFETCHABLE : 0);
2073	if (res == NULL)
2074		return (barlen);
2075	start = rman_get_start(res);
2076	if ((u_long)start != start) {
2077		/* Wait a minute!  this platform can't do this address. */
2078		device_printf(bus,
2079		    "pci%d.%d.%x bar %#x start %#jx, too many bits.",
2080		    b, s, f, reg, (uintmax_t)start);
2081		resource_list_release(rl, bus, dev, type, reg, res);
2082		return (barlen);
2083	}
2084	pci_write_config(dev, reg, start, 4);
2085	if (ln2range == 64)
2086		pci_write_config(dev, reg + 4, start >> 32, 4);
2087	return (barlen);
2088}
2089
2090/*
2091 * For ATA devices we need to decide early what addressing mode to use.
2092 * Legacy demands that the primary and secondary ATA ports sits on the
2093 * same addresses that old ISA hardware did. This dictates that we use
2094 * those addresses and ignore the BAR's if we cannot set PCI native
2095 * addressing mode.
2096 */
2097static void
2098pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2099    int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2100{
2101	int rid, type, progif;
2102#if 0
2103	/* if this device supports PCI native addressing use it */
2104	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2105	if ((progif & 0x8a) == 0x8a) {
2106		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2107		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2108			printf("Trying ATA native PCI addressing mode\n");
2109			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2110		}
2111	}
2112#endif
2113	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2114	type = SYS_RES_IOPORT;
2115	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2116		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2117		    prefetchmask & (1 << 0));
2118		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2119		    prefetchmask & (1 << 1));
2120	} else {
2121		rid = PCIR_BAR(0);
2122		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2123		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2124		    0);
2125		rid = PCIR_BAR(1);
2126		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2127		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2128		    0);
2129	}
2130	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2131		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2132		    prefetchmask & (1 << 2));
2133		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2134		    prefetchmask & (1 << 3));
2135	} else {
2136		rid = PCIR_BAR(2);
2137		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2138		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2139		    0);
2140		rid = PCIR_BAR(3);
2141		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2142		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2143		    0);
2144	}
2145	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2146	    prefetchmask & (1 << 4));
2147	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2148	    prefetchmask & (1 << 5));
2149}
2150
2151static void
2152pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2153{
2154	struct pci_devinfo *dinfo = device_get_ivars(dev);
2155	pcicfgregs *cfg = &dinfo->cfg;
2156	char tunable_name[64];
2157	int irq;
2158
2159	/* Has to have an intpin to have an interrupt. */
2160	if (cfg->intpin == 0)
2161		return;
2162
2163	/* Let the user override the IRQ with a tunable. */
2164	irq = PCI_INVALID_IRQ;
2165	snprintf(tunable_name, sizeof(tunable_name), "hw.pci%d.%d.INT%c.irq",
2166	    cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2167	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2168		irq = PCI_INVALID_IRQ;
2169
2170	/*
2171	 * If we didn't get an IRQ via the tunable, then we either use the
2172	 * IRQ value in the intline register or we ask the bus to route an
2173	 * interrupt for us.  If force_route is true, then we only use the
2174	 * value in the intline register if the bus was unable to assign an
2175	 * IRQ.
2176	 */
2177	if (!PCI_INTERRUPT_VALID(irq)) {
2178		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2179			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2180		if (!PCI_INTERRUPT_VALID(irq))
2181			irq = cfg->intline;
2182	}
2183
2184	/* If after all that we don't have an IRQ, just bail. */
2185	if (!PCI_INTERRUPT_VALID(irq))
2186		return;
2187
2188	/* Update the config register if it changed. */
2189	if (irq != cfg->intline) {
2190		cfg->intline = irq;
2191		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2192	}
2193
2194	/* Add this IRQ as rid 0 interrupt resource. */
2195	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2196}
2197
2198void
2199pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2200{
2201	device_t pcib;
2202	struct pci_devinfo *dinfo = device_get_ivars(dev);
2203	pcicfgregs *cfg = &dinfo->cfg;
2204	struct resource_list *rl = &dinfo->resources;
2205	struct pci_quirk *q;
2206	int b, i, f, s;
2207
2208	pcib = device_get_parent(bus);
2209
2210	b = cfg->bus;
2211	s = cfg->slot;
2212	f = cfg->func;
2213
2214	/* ATA devices needs special map treatment */
2215	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2216	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2217	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2218	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2219	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2220		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2221	else
2222		for (i = 0; i < cfg->nummaps;)
2223			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2224			    rl, force, prefetchmask & (1 << i));
2225
2226	/*
2227	 * Add additional, quirked resources.
2228	 */
2229	for (q = &pci_quirks[0]; q->devid; q++) {
2230		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2231		    && q->type == PCI_QUIRK_MAP_REG)
2232			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2233			  force, 0);
2234	}
2235
2236	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2237#ifdef __PCI_REROUTE_INTERRUPT
2238		/*
2239		 * Try to re-route interrupts. Sometimes the BIOS or
2240		 * firmware may leave bogus values in these registers.
2241		 * If the re-route fails, then just stick with what we
2242		 * have.
2243		 */
2244		pci_assign_interrupt(bus, dev, 1);
2245#else
2246		pci_assign_interrupt(bus, dev, 0);
2247#endif
2248	}
2249}
2250
2251void
2252pci_add_children(device_t dev, int busno, size_t dinfo_size)
2253{
2254#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2255	device_t pcib = device_get_parent(dev);
2256	struct pci_devinfo *dinfo;
2257	int maxslots;
2258	int s, f, pcifunchigh;
2259	uint8_t hdrtype;
2260
2261	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2262	    ("dinfo_size too small"));
2263	maxslots = PCIB_MAXSLOTS(pcib);
2264	for (s = 0; s <= maxslots; s++) {
2265		pcifunchigh = 0;
2266		f = 0;
2267		DELAY(1);
2268		hdrtype = REG(PCIR_HDRTYPE, 1);
2269		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2270			continue;
2271		if (hdrtype & PCIM_MFDEV)
2272			pcifunchigh = PCI_FUNCMAX;
2273		for (f = 0; f <= pcifunchigh; f++) {
2274			dinfo = pci_read_device(pcib, busno, s, f, dinfo_size);
2275			if (dinfo != NULL) {
2276				pci_add_child(dev, dinfo);
2277			}
2278		}
2279	}
2280#undef REG
2281}
2282
2283void
2284pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2285{
2286	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2287	device_set_ivars(dinfo->cfg.dev, dinfo);
2288	resource_list_init(&dinfo->resources);
2289	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2290	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2291	pci_print_verbose(dinfo);
2292	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2293}
2294
2295static int
2296pci_probe(device_t dev)
2297{
2298
2299	device_set_desc(dev, "PCI bus");
2300
2301	/* Allow other subclasses to override this driver. */
2302	return (-1000);
2303}
2304
2305static int
2306pci_attach(device_t dev)
2307{
2308	int busno;
2309
2310	/*
2311	 * Since there can be multiple independantly numbered PCI
2312	 * busses on systems with multiple PCI domains, we can't use
2313	 * the unit number to decide which bus we are probing. We ask
2314	 * the parent pcib what our bus number is.
2315	 */
2316	busno = pcib_get_bus(dev);
2317	if (bootverbose)
2318		device_printf(dev, "physical bus=%d\n", busno);
2319
2320	pci_add_children(dev, busno, sizeof(struct pci_devinfo));
2321
2322	return (bus_generic_attach(dev));
2323}
2324
2325int
2326pci_suspend(device_t dev)
2327{
2328	int dstate, error, i, numdevs;
2329	device_t acpi_dev, child, *devlist;
2330	struct pci_devinfo *dinfo;
2331
2332	/*
2333	 * Save the PCI configuration space for each child and set the
2334	 * device in the appropriate power state for this sleep state.
2335	 */
2336	acpi_dev = NULL;
2337	if (pci_do_power_resume)
2338		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2339	device_get_children(dev, &devlist, &numdevs);
2340	for (i = 0; i < numdevs; i++) {
2341		child = devlist[i];
2342		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2343		pci_cfg_save(child, dinfo, 0);
2344	}
2345
2346	/* Suspend devices before potentially powering them down. */
2347	error = bus_generic_suspend(dev);
2348	if (error) {
2349		free(devlist, M_TEMP);
2350		return (error);
2351	}
2352
2353	/*
2354	 * Always set the device to D3.  If ACPI suggests a different
2355	 * power state, use it instead.  If ACPI is not present, the
2356	 * firmware is responsible for managing device power.  Skip
2357	 * children who aren't attached since they are powered down
2358	 * separately.  Only manage type 0 devices for now.
2359	 */
2360	for (i = 0; acpi_dev && i < numdevs; i++) {
2361		child = devlist[i];
2362		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2363		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2364			dstate = PCI_POWERSTATE_D3;
2365			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2366			pci_set_powerstate(child, dstate);
2367		}
2368	}
2369	free(devlist, M_TEMP);
2370	return (0);
2371}
2372
2373int
2374pci_resume(device_t dev)
2375{
2376	int i, numdevs;
2377	device_t acpi_dev, child, *devlist;
2378	struct pci_devinfo *dinfo;
2379
2380	/*
2381	 * Set each child to D0 and restore its PCI configuration space.
2382	 */
2383	acpi_dev = NULL;
2384	if (pci_do_power_resume)
2385		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2386	device_get_children(dev, &devlist, &numdevs);
2387	for (i = 0; i < numdevs; i++) {
2388		/*
2389		 * Notify ACPI we're going to D0 but ignore the result.  If
2390		 * ACPI is not present, the firmware is responsible for
2391		 * managing device power.  Only manage type 0 devices for now.
2392		 */
2393		child = devlist[i];
2394		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2395		if (acpi_dev && device_is_attached(child) &&
2396		    dinfo->cfg.hdrtype == 0) {
2397			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2398			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2399		}
2400
2401		/* Now the device is powered up, restore its config space. */
2402		pci_cfg_restore(child, dinfo);
2403	}
2404	free(devlist, M_TEMP);
2405	return (bus_generic_resume(dev));
2406}
2407
2408static void
2409pci_load_vendor_data(void)
2410{
2411	caddr_t vendordata, info;
2412
2413	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2414		info = preload_search_info(vendordata, MODINFO_ADDR);
2415		pci_vendordata = *(char **)info;
2416		info = preload_search_info(vendordata, MODINFO_SIZE);
2417		pci_vendordata_size = *(size_t *)info;
2418		/* terminate the database */
2419		pci_vendordata[pci_vendordata_size] = '\n';
2420	}
2421}
2422
2423void
2424pci_driver_added(device_t dev, driver_t *driver)
2425{
2426	int numdevs;
2427	device_t *devlist;
2428	device_t child;
2429	struct pci_devinfo *dinfo;
2430	int i;
2431
2432	if (bootverbose)
2433		device_printf(dev, "driver added\n");
2434	DEVICE_IDENTIFY(driver, dev);
2435	device_get_children(dev, &devlist, &numdevs);
2436	for (i = 0; i < numdevs; i++) {
2437		child = devlist[i];
2438		if (device_get_state(child) != DS_NOTPRESENT)
2439			continue;
2440		dinfo = device_get_ivars(child);
2441		pci_print_verbose(dinfo);
2442		if (bootverbose)
2443			printf("pci%d:%d:%d: reprobing on driver added\n",
2444			    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func);
2445		pci_cfg_restore(child, dinfo);
2446		if (device_probe_and_attach(child) != 0)
2447			pci_cfg_save(child, dinfo, 1);
2448	}
2449	free(devlist, M_TEMP);
2450}
2451
2452int
2453pci_print_child(device_t dev, device_t child)
2454{
2455	struct pci_devinfo *dinfo;
2456	struct resource_list *rl;
2457	int retval = 0;
2458
2459	dinfo = device_get_ivars(child);
2460	rl = &dinfo->resources;
2461
2462	retval += bus_print_child_header(dev, child);
2463
2464	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2465	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2466	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2467	if (device_get_flags(dev))
2468		retval += printf(" flags %#x", device_get_flags(dev));
2469
2470	retval += printf(" at device %d.%d", pci_get_slot(child),
2471	    pci_get_function(child));
2472
2473	retval += bus_print_child_footer(dev, child);
2474
2475	return (retval);
2476}
2477
2478static struct
2479{
2480	int	class;
2481	int	subclass;
2482	char	*desc;
2483} pci_nomatch_tab[] = {
2484	{PCIC_OLD,		-1,			"old"},
2485	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2486	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
2487	{PCIC_STORAGE,		-1,			"mass storage"},
2488	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
2489	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
2490	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
2491	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
2492	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
2493	{PCIC_NETWORK,		-1,			"network"},
2494	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
2495	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
2496	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
2497	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
2498	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
2499	{PCIC_DISPLAY,		-1,			"display"},
2500	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
2501	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
2502	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
2503	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
2504	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
2505	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
2506	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
2507	{PCIC_MEMORY,		-1,			"memory"},
2508	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
2509	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
2510	{PCIC_BRIDGE,		-1,			"bridge"},
2511	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
2512	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
2513	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
2514	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
2515	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
2516	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
2517	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
2518	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
2519	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
2520	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
2521	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
2522	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
2523	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
2524	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
2525	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
2526	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
2527	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
2528	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
2529	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
2530	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
2531	{PCIC_INPUTDEV,		-1,			"input device"},
2532	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
2533	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
2534	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
2535	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
2536	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
2537	{PCIC_DOCKING,		-1,			"docking station"},
2538	{PCIC_PROCESSOR,	-1,			"processor"},
2539	{PCIC_SERIALBUS,	-1,			"serial bus"},
2540	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
2541	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
2542	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
2543	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
2544	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
2545	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
2546	{PCIC_WIRELESS,		-1,			"wireless controller"},
2547	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
2548	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
2549	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
2550	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
2551	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
2552	{PCIC_SATCOM,		-1,			"satellite communication"},
2553	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
2554	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
2555	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
2556	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
2557	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
2558	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
2559	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
2560	{PCIC_DASP,		-1,			"dasp"},
2561	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
2562	{0, 0,		NULL}
2563};
2564
2565void
2566pci_probe_nomatch(device_t dev, device_t child)
2567{
2568	int	i;
2569	char	*cp, *scp, *device;
2570
2571	/*
2572	 * Look for a listing for this device in a loaded device database.
2573	 */
2574	if ((device = pci_describe_device(child)) != NULL) {
2575		device_printf(dev, "<%s>", device);
2576		free(device, M_DEVBUF);
2577	} else {
2578		/*
2579		 * Scan the class/subclass descriptions for a general
2580		 * description.
2581		 */
2582		cp = "unknown";
2583		scp = NULL;
2584		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
2585			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
2586				if (pci_nomatch_tab[i].subclass == -1) {
2587					cp = pci_nomatch_tab[i].desc;
2588				} else if (pci_nomatch_tab[i].subclass ==
2589				    pci_get_subclass(child)) {
2590					scp = pci_nomatch_tab[i].desc;
2591				}
2592			}
2593		}
2594		device_printf(dev, "<%s%s%s>",
2595		    cp ? cp : "",
2596		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
2597		    scp ? scp : "");
2598	}
2599	printf(" at device %d.%d (no driver attached)\n",
2600	    pci_get_slot(child), pci_get_function(child));
2601	if (pci_do_power_nodriver)
2602		pci_cfg_save(child,
2603		    (struct pci_devinfo *) device_get_ivars(child), 1);
2604	return;
2605}
2606
2607/*
2608 * Parse the PCI device database, if loaded, and return a pointer to a
2609 * description of the device.
2610 *
2611 * The database is flat text formatted as follows:
2612 *
2613 * Any line not in a valid format is ignored.
2614 * Lines are terminated with newline '\n' characters.
2615 *
2616 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
2617 * the vendor name.
2618 *
2619 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
2620 * - devices cannot be listed without a corresponding VENDOR line.
2621 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
2622 * another TAB, then the device name.
2623 */
2624
2625/*
2626 * Assuming (ptr) points to the beginning of a line in the database,
2627 * return the vendor or device and description of the next entry.
2628 * The value of (vendor) or (device) inappropriate for the entry type
2629 * is set to -1.  Returns nonzero at the end of the database.
2630 *
2631 * Note that this is slightly unrobust in the face of corrupt data;
2632 * we attempt to safeguard against this by spamming the end of the
2633 * database with a newline when we initialise.
2634 */
2635static int
2636pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
2637{
2638	char	*cp = *ptr;
2639	int	left;
2640
2641	*device = -1;
2642	*vendor = -1;
2643	**desc = '\0';
2644	for (;;) {
2645		left = pci_vendordata_size - (cp - pci_vendordata);
2646		if (left <= 0) {
2647			*ptr = cp;
2648			return(1);
2649		}
2650
2651		/* vendor entry? */
2652		if (*cp != '\t' &&
2653		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
2654			break;
2655		/* device entry? */
2656		if (*cp == '\t' &&
2657		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
2658			break;
2659
2660		/* skip to next line */
2661		while (*cp != '\n' && left > 0) {
2662			cp++;
2663			left--;
2664		}
2665		if (*cp == '\n') {
2666			cp++;
2667			left--;
2668		}
2669	}
2670	/* skip to next line */
2671	while (*cp != '\n' && left > 0) {
2672		cp++;
2673		left--;
2674	}
2675	if (*cp == '\n' && left > 0)
2676		cp++;
2677	*ptr = cp;
2678	return(0);
2679}
2680
2681static char *
2682pci_describe_device(device_t dev)
2683{
2684	int	vendor, device;
2685	char	*desc, *vp, *dp, *line;
2686
2687	desc = vp = dp = NULL;
2688
2689	/*
2690	 * If we have no vendor data, we can't do anything.
2691	 */
2692	if (pci_vendordata == NULL)
2693		goto out;
2694
2695	/*
2696	 * Scan the vendor data looking for this device
2697	 */
2698	line = pci_vendordata;
2699	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2700		goto out;
2701	for (;;) {
2702		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
2703			goto out;
2704		if (vendor == pci_get_vendor(dev))
2705			break;
2706	}
2707	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2708		goto out;
2709	for (;;) {
2710		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
2711			*dp = 0;
2712			break;
2713		}
2714		if (vendor != -1) {
2715			*dp = 0;
2716			break;
2717		}
2718		if (device == pci_get_device(dev))
2719			break;
2720	}
2721	if (dp[0] == '\0')
2722		snprintf(dp, 80, "0x%x", pci_get_device(dev));
2723	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
2724	    NULL)
2725		sprintf(desc, "%s, %s", vp, dp);
2726 out:
2727	if (vp != NULL)
2728		free(vp, M_DEVBUF);
2729	if (dp != NULL)
2730		free(dp, M_DEVBUF);
2731	return(desc);
2732}
2733
2734int
2735pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
2736{
2737	struct pci_devinfo *dinfo;
2738	pcicfgregs *cfg;
2739
2740	dinfo = device_get_ivars(child);
2741	cfg = &dinfo->cfg;
2742
2743	switch (which) {
2744	case PCI_IVAR_ETHADDR:
2745		/*
2746		 * The generic accessor doesn't deal with failure, so
2747		 * we set the return value, then return an error.
2748		 */
2749		*((uint8_t **) result) = NULL;
2750		return (EINVAL);
2751	case PCI_IVAR_SUBVENDOR:
2752		*result = cfg->subvendor;
2753		break;
2754	case PCI_IVAR_SUBDEVICE:
2755		*result = cfg->subdevice;
2756		break;
2757	case PCI_IVAR_VENDOR:
2758		*result = cfg->vendor;
2759		break;
2760	case PCI_IVAR_DEVICE:
2761		*result = cfg->device;
2762		break;
2763	case PCI_IVAR_DEVID:
2764		*result = (cfg->device << 16) | cfg->vendor;
2765		break;
2766	case PCI_IVAR_CLASS:
2767		*result = cfg->baseclass;
2768		break;
2769	case PCI_IVAR_SUBCLASS:
2770		*result = cfg->subclass;
2771		break;
2772	case PCI_IVAR_PROGIF:
2773		*result = cfg->progif;
2774		break;
2775	case PCI_IVAR_REVID:
2776		*result = cfg->revid;
2777		break;
2778	case PCI_IVAR_INTPIN:
2779		*result = cfg->intpin;
2780		break;
2781	case PCI_IVAR_IRQ:
2782		*result = cfg->intline;
2783		break;
2784	case PCI_IVAR_BUS:
2785		*result = cfg->bus;
2786		break;
2787	case PCI_IVAR_SLOT:
2788		*result = cfg->slot;
2789		break;
2790	case PCI_IVAR_FUNCTION:
2791		*result = cfg->func;
2792		break;
2793	case PCI_IVAR_CMDREG:
2794		*result = cfg->cmdreg;
2795		break;
2796	case PCI_IVAR_CACHELNSZ:
2797		*result = cfg->cachelnsz;
2798		break;
2799	case PCI_IVAR_MINGNT:
2800		*result = cfg->mingnt;
2801		break;
2802	case PCI_IVAR_MAXLAT:
2803		*result = cfg->maxlat;
2804		break;
2805	case PCI_IVAR_LATTIMER:
2806		*result = cfg->lattimer;
2807		break;
2808	default:
2809		return (ENOENT);
2810	}
2811	return (0);
2812}
2813
2814int
2815pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
2816{
2817	struct pci_devinfo *dinfo;
2818
2819	dinfo = device_get_ivars(child);
2820
2821	switch (which) {
2822	case PCI_IVAR_INTPIN:
2823		dinfo->cfg.intpin = value;
2824		return (0);
2825	case PCI_IVAR_ETHADDR:
2826	case PCI_IVAR_SUBVENDOR:
2827	case PCI_IVAR_SUBDEVICE:
2828	case PCI_IVAR_VENDOR:
2829	case PCI_IVAR_DEVICE:
2830	case PCI_IVAR_DEVID:
2831	case PCI_IVAR_CLASS:
2832	case PCI_IVAR_SUBCLASS:
2833	case PCI_IVAR_PROGIF:
2834	case PCI_IVAR_REVID:
2835	case PCI_IVAR_IRQ:
2836	case PCI_IVAR_BUS:
2837	case PCI_IVAR_SLOT:
2838	case PCI_IVAR_FUNCTION:
2839		return (EINVAL);	/* disallow for now */
2840
2841	default:
2842		return (ENOENT);
2843	}
2844}
2845
2846
2847#include "opt_ddb.h"
2848#ifdef DDB
2849#include <ddb/ddb.h>
2850#include <sys/cons.h>
2851
2852/*
2853 * List resources based on pci map registers, used for within ddb
2854 */
2855
2856DB_SHOW_COMMAND(pciregs, db_pci_dump)
2857{
2858	struct pci_devinfo *dinfo;
2859	struct devlist *devlist_head;
2860	struct pci_conf *p;
2861	const char *name;
2862	int i, error, none_count;
2863
2864	none_count = 0;
2865	/* get the head of the device queue */
2866	devlist_head = &pci_devq;
2867
2868	/*
2869	 * Go through the list of devices and print out devices
2870	 */
2871	for (error = 0, i = 0,
2872	     dinfo = STAILQ_FIRST(devlist_head);
2873	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
2874	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
2875
2876		/* Populate pd_name and pd_unit */
2877		name = NULL;
2878		if (dinfo->cfg.dev)
2879			name = device_get_name(dinfo->cfg.dev);
2880
2881		p = &dinfo->conf;
2882		db_printf("%s%d@pci%d:%d:%d:\tclass=0x%06x card=0x%08x "
2883			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
2884			(name && *name) ? name : "none",
2885			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
2886			none_count++,
2887			p->pc_sel.pc_bus, p->pc_sel.pc_dev,
2888			p->pc_sel.pc_func, (p->pc_class << 16) |
2889			(p->pc_subclass << 8) | p->pc_progif,
2890			(p->pc_subdevice << 16) | p->pc_subvendor,
2891			(p->pc_device << 16) | p->pc_vendor,
2892			p->pc_revid, p->pc_hdr);
2893	}
2894}
2895#endif /* DDB */
2896
2897static struct resource *
2898pci_alloc_map(device_t dev, device_t child, int type, int *rid,
2899    u_long start, u_long end, u_long count, u_int flags)
2900{
2901	struct pci_devinfo *dinfo = device_get_ivars(child);
2902	struct resource_list *rl = &dinfo->resources;
2903	struct resource_list_entry *rle;
2904	struct resource *res;
2905	pci_addr_t map, testval;
2906	int mapsize;
2907
2908	/*
2909	 * Weed out the bogons, and figure out how large the BAR/map
2910	 * is.  Bars that read back 0 here are bogus and unimplemented.
2911	 * Note: atapci in legacy mode are special and handled elsewhere
2912	 * in the code.  If you have a atapci device in legacy mode and
2913	 * it fails here, that other code is broken.
2914	 */
2915	res = NULL;
2916	map = pci_read_config(child, *rid, 4);
2917	pci_write_config(child, *rid, 0xffffffff, 4);
2918	testval = pci_read_config(child, *rid, 4);
2919	if (pci_maprange(testval) == 64)
2920		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
2921	if (pci_mapbase(testval) == 0)
2922		goto out;
2923	if (PCI_BAR_MEM(testval)) {
2924		if (type != SYS_RES_MEMORY) {
2925			if (bootverbose)
2926				device_printf(dev,
2927				    "child %s requested type %d for rid %#x,"
2928				    " but the BAR says it is an memio\n",
2929				    device_get_nameunit(child), type, *rid);
2930			goto out;
2931		}
2932	} else {
2933		if (type != SYS_RES_IOPORT) {
2934			if (bootverbose)
2935				device_printf(dev,
2936				    "child %s requested type %d for rid %#x,"
2937				    " but the BAR says it is an ioport\n",
2938				    device_get_nameunit(child), type, *rid);
2939			goto out;
2940		}
2941	}
2942	/*
2943	 * For real BARs, we need to override the size that
2944	 * the driver requests, because that's what the BAR
2945	 * actually uses and we would otherwise have a
2946	 * situation where we might allocate the excess to
2947	 * another driver, which won't work.
2948	 */
2949	mapsize = pci_mapsize(testval);
2950	count = 1UL << mapsize;
2951	if (RF_ALIGNMENT(flags) < mapsize)
2952		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
2953
2954	/*
2955	 * Allocate enough resource, and then write back the
2956	 * appropriate bar for that resource.
2957	 */
2958	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
2959	    start, end, count, flags);
2960	if (res == NULL) {
2961		device_printf(child,
2962		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
2963		    count, *rid, type, start, end);
2964		goto out;
2965	}
2966	resource_list_add(rl, type, *rid, start, end, count);
2967	rle = resource_list_find(rl, type, *rid);
2968	if (rle == NULL)
2969		panic("pci_alloc_map: unexpectedly can't find resource.");
2970	rle->res = res;
2971	rle->start = rman_get_start(res);
2972	rle->end = rman_get_end(res);
2973	rle->count = count;
2974	if (bootverbose)
2975		device_printf(child,
2976		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
2977		    count, *rid, type, rman_get_start(res));
2978	map = rman_get_start(res);
2979out:;
2980	pci_write_config(child, *rid, map, 4);
2981	if (pci_maprange(testval) == 64)
2982		pci_write_config(child, *rid + 4, map >> 32, 4);
2983	return (res);
2984}
2985
2986
2987struct resource *
2988pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
2989		   u_long start, u_long end, u_long count, u_int flags)
2990{
2991	struct pci_devinfo *dinfo = device_get_ivars(child);
2992	struct resource_list *rl = &dinfo->resources;
2993	struct resource_list_entry *rle;
2994	pcicfgregs *cfg = &dinfo->cfg;
2995
2996	/*
2997	 * Perform lazy resource allocation
2998	 */
2999	if (device_get_parent(child) == dev) {
3000		switch (type) {
3001		case SYS_RES_IRQ:
3002			/*
3003			 * Can't alloc legacy interrupt once MSI messages
3004			 * have been allocated.
3005			 */
3006			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3007			    cfg->msix.msix_alloc > 0))
3008				return (NULL);
3009			/*
3010			 * If the child device doesn't have an
3011			 * interrupt routed and is deserving of an
3012			 * interrupt, try to assign it one.
3013			 */
3014			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3015			    (cfg->intpin != 0))
3016				pci_assign_interrupt(dev, child, 0);
3017			break;
3018		case SYS_RES_IOPORT:
3019		case SYS_RES_MEMORY:
3020			if (*rid < PCIR_BAR(cfg->nummaps)) {
3021				/*
3022				 * Enable the I/O mode.  We should
3023				 * also be assigning resources too
3024				 * when none are present.  The
3025				 * resource_list_alloc kind of sorta does
3026				 * this...
3027				 */
3028				if (PCI_ENABLE_IO(dev, child, type))
3029					return (NULL);
3030			}
3031			rle = resource_list_find(rl, type, *rid);
3032			if (rle == NULL)
3033				return (pci_alloc_map(dev, child, type, rid,
3034				    start, end, count, flags));
3035			break;
3036		}
3037		/*
3038		 * If we've already allocated the resource, then
3039		 * return it now.  But first we may need to activate
3040		 * it, since we don't allocate the resource as active
3041		 * above.  Normally this would be done down in the
3042		 * nexus, but since we short-circuit that path we have
3043		 * to do its job here.  Not sure if we should free the
3044		 * resource if it fails to activate.
3045		 */
3046		rle = resource_list_find(rl, type, *rid);
3047		if (rle != NULL && rle->res != NULL) {
3048			if (bootverbose)
3049				device_printf(child,
3050			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3051				    rman_get_size(rle->res), *rid, type,
3052				    rman_get_start(rle->res));
3053			if ((flags & RF_ACTIVE) &&
3054			    bus_generic_activate_resource(dev, child, type,
3055			    *rid, rle->res) != 0)
3056				return (NULL);
3057			return (rle->res);
3058		}
3059	}
3060	return (resource_list_alloc(rl, dev, child, type, rid,
3061	    start, end, count, flags));
3062}
3063
3064void
3065pci_delete_resource(device_t dev, device_t child, int type, int rid)
3066{
3067	struct pci_devinfo *dinfo;
3068	struct resource_list *rl;
3069	struct resource_list_entry *rle;
3070
3071	if (device_get_parent(child) != dev)
3072		return;
3073
3074	dinfo = device_get_ivars(child);
3075	rl = &dinfo->resources;
3076	rle = resource_list_find(rl, type, rid);
3077	if (rle) {
3078		if (rle->res) {
3079			if (rman_get_device(rle->res) != dev ||
3080			    rman_get_flags(rle->res) & RF_ACTIVE) {
3081				device_printf(dev, "delete_resource: "
3082				    "Resource still owned by child, oops. "
3083				    "(type=%d, rid=%d, addr=%lx)\n",
3084				    rle->type, rle->rid,
3085				    rman_get_start(rle->res));
3086				return;
3087			}
3088			bus_release_resource(dev, type, rid, rle->res);
3089		}
3090		resource_list_delete(rl, type, rid);
3091	}
3092	/*
3093	 * Why do we turn off the PCI configuration BAR when we delete a
3094	 * resource? -- imp
3095	 */
3096	pci_write_config(child, rid, 0, 4);
3097	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3098}
3099
3100struct resource_list *
3101pci_get_resource_list (device_t dev, device_t child)
3102{
3103	struct pci_devinfo *dinfo = device_get_ivars(child);
3104
3105	return (&dinfo->resources);
3106}
3107
3108uint32_t
3109pci_read_config_method(device_t dev, device_t child, int reg, int width)
3110{
3111	struct pci_devinfo *dinfo = device_get_ivars(child);
3112	pcicfgregs *cfg = &dinfo->cfg;
3113
3114	return (PCIB_READ_CONFIG(device_get_parent(dev),
3115	    cfg->bus, cfg->slot, cfg->func, reg, width));
3116}
3117
3118void
3119pci_write_config_method(device_t dev, device_t child, int reg,
3120    uint32_t val, int width)
3121{
3122	struct pci_devinfo *dinfo = device_get_ivars(child);
3123	pcicfgregs *cfg = &dinfo->cfg;
3124
3125	PCIB_WRITE_CONFIG(device_get_parent(dev),
3126	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3127}
3128
3129int
3130pci_child_location_str_method(device_t dev, device_t child, char *buf,
3131    size_t buflen)
3132{
3133
3134	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3135	    pci_get_function(child));
3136	return (0);
3137}
3138
3139int
3140pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3141    size_t buflen)
3142{
3143	struct pci_devinfo *dinfo;
3144	pcicfgregs *cfg;
3145
3146	dinfo = device_get_ivars(child);
3147	cfg = &dinfo->cfg;
3148	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3149	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3150	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3151	    cfg->progif);
3152	return (0);
3153}
3154
3155int
3156pci_assign_interrupt_method(device_t dev, device_t child)
3157{
3158	struct pci_devinfo *dinfo = device_get_ivars(child);
3159	pcicfgregs *cfg = &dinfo->cfg;
3160
3161	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3162	    cfg->intpin));
3163}
3164
3165static int
3166pci_modevent(module_t mod, int what, void *arg)
3167{
3168	static struct cdev *pci_cdev;
3169
3170	switch (what) {
3171	case MOD_LOAD:
3172		STAILQ_INIT(&pci_devq);
3173		pci_generation = 0;
3174		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3175		    "pci");
3176		pci_load_vendor_data();
3177		break;
3178
3179	case MOD_UNLOAD:
3180		destroy_dev(pci_cdev);
3181		break;
3182	}
3183
3184	return (0);
3185}
3186
3187void
3188pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3189{
3190	int i;
3191
3192	/*
3193	 * Only do header type 0 devices.  Type 1 devices are bridges,
3194	 * which we know need special treatment.  Type 2 devices are
3195	 * cardbus bridges which also require special treatment.
3196	 * Other types are unknown, and we err on the side of safety
3197	 * by ignoring them.
3198	 */
3199	if (dinfo->cfg.hdrtype != 0)
3200		return;
3201
3202	/*
3203	 * Restore the device to full power mode.  We must do this
3204	 * before we restore the registers because moving from D3 to
3205	 * D0 will cause the chip's BARs and some other registers to
3206	 * be reset to some unknown power on reset values.  Cut down
3207	 * the noise on boot by doing nothing if we are already in
3208	 * state D0.
3209	 */
3210	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3211		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3212	}
3213	for (i = 0; i < dinfo->cfg.nummaps; i++)
3214		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3215	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3216	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3217	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3218	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3219	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3220	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3221	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3222	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3223	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3224	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3225
3226	/*
3227	 * Restore MSI configuration if it is present.  If MSI is enabled,
3228	 * then restore the data and addr registers.
3229	 */
3230	if (dinfo->cfg.msi.msi_location != 0)
3231		pci_resume_msi(dev);
3232}
3233
3234void
3235pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3236{
3237	int i;
3238	uint32_t cls;
3239	int ps;
3240
3241	/*
3242	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3243	 * we know need special treatment.  Type 2 devices are cardbus bridges
3244	 * which also require special treatment.  Other types are unknown, and
3245	 * we err on the side of safety by ignoring them.  Powering down
3246	 * bridges should not be undertaken lightly.
3247	 */
3248	if (dinfo->cfg.hdrtype != 0)
3249		return;
3250	for (i = 0; i < dinfo->cfg.nummaps; i++)
3251		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3252	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3253
3254	/*
3255	 * Some drivers apparently write to these registers w/o updating our
3256	 * cached copy.  No harm happens if we update the copy, so do so here
3257	 * so we can restore them.  The COMMAND register is modified by the
3258	 * bus w/o updating the cache.  This should represent the normally
3259	 * writable portion of the 'defined' part of type 0 headers.  In
3260	 * theory we also need to save/restore the PCI capability structures
3261	 * we know about, but apart from power we don't know any that are
3262	 * writable.
3263	 */
3264	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3265	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3266	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3267	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3268	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3269	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3270	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3271	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3272	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3273	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3274	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3275	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3276	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3277	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3278	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3279
3280	/*
3281	 * don't set the state for display devices, base peripherals and
3282	 * memory devices since bad things happen when they are powered down.
3283	 * We should (a) have drivers that can easily detach and (b) use
3284	 * generic drivers for these devices so that some device actually
3285	 * attaches.  We need to make sure that when we implement (a) we don't
3286	 * power the device down on a reattach.
3287	 */
3288	cls = pci_get_class(dev);
3289	if (!setstate)
3290		return;
3291	switch (pci_do_power_nodriver)
3292	{
3293		case 0:		/* NO powerdown at all */
3294			return;
3295		case 1:		/* Conservative about what to power down */
3296			if (cls == PCIC_STORAGE)
3297				return;
3298			/*FALLTHROUGH*/
3299		case 2:		/* Agressive about what to power down */
3300			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3301			    cls == PCIC_BASEPERIPH)
3302				return;
3303			/*FALLTHROUGH*/
3304		case 3:		/* Power down everything */
3305			break;
3306	}
3307	/*
3308	 * PCI spec says we can only go into D3 state from D0 state.
3309	 * Transition from D[12] into D0 before going to D3 state.
3310	 */
3311	ps = pci_get_powerstate(dev);
3312	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3313		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3314	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3315		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3316}
3317