pci.c revision 166802
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 166802 2007-02-17 16:56:39Z sos $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54
55#if defined(__i386__) || defined(__amd64__)
56#include <machine/intr_machdep.h>
57#endif
58
59#include <sys/pciio.h>
60#include <dev/pci/pcireg.h>
61#include <dev/pci/pcivar.h>
62#include <dev/pci/pci_private.h>
63
64#include "pcib_if.h"
65#include "pci_if.h"
66
67#ifdef __HAVE_ACPI
68#include <contrib/dev/acpica/acpi.h>
69#include "acpi_if.h"
70#else
71#define	ACPI_PWR_FOR_SLEEP(x, y, z)
72#endif
73
74static uint32_t		pci_mapbase(unsigned mapreg);
75static int		pci_maptype(unsigned mapreg);
76static int		pci_mapsize(unsigned testval);
77static int		pci_maprange(unsigned mapreg);
78static void		pci_fixancient(pcicfgregs *cfg);
79
80static int		pci_porten(device_t pcib, int b, int s, int f);
81static int		pci_memen(device_t pcib, int b, int s, int f);
82static void		pci_assign_interrupt(device_t bus, device_t dev,
83			    int force_route);
84static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85			    int b, int s, int f, int reg,
86			    struct resource_list *rl, int force, int prefetch);
87static int		pci_probe(device_t dev);
88static int		pci_attach(device_t dev);
89static void		pci_load_vendor_data(void);
90static int		pci_describe_parse_line(char **ptr, int *vendor,
91			    int *device, char **desc);
92static char		*pci_describe_device(device_t dev);
93static int		pci_modevent(module_t mod, int what, void *arg);
94static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95			    pcicfgregs *cfg);
96static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97static uint32_t		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98			    int reg);
99#if 0
100static void		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101			    int reg, uint32_t data);
102#endif
103static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104static int		pci_msi_blacklisted(void);
105
106static device_method_t pci_methods[] = {
107	/* Device interface */
108	DEVMETHOD(device_probe,		pci_probe),
109	DEVMETHOD(device_attach,	pci_attach),
110	DEVMETHOD(device_detach,	bus_generic_detach),
111	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
112	DEVMETHOD(device_suspend,	pci_suspend),
113	DEVMETHOD(device_resume,	pci_resume),
114
115	/* Bus interface */
116	DEVMETHOD(bus_print_child,	pci_print_child),
117	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
118	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
119	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
120	DEVMETHOD(bus_driver_added,	pci_driver_added),
121	DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr),
122	DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
123
124	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
125	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
126	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
127	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
128	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
129	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
130	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
131	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
132	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
133	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
134
135	/* PCI interface */
136	DEVMETHOD(pci_read_config,	pci_read_config_method),
137	DEVMETHOD(pci_write_config,	pci_write_config_method),
138	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
139	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
140	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
141	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
142	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
143	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
144	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
145	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
146	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
147	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
148	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
149	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
150	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
151	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
152	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
153	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
154
155	{ 0, 0 }
156};
157
158DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
159
160static devclass_t pci_devclass;
161DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
162MODULE_VERSION(pci, 1);
163
164static char	*pci_vendordata;
165static size_t	pci_vendordata_size;
166
167
168struct pci_quirk {
169	uint32_t devid;	/* Vendor/device of the card */
170	int	type;
171#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
172#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
173	int	arg1;
174	int	arg2;
175};
176
177struct pci_quirk pci_quirks[] = {
178	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
179	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
180	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
181	/* As does the Serverworks OSB4 (the SMBus mapping register) */
182	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
183
184	/*
185	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
186	 * or the CMIC-SL (AKA ServerWorks GC_LE).
187	 */
188	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
189	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
190
191	/*
192	 * MSI doesn't work on earlier Intel chipsets including
193	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
194	 */
195	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
196	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
197	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
200	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
201	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
202
203	/*
204	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
205	 * bridge.
206	 */
207	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208
209	{ 0 }
210};
211
212/* map register information */
213#define	PCI_MAPMEM	0x01	/* memory map */
214#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
215#define	PCI_MAPPORT	0x04	/* port map */
216
217struct devlist pci_devq;
218uint32_t pci_generation;
219uint32_t pci_numdevs = 0;
220static int pcie_chipset, pcix_chipset;
221
222/* sysctl vars */
223SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
224
225static int pci_enable_io_modes = 1;
226TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
227SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
228    &pci_enable_io_modes, 1,
229    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
230enable these bits correctly.  We'd like to do this all the time, but there\n\
231are some peripherals that this causes problems with.");
232
233static int pci_do_power_nodriver = 0;
234TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
235SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
236    &pci_do_power_nodriver, 0,
237  "Place a function into D3 state when no driver attaches to it.  0 means\n\
238disable.  1 means conservatively place devices into D3 state.  2 means\n\
239agressively place devices into D3 state.  3 means put absolutely everything\n\
240in D3 state.");
241
242static int pci_do_power_resume = 1;
243TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
244SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
245    &pci_do_power_resume, 1,
246  "Transition from D3 -> D0 on resume.");
247
248static int pci_do_vpd = 1;
249TUNABLE_INT("hw.pci.enable_vpd", &pci_do_vpd);
250SYSCTL_INT(_hw_pci, OID_AUTO, enable_vpd, CTLFLAG_RW, &pci_do_vpd, 1,
251    "Enable support for VPD.");
252
253static int pci_do_msi = 1;
254TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
255SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
256    "Enable support for MSI interrupts");
257
258static int pci_do_msix = 1;
259TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
260SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
261    "Enable support for MSI-X interrupts");
262
263static int pci_honor_msi_blacklist = 1;
264TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
265SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
266    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
267
268/* Find a device_t by bus/slot/function */
269
270device_t
271pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
272{
273	struct pci_devinfo *dinfo;
274
275	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
276		if ((dinfo->cfg.bus == bus) &&
277		    (dinfo->cfg.slot == slot) &&
278		    (dinfo->cfg.func == func)) {
279			return (dinfo->cfg.dev);
280		}
281	}
282
283	return (NULL);
284}
285
286/* Find a device_t by vendor/device ID */
287
288device_t
289pci_find_device(uint16_t vendor, uint16_t device)
290{
291	struct pci_devinfo *dinfo;
292
293	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
294		if ((dinfo->cfg.vendor == vendor) &&
295		    (dinfo->cfg.device == device)) {
296			return (dinfo->cfg.dev);
297		}
298	}
299
300	return (NULL);
301}
302
303/* return base address of memory or port map */
304
305static uint32_t
306pci_mapbase(uint32_t mapreg)
307{
308	int mask = 0x03;
309	if ((mapreg & 0x01) == 0)
310		mask = 0x0f;
311	return (mapreg & ~mask);
312}
313
314/* return map type of memory or port map */
315
316static int
317pci_maptype(unsigned mapreg)
318{
319	static uint8_t maptype[0x10] = {
320		PCI_MAPMEM,		PCI_MAPPORT,
321		PCI_MAPMEM,		0,
322		PCI_MAPMEM,		PCI_MAPPORT,
323		0,			0,
324		PCI_MAPMEM|PCI_MAPMEMP,	PCI_MAPPORT,
325		PCI_MAPMEM|PCI_MAPMEMP, 0,
326		PCI_MAPMEM|PCI_MAPMEMP,	PCI_MAPPORT,
327		0,			0,
328	};
329
330	return maptype[mapreg & 0x0f];
331}
332
333/* return log2 of map size decoded for memory or port map */
334
335static int
336pci_mapsize(uint32_t testval)
337{
338	int ln2size;
339
340	testval = pci_mapbase(testval);
341	ln2size = 0;
342	if (testval != 0) {
343		while ((testval & 1) == 0)
344		{
345			ln2size++;
346			testval >>= 1;
347		}
348	}
349	return (ln2size);
350}
351
352/* return log2 of address range supported by map register */
353
354static int
355pci_maprange(unsigned mapreg)
356{
357	int ln2range = 0;
358	switch (mapreg & 0x07) {
359	case 0x00:
360	case 0x01:
361	case 0x05:
362		ln2range = 32;
363		break;
364	case 0x02:
365		ln2range = 20;
366		break;
367	case 0x04:
368		ln2range = 64;
369		break;
370	}
371	return (ln2range);
372}
373
374/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
375
376static void
377pci_fixancient(pcicfgregs *cfg)
378{
379	if (cfg->hdrtype != 0)
380		return;
381
382	/* PCI to PCI bridges use header type 1 */
383	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
384		cfg->hdrtype = 1;
385}
386
387/* extract header type specific config data */
388
389static void
390pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
391{
392#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
393	switch (cfg->hdrtype) {
394	case 0:
395		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
396		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
397		cfg->nummaps	    = PCI_MAXMAPS_0;
398		break;
399	case 1:
400		cfg->nummaps	    = PCI_MAXMAPS_1;
401		break;
402	case 2:
403		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
404		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
405		cfg->nummaps	    = PCI_MAXMAPS_2;
406		break;
407	}
408#undef REG
409}
410
411/* read configuration header into pcicfgregs structure */
412struct pci_devinfo *
413pci_read_device(device_t pcib, int b, int s, int f, size_t size)
414{
415#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
416	pcicfgregs *cfg = NULL;
417	struct pci_devinfo *devlist_entry;
418	struct devlist *devlist_head;
419
420	devlist_head = &pci_devq;
421
422	devlist_entry = NULL;
423
424	if (REG(PCIR_DEVVENDOR, 4) != -1) {
425		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
426		if (devlist_entry == NULL)
427			return (NULL);
428
429		cfg = &devlist_entry->cfg;
430
431		cfg->bus		= b;
432		cfg->slot		= s;
433		cfg->func		= f;
434		cfg->vendor		= REG(PCIR_VENDOR, 2);
435		cfg->device		= REG(PCIR_DEVICE, 2);
436		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
437		cfg->statreg		= REG(PCIR_STATUS, 2);
438		cfg->baseclass		= REG(PCIR_CLASS, 1);
439		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
440		cfg->progif		= REG(PCIR_PROGIF, 1);
441		cfg->revid		= REG(PCIR_REVID, 1);
442		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
443		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
444		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
445		cfg->intpin		= REG(PCIR_INTPIN, 1);
446		cfg->intline		= REG(PCIR_INTLINE, 1);
447
448		cfg->mingnt		= REG(PCIR_MINGNT, 1);
449		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
450
451		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
452		cfg->hdrtype		&= ~PCIM_MFDEV;
453
454		pci_fixancient(cfg);
455		pci_hdrtypedata(pcib, b, s, f, cfg);
456
457		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
458			pci_read_extcap(pcib, cfg);
459
460		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
461
462		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
463		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
464		devlist_entry->conf.pc_sel.pc_func = cfg->func;
465		devlist_entry->conf.pc_hdr = cfg->hdrtype;
466
467		devlist_entry->conf.pc_subvendor = cfg->subvendor;
468		devlist_entry->conf.pc_subdevice = cfg->subdevice;
469		devlist_entry->conf.pc_vendor = cfg->vendor;
470		devlist_entry->conf.pc_device = cfg->device;
471
472		devlist_entry->conf.pc_class = cfg->baseclass;
473		devlist_entry->conf.pc_subclass = cfg->subclass;
474		devlist_entry->conf.pc_progif = cfg->progif;
475		devlist_entry->conf.pc_revid = cfg->revid;
476
477		pci_numdevs++;
478		pci_generation++;
479	}
480	return (devlist_entry);
481#undef REG
482}
483
484static void
485pci_read_extcap(device_t pcib, pcicfgregs *cfg)
486{
487#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
488#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
489#if defined(__i386__) || defined(__amd64__)
490	uint64_t addr;
491#endif
492	uint32_t val;
493	int	ptr, nextptr, ptrptr;
494
495	switch (cfg->hdrtype & PCIM_HDRTYPE) {
496	case 0:
497	case 1:
498		ptrptr = PCIR_CAP_PTR;
499		break;
500	case 2:
501		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
502		break;
503	default:
504		return;		/* no extended capabilities support */
505	}
506	nextptr = REG(ptrptr, 1);	/* sanity check? */
507
508	/*
509	 * Read capability entries.
510	 */
511	while (nextptr != 0) {
512		/* Sanity check */
513		if (nextptr > 255) {
514			printf("illegal PCI extended capability offset %d\n",
515			    nextptr);
516			return;
517		}
518		/* Find the next entry */
519		ptr = nextptr;
520		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
521
522		/* Process this entry */
523		switch (REG(ptr + PCICAP_ID, 1)) {
524		case PCIY_PMG:		/* PCI power management */
525			if (cfg->pp.pp_cap == 0) {
526				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
527				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
528				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
529				if ((nextptr - ptr) > PCIR_POWER_DATA)
530					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
531			}
532			break;
533#if defined(__i386__) || defined(__amd64__)
534		case PCIY_HT:		/* HyperTransport */
535			/* Determine HT-specific capability type. */
536			val = REG(ptr + PCIR_HT_COMMAND, 2);
537			switch (val & PCIM_HTCMD_CAP_MASK) {
538			case PCIM_HTCAP_MSI_MAPPING:
539				/* Sanity check the mapping window. */
540				addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
541				addr <<= 32;
542				addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
543				if (addr != MSI_INTEL_ADDR_BASE)
544					device_printf(pcib,
545		    "HT Bridge at %d:%d:%d has non-default MSI window 0x%llx\n",
546					    cfg->bus, cfg->slot, cfg->func,
547					    (long long)addr);
548
549				/* Enable MSI -> HT mapping. */
550				val |= PCIM_HTCMD_MSI_ENABLE;
551				WREG(ptr + PCIR_HT_COMMAND, val, 2);
552				break;
553			}
554			break;
555#endif
556		case PCIY_MSI:		/* PCI MSI */
557			cfg->msi.msi_location = ptr;
558			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
559			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
560						     PCIM_MSICTRL_MMC_MASK)>>1);
561			break;
562		case PCIY_MSIX:		/* PCI MSI-X */
563			cfg->msix.msix_location = ptr;
564			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
565			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
566			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
567			val = REG(ptr + PCIR_MSIX_TABLE, 4);
568			cfg->msix.msix_table_bar = PCIR_BAR(val &
569			    PCIM_MSIX_BIR_MASK);
570			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
571			val = REG(ptr + PCIR_MSIX_PBA, 4);
572			cfg->msix.msix_pba_bar = PCIR_BAR(val &
573			    PCIM_MSIX_BIR_MASK);
574			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
575			break;
576		case PCIY_VPD:		/* PCI Vital Product Data */
577			if (pci_do_vpd) {
578				cfg->vpd.vpd_reg = ptr;
579				pci_read_vpd(pcib, cfg);
580			}
581			break;
582		case PCIY_SUBVENDOR:
583			/* Should always be true. */
584			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
585				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
586				cfg->subvendor = val & 0xffff;
587				cfg->subdevice = val >> 16;
588			}
589			break;
590		case PCIY_PCIX:		/* PCI-X */
591			/*
592			 * Assume we have a PCI-X chipset if we have
593			 * at least one PCI-PCI bridge with a PCI-X
594			 * capability.  Note that some systems with
595			 * PCI-express or HT chipsets might match on
596			 * this check as well.
597			 */
598			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
599				pcix_chipset = 1;
600			break;
601		case PCIY_EXPRESS:	/* PCI-express */
602			/*
603			 * Assume we have a PCI-express chipset if we have
604			 * at least one PCI-express root port.
605			 */
606			val = REG(ptr + PCIR_EXPRESS_FLAGS, 2);
607			if ((val & PCIM_EXP_FLAGS_TYPE) ==
608			    PCIM_EXP_TYPE_ROOT_PORT)
609				pcie_chipset = 1;
610			break;
611		default:
612			break;
613		}
614	}
615/* REG and WREG use carry through to next functions */
616}
617
618/*
619 * PCI Vital Product Data
620 */
621static uint32_t
622pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg)
623{
624
625	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
626
627	WREG(cfg->vpd.vpd_reg + 2, reg, 2);
628	while ((REG(cfg->vpd.vpd_reg + 2, 2) & 0x8000) != 0x8000)
629		DELAY(1);	/* limit looping */
630
631	return REG(cfg->vpd.vpd_reg + 4, 4);
632}
633
634#if 0
635static void
636pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
637{
638	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
639
640	WREG(cfg->vpd.vpd_reg + 4, data, 4);
641	WREG(cfg->vpd.vpd_reg + 2, reg | 0x8000, 2);
642	while ((REG(cfg->vpd.vpd_reg + 2, 2) & 0x8000) == 0x8000)
643		DELAY(1);	/* limit looping */
644
645	return;
646}
647#endif
648
649struct vpd_readstate {
650	device_t	pcib;
651	pcicfgregs	*cfg;
652	uint32_t	val;
653	int		bytesinval;
654	int		off;
655	uint8_t		cksum;
656};
657
658static uint8_t
659vpd_nextbyte(struct vpd_readstate *vrs)
660{
661	uint8_t byte;
662
663	if (vrs->bytesinval == 0) {
664		vrs->val = le32toh(pci_read_vpd_reg(vrs->pcib, vrs->cfg,
665		    vrs->off));
666		vrs->off += 4;
667		byte = vrs->val & 0xff;
668		vrs->bytesinval = 3;
669	} else {
670		vrs->val = vrs->val >> 8;
671		byte = vrs->val & 0xff;
672		vrs->bytesinval--;
673	}
674
675	vrs->cksum += byte;
676	return byte;
677}
678
679static void
680pci_read_vpd(device_t pcib, pcicfgregs *cfg)
681{
682	struct vpd_readstate vrs;
683	int state;
684	int name;
685	int remain;
686	int end;
687	int i;
688	uint8_t byte;
689	int alloc, off;		/* alloc/off for RO/W arrays */
690	int cksumvalid;
691	int dflen;
692
693	/* init vpd reader */
694	vrs.bytesinval = 0;
695	vrs.off = 0;
696	vrs.pcib = pcib;
697	vrs.cfg = cfg;
698	vrs.cksum = 0;
699
700	state = 0;
701	name = remain = i = 0;	/* shut up stupid gcc */
702	alloc = off = 0;	/* shut up stupid gcc */
703	dflen = 0;		/* shut up stupid gcc */
704	end = 0;
705	cksumvalid = -1;
706	for (; !end;) {
707		byte = vpd_nextbyte(&vrs);
708#if 0
709		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
710		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
711		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
712#endif
713		switch (state) {
714		case 0:		/* item name */
715			if (byte & 0x80) {
716				remain = vpd_nextbyte(&vrs);
717				remain |= vpd_nextbyte(&vrs) << 8;
718				if (remain > (0x7f*4 - vrs.off)) {
719					end = 1;
720					printf(
721			    "pci%d:%d:%d: invalid vpd data, remain %#x\n",
722					    cfg->bus, cfg->slot, cfg->func,
723					    remain);
724				}
725				name = byte & 0x7f;
726			} else {
727				remain = byte & 0x7;
728				name = (byte >> 3) & 0xf;
729			}
730			switch (name) {
731			case 0x2:	/* String */
732				cfg->vpd.vpd_ident = malloc(remain + 1,
733				    M_DEVBUF, M_WAITOK);
734				i = 0;
735				state = 1;
736				break;
737			case 0xf:	/* End */
738				end = 1;
739				state = -1;
740				break;
741			case 0x10:	/* VPD-R */
742				alloc = 8;
743				off = 0;
744				cfg->vpd.vpd_ros = malloc(alloc *
745				    sizeof *cfg->vpd.vpd_ros, M_DEVBUF,
746				    M_WAITOK);
747				state = 2;
748				break;
749			case 0x11:	/* VPD-W */
750				alloc = 8;
751				off = 0;
752				cfg->vpd.vpd_w = malloc(alloc *
753				    sizeof *cfg->vpd.vpd_w, M_DEVBUF,
754				    M_WAITOK);
755				state = 5;
756				break;
757			default:	/* Invalid data, abort */
758				end = 1;
759				continue;
760			}
761			break;
762
763		case 1:	/* Identifier String */
764			cfg->vpd.vpd_ident[i++] = byte;
765			remain--;
766			if (remain == 0)  {
767				cfg->vpd.vpd_ident[i] = '\0';
768				state = 0;
769			}
770			break;
771
772		case 2:	/* VPD-R Keyword Header */
773			if (off == alloc) {
774				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
775				    (alloc *= 2) * sizeof *cfg->vpd.vpd_ros,
776				    M_DEVBUF, M_WAITOK);
777			}
778			cfg->vpd.vpd_ros[off].keyword[0] = byte;
779			cfg->vpd.vpd_ros[off].keyword[1] = vpd_nextbyte(&vrs);
780			dflen = vpd_nextbyte(&vrs);
781			if (dflen == 0 &&
782			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
783			    2) == 0) {
784				/*
785				 * if this happens, we can't trust the rest
786				 * of the VPD.
787				 */
788				printf("pci%d:%d:%d: bad keyword length: %d\n",
789				    cfg->bus, cfg->slot, cfg->func, dflen);
790				cksumvalid = 0;
791				end = 1;
792				break;
793			} else if (dflen == 0) {
794				cfg->vpd.vpd_ros[off].value = malloc(1 *
795				    sizeof *cfg->vpd.vpd_ros[off].value,
796				    M_DEVBUF, M_WAITOK);
797				cfg->vpd.vpd_ros[off].value[0] = '\x00';
798			} else
799				cfg->vpd.vpd_ros[off].value = malloc(
800				    (dflen + 1) *
801				    sizeof *cfg->vpd.vpd_ros[off].value,
802				    M_DEVBUF, M_WAITOK);
803			remain -= 3;
804			i = 0;
805			/* keep in sync w/ state 3's transistions */
806			if (dflen == 0 && remain == 0)
807				state = 0;
808			else if (dflen == 0)
809				state = 2;
810			else
811				state = 3;
812			break;
813
814		case 3:	/* VPD-R Keyword Value */
815			cfg->vpd.vpd_ros[off].value[i++] = byte;
816			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
817			    "RV", 2) == 0 && cksumvalid == -1) {
818				if (vrs.cksum == 0)
819					cksumvalid = 1;
820				else {
821					printf(
822				    "pci%d:%d:%d: bad VPD cksum, remain %hhu\n",
823					    cfg->bus, cfg->slot, cfg->func,
824					    vrs.cksum);
825					cksumvalid = 0;
826					end = 1;
827					break;
828				}
829			}
830			dflen--;
831			remain--;
832			/* keep in sync w/ state 2's transistions */
833			if (dflen == 0)
834				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
835			if (dflen == 0 && remain == 0) {
836				cfg->vpd.vpd_rocnt = off;
837				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
838				    off * sizeof *cfg->vpd.vpd_ros,
839				    M_DEVBUF, M_WAITOK);
840				state = 0;
841			} else if (dflen == 0)
842				state = 2;
843			break;
844
845		case 4:
846			remain--;
847			if (remain == 0)
848				state = 0;
849			break;
850
851		case 5:	/* VPD-W Keyword Header */
852			if (off == alloc) {
853				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
854				    (alloc *= 2) * sizeof *cfg->vpd.vpd_w,
855				    M_DEVBUF, M_WAITOK);
856			}
857			cfg->vpd.vpd_w[off].keyword[0] = byte;
858			cfg->vpd.vpd_w[off].keyword[1] = vpd_nextbyte(&vrs);
859			cfg->vpd.vpd_w[off].len = dflen = vpd_nextbyte(&vrs);
860			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
861			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
862			    sizeof *cfg->vpd.vpd_w[off].value,
863			    M_DEVBUF, M_WAITOK);
864			remain -= 3;
865			i = 0;
866			/* keep in sync w/ state 6's transistions */
867			if (dflen == 0 && remain == 0)
868				state = 0;
869			else if (dflen == 0)
870				state = 5;
871			else
872				state = 6;
873			break;
874
875		case 6:	/* VPD-W Keyword Value */
876			cfg->vpd.vpd_w[off].value[i++] = byte;
877			dflen--;
878			remain--;
879			/* keep in sync w/ state 5's transistions */
880			if (dflen == 0)
881				cfg->vpd.vpd_w[off++].value[i++] = '\0';
882			if (dflen == 0 && remain == 0) {
883				cfg->vpd.vpd_wcnt = off;
884				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
885				    off * sizeof *cfg->vpd.vpd_w,
886				    M_DEVBUF, M_WAITOK);
887				state = 0;
888			} else if (dflen == 0)
889				state = 5;
890			break;
891
892		default:
893			printf("pci%d:%d:%d: invalid state: %d\n",
894			    cfg->bus, cfg->slot, cfg->func, state);
895			end = 1;
896			break;
897		}
898	}
899
900	if (cksumvalid == 0) {
901		/* read-only data bad, clean up */
902		for (; off; off--)
903			free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
904
905		free(cfg->vpd.vpd_ros, M_DEVBUF);
906		cfg->vpd.vpd_ros = NULL;
907	}
908#undef REG
909#undef WREG
910}
911
912int
913pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
914{
915	struct pci_devinfo *dinfo = device_get_ivars(child);
916	pcicfgregs *cfg = &dinfo->cfg;
917
918	*identptr = cfg->vpd.vpd_ident;
919
920	if (*identptr == NULL)
921		return ENXIO;
922
923	return 0;
924}
925
926int
927pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
928	const char **vptr)
929{
930	struct pci_devinfo *dinfo = device_get_ivars(child);
931	pcicfgregs *cfg = &dinfo->cfg;
932	int i;
933
934	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
935		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
936		    sizeof cfg->vpd.vpd_ros[i].keyword) == 0) {
937			*vptr = cfg->vpd.vpd_ros[i].value;
938		}
939
940	if (i != cfg->vpd.vpd_rocnt)
941		return 0;
942
943	*vptr = NULL;
944	return ENXIO;
945}
946
947/*
948 * Return the offset in configuration space of the requested extended
949 * capability entry or 0 if the specified capability was not found.
950 */
951int
952pci_find_extcap_method(device_t dev, device_t child, int capability,
953    int *capreg)
954{
955	struct pci_devinfo *dinfo = device_get_ivars(child);
956	pcicfgregs *cfg = &dinfo->cfg;
957	u_int32_t status;
958	u_int8_t ptr;
959
960	/*
961	 * Check the CAP_LIST bit of the PCI status register first.
962	 */
963	status = pci_read_config(child, PCIR_STATUS, 2);
964	if (!(status & PCIM_STATUS_CAPPRESENT))
965		return (ENXIO);
966
967	/*
968	 * Determine the start pointer of the capabilities list.
969	 */
970	switch (cfg->hdrtype & PCIM_HDRTYPE) {
971	case 0:
972	case 1:
973		ptr = PCIR_CAP_PTR;
974		break;
975	case 2:
976		ptr = PCIR_CAP_PTR_2;
977		break;
978	default:
979		/* XXX: panic? */
980		return (ENXIO);		/* no extended capabilities support */
981	}
982	ptr = pci_read_config(child, ptr, 1);
983
984	/*
985	 * Traverse the capabilities list.
986	 */
987	while (ptr != 0) {
988		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
989			if (capreg != NULL)
990				*capreg = ptr;
991			return (0);
992		}
993		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
994	}
995
996	return (ENOENT);
997}
998
999/*
1000 * Support for MSI-X message interrupts.
1001 */
1002void
1003pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1004{
1005	struct pci_devinfo *dinfo = device_get_ivars(dev);
1006	pcicfgregs *cfg = &dinfo->cfg;
1007	uint32_t offset;
1008
1009	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1010	offset = cfg->msix.msix_table_offset + index * 16;
1011	bus_write_4(cfg->msix.msix_table_res, offset, address & 0xffffffff);
1012	bus_write_4(cfg->msix.msix_table_res, offset + 4, address >> 32);
1013	bus_write_4(cfg->msix.msix_table_res, offset + 8, data);
1014}
1015
1016void
1017pci_mask_msix(device_t dev, u_int index)
1018{
1019	struct pci_devinfo *dinfo = device_get_ivars(dev);
1020	pcicfgregs *cfg = &dinfo->cfg;
1021	uint32_t offset, val;
1022
1023	KASSERT(cfg->msix.msix_msgnum > index, ("bogus index"));
1024	offset = cfg->msix.msix_table_offset + index * 16 + 12;
1025	val = bus_read_4(cfg->msix.msix_table_res, offset);
1026	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1027		val |= PCIM_MSIX_VCTRL_MASK;
1028		bus_write_4(cfg->msix.msix_table_res, offset, val);
1029	}
1030}
1031
1032void
1033pci_unmask_msix(device_t dev, u_int index)
1034{
1035	struct pci_devinfo *dinfo = device_get_ivars(dev);
1036	pcicfgregs *cfg = &dinfo->cfg;
1037	uint32_t offset, val;
1038
1039	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1040	offset = cfg->msix.msix_table_offset + index * 16 + 12;
1041	val = bus_read_4(cfg->msix.msix_table_res, offset);
1042	if (val & PCIM_MSIX_VCTRL_MASK) {
1043		val &= ~PCIM_MSIX_VCTRL_MASK;
1044		bus_write_4(cfg->msix.msix_table_res, offset, val);
1045	}
1046}
1047
1048int
1049pci_pending_msix(device_t dev, u_int index)
1050{
1051	struct pci_devinfo *dinfo = device_get_ivars(dev);
1052	pcicfgregs *cfg = &dinfo->cfg;
1053	uint32_t offset, bit;
1054
1055	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1056	offset = cfg->msix.msix_pba_offset + (index / 4) * 4;
1057	bit = 1 << index % 32;
1058	return (bus_read_4(cfg->msix.msix_pba_res, offset) & bit);
1059}
1060
1061/*
1062 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1063 * returned in *count.  After this function returns, each message will be
1064 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1065 */
1066int
1067pci_alloc_msix_method(device_t dev, device_t child, int *count)
1068{
1069	struct pci_devinfo *dinfo = device_get_ivars(child);
1070	pcicfgregs *cfg = &dinfo->cfg;
1071	struct resource_list_entry *rle;
1072	int actual, error, i, irq, max;
1073
1074	/* Don't let count == 0 get us into trouble. */
1075	if (*count == 0)
1076		return (EINVAL);
1077
1078	/* If rid 0 is allocated, then fail. */
1079	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1080	if (rle != NULL && rle->res != NULL)
1081		return (ENXIO);
1082
1083	/* Already have allocated messages? */
1084	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1085		return (ENXIO);
1086
1087	/* If MSI is blacklisted for this system, fail. */
1088	if (pci_msi_blacklisted())
1089		return (ENXIO);
1090
1091	/* MSI-X capability present? */
1092	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1093		return (ENODEV);
1094
1095	/* Make sure the appropriate BARs are mapped. */
1096	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1097	    cfg->msix.msix_table_bar);
1098	if (rle == NULL || rle->res == NULL ||
1099	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1100		return (ENXIO);
1101	cfg->msix.msix_table_res = rle->res;
1102	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1103		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1104		    cfg->msix.msix_pba_bar);
1105		if (rle == NULL || rle->res == NULL ||
1106		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1107			return (ENXIO);
1108	}
1109	cfg->msix.msix_pba_res = rle->res;
1110
1111	if (bootverbose)
1112		device_printf(child,
1113		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1114		    *count, cfg->msix.msix_msgnum);
1115	max = min(*count, cfg->msix.msix_msgnum);
1116	for (i = 0; i < max; i++) {
1117		/* Allocate a message. */
1118		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, i,
1119		    &irq);
1120		if (error)
1121			break;
1122		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1123		    irq, 1);
1124	}
1125	actual = i;
1126
1127	if (bootverbose) {
1128		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1129		if (actual == 1)
1130			device_printf(child, "using IRQ %lu for MSI-X\n",
1131			    rle->start);
1132		else {
1133			int run;
1134
1135			/*
1136			 * Be fancy and try to print contiguous runs of
1137			 * IRQ values as ranges.  'irq' is the previous IRQ.
1138			 * 'run' is true if we are in a range.
1139			 */
1140			device_printf(child, "using IRQs %lu", rle->start);
1141			irq = rle->start;
1142			run = 0;
1143			for (i = 1; i < actual; i++) {
1144				rle = resource_list_find(&dinfo->resources,
1145				    SYS_RES_IRQ, i + 1);
1146
1147				/* Still in a run? */
1148				if (rle->start == irq + 1) {
1149					run = 1;
1150					irq++;
1151					continue;
1152				}
1153
1154				/* Finish previous range. */
1155				if (run) {
1156					printf("-%d", irq);
1157					run = 0;
1158				}
1159
1160				/* Start new range. */
1161				printf(",%lu", rle->start);
1162				irq = rle->start;
1163			}
1164
1165			/* Unfinished range? */
1166			if (run)
1167				printf("-%d", irq);
1168			printf(" for MSI-X\n");
1169		}
1170	}
1171
1172	/* Mask all vectors. */
1173	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1174		pci_mask_msix(child, i);
1175
1176	/* Update control register to enable MSI-X. */
1177	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1178	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1179	    cfg->msix.msix_ctrl, 2);
1180
1181	/* Update counts of alloc'd messages. */
1182	cfg->msix.msix_alloc = actual;
1183	*count = actual;
1184	return (0);
1185}
1186
1187/*
1188 * By default, pci_alloc_msix() will assign the allocated IRQ resources to
1189 * the first N messages in the MSI-X table.  However, device drivers may
1190 * want to use different layouts in the case that they do not allocate a
1191 * full table.  This method allows the driver to specify what layout it
1192 * wants.  It must be called after a successful pci_alloc_msix() but
1193 * before any of the associated SYS_RES_IRQ resources are allocated via
1194 * bus_alloc_resource().  The 'indices' array contains N (where N equals
1195 * the 'count' returned from pci_alloc_msix()) message indices.  The
1196 * indices are 1-based (meaning the first message is at index 1).  On
1197 * successful return, each of the messages in the 'indices' array will
1198 * have an associated SYS_RES_IRQ whose rid is equal to the index.  Thus,
1199 * if indices contains { 2, 4 }, then upon successful return, the 'child'
1200 * device will have two SYS_RES_IRQ resources available at rids 2 and 4.
1201 */
1202int
1203pci_remap_msix_method(device_t dev, device_t child, u_int *indices)
1204{
1205	struct pci_devinfo *dinfo = device_get_ivars(child);
1206	pcicfgregs *cfg = &dinfo->cfg;
1207	struct resource_list_entry *rle;
1208	int count, error, i, j, *irqs;
1209
1210	/* Sanity check the indices. */
1211	for (i = 0; i < cfg->msix.msix_alloc; i++)
1212		if (indices[i] == 0 || indices[i] > cfg->msix.msix_msgnum)
1213			return (EINVAL);
1214
1215	/* Check for duplicates. */
1216	for (i = 0; i < cfg->msix.msix_alloc; i++)
1217		for (j = i + 1; j < cfg->msix.msix_alloc; j++)
1218			if (indices[i] == indices[j])
1219				return (EINVAL);
1220
1221	/* Make sure none of the resources are allocated. */
1222	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1223		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1224		if (rle == NULL)
1225			continue;
1226		if (rle->res != NULL)
1227			return (EBUSY);
1228		count++;
1229	}
1230
1231	/* Save the IRQ values and free the existing resources. */
1232	irqs = malloc(sizeof(int) * cfg->msix.msix_alloc, M_TEMP, M_WAITOK);
1233	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1234		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1235		if (rle == NULL)
1236			continue;
1237		irqs[count] = rle->start;
1238		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
1239		count++;
1240	}
1241
1242	/* Map the IRQ values to the new message indices and rids. */
1243	for (i = 0; i < cfg->msix.msix_alloc; i++) {
1244		resource_list_add(&dinfo->resources, SYS_RES_IRQ, indices[i],
1245		    irqs[i], irqs[i], 1);
1246
1247		/*
1248		 * The indices in the backend code (PCIB_* methods and the
1249		 * MI helper routines for MD code such as pci_enable_msix())
1250		 * are all zero-based.  However, the indices passed to this
1251		 * function are 1-based so that the correspond 1:1 with the
1252		 * SYS_RES_IRQ resource IDs.
1253		 */
1254		error = PCIB_REMAP_MSIX(device_get_parent(dev), child,
1255		    indices[i] - 1, irqs[i]);
1256		KASSERT(error == 0, ("Failed to remap MSI-X message"));
1257	}
1258	if (bootverbose) {
1259		if (cfg->msix.msix_alloc == 1)
1260			device_printf(child,
1261			    "Remapped MSI-X IRQ to index %d\n", indices[0]);
1262		else {
1263			device_printf(child, "Remapped MSI-X IRQs to indices");
1264			for (i = 0; i < cfg->msix.msix_alloc - 1; i++)
1265				printf(" %d,", indices[i]);
1266			printf(" %d\n", indices[cfg->msix.msix_alloc - 1]);
1267		}
1268	}
1269	free(irqs, M_TEMP);
1270
1271	return (0);
1272}
1273
1274static int
1275pci_release_msix(device_t dev, device_t child)
1276{
1277	struct pci_devinfo *dinfo = device_get_ivars(child);
1278	pcicfgregs *cfg = &dinfo->cfg;
1279	struct resource_list_entry *rle;
1280	int count, i;
1281
1282	/* Do we have any messages to release? */
1283	if (cfg->msix.msix_alloc == 0)
1284		return (ENODEV);
1285
1286	/* Make sure none of the resources are allocated. */
1287	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1288		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1289		if (rle == NULL)
1290			continue;
1291		if (rle->res != NULL)
1292			return (EBUSY);
1293		count++;
1294	}
1295
1296	/* Update control register with to disable MSI-X. */
1297	cfg->msix.msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1298	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1299	    cfg->msix.msix_ctrl, 2);
1300
1301	/* Release the messages. */
1302	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1303		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1304		if (rle == NULL)
1305			continue;
1306		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1307		    rle->start);
1308		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
1309		count++;
1310	}
1311
1312	/* Update alloc count. */
1313	cfg->msix.msix_alloc = 0;
1314	return (0);
1315}
1316
1317/*
1318 * Return the max supported MSI-X messages this device supports.
1319 * Basically, assuming the MD code can alloc messages, this function
1320 * should return the maximum value that pci_alloc_msix() can return.
1321 * Thus, it is subject to the tunables, etc.
1322 */
1323int
1324pci_msix_count_method(device_t dev, device_t child)
1325{
1326	struct pci_devinfo *dinfo = device_get_ivars(child);
1327	pcicfgregs *cfg = &dinfo->cfg;
1328
1329	if (pci_do_msix && cfg->msix.msix_location != 0)
1330		return (cfg->msix.msix_msgnum);
1331	return (0);
1332}
1333
1334/*
1335 * Support for MSI message signalled interrupts.
1336 */
1337void
1338pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1339{
1340	struct pci_devinfo *dinfo = device_get_ivars(dev);
1341	pcicfgregs *cfg = &dinfo->cfg;
1342
1343	/* Write data and address values. */
1344	cfg->msi.msi_addr = address;
1345	cfg->msi.msi_data = data;
1346	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1347	    address & 0xffffffff, 4);
1348	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1349		pci_write_config(dev, cfg->msi.msi_location +
1350		    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1351		pci_write_config(dev, cfg->msi.msi_location +
1352		    PCIR_MSI_DATA_64BIT, data, 2);
1353	} else
1354		pci_write_config(dev, cfg->msi.msi_location +
1355		    PCIR_MSI_DATA, data, 2);
1356
1357	/* Enable MSI in the control register. */
1358	cfg->msi.msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1359	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1360	    cfg->msi.msi_ctrl, 2);
1361}
1362
1363/*
1364 * Restore MSI registers during resume.  If MSI is enabled then
1365 * restore the data and address registers in addition to the control
1366 * register.
1367 */
1368static void
1369pci_resume_msi(device_t dev)
1370{
1371	struct pci_devinfo *dinfo = device_get_ivars(dev);
1372	pcicfgregs *cfg = &dinfo->cfg;
1373	uint64_t address;
1374	uint16_t data;
1375
1376	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1377		address = cfg->msi.msi_addr;
1378		data = cfg->msi.msi_data;
1379		pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1380		    address & 0xffffffff, 4);
1381		if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1382			pci_write_config(dev, cfg->msi.msi_location +
1383			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1384			pci_write_config(dev, cfg->msi.msi_location +
1385			    PCIR_MSI_DATA_64BIT, data, 2);
1386		} else
1387			pci_write_config(dev, cfg->msi.msi_location +
1388			    PCIR_MSI_DATA, data, 2);
1389	}
1390	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1391	    cfg->msi.msi_ctrl, 2);
1392}
1393
1394/*
1395 * Returns true if the specified device is blacklisted because MSI
1396 * doesn't work.
1397 */
1398int
1399pci_msi_device_blacklisted(device_t dev)
1400{
1401	struct pci_quirk *q;
1402
1403	if (!pci_honor_msi_blacklist)
1404		return (0);
1405
1406	for (q = &pci_quirks[0]; q->devid; q++) {
1407		if (q->devid == pci_get_devid(dev) &&
1408		    q->type == PCI_QUIRK_DISABLE_MSI)
1409			return (1);
1410	}
1411	return (0);
1412}
1413
1414/*
1415 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1416 * we just check for blacklisted chipsets as represented by the
1417 * host-PCI bridge at device 0:0:0.  In the future, it may become
1418 * necessary to check other system attributes, such as the kenv values
1419 * that give the motherboard manufacturer and model number.
1420 */
1421static int
1422pci_msi_blacklisted(void)
1423{
1424	device_t dev;
1425
1426	if (!pci_honor_msi_blacklist)
1427		return (0);
1428
1429	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1430	if (!(pcie_chipset || pcix_chipset))
1431		return (1);
1432
1433	dev = pci_find_bsf(0, 0, 0);
1434	if (dev != NULL)
1435		return (pci_msi_device_blacklisted(dev));
1436	return (0);
1437}
1438
1439/*
1440 * Attempt to allocate *count MSI messages.  The actual number allocated is
1441 * returned in *count.  After this function returns, each message will be
1442 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1443 */
1444int
1445pci_alloc_msi_method(device_t dev, device_t child, int *count)
1446{
1447	struct pci_devinfo *dinfo = device_get_ivars(child);
1448	pcicfgregs *cfg = &dinfo->cfg;
1449	struct resource_list_entry *rle;
1450	int actual, error, i, irqs[32];
1451	uint16_t ctrl;
1452
1453	/* Don't let count == 0 get us into trouble. */
1454	if (*count == 0)
1455		return (EINVAL);
1456
1457	/* If rid 0 is allocated, then fail. */
1458	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1459	if (rle != NULL && rle->res != NULL)
1460		return (ENXIO);
1461
1462	/* Already have allocated messages? */
1463	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1464		return (ENXIO);
1465
1466	/* If MSI is blacklisted for this system, fail. */
1467	if (pci_msi_blacklisted())
1468		return (ENXIO);
1469
1470	/* MSI capability present? */
1471	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1472		return (ENODEV);
1473
1474	if (bootverbose)
1475		device_printf(child,
1476		    "attempting to allocate %d MSI vectors (%d supported)\n",
1477		    *count, cfg->msi.msi_msgnum);
1478
1479	/* Don't ask for more than the device supports. */
1480	actual = min(*count, cfg->msi.msi_msgnum);
1481
1482	/* Don't ask for more than 32 messages. */
1483	actual = min(actual, 32);
1484
1485	/* MSI requires power of 2 number of messages. */
1486	if (!powerof2(actual))
1487		return (EINVAL);
1488
1489	for (;;) {
1490		/* Try to allocate N messages. */
1491		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1492		    cfg->msi.msi_msgnum, irqs);
1493		if (error == 0)
1494			break;
1495		if (actual == 1)
1496			return (error);
1497
1498		/* Try N / 2. */
1499		actual >>= 1;
1500	}
1501
1502	/*
1503	 * We now have N actual messages mapped onto SYS_RES_IRQ
1504	 * resources in the irqs[] array, so add new resources
1505	 * starting at rid 1.
1506	 */
1507	for (i = 0; i < actual; i++)
1508		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1509		    irqs[i], irqs[i], 1);
1510
1511	if (bootverbose) {
1512		if (actual == 1)
1513			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1514		else {
1515			int run;
1516
1517			/*
1518			 * Be fancy and try to print contiguous runs
1519			 * of IRQ values as ranges.  'run' is true if
1520			 * we are in a range.
1521			 */
1522			device_printf(child, "using IRQs %d", irqs[0]);
1523			run = 0;
1524			for (i = 1; i < actual; i++) {
1525
1526				/* Still in a run? */
1527				if (irqs[i] == irqs[i - 1] + 1) {
1528					run = 1;
1529					continue;
1530				}
1531
1532				/* Finish previous range. */
1533				if (run) {
1534					printf("-%d", irqs[i - 1]);
1535					run = 0;
1536				}
1537
1538				/* Start new range. */
1539				printf(",%d", irqs[i]);
1540			}
1541
1542			/* Unfinished range? */
1543			if (run)
1544				printf("%d", irqs[actual - 1]);
1545			printf(" for MSI\n");
1546		}
1547	}
1548
1549	/* Update control register with actual count and enable MSI. */
1550	ctrl = cfg->msi.msi_ctrl;
1551	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1552	ctrl |= (ffs(actual) - 1) << 4;
1553	cfg->msi.msi_ctrl = ctrl;
1554	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1555
1556	/* Update counts of alloc'd messages. */
1557	cfg->msi.msi_alloc = actual;
1558	*count = actual;
1559	return (0);
1560}
1561
1562/* Release the MSI messages associated with this device. */
1563int
1564pci_release_msi_method(device_t dev, device_t child)
1565{
1566	struct pci_devinfo *dinfo = device_get_ivars(child);
1567	pcicfgregs *cfg = &dinfo->cfg;
1568	struct resource_list_entry *rle;
1569	int error, i, irqs[32];
1570
1571	/* Try MSI-X first. */
1572	error = pci_release_msix(dev, child);
1573	if (error != ENODEV)
1574		return (error);
1575
1576	/* Do we have any messages to release? */
1577	if (cfg->msi.msi_alloc == 0)
1578		return (ENODEV);
1579	KASSERT(cfg->msi.msi_alloc <= 32, ("more than 32 alloc'd messages"));
1580
1581	/* Make sure none of the resources are allocated. */
1582	for (i = 0; i < cfg->msi.msi_alloc; i++) {
1583		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1584		KASSERT(rle != NULL, ("missing MSI resource"));
1585		if (rle->res != NULL)
1586			return (EBUSY);
1587		irqs[i] = rle->start;
1588	}
1589
1590	/* Update control register with 0 count and disable MSI. */
1591	cfg->msi.msi_ctrl &= ~(PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE);
1592	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL,
1593	    cfg->msi.msi_ctrl, 2);
1594
1595	/* Release the messages. */
1596	PCIB_RELEASE_MSI(device_get_parent(dev), child, cfg->msi.msi_alloc,
1597	    irqs);
1598	for (i = 0; i < cfg->msi.msi_alloc; i++)
1599		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1600
1601	/* Update alloc count. */
1602	cfg->msi.msi_alloc = 0;
1603	return (0);
1604}
1605
1606/*
1607 * Return the max supported MSI messages this device supports.
1608 * Basically, assuming the MD code can alloc messages, this function
1609 * should return the maximum value that pci_alloc_msi() can return.
1610 * Thus, it is subject to the tunables, etc.
1611 */
1612int
1613pci_msi_count_method(device_t dev, device_t child)
1614{
1615	struct pci_devinfo *dinfo = device_get_ivars(child);
1616	pcicfgregs *cfg = &dinfo->cfg;
1617
1618	if (pci_do_msi && cfg->msi.msi_location != 0)
1619		return (cfg->msi.msi_msgnum);
1620	return (0);
1621}
1622
1623/* free pcicfgregs structure and all depending data structures */
1624
1625int
1626pci_freecfg(struct pci_devinfo *dinfo)
1627{
1628	struct devlist *devlist_head;
1629	int i;
1630
1631	devlist_head = &pci_devq;
1632
1633	if (dinfo->cfg.vpd.vpd_reg) {
1634		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1635		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1636			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1637		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1638		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1639			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1640		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1641	}
1642	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1643	free(dinfo, M_DEVBUF);
1644
1645	/* increment the generation count */
1646	pci_generation++;
1647
1648	/* we're losing one device */
1649	pci_numdevs--;
1650	return (0);
1651}
1652
1653/*
1654 * PCI power manangement
1655 */
1656int
1657pci_set_powerstate_method(device_t dev, device_t child, int state)
1658{
1659	struct pci_devinfo *dinfo = device_get_ivars(child);
1660	pcicfgregs *cfg = &dinfo->cfg;
1661	uint16_t status;
1662	int result, oldstate, highest, delay;
1663
1664	if (cfg->pp.pp_cap == 0)
1665		return (EOPNOTSUPP);
1666
1667	/*
1668	 * Optimize a no state change request away.  While it would be OK to
1669	 * write to the hardware in theory, some devices have shown odd
1670	 * behavior when going from D3 -> D3.
1671	 */
1672	oldstate = pci_get_powerstate(child);
1673	if (oldstate == state)
1674		return (0);
1675
1676	/*
1677	 * The PCI power management specification states that after a state
1678	 * transition between PCI power states, system software must
1679	 * guarantee a minimal delay before the function accesses the device.
1680	 * Compute the worst case delay that we need to guarantee before we
1681	 * access the device.  Many devices will be responsive much more
1682	 * quickly than this delay, but there are some that don't respond
1683	 * instantly to state changes.  Transitions to/from D3 state require
1684	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
1685	 * is done below with DELAY rather than a sleeper function because
1686	 * this function can be called from contexts where we cannot sleep.
1687	 */
1688	highest = (oldstate > state) ? oldstate : state;
1689	if (highest == PCI_POWERSTATE_D3)
1690	    delay = 10000;
1691	else if (highest == PCI_POWERSTATE_D2)
1692	    delay = 200;
1693	else
1694	    delay = 0;
1695	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
1696	    & ~PCIM_PSTAT_DMASK;
1697	result = 0;
1698	switch (state) {
1699	case PCI_POWERSTATE_D0:
1700		status |= PCIM_PSTAT_D0;
1701		break;
1702	case PCI_POWERSTATE_D1:
1703		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
1704			return (EOPNOTSUPP);
1705		status |= PCIM_PSTAT_D1;
1706		break;
1707	case PCI_POWERSTATE_D2:
1708		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
1709			return (EOPNOTSUPP);
1710		status |= PCIM_PSTAT_D2;
1711		break;
1712	case PCI_POWERSTATE_D3:
1713		status |= PCIM_PSTAT_D3;
1714		break;
1715	default:
1716		return (EINVAL);
1717	}
1718
1719	if (bootverbose)
1720		printf(
1721		    "pci%d:%d:%d: Transition from D%d to D%d\n",
1722		    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func,
1723		    oldstate, state);
1724
1725	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
1726	if (delay)
1727		DELAY(delay);
1728	return (0);
1729}
1730
1731int
1732pci_get_powerstate_method(device_t dev, device_t child)
1733{
1734	struct pci_devinfo *dinfo = device_get_ivars(child);
1735	pcicfgregs *cfg = &dinfo->cfg;
1736	uint16_t status;
1737	int result;
1738
1739	if (cfg->pp.pp_cap != 0) {
1740		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
1741		switch (status & PCIM_PSTAT_DMASK) {
1742		case PCIM_PSTAT_D0:
1743			result = PCI_POWERSTATE_D0;
1744			break;
1745		case PCIM_PSTAT_D1:
1746			result = PCI_POWERSTATE_D1;
1747			break;
1748		case PCIM_PSTAT_D2:
1749			result = PCI_POWERSTATE_D2;
1750			break;
1751		case PCIM_PSTAT_D3:
1752			result = PCI_POWERSTATE_D3;
1753			break;
1754		default:
1755			result = PCI_POWERSTATE_UNKNOWN;
1756			break;
1757		}
1758	} else {
1759		/* No support, device is always at D0 */
1760		result = PCI_POWERSTATE_D0;
1761	}
1762	return (result);
1763}
1764
1765/*
1766 * Some convenience functions for PCI device drivers.
1767 */
1768
1769static __inline void
1770pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
1771{
1772	uint16_t	command;
1773
1774	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1775	command |= bit;
1776	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1777}
1778
1779static __inline void
1780pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
1781{
1782	uint16_t	command;
1783
1784	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1785	command &= ~bit;
1786	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1787}
1788
1789int
1790pci_enable_busmaster_method(device_t dev, device_t child)
1791{
1792	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1793	return (0);
1794}
1795
1796int
1797pci_disable_busmaster_method(device_t dev, device_t child)
1798{
1799	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1800	return (0);
1801}
1802
1803int
1804pci_enable_io_method(device_t dev, device_t child, int space)
1805{
1806	uint16_t command;
1807	uint16_t bit;
1808	char *error;
1809
1810	bit = 0;
1811	error = NULL;
1812
1813	switch(space) {
1814	case SYS_RES_IOPORT:
1815		bit = PCIM_CMD_PORTEN;
1816		error = "port";
1817		break;
1818	case SYS_RES_MEMORY:
1819		bit = PCIM_CMD_MEMEN;
1820		error = "memory";
1821		break;
1822	default:
1823		return (EINVAL);
1824	}
1825	pci_set_command_bit(dev, child, bit);
1826	/* Some devices seem to need a brief stall here, what do to? */
1827	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1828	if (command & bit)
1829		return (0);
1830	device_printf(child, "failed to enable %s mapping!\n", error);
1831	return (ENXIO);
1832}
1833
1834int
1835pci_disable_io_method(device_t dev, device_t child, int space)
1836{
1837	uint16_t command;
1838	uint16_t bit;
1839	char *error;
1840
1841	bit = 0;
1842	error = NULL;
1843
1844	switch(space) {
1845	case SYS_RES_IOPORT:
1846		bit = PCIM_CMD_PORTEN;
1847		error = "port";
1848		break;
1849	case SYS_RES_MEMORY:
1850		bit = PCIM_CMD_MEMEN;
1851		error = "memory";
1852		break;
1853	default:
1854		return (EINVAL);
1855	}
1856	pci_clear_command_bit(dev, child, bit);
1857	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1858	if (command & bit) {
1859		device_printf(child, "failed to disable %s mapping!\n", error);
1860		return (ENXIO);
1861	}
1862	return (0);
1863}
1864
1865/*
1866 * New style pci driver.  Parent device is either a pci-host-bridge or a
1867 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
1868 */
1869
1870void
1871pci_print_verbose(struct pci_devinfo *dinfo)
1872{
1873	int i;
1874
1875	if (bootverbose) {
1876		pcicfgregs *cfg = &dinfo->cfg;
1877
1878		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
1879		    cfg->vendor, cfg->device, cfg->revid);
1880		printf("\tbus=%d, slot=%d, func=%d\n",
1881		    cfg->bus, cfg->slot, cfg->func);
1882		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
1883		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
1884		    cfg->mfdev);
1885		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
1886		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
1887		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
1888		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
1889		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
1890		if (cfg->intpin > 0)
1891			printf("\tintpin=%c, irq=%d\n",
1892			    cfg->intpin +'a' -1, cfg->intline);
1893		if (cfg->pp.pp_cap) {
1894			uint16_t status;
1895
1896			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
1897			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
1898			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
1899			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
1900			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
1901			    status & PCIM_PSTAT_DMASK);
1902		}
1903		if (cfg->vpd.vpd_reg) {
1904			printf("\tVPD Ident: %s\n", cfg->vpd.vpd_ident);
1905			for (i = 0; i < cfg->vpd.vpd_rocnt; i++) {
1906				struct vpd_readonly *vrop;
1907				vrop = &cfg->vpd.vpd_ros[i];
1908				if (strncmp("CP", vrop->keyword, 2) == 0)
1909					printf("\tCP: id %d, BAR%d, off %#x\n",
1910					    vrop->value[0], vrop->value[1],
1911					    le16toh(
1912					      *(uint16_t *)&vrop->value[2]));
1913				else if (strncmp("RV", vrop->keyword, 2) == 0)
1914					printf("\tRV: %#hhx\n", vrop->value[0]);
1915				else
1916					printf("\t%.2s: %s\n", vrop->keyword,
1917					    vrop->value);
1918			}
1919			for (i = 0; i < cfg->vpd.vpd_wcnt; i++) {
1920				struct vpd_write *vwp;
1921				vwp = &cfg->vpd.vpd_w[i];
1922				if (strncmp("RW", vwp->keyword, 2) != 0)
1923					printf("\t%.2s(%#x-%#x): %s\n",
1924					    vwp->keyword, vwp->start,
1925					    vwp->start + vwp->len, vwp->value);
1926			}
1927		}
1928		if (cfg->msi.msi_location) {
1929			int ctrl;
1930
1931			ctrl = cfg->msi.msi_ctrl;
1932			printf("\tMSI supports %d message%s%s%s\n",
1933			    cfg->msi.msi_msgnum,
1934			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
1935			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
1936			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
1937		}
1938		if (cfg->msix.msix_location) {
1939			printf("\tMSI-X supports %d message%s ",
1940			    cfg->msix.msix_msgnum,
1941			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
1942			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
1943				printf("in map 0x%x\n",
1944				    cfg->msix.msix_table_bar);
1945			else
1946				printf("in maps 0x%x and 0x%x\n",
1947				    cfg->msix.msix_table_bar,
1948				    cfg->msix.msix_pba_bar);
1949		}
1950	}
1951}
1952
1953static int
1954pci_porten(device_t pcib, int b, int s, int f)
1955{
1956	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1957		& PCIM_CMD_PORTEN) != 0;
1958}
1959
1960static int
1961pci_memen(device_t pcib, int b, int s, int f)
1962{
1963	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1964		& PCIM_CMD_MEMEN) != 0;
1965}
1966
1967/*
1968 * Add a resource based on a pci map register. Return 1 if the map
1969 * register is a 32bit map register or 2 if it is a 64bit register.
1970 */
1971static int
1972pci_add_map(device_t pcib, device_t bus, device_t dev,
1973    int b, int s, int f, int reg, struct resource_list *rl, int force,
1974    int prefetch)
1975{
1976	uint32_t map;
1977	pci_addr_t base;
1978	pci_addr_t start, end, count;
1979	uint8_t ln2size;
1980	uint8_t ln2range;
1981	uint32_t testval;
1982	uint16_t cmd;
1983	int type;
1984	int barlen;
1985	struct resource *res;
1986
1987	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1988	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
1989	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1990	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
1991
1992	if (pci_maptype(map) & PCI_MAPMEM)
1993		type = SYS_RES_MEMORY;
1994	else
1995		type = SYS_RES_IOPORT;
1996	ln2size = pci_mapsize(testval);
1997	ln2range = pci_maprange(testval);
1998	base = pci_mapbase(map);
1999	barlen = ln2range == 64 ? 2 : 1;
2000
2001	/*
2002	 * For I/O registers, if bottom bit is set, and the next bit up
2003	 * isn't clear, we know we have a BAR that doesn't conform to the
2004	 * spec, so ignore it.  Also, sanity check the size of the data
2005	 * areas to the type of memory involved.  Memory must be at least
2006	 * 16 bytes in size, while I/O ranges must be at least 4.
2007	 */
2008	if ((testval & 0x1) == 0x1 &&
2009	    (testval & 0x2) != 0)
2010		return (barlen);
2011	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2012	    (type == SYS_RES_IOPORT && ln2size < 2))
2013		return (barlen);
2014
2015	if (ln2range == 64)
2016		/* Read the other half of a 64bit map register */
2017		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2018	if (bootverbose) {
2019		printf("\tmap[%02x]: type %x, range %2d, base %#jx, size %2d",
2020		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2021		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2022			printf(", port disabled\n");
2023		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2024			printf(", memory disabled\n");
2025		else
2026			printf(", enabled\n");
2027	}
2028
2029	/*
2030	 * If base is 0, then we have problems.  It is best to ignore
2031	 * such entries for the moment.  These will be allocated later if
2032	 * the driver specifically requests them.  However, some
2033	 * removable busses look better when all resources are allocated,
2034	 * so allow '0' to be overriden.
2035	 *
2036	 * Similarly treat maps whose values is the same as the test value
2037	 * read back.  These maps have had all f's written to them by the
2038	 * BIOS in an attempt to disable the resources.
2039	 */
2040	if (!force && (base == 0 || map == testval))
2041		return (barlen);
2042	if ((u_long)base != base) {
2043		device_printf(bus,
2044		    "pci%d:%d:%d bar %#x too many address bits", b, s, f, reg);
2045		return (barlen);
2046	}
2047
2048	/*
2049	 * This code theoretically does the right thing, but has
2050	 * undesirable side effects in some cases where peripherals
2051	 * respond oddly to having these bits enabled.  Let the user
2052	 * be able to turn them off (since pci_enable_io_modes is 1 by
2053	 * default).
2054	 */
2055	if (pci_enable_io_modes) {
2056		/* Turn on resources that have been left off by a lazy BIOS */
2057		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2058			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2059			cmd |= PCIM_CMD_PORTEN;
2060			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2061		}
2062		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2063			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2064			cmd |= PCIM_CMD_MEMEN;
2065			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2066		}
2067	} else {
2068		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2069			return (barlen);
2070		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2071			return (barlen);
2072	}
2073
2074	count = 1 << ln2size;
2075	if (base == 0 || base == pci_mapbase(testval)) {
2076		start = 0;	/* Let the parent deside */
2077		end = ~0ULL;
2078	} else {
2079		start = base;
2080		end = base + (1 << ln2size) - 1;
2081	}
2082	resource_list_add(rl, type, reg, start, end, count);
2083
2084	/*
2085	 * Not quite sure what to do on failure of allocating the resource
2086	 * since I can postulate several right answers.
2087	 */
2088	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2089	    prefetch ? RF_PREFETCHABLE : 0);
2090	if (res == NULL)
2091		return (barlen);
2092	start = rman_get_start(res);
2093	if ((u_long)start != start) {
2094		/* Wait a minute!  this platform can't do this address. */
2095		device_printf(bus,
2096		    "pci%d.%d.%x bar %#x start %#jx, too many bits.",
2097		    b, s, f, reg, (uintmax_t)start);
2098		resource_list_release(rl, bus, dev, type, reg, res);
2099		return (barlen);
2100	}
2101	pci_write_config(dev, reg, start, 4);
2102	if (ln2range == 64)
2103		pci_write_config(dev, reg + 4, start >> 32, 4);
2104	return (barlen);
2105}
2106
2107/*
2108 * For ATA devices we need to decide early what addressing mode to use.
2109 * Legacy demands that the primary and secondary ATA ports sits on the
2110 * same addresses that old ISA hardware did. This dictates that we use
2111 * those addresses and ignore the BAR's if we cannot set PCI native
2112 * addressing mode.
2113 */
2114static void
2115pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2116    int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2117{
2118	int rid, type, progif;
2119#if 0
2120	/* if this device supports PCI native addressing use it */
2121	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2122	if ((progif & 0x8a) == 0x8a) {
2123		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2124		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2125			printf("Trying ATA native PCI addressing mode\n");
2126			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2127		}
2128	}
2129#endif
2130	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2131	type = SYS_RES_IOPORT;
2132	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2133		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2134		    prefetchmask & (1 << 0));
2135		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2136		    prefetchmask & (1 << 1));
2137	} else {
2138		rid = PCIR_BAR(0);
2139		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2140		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2141		    0);
2142		rid = PCIR_BAR(1);
2143		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2144		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2145		    0);
2146	}
2147	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2148		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2149		    prefetchmask & (1 << 2));
2150		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2151		    prefetchmask & (1 << 3));
2152	} else {
2153		rid = PCIR_BAR(2);
2154		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2155		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2156		    0);
2157		rid = PCIR_BAR(3);
2158		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2159		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2160		    0);
2161	}
2162	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2163	    prefetchmask & (1 << 4));
2164	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2165	    prefetchmask & (1 << 5));
2166}
2167
2168static void
2169pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2170{
2171	struct pci_devinfo *dinfo = device_get_ivars(dev);
2172	pcicfgregs *cfg = &dinfo->cfg;
2173	char tunable_name[64];
2174	int irq;
2175
2176	/* Has to have an intpin to have an interrupt. */
2177	if (cfg->intpin == 0)
2178		return;
2179
2180	/* Let the user override the IRQ with a tunable. */
2181	irq = PCI_INVALID_IRQ;
2182	snprintf(tunable_name, sizeof(tunable_name), "hw.pci%d.%d.INT%c.irq",
2183	    cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2184	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2185		irq = PCI_INVALID_IRQ;
2186
2187	/*
2188	 * If we didn't get an IRQ via the tunable, then we either use the
2189	 * IRQ value in the intline register or we ask the bus to route an
2190	 * interrupt for us.  If force_route is true, then we only use the
2191	 * value in the intline register if the bus was unable to assign an
2192	 * IRQ.
2193	 */
2194	if (!PCI_INTERRUPT_VALID(irq)) {
2195		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2196			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2197		if (!PCI_INTERRUPT_VALID(irq))
2198			irq = cfg->intline;
2199	}
2200
2201	/* If after all that we don't have an IRQ, just bail. */
2202	if (!PCI_INTERRUPT_VALID(irq))
2203		return;
2204
2205	/* Update the config register if it changed. */
2206	if (irq != cfg->intline) {
2207		cfg->intline = irq;
2208		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2209	}
2210
2211	/* Add this IRQ as rid 0 interrupt resource. */
2212	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2213}
2214
2215void
2216pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2217{
2218	device_t pcib;
2219	struct pci_devinfo *dinfo = device_get_ivars(dev);
2220	pcicfgregs *cfg = &dinfo->cfg;
2221	struct resource_list *rl = &dinfo->resources;
2222	struct pci_quirk *q;
2223	int b, i, f, s;
2224
2225	pcib = device_get_parent(bus);
2226
2227	b = cfg->bus;
2228	s = cfg->slot;
2229	f = cfg->func;
2230
2231	/* ATA devices needs special map treatment */
2232	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2233	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2234	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2235	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2236	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2237		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2238	else
2239		for (i = 0; i < cfg->nummaps;)
2240			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2241			    rl, force, prefetchmask & (1 << i));
2242
2243	/*
2244	 * Add additional, quirked resources.
2245	 */
2246	for (q = &pci_quirks[0]; q->devid; q++) {
2247		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2248		    && q->type == PCI_QUIRK_MAP_REG)
2249			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2250			  force, 0);
2251	}
2252
2253	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2254#ifdef __PCI_REROUTE_INTERRUPT
2255		/*
2256		 * Try to re-route interrupts. Sometimes the BIOS or
2257		 * firmware may leave bogus values in these registers.
2258		 * If the re-route fails, then just stick with what we
2259		 * have.
2260		 */
2261		pci_assign_interrupt(bus, dev, 1);
2262#else
2263		pci_assign_interrupt(bus, dev, 0);
2264#endif
2265	}
2266}
2267
2268void
2269pci_add_children(device_t dev, int busno, size_t dinfo_size)
2270{
2271#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2272	device_t pcib = device_get_parent(dev);
2273	struct pci_devinfo *dinfo;
2274	int maxslots;
2275	int s, f, pcifunchigh;
2276	uint8_t hdrtype;
2277
2278	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2279	    ("dinfo_size too small"));
2280	maxslots = PCIB_MAXSLOTS(pcib);
2281	for (s = 0; s <= maxslots; s++) {
2282		pcifunchigh = 0;
2283		f = 0;
2284		DELAY(1);
2285		hdrtype = REG(PCIR_HDRTYPE, 1);
2286		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2287			continue;
2288		if (hdrtype & PCIM_MFDEV)
2289			pcifunchigh = PCI_FUNCMAX;
2290		for (f = 0; f <= pcifunchigh; f++) {
2291			dinfo = pci_read_device(pcib, busno, s, f, dinfo_size);
2292			if (dinfo != NULL) {
2293				pci_add_child(dev, dinfo);
2294			}
2295		}
2296	}
2297#undef REG
2298}
2299
2300void
2301pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2302{
2303	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2304	device_set_ivars(dinfo->cfg.dev, dinfo);
2305	resource_list_init(&dinfo->resources);
2306	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2307	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2308	pci_print_verbose(dinfo);
2309	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2310}
2311
2312static int
2313pci_probe(device_t dev)
2314{
2315
2316	device_set_desc(dev, "PCI bus");
2317
2318	/* Allow other subclasses to override this driver. */
2319	return (-1000);
2320}
2321
2322static int
2323pci_attach(device_t dev)
2324{
2325	int busno;
2326
2327	/*
2328	 * Since there can be multiple independantly numbered PCI
2329	 * busses on systems with multiple PCI domains, we can't use
2330	 * the unit number to decide which bus we are probing. We ask
2331	 * the parent pcib what our bus number is.
2332	 */
2333	busno = pcib_get_bus(dev);
2334	if (bootverbose)
2335		device_printf(dev, "physical bus=%d\n", busno);
2336
2337	pci_add_children(dev, busno, sizeof(struct pci_devinfo));
2338
2339	return (bus_generic_attach(dev));
2340}
2341
2342int
2343pci_suspend(device_t dev)
2344{
2345	int dstate, error, i, numdevs;
2346	device_t acpi_dev, child, *devlist;
2347	struct pci_devinfo *dinfo;
2348
2349	/*
2350	 * Save the PCI configuration space for each child and set the
2351	 * device in the appropriate power state for this sleep state.
2352	 */
2353	acpi_dev = NULL;
2354	if (pci_do_power_resume)
2355		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2356	device_get_children(dev, &devlist, &numdevs);
2357	for (i = 0; i < numdevs; i++) {
2358		child = devlist[i];
2359		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2360		pci_cfg_save(child, dinfo, 0);
2361	}
2362
2363	/* Suspend devices before potentially powering them down. */
2364	error = bus_generic_suspend(dev);
2365	if (error) {
2366		free(devlist, M_TEMP);
2367		return (error);
2368	}
2369
2370	/*
2371	 * Always set the device to D3.  If ACPI suggests a different
2372	 * power state, use it instead.  If ACPI is not present, the
2373	 * firmware is responsible for managing device power.  Skip
2374	 * children who aren't attached since they are powered down
2375	 * separately.  Only manage type 0 devices for now.
2376	 */
2377	for (i = 0; acpi_dev && i < numdevs; i++) {
2378		child = devlist[i];
2379		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2380		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2381			dstate = PCI_POWERSTATE_D3;
2382			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2383			pci_set_powerstate(child, dstate);
2384		}
2385	}
2386	free(devlist, M_TEMP);
2387	return (0);
2388}
2389
2390int
2391pci_resume(device_t dev)
2392{
2393	int i, numdevs;
2394	device_t acpi_dev, child, *devlist;
2395	struct pci_devinfo *dinfo;
2396
2397	/*
2398	 * Set each child to D0 and restore its PCI configuration space.
2399	 */
2400	acpi_dev = NULL;
2401	if (pci_do_power_resume)
2402		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2403	device_get_children(dev, &devlist, &numdevs);
2404	for (i = 0; i < numdevs; i++) {
2405		/*
2406		 * Notify ACPI we're going to D0 but ignore the result.  If
2407		 * ACPI is not present, the firmware is responsible for
2408		 * managing device power.  Only manage type 0 devices for now.
2409		 */
2410		child = devlist[i];
2411		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2412		if (acpi_dev && device_is_attached(child) &&
2413		    dinfo->cfg.hdrtype == 0) {
2414			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2415			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2416		}
2417
2418		/* Now the device is powered up, restore its config space. */
2419		pci_cfg_restore(child, dinfo);
2420	}
2421	free(devlist, M_TEMP);
2422	return (bus_generic_resume(dev));
2423}
2424
2425static void
2426pci_load_vendor_data(void)
2427{
2428	caddr_t vendordata, info;
2429
2430	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2431		info = preload_search_info(vendordata, MODINFO_ADDR);
2432		pci_vendordata = *(char **)info;
2433		info = preload_search_info(vendordata, MODINFO_SIZE);
2434		pci_vendordata_size = *(size_t *)info;
2435		/* terminate the database */
2436		pci_vendordata[pci_vendordata_size] = '\n';
2437	}
2438}
2439
2440void
2441pci_driver_added(device_t dev, driver_t *driver)
2442{
2443	int numdevs;
2444	device_t *devlist;
2445	device_t child;
2446	struct pci_devinfo *dinfo;
2447	int i;
2448
2449	if (bootverbose)
2450		device_printf(dev, "driver added\n");
2451	DEVICE_IDENTIFY(driver, dev);
2452	device_get_children(dev, &devlist, &numdevs);
2453	for (i = 0; i < numdevs; i++) {
2454		child = devlist[i];
2455		if (device_get_state(child) != DS_NOTPRESENT)
2456			continue;
2457		dinfo = device_get_ivars(child);
2458		pci_print_verbose(dinfo);
2459		if (bootverbose)
2460			printf("pci%d:%d:%d: reprobing on driver added\n",
2461			    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func);
2462		pci_cfg_restore(child, dinfo);
2463		if (device_probe_and_attach(child) != 0)
2464			pci_cfg_save(child, dinfo, 1);
2465	}
2466	free(devlist, M_TEMP);
2467}
2468
2469int
2470pci_print_child(device_t dev, device_t child)
2471{
2472	struct pci_devinfo *dinfo;
2473	struct resource_list *rl;
2474	int retval = 0;
2475
2476	dinfo = device_get_ivars(child);
2477	rl = &dinfo->resources;
2478
2479	retval += bus_print_child_header(dev, child);
2480
2481	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2482	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2483	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2484	if (device_get_flags(dev))
2485		retval += printf(" flags %#x", device_get_flags(dev));
2486
2487	retval += printf(" at device %d.%d", pci_get_slot(child),
2488	    pci_get_function(child));
2489
2490	retval += bus_print_child_footer(dev, child);
2491
2492	return (retval);
2493}
2494
2495static struct
2496{
2497	int	class;
2498	int	subclass;
2499	char	*desc;
2500} pci_nomatch_tab[] = {
2501	{PCIC_OLD,		-1,			"old"},
2502	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2503	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
2504	{PCIC_STORAGE,		-1,			"mass storage"},
2505	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
2506	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
2507	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
2508	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
2509	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
2510	{PCIC_NETWORK,		-1,			"network"},
2511	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
2512	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
2513	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
2514	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
2515	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
2516	{PCIC_DISPLAY,		-1,			"display"},
2517	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
2518	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
2519	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
2520	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
2521	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
2522	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
2523	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
2524	{PCIC_MEMORY,		-1,			"memory"},
2525	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
2526	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
2527	{PCIC_BRIDGE,		-1,			"bridge"},
2528	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
2529	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
2530	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
2531	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
2532	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
2533	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
2534	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
2535	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
2536	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
2537	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
2538	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
2539	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
2540	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
2541	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
2542	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
2543	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
2544	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
2545	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
2546	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
2547	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
2548	{PCIC_INPUTDEV,		-1,			"input device"},
2549	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
2550	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
2551	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
2552	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
2553	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
2554	{PCIC_DOCKING,		-1,			"docking station"},
2555	{PCIC_PROCESSOR,	-1,			"processor"},
2556	{PCIC_SERIALBUS,	-1,			"serial bus"},
2557	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
2558	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
2559	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
2560	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
2561	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
2562	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
2563	{PCIC_WIRELESS,		-1,			"wireless controller"},
2564	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
2565	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
2566	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
2567	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
2568	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
2569	{PCIC_SATCOM,		-1,			"satellite communication"},
2570	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
2571	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
2572	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
2573	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
2574	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
2575	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
2576	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
2577	{PCIC_DASP,		-1,			"dasp"},
2578	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
2579	{0, 0,		NULL}
2580};
2581
2582void
2583pci_probe_nomatch(device_t dev, device_t child)
2584{
2585	int	i;
2586	char	*cp, *scp, *device;
2587
2588	/*
2589	 * Look for a listing for this device in a loaded device database.
2590	 */
2591	if ((device = pci_describe_device(child)) != NULL) {
2592		device_printf(dev, "<%s>", device);
2593		free(device, M_DEVBUF);
2594	} else {
2595		/*
2596		 * Scan the class/subclass descriptions for a general
2597		 * description.
2598		 */
2599		cp = "unknown";
2600		scp = NULL;
2601		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
2602			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
2603				if (pci_nomatch_tab[i].subclass == -1) {
2604					cp = pci_nomatch_tab[i].desc;
2605				} else if (pci_nomatch_tab[i].subclass ==
2606				    pci_get_subclass(child)) {
2607					scp = pci_nomatch_tab[i].desc;
2608				}
2609			}
2610		}
2611		device_printf(dev, "<%s%s%s>",
2612		    cp ? cp : "",
2613		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
2614		    scp ? scp : "");
2615	}
2616	printf(" at device %d.%d (no driver attached)\n",
2617	    pci_get_slot(child), pci_get_function(child));
2618	if (pci_do_power_nodriver)
2619		pci_cfg_save(child,
2620		    (struct pci_devinfo *) device_get_ivars(child), 1);
2621	return;
2622}
2623
2624/*
2625 * Parse the PCI device database, if loaded, and return a pointer to a
2626 * description of the device.
2627 *
2628 * The database is flat text formatted as follows:
2629 *
2630 * Any line not in a valid format is ignored.
2631 * Lines are terminated with newline '\n' characters.
2632 *
2633 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
2634 * the vendor name.
2635 *
2636 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
2637 * - devices cannot be listed without a corresponding VENDOR line.
2638 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
2639 * another TAB, then the device name.
2640 */
2641
2642/*
2643 * Assuming (ptr) points to the beginning of a line in the database,
2644 * return the vendor or device and description of the next entry.
2645 * The value of (vendor) or (device) inappropriate for the entry type
2646 * is set to -1.  Returns nonzero at the end of the database.
2647 *
2648 * Note that this is slightly unrobust in the face of corrupt data;
2649 * we attempt to safeguard against this by spamming the end of the
2650 * database with a newline when we initialise.
2651 */
2652static int
2653pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
2654{
2655	char	*cp = *ptr;
2656	int	left;
2657
2658	*device = -1;
2659	*vendor = -1;
2660	**desc = '\0';
2661	for (;;) {
2662		left = pci_vendordata_size - (cp - pci_vendordata);
2663		if (left <= 0) {
2664			*ptr = cp;
2665			return(1);
2666		}
2667
2668		/* vendor entry? */
2669		if (*cp != '\t' &&
2670		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
2671			break;
2672		/* device entry? */
2673		if (*cp == '\t' &&
2674		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
2675			break;
2676
2677		/* skip to next line */
2678		while (*cp != '\n' && left > 0) {
2679			cp++;
2680			left--;
2681		}
2682		if (*cp == '\n') {
2683			cp++;
2684			left--;
2685		}
2686	}
2687	/* skip to next line */
2688	while (*cp != '\n' && left > 0) {
2689		cp++;
2690		left--;
2691	}
2692	if (*cp == '\n' && left > 0)
2693		cp++;
2694	*ptr = cp;
2695	return(0);
2696}
2697
2698static char *
2699pci_describe_device(device_t dev)
2700{
2701	int	vendor, device;
2702	char	*desc, *vp, *dp, *line;
2703
2704	desc = vp = dp = NULL;
2705
2706	/*
2707	 * If we have no vendor data, we can't do anything.
2708	 */
2709	if (pci_vendordata == NULL)
2710		goto out;
2711
2712	/*
2713	 * Scan the vendor data looking for this device
2714	 */
2715	line = pci_vendordata;
2716	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2717		goto out;
2718	for (;;) {
2719		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
2720			goto out;
2721		if (vendor == pci_get_vendor(dev))
2722			break;
2723	}
2724	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2725		goto out;
2726	for (;;) {
2727		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
2728			*dp = 0;
2729			break;
2730		}
2731		if (vendor != -1) {
2732			*dp = 0;
2733			break;
2734		}
2735		if (device == pci_get_device(dev))
2736			break;
2737	}
2738	if (dp[0] == '\0')
2739		snprintf(dp, 80, "0x%x", pci_get_device(dev));
2740	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
2741	    NULL)
2742		sprintf(desc, "%s, %s", vp, dp);
2743 out:
2744	if (vp != NULL)
2745		free(vp, M_DEVBUF);
2746	if (dp != NULL)
2747		free(dp, M_DEVBUF);
2748	return(desc);
2749}
2750
2751int
2752pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
2753{
2754	struct pci_devinfo *dinfo;
2755	pcicfgregs *cfg;
2756
2757	dinfo = device_get_ivars(child);
2758	cfg = &dinfo->cfg;
2759
2760	switch (which) {
2761	case PCI_IVAR_ETHADDR:
2762		/*
2763		 * The generic accessor doesn't deal with failure, so
2764		 * we set the return value, then return an error.
2765		 */
2766		*((uint8_t **) result) = NULL;
2767		return (EINVAL);
2768	case PCI_IVAR_SUBVENDOR:
2769		*result = cfg->subvendor;
2770		break;
2771	case PCI_IVAR_SUBDEVICE:
2772		*result = cfg->subdevice;
2773		break;
2774	case PCI_IVAR_VENDOR:
2775		*result = cfg->vendor;
2776		break;
2777	case PCI_IVAR_DEVICE:
2778		*result = cfg->device;
2779		break;
2780	case PCI_IVAR_DEVID:
2781		*result = (cfg->device << 16) | cfg->vendor;
2782		break;
2783	case PCI_IVAR_CLASS:
2784		*result = cfg->baseclass;
2785		break;
2786	case PCI_IVAR_SUBCLASS:
2787		*result = cfg->subclass;
2788		break;
2789	case PCI_IVAR_PROGIF:
2790		*result = cfg->progif;
2791		break;
2792	case PCI_IVAR_REVID:
2793		*result = cfg->revid;
2794		break;
2795	case PCI_IVAR_INTPIN:
2796		*result = cfg->intpin;
2797		break;
2798	case PCI_IVAR_IRQ:
2799		*result = cfg->intline;
2800		break;
2801	case PCI_IVAR_BUS:
2802		*result = cfg->bus;
2803		break;
2804	case PCI_IVAR_SLOT:
2805		*result = cfg->slot;
2806		break;
2807	case PCI_IVAR_FUNCTION:
2808		*result = cfg->func;
2809		break;
2810	case PCI_IVAR_CMDREG:
2811		*result = cfg->cmdreg;
2812		break;
2813	case PCI_IVAR_CACHELNSZ:
2814		*result = cfg->cachelnsz;
2815		break;
2816	case PCI_IVAR_MINGNT:
2817		*result = cfg->mingnt;
2818		break;
2819	case PCI_IVAR_MAXLAT:
2820		*result = cfg->maxlat;
2821		break;
2822	case PCI_IVAR_LATTIMER:
2823		*result = cfg->lattimer;
2824		break;
2825	default:
2826		return (ENOENT);
2827	}
2828	return (0);
2829}
2830
2831int
2832pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
2833{
2834	struct pci_devinfo *dinfo;
2835
2836	dinfo = device_get_ivars(child);
2837
2838	switch (which) {
2839	case PCI_IVAR_INTPIN:
2840		dinfo->cfg.intpin = value;
2841		return (0);
2842	case PCI_IVAR_ETHADDR:
2843	case PCI_IVAR_SUBVENDOR:
2844	case PCI_IVAR_SUBDEVICE:
2845	case PCI_IVAR_VENDOR:
2846	case PCI_IVAR_DEVICE:
2847	case PCI_IVAR_DEVID:
2848	case PCI_IVAR_CLASS:
2849	case PCI_IVAR_SUBCLASS:
2850	case PCI_IVAR_PROGIF:
2851	case PCI_IVAR_REVID:
2852	case PCI_IVAR_IRQ:
2853	case PCI_IVAR_BUS:
2854	case PCI_IVAR_SLOT:
2855	case PCI_IVAR_FUNCTION:
2856		return (EINVAL);	/* disallow for now */
2857
2858	default:
2859		return (ENOENT);
2860	}
2861}
2862
2863
2864#include "opt_ddb.h"
2865#ifdef DDB
2866#include <ddb/ddb.h>
2867#include <sys/cons.h>
2868
2869/*
2870 * List resources based on pci map registers, used for within ddb
2871 */
2872
2873DB_SHOW_COMMAND(pciregs, db_pci_dump)
2874{
2875	struct pci_devinfo *dinfo;
2876	struct devlist *devlist_head;
2877	struct pci_conf *p;
2878	const char *name;
2879	int i, error, none_count;
2880
2881	none_count = 0;
2882	/* get the head of the device queue */
2883	devlist_head = &pci_devq;
2884
2885	/*
2886	 * Go through the list of devices and print out devices
2887	 */
2888	for (error = 0, i = 0,
2889	     dinfo = STAILQ_FIRST(devlist_head);
2890	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
2891	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
2892
2893		/* Populate pd_name and pd_unit */
2894		name = NULL;
2895		if (dinfo->cfg.dev)
2896			name = device_get_name(dinfo->cfg.dev);
2897
2898		p = &dinfo->conf;
2899		db_printf("%s%d@pci%d:%d:%d:\tclass=0x%06x card=0x%08x "
2900			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
2901			(name && *name) ? name : "none",
2902			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
2903			none_count++,
2904			p->pc_sel.pc_bus, p->pc_sel.pc_dev,
2905			p->pc_sel.pc_func, (p->pc_class << 16) |
2906			(p->pc_subclass << 8) | p->pc_progif,
2907			(p->pc_subdevice << 16) | p->pc_subvendor,
2908			(p->pc_device << 16) | p->pc_vendor,
2909			p->pc_revid, p->pc_hdr);
2910	}
2911}
2912#endif /* DDB */
2913
2914static struct resource *
2915pci_alloc_map(device_t dev, device_t child, int type, int *rid,
2916    u_long start, u_long end, u_long count, u_int flags)
2917{
2918	struct pci_devinfo *dinfo = device_get_ivars(child);
2919	struct resource_list *rl = &dinfo->resources;
2920	struct resource_list_entry *rle;
2921	struct resource *res;
2922	pci_addr_t map, testval;
2923	int mapsize;
2924
2925	/*
2926	 * Weed out the bogons, and figure out how large the BAR/map
2927	 * is.  Bars that read back 0 here are bogus and unimplemented.
2928	 * Note: atapci in legacy mode are special and handled elsewhere
2929	 * in the code.  If you have a atapci device in legacy mode and
2930	 * it fails here, that other code is broken.
2931	 */
2932	res = NULL;
2933	map = pci_read_config(child, *rid, 4);
2934	pci_write_config(child, *rid, 0xffffffff, 4);
2935	testval = pci_read_config(child, *rid, 4);
2936	if (pci_maprange(testval) == 64)
2937		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
2938	if (pci_mapbase(testval) == 0)
2939		goto out;
2940	if (pci_maptype(testval) & PCI_MAPMEM) {
2941		if (type != SYS_RES_MEMORY) {
2942			if (bootverbose)
2943				device_printf(dev,
2944				    "child %s requested type %d for rid %#x,"
2945				    " but the BAR says it is an memio\n",
2946				    device_get_nameunit(child), type, *rid);
2947			goto out;
2948		}
2949	} else {
2950		if (type != SYS_RES_IOPORT) {
2951			if (bootverbose)
2952				device_printf(dev,
2953				    "child %s requested type %d for rid %#x,"
2954				    " but the BAR says it is an ioport\n",
2955				    device_get_nameunit(child), type, *rid);
2956			goto out;
2957		}
2958	}
2959	/*
2960	 * For real BARs, we need to override the size that
2961	 * the driver requests, because that's what the BAR
2962	 * actually uses and we would otherwise have a
2963	 * situation where we might allocate the excess to
2964	 * another driver, which won't work.
2965	 */
2966	mapsize = pci_mapsize(testval);
2967	count = 1UL << mapsize;
2968	if (RF_ALIGNMENT(flags) < mapsize)
2969		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
2970
2971	/*
2972	 * Allocate enough resource, and then write back the
2973	 * appropriate bar for that resource.
2974	 */
2975	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
2976	    start, end, count, flags);
2977	if (res == NULL) {
2978		device_printf(child,
2979		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
2980		    count, *rid, type, start, end);
2981		goto out;
2982	}
2983	resource_list_add(rl, type, *rid, start, end, count);
2984	rle = resource_list_find(rl, type, *rid);
2985	if (rle == NULL)
2986		panic("pci_alloc_map: unexpectedly can't find resource.");
2987	rle->res = res;
2988	rle->start = rman_get_start(res);
2989	rle->end = rman_get_end(res);
2990	rle->count = count;
2991	if (bootverbose)
2992		device_printf(child,
2993		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
2994		    count, *rid, type, rman_get_start(res));
2995	map = rman_get_start(res);
2996out:;
2997	pci_write_config(child, *rid, map, 4);
2998	if (pci_maprange(testval) == 64)
2999		pci_write_config(child, *rid + 4, map >> 32, 4);
3000	return (res);
3001}
3002
3003
3004struct resource *
3005pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3006		   u_long start, u_long end, u_long count, u_int flags)
3007{
3008	struct pci_devinfo *dinfo = device_get_ivars(child);
3009	struct resource_list *rl = &dinfo->resources;
3010	struct resource_list_entry *rle;
3011	pcicfgregs *cfg = &dinfo->cfg;
3012
3013	/*
3014	 * Perform lazy resource allocation
3015	 */
3016	if (device_get_parent(child) == dev) {
3017		switch (type) {
3018		case SYS_RES_IRQ:
3019			/*
3020			 * Can't alloc legacy interrupt once MSI messages
3021			 * have been allocated.
3022			 */
3023			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3024			    cfg->msix.msix_alloc > 0))
3025				return (NULL);
3026			/*
3027			 * If the child device doesn't have an
3028			 * interrupt routed and is deserving of an
3029			 * interrupt, try to assign it one.
3030			 */
3031			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3032			    (cfg->intpin != 0))
3033				pci_assign_interrupt(dev, child, 0);
3034			break;
3035		case SYS_RES_IOPORT:
3036		case SYS_RES_MEMORY:
3037			if (*rid < PCIR_BAR(cfg->nummaps)) {
3038				/*
3039				 * Enable the I/O mode.  We should
3040				 * also be assigning resources too
3041				 * when none are present.  The
3042				 * resource_list_alloc kind of sorta does
3043				 * this...
3044				 */
3045				if (PCI_ENABLE_IO(dev, child, type))
3046					return (NULL);
3047			}
3048			rle = resource_list_find(rl, type, *rid);
3049			if (rle == NULL)
3050				return (pci_alloc_map(dev, child, type, rid,
3051				    start, end, count, flags));
3052			break;
3053		}
3054		/*
3055		 * If we've already allocated the resource, then
3056		 * return it now.  But first we may need to activate
3057		 * it, since we don't allocate the resource as active
3058		 * above.  Normally this would be done down in the
3059		 * nexus, but since we short-circuit that path we have
3060		 * to do its job here.  Not sure if we should free the
3061		 * resource if it fails to activate.
3062		 */
3063		rle = resource_list_find(rl, type, *rid);
3064		if (rle != NULL && rle->res != NULL) {
3065			if (bootverbose)
3066				device_printf(child,
3067			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3068				    rman_get_size(rle->res), *rid, type,
3069				    rman_get_start(rle->res));
3070			if ((flags & RF_ACTIVE) &&
3071			    bus_generic_activate_resource(dev, child, type,
3072			    *rid, rle->res) != 0)
3073				return NULL;
3074			return (rle->res);
3075		}
3076	}
3077	return (resource_list_alloc(rl, dev, child, type, rid,
3078	    start, end, count, flags));
3079}
3080
3081void
3082pci_delete_resource(device_t dev, device_t child, int type, int rid)
3083{
3084	struct pci_devinfo *dinfo;
3085	struct resource_list *rl;
3086	struct resource_list_entry *rle;
3087
3088	if (device_get_parent(child) != dev)
3089		return;
3090
3091	dinfo = device_get_ivars(child);
3092	rl = &dinfo->resources;
3093	rle = resource_list_find(rl, type, rid);
3094	if (rle) {
3095		if (rle->res) {
3096			if (rman_get_device(rle->res) != dev ||
3097			    rman_get_flags(rle->res) & RF_ACTIVE) {
3098				device_printf(dev, "delete_resource: "
3099				    "Resource still owned by child, oops. "
3100				    "(type=%d, rid=%d, addr=%lx)\n",
3101				    rle->type, rle->rid,
3102				    rman_get_start(rle->res));
3103				return;
3104			}
3105			bus_release_resource(dev, type, rid, rle->res);
3106		}
3107		resource_list_delete(rl, type, rid);
3108	}
3109	/*
3110	 * Why do we turn off the PCI configuration BAR when we delete a
3111	 * resource? -- imp
3112	 */
3113	pci_write_config(child, rid, 0, 4);
3114	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3115}
3116
3117struct resource_list *
3118pci_get_resource_list (device_t dev, device_t child)
3119{
3120	struct pci_devinfo *dinfo = device_get_ivars(child);
3121
3122	return (&dinfo->resources);
3123}
3124
3125uint32_t
3126pci_read_config_method(device_t dev, device_t child, int reg, int width)
3127{
3128	struct pci_devinfo *dinfo = device_get_ivars(child);
3129	pcicfgregs *cfg = &dinfo->cfg;
3130
3131	return (PCIB_READ_CONFIG(device_get_parent(dev),
3132	    cfg->bus, cfg->slot, cfg->func, reg, width));
3133}
3134
3135void
3136pci_write_config_method(device_t dev, device_t child, int reg,
3137    uint32_t val, int width)
3138{
3139	struct pci_devinfo *dinfo = device_get_ivars(child);
3140	pcicfgregs *cfg = &dinfo->cfg;
3141
3142	PCIB_WRITE_CONFIG(device_get_parent(dev),
3143	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3144}
3145
3146int
3147pci_child_location_str_method(device_t dev, device_t child, char *buf,
3148    size_t buflen)
3149{
3150
3151	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3152	    pci_get_function(child));
3153	return (0);
3154}
3155
3156int
3157pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3158    size_t buflen)
3159{
3160	struct pci_devinfo *dinfo;
3161	pcicfgregs *cfg;
3162
3163	dinfo = device_get_ivars(child);
3164	cfg = &dinfo->cfg;
3165	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3166	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3167	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3168	    cfg->progif);
3169	return (0);
3170}
3171
3172int
3173pci_assign_interrupt_method(device_t dev, device_t child)
3174{
3175	struct pci_devinfo *dinfo = device_get_ivars(child);
3176	pcicfgregs *cfg = &dinfo->cfg;
3177
3178	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3179	    cfg->intpin));
3180}
3181
3182static int
3183pci_modevent(module_t mod, int what, void *arg)
3184{
3185	static struct cdev *pci_cdev;
3186
3187	switch (what) {
3188	case MOD_LOAD:
3189		STAILQ_INIT(&pci_devq);
3190		pci_generation = 0;
3191		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3192		    "pci");
3193		pci_load_vendor_data();
3194		break;
3195
3196	case MOD_UNLOAD:
3197		destroy_dev(pci_cdev);
3198		break;
3199	}
3200
3201	return (0);
3202}
3203
3204void
3205pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3206{
3207	int i;
3208
3209	/*
3210	 * Only do header type 0 devices.  Type 1 devices are bridges,
3211	 * which we know need special treatment.  Type 2 devices are
3212	 * cardbus bridges which also require special treatment.
3213	 * Other types are unknown, and we err on the side of safety
3214	 * by ignoring them.
3215	 */
3216	if (dinfo->cfg.hdrtype != 0)
3217		return;
3218
3219	/*
3220	 * Restore the device to full power mode.  We must do this
3221	 * before we restore the registers because moving from D3 to
3222	 * D0 will cause the chip's BARs and some other registers to
3223	 * be reset to some unknown power on reset values.  Cut down
3224	 * the noise on boot by doing nothing if we are already in
3225	 * state D0.
3226	 */
3227	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3228		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3229	}
3230	for (i = 0; i < dinfo->cfg.nummaps; i++)
3231		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3232	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3233	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3234	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3235	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3236	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3237	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3238	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3239	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3240	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3241	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3242
3243	/*
3244	 * Restore MSI configuration if it is present.  If MSI is enabled,
3245	 * then restore the data and addr registers.
3246	 */
3247	if (dinfo->cfg.msi.msi_location != 0)
3248		pci_resume_msi(dev);
3249}
3250
3251void
3252pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3253{
3254	int i;
3255	uint32_t cls;
3256	int ps;
3257
3258	/*
3259	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3260	 * we know need special treatment.  Type 2 devices are cardbus bridges
3261	 * which also require special treatment.  Other types are unknown, and
3262	 * we err on the side of safety by ignoring them.  Powering down
3263	 * bridges should not be undertaken lightly.
3264	 */
3265	if (dinfo->cfg.hdrtype != 0)
3266		return;
3267	for (i = 0; i < dinfo->cfg.nummaps; i++)
3268		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3269	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3270
3271	/*
3272	 * Some drivers apparently write to these registers w/o updating our
3273	 * cached copy.  No harm happens if we update the copy, so do so here
3274	 * so we can restore them.  The COMMAND register is modified by the
3275	 * bus w/o updating the cache.  This should represent the normally
3276	 * writable portion of the 'defined' part of type 0 headers.  In
3277	 * theory we also need to save/restore the PCI capability structures
3278	 * we know about, but apart from power we don't know any that are
3279	 * writable.
3280	 */
3281	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3282	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3283	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3284	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3285	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3286	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3287	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3288	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3289	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3290	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3291	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3292	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3293	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3294	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3295	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3296
3297	/*
3298	 * don't set the state for display devices, base peripherals and
3299	 * memory devices since bad things happen when they are powered down.
3300	 * We should (a) have drivers that can easily detach and (b) use
3301	 * generic drivers for these devices so that some device actually
3302	 * attaches.  We need to make sure that when we implement (a) we don't
3303	 * power the device down on a reattach.
3304	 */
3305	cls = pci_get_class(dev);
3306	if (!setstate)
3307		return;
3308	switch (pci_do_power_nodriver)
3309	{
3310		case 0:		/* NO powerdown at all */
3311			return;
3312		case 1:		/* Conservative about what to power down */
3313			if (cls == PCIC_STORAGE)
3314				return;
3315			/*FALLTHROUGH*/
3316		case 2:		/* Agressive about what to power down */
3317			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3318			    cls == PCIC_BASEPERIPH)
3319				return;
3320			/*FALLTHROUGH*/
3321		case 3:		/* Power down everything */
3322			break;
3323	}
3324	/*
3325	 * PCI spec says we can only go into D3 state from D0 state.
3326	 * Transition from D[12] into D0 before going to D3 state.
3327	 */
3328	ps = pci_get_powerstate(dev);
3329	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3330		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3331	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3332		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3333}
3334