pci.c revision 169219
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 169219 2007-05-02 16:21:18Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54
55#if defined(__i386__) || defined(__amd64__)
56#include <machine/intr_machdep.h>
57#endif
58
59#include <sys/pciio.h>
60#include <dev/pci/pcireg.h>
61#include <dev/pci/pcivar.h>
62#include <dev/pci/pci_private.h>
63
64#include "pcib_if.h"
65#include "pci_if.h"
66
67#ifdef __HAVE_ACPI
68#include <contrib/dev/acpica/acpi.h>
69#include "acpi_if.h"
70#else
71#define	ACPI_PWR_FOR_SLEEP(x, y, z)
72#endif
73
74static uint32_t		pci_mapbase(unsigned mapreg);
75static const char	*pci_maptype(unsigned mapreg);
76static int		pci_mapsize(unsigned testval);
77static int		pci_maprange(unsigned mapreg);
78static void		pci_fixancient(pcicfgregs *cfg);
79
80static int		pci_porten(device_t pcib, int b, int s, int f);
81static int		pci_memen(device_t pcib, int b, int s, int f);
82static void		pci_assign_interrupt(device_t bus, device_t dev,
83			    int force_route);
84static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85			    int b, int s, int f, int reg,
86			    struct resource_list *rl, int force, int prefetch);
87static int		pci_probe(device_t dev);
88static int		pci_attach(device_t dev);
89static void		pci_load_vendor_data(void);
90static int		pci_describe_parse_line(char **ptr, int *vendor,
91			    int *device, char **desc);
92static char		*pci_describe_device(device_t dev);
93static int		pci_modevent(module_t mod, int what, void *arg);
94static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95			    pcicfgregs *cfg);
96static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97static uint32_t		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98			    int reg);
99#if 0
100static void		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101			    int reg, uint32_t data);
102#endif
103static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104static int		pci_msi_blacklisted(void);
105
106static device_method_t pci_methods[] = {
107	/* Device interface */
108	DEVMETHOD(device_probe,		pci_probe),
109	DEVMETHOD(device_attach,	pci_attach),
110	DEVMETHOD(device_detach,	bus_generic_detach),
111	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
112	DEVMETHOD(device_suspend,	pci_suspend),
113	DEVMETHOD(device_resume,	pci_resume),
114
115	/* Bus interface */
116	DEVMETHOD(bus_print_child,	pci_print_child),
117	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
118	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
119	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
120	DEVMETHOD(bus_driver_added,	pci_driver_added),
121	DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr),
122	DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
123
124	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
125	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
126	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
127	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
128	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
129	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
130	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
131	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
132	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
133	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
134
135	/* PCI interface */
136	DEVMETHOD(pci_read_config,	pci_read_config_method),
137	DEVMETHOD(pci_write_config,	pci_write_config_method),
138	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
139	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
140	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
141	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
142	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
143	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
144	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
145	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
146	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
147	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
148	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
149	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
150	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
151	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
152	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
153	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
154
155	{ 0, 0 }
156};
157
158DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
159
160static devclass_t pci_devclass;
161DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
162MODULE_VERSION(pci, 1);
163
164static char	*pci_vendordata;
165static size_t	pci_vendordata_size;
166
167
168struct pci_quirk {
169	uint32_t devid;	/* Vendor/device of the card */
170	int	type;
171#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
172#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
173	int	arg1;
174	int	arg2;
175};
176
177struct pci_quirk pci_quirks[] = {
178	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
179	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
180	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
181	/* As does the Serverworks OSB4 (the SMBus mapping register) */
182	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
183
184	/*
185	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
186	 * or the CMIC-SL (AKA ServerWorks GC_LE).
187	 */
188	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
189	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
190
191	/*
192	 * MSI doesn't work on earlier Intel chipsets including
193	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
194	 */
195	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
196	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
197	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
200	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
201	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
202
203	/*
204	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
205	 * bridge.
206	 */
207	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208
209	{ 0 }
210};
211
212/* map register information */
213#define	PCI_MAPMEM	0x01	/* memory map */
214#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
215#define	PCI_MAPPORT	0x04	/* port map */
216
217struct devlist pci_devq;
218uint32_t pci_generation;
219uint32_t pci_numdevs = 0;
220static int pcie_chipset, pcix_chipset;
221
222/* sysctl vars */
223SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
224
225static int pci_enable_io_modes = 1;
226TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
227SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
228    &pci_enable_io_modes, 1,
229    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
230enable these bits correctly.  We'd like to do this all the time, but there\n\
231are some peripherals that this causes problems with.");
232
233static int pci_do_power_nodriver = 0;
234TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
235SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
236    &pci_do_power_nodriver, 0,
237  "Place a function into D3 state when no driver attaches to it.  0 means\n\
238disable.  1 means conservatively place devices into D3 state.  2 means\n\
239agressively place devices into D3 state.  3 means put absolutely everything\n\
240in D3 state.");
241
242static int pci_do_power_resume = 1;
243TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
244SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
245    &pci_do_power_resume, 1,
246  "Transition from D3 -> D0 on resume.");
247
248static int pci_do_vpd = 1;
249TUNABLE_INT("hw.pci.enable_vpd", &pci_do_vpd);
250SYSCTL_INT(_hw_pci, OID_AUTO, enable_vpd, CTLFLAG_RW, &pci_do_vpd, 1,
251    "Enable support for VPD.");
252
253static int pci_do_msi = 1;
254TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
255SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
256    "Enable support for MSI interrupts");
257
258static int pci_do_msix = 1;
259TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
260SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
261    "Enable support for MSI-X interrupts");
262
263static int pci_honor_msi_blacklist = 1;
264TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
265SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
266    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
267
268/* Find a device_t by bus/slot/function */
269
270device_t
271pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
272{
273	struct pci_devinfo *dinfo;
274
275	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
276		if ((dinfo->cfg.bus == bus) &&
277		    (dinfo->cfg.slot == slot) &&
278		    (dinfo->cfg.func == func)) {
279			return (dinfo->cfg.dev);
280		}
281	}
282
283	return (NULL);
284}
285
286/* Find a device_t by vendor/device ID */
287
288device_t
289pci_find_device(uint16_t vendor, uint16_t device)
290{
291	struct pci_devinfo *dinfo;
292
293	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
294		if ((dinfo->cfg.vendor == vendor) &&
295		    (dinfo->cfg.device == device)) {
296			return (dinfo->cfg.dev);
297		}
298	}
299
300	return (NULL);
301}
302
303/* return base address of memory or port map */
304
305static uint32_t
306pci_mapbase(uint32_t mapreg)
307{
308
309	if (PCI_BAR_MEM(mapreg))
310		return (mapreg & PCIM_BAR_MEM_BASE);
311	else
312		return (mapreg & PCIM_BAR_IO_BASE);
313}
314
315/* return map type of memory or port map */
316
317static const char *
318pci_maptype(unsigned mapreg)
319{
320
321	if (PCI_BAR_IO(mapreg))
322		return ("I/O Port");
323	if (mapreg & PCIM_BAR_MEM_PREFETCH)
324		return ("Prefetchable Memory");
325	return ("Memory");
326}
327
328/* return log2 of map size decoded for memory or port map */
329
330static int
331pci_mapsize(uint32_t testval)
332{
333	int ln2size;
334
335	testval = pci_mapbase(testval);
336	ln2size = 0;
337	if (testval != 0) {
338		while ((testval & 1) == 0)
339		{
340			ln2size++;
341			testval >>= 1;
342		}
343	}
344	return (ln2size);
345}
346
347/* return log2 of address range supported by map register */
348
349static int
350pci_maprange(unsigned mapreg)
351{
352	int ln2range = 0;
353
354	if (PCI_BAR_IO(mapreg))
355		ln2range = 32;
356	else
357		switch (mapreg & PCIM_BAR_MEM_TYPE) {
358		case PCIM_BAR_MEM_32:
359			ln2range = 32;
360			break;
361		case PCIM_BAR_MEM_1MB:
362			ln2range = 20;
363			break;
364		case PCIM_BAR_MEM_64:
365			ln2range = 64;
366			break;
367		}
368	return (ln2range);
369}
370
371/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
372
373static void
374pci_fixancient(pcicfgregs *cfg)
375{
376	if (cfg->hdrtype != 0)
377		return;
378
379	/* PCI to PCI bridges use header type 1 */
380	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
381		cfg->hdrtype = 1;
382}
383
384/* extract header type specific config data */
385
386static void
387pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
388{
389#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
390	switch (cfg->hdrtype) {
391	case 0:
392		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
393		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
394		cfg->nummaps	    = PCI_MAXMAPS_0;
395		break;
396	case 1:
397		cfg->nummaps	    = PCI_MAXMAPS_1;
398		break;
399	case 2:
400		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
401		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
402		cfg->nummaps	    = PCI_MAXMAPS_2;
403		break;
404	}
405#undef REG
406}
407
408/* read configuration header into pcicfgregs structure */
409struct pci_devinfo *
410pci_read_device(device_t pcib, int b, int s, int f, size_t size)
411{
412#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
413	pcicfgregs *cfg = NULL;
414	struct pci_devinfo *devlist_entry;
415	struct devlist *devlist_head;
416
417	devlist_head = &pci_devq;
418
419	devlist_entry = NULL;
420
421	if (REG(PCIR_DEVVENDOR, 4) != -1) {
422		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
423		if (devlist_entry == NULL)
424			return (NULL);
425
426		cfg = &devlist_entry->cfg;
427
428		cfg->bus		= b;
429		cfg->slot		= s;
430		cfg->func		= f;
431		cfg->vendor		= REG(PCIR_VENDOR, 2);
432		cfg->device		= REG(PCIR_DEVICE, 2);
433		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
434		cfg->statreg		= REG(PCIR_STATUS, 2);
435		cfg->baseclass		= REG(PCIR_CLASS, 1);
436		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
437		cfg->progif		= REG(PCIR_PROGIF, 1);
438		cfg->revid		= REG(PCIR_REVID, 1);
439		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
440		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
441		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
442		cfg->intpin		= REG(PCIR_INTPIN, 1);
443		cfg->intline		= REG(PCIR_INTLINE, 1);
444
445		cfg->mingnt		= REG(PCIR_MINGNT, 1);
446		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
447
448		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
449		cfg->hdrtype		&= ~PCIM_MFDEV;
450
451		pci_fixancient(cfg);
452		pci_hdrtypedata(pcib, b, s, f, cfg);
453
454		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
455			pci_read_extcap(pcib, cfg);
456
457		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
458
459		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
460		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
461		devlist_entry->conf.pc_sel.pc_func = cfg->func;
462		devlist_entry->conf.pc_hdr = cfg->hdrtype;
463
464		devlist_entry->conf.pc_subvendor = cfg->subvendor;
465		devlist_entry->conf.pc_subdevice = cfg->subdevice;
466		devlist_entry->conf.pc_vendor = cfg->vendor;
467		devlist_entry->conf.pc_device = cfg->device;
468
469		devlist_entry->conf.pc_class = cfg->baseclass;
470		devlist_entry->conf.pc_subclass = cfg->subclass;
471		devlist_entry->conf.pc_progif = cfg->progif;
472		devlist_entry->conf.pc_revid = cfg->revid;
473
474		pci_numdevs++;
475		pci_generation++;
476	}
477	return (devlist_entry);
478#undef REG
479}
480
481static void
482pci_read_extcap(device_t pcib, pcicfgregs *cfg)
483{
484#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
485#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
486#if defined(__i386__) || defined(__amd64__)
487	uint64_t addr;
488#endif
489	uint32_t val;
490	int	ptr, nextptr, ptrptr;
491
492	switch (cfg->hdrtype & PCIM_HDRTYPE) {
493	case 0:
494	case 1:
495		ptrptr = PCIR_CAP_PTR;
496		break;
497	case 2:
498		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
499		break;
500	default:
501		return;		/* no extended capabilities support */
502	}
503	nextptr = REG(ptrptr, 1);	/* sanity check? */
504
505	/*
506	 * Read capability entries.
507	 */
508	while (nextptr != 0) {
509		/* Sanity check */
510		if (nextptr > 255) {
511			printf("illegal PCI extended capability offset %d\n",
512			    nextptr);
513			return;
514		}
515		/* Find the next entry */
516		ptr = nextptr;
517		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
518
519		/* Process this entry */
520		switch (REG(ptr + PCICAP_ID, 1)) {
521		case PCIY_PMG:		/* PCI power management */
522			if (cfg->pp.pp_cap == 0) {
523				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
524				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
525				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
526				if ((nextptr - ptr) > PCIR_POWER_DATA)
527					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
528			}
529			break;
530#if defined(__i386__) || defined(__amd64__)
531		case PCIY_HT:		/* HyperTransport */
532			/* Determine HT-specific capability type. */
533			val = REG(ptr + PCIR_HT_COMMAND, 2);
534			switch (val & PCIM_HTCMD_CAP_MASK) {
535			case PCIM_HTCAP_MSI_MAPPING:
536				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
537					/* Sanity check the mapping window. */
538					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
539					    4);
540					addr <<= 32;
541					addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO,
542					    4);
543					if (addr != MSI_INTEL_ADDR_BASE)
544						device_printf(pcib,
545		    "HT Bridge at %d:%d:%d has non-default MSI window 0x%llx\n",
546						    cfg->bus, cfg->slot,
547						    cfg->func, (long long)addr);
548				}
549
550				/* Enable MSI -> HT mapping. */
551				val |= PCIM_HTCMD_MSI_ENABLE;
552				WREG(ptr + PCIR_HT_COMMAND, val, 2);
553				break;
554			}
555			break;
556#endif
557		case PCIY_MSI:		/* PCI MSI */
558			cfg->msi.msi_location = ptr;
559			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
560			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
561						     PCIM_MSICTRL_MMC_MASK)>>1);
562			break;
563		case PCIY_MSIX:		/* PCI MSI-X */
564			cfg->msix.msix_location = ptr;
565			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
566			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
567			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
568			val = REG(ptr + PCIR_MSIX_TABLE, 4);
569			cfg->msix.msix_table_bar = PCIR_BAR(val &
570			    PCIM_MSIX_BIR_MASK);
571			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
572			val = REG(ptr + PCIR_MSIX_PBA, 4);
573			cfg->msix.msix_pba_bar = PCIR_BAR(val &
574			    PCIM_MSIX_BIR_MASK);
575			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
576			break;
577		case PCIY_VPD:		/* PCI Vital Product Data */
578			cfg->vpd.vpd_reg = ptr;
579			break;
580		case PCIY_SUBVENDOR:
581			/* Should always be true. */
582			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
583				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
584				cfg->subvendor = val & 0xffff;
585				cfg->subdevice = val >> 16;
586			}
587			break;
588		case PCIY_PCIX:		/* PCI-X */
589			/*
590			 * Assume we have a PCI-X chipset if we have
591			 * at least one PCI-PCI bridge with a PCI-X
592			 * capability.  Note that some systems with
593			 * PCI-express or HT chipsets might match on
594			 * this check as well.
595			 */
596			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
597				pcix_chipset = 1;
598			break;
599		case PCIY_EXPRESS:	/* PCI-express */
600			/*
601			 * Assume we have a PCI-express chipset if we have
602			 * at least one PCI-express root port.
603			 */
604			val = REG(ptr + PCIR_EXPRESS_FLAGS, 2);
605			if ((val & PCIM_EXP_FLAGS_TYPE) ==
606			    PCIM_EXP_TYPE_ROOT_PORT)
607				pcie_chipset = 1;
608			break;
609		default:
610			break;
611		}
612	}
613/* REG and WREG use carry through to next functions */
614}
615
616/*
617 * PCI Vital Product Data
618 */
619static uint32_t
620pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg)
621{
622
623	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
624
625	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
626	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000)
627		DELAY(1);	/* limit looping */
628
629	return (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
630}
631
632#if 0
633static void
634pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
635{
636	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
637
638	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
639	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
640	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000)
641		DELAY(1);	/* limit looping */
642
643	return;
644}
645#endif
646
647struct vpd_readstate {
648	device_t	pcib;
649	pcicfgregs	*cfg;
650	uint32_t	val;
651	int		bytesinval;
652	int		off;
653	uint8_t		cksum;
654};
655
656static uint8_t
657vpd_nextbyte(struct vpd_readstate *vrs)
658{
659	uint8_t byte;
660
661	if (vrs->bytesinval == 0) {
662		vrs->val = le32toh(pci_read_vpd_reg(vrs->pcib, vrs->cfg,
663		    vrs->off));
664		vrs->off += 4;
665		byte = vrs->val & 0xff;
666		vrs->bytesinval = 3;
667	} else {
668		vrs->val = vrs->val >> 8;
669		byte = vrs->val & 0xff;
670		vrs->bytesinval--;
671	}
672
673	vrs->cksum += byte;
674	return (byte);
675}
676
677static void
678pci_read_vpd(device_t pcib, pcicfgregs *cfg)
679{
680	struct vpd_readstate vrs;
681	int state;
682	int name;
683	int remain;
684	int end;
685	int i;
686	uint8_t byte;
687	int alloc, off;		/* alloc/off for RO/W arrays */
688	int cksumvalid;
689	int dflen;
690
691	if (!pci_do_vpd) {
692		cfg->vpd.vpd_cached = 1;
693		return;
694	}
695
696	/* init vpd reader */
697	vrs.bytesinval = 0;
698	vrs.off = 0;
699	vrs.pcib = pcib;
700	vrs.cfg = cfg;
701	vrs.cksum = 0;
702
703	state = 0;
704	name = remain = i = 0;	/* shut up stupid gcc */
705	alloc = off = 0;	/* shut up stupid gcc */
706	dflen = 0;		/* shut up stupid gcc */
707	end = 0;
708	cksumvalid = -1;
709	for (; !end;) {
710		byte = vpd_nextbyte(&vrs);
711#if 0
712		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
713		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
714		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
715#endif
716		switch (state) {
717		case 0:		/* item name */
718			if (byte & 0x80) {
719				remain = vpd_nextbyte(&vrs);
720				remain |= vpd_nextbyte(&vrs) << 8;
721				if (remain > (0x7f*4 - vrs.off)) {
722					end = 1;
723					printf(
724			    "pci%d:%d:%d: invalid vpd data, remain %#x\n",
725					    cfg->bus, cfg->slot, cfg->func,
726					    remain);
727				}
728				name = byte & 0x7f;
729			} else {
730				remain = byte & 0x7;
731				name = (byte >> 3) & 0xf;
732			}
733			switch (name) {
734			case 0x2:	/* String */
735				cfg->vpd.vpd_ident = malloc(remain + 1,
736				    M_DEVBUF, M_WAITOK);
737				i = 0;
738				state = 1;
739				break;
740			case 0xf:	/* End */
741				end = 1;
742				state = -1;
743				break;
744			case 0x10:	/* VPD-R */
745				alloc = 8;
746				off = 0;
747				cfg->vpd.vpd_ros = malloc(alloc *
748				    sizeof *cfg->vpd.vpd_ros, M_DEVBUF,
749				    M_WAITOK);
750				state = 2;
751				break;
752			case 0x11:	/* VPD-W */
753				alloc = 8;
754				off = 0;
755				cfg->vpd.vpd_w = malloc(alloc *
756				    sizeof *cfg->vpd.vpd_w, M_DEVBUF,
757				    M_WAITOK);
758				state = 5;
759				break;
760			default:	/* Invalid data, abort */
761				end = 1;
762				continue;
763			}
764			break;
765
766		case 1:	/* Identifier String */
767			cfg->vpd.vpd_ident[i++] = byte;
768			remain--;
769			if (remain == 0)  {
770				cfg->vpd.vpd_ident[i] = '\0';
771				state = 0;
772			}
773			break;
774
775		case 2:	/* VPD-R Keyword Header */
776			if (off == alloc) {
777				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
778				    (alloc *= 2) * sizeof *cfg->vpd.vpd_ros,
779				    M_DEVBUF, M_WAITOK);
780			}
781			cfg->vpd.vpd_ros[off].keyword[0] = byte;
782			cfg->vpd.vpd_ros[off].keyword[1] = vpd_nextbyte(&vrs);
783			dflen = vpd_nextbyte(&vrs);
784			if (dflen == 0 &&
785			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
786			    2) == 0) {
787				/*
788				 * if this happens, we can't trust the rest
789				 * of the VPD.
790				 */
791				printf("pci%d:%d:%d: bad keyword length: %d\n",
792				    cfg->bus, cfg->slot, cfg->func, dflen);
793				cksumvalid = 0;
794				end = 1;
795				break;
796			} else if (dflen == 0) {
797				cfg->vpd.vpd_ros[off].value = malloc(1 *
798				    sizeof *cfg->vpd.vpd_ros[off].value,
799				    M_DEVBUF, M_WAITOK);
800				cfg->vpd.vpd_ros[off].value[0] = '\x00';
801			} else
802				cfg->vpd.vpd_ros[off].value = malloc(
803				    (dflen + 1) *
804				    sizeof *cfg->vpd.vpd_ros[off].value,
805				    M_DEVBUF, M_WAITOK);
806			remain -= 3;
807			i = 0;
808			/* keep in sync w/ state 3's transistions */
809			if (dflen == 0 && remain == 0)
810				state = 0;
811			else if (dflen == 0)
812				state = 2;
813			else
814				state = 3;
815			break;
816
817		case 3:	/* VPD-R Keyword Value */
818			cfg->vpd.vpd_ros[off].value[i++] = byte;
819			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
820			    "RV", 2) == 0 && cksumvalid == -1) {
821				if (vrs.cksum == 0)
822					cksumvalid = 1;
823				else {
824					printf(
825				    "pci%d:%d:%d: bad VPD cksum, remain %hhu\n",
826					    cfg->bus, cfg->slot, cfg->func,
827					    vrs.cksum);
828					cksumvalid = 0;
829					end = 1;
830					break;
831				}
832			}
833			dflen--;
834			remain--;
835			/* keep in sync w/ state 2's transistions */
836			if (dflen == 0)
837				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
838			if (dflen == 0 && remain == 0) {
839				cfg->vpd.vpd_rocnt = off;
840				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
841				    off * sizeof *cfg->vpd.vpd_ros,
842				    M_DEVBUF, M_WAITOK);
843				state = 0;
844			} else if (dflen == 0)
845				state = 2;
846			break;
847
848		case 4:
849			remain--;
850			if (remain == 0)
851				state = 0;
852			break;
853
854		case 5:	/* VPD-W Keyword Header */
855			if (off == alloc) {
856				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
857				    (alloc *= 2) * sizeof *cfg->vpd.vpd_w,
858				    M_DEVBUF, M_WAITOK);
859			}
860			cfg->vpd.vpd_w[off].keyword[0] = byte;
861			cfg->vpd.vpd_w[off].keyword[1] = vpd_nextbyte(&vrs);
862			cfg->vpd.vpd_w[off].len = dflen = vpd_nextbyte(&vrs);
863			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
864			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
865			    sizeof *cfg->vpd.vpd_w[off].value,
866			    M_DEVBUF, M_WAITOK);
867			remain -= 3;
868			i = 0;
869			/* keep in sync w/ state 6's transistions */
870			if (dflen == 0 && remain == 0)
871				state = 0;
872			else if (dflen == 0)
873				state = 5;
874			else
875				state = 6;
876			break;
877
878		case 6:	/* VPD-W Keyword Value */
879			cfg->vpd.vpd_w[off].value[i++] = byte;
880			dflen--;
881			remain--;
882			/* keep in sync w/ state 5's transistions */
883			if (dflen == 0)
884				cfg->vpd.vpd_w[off++].value[i++] = '\0';
885			if (dflen == 0 && remain == 0) {
886				cfg->vpd.vpd_wcnt = off;
887				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
888				    off * sizeof *cfg->vpd.vpd_w,
889				    M_DEVBUF, M_WAITOK);
890				state = 0;
891			} else if (dflen == 0)
892				state = 5;
893			break;
894
895		default:
896			printf("pci%d:%d:%d: invalid state: %d\n",
897			    cfg->bus, cfg->slot, cfg->func, state);
898			end = 1;
899			break;
900		}
901	}
902
903	if (cksumvalid == 0) {
904		/* read-only data bad, clean up */
905		for (; off; off--)
906			free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
907
908		free(cfg->vpd.vpd_ros, M_DEVBUF);
909		cfg->vpd.vpd_ros = NULL;
910	}
911	cfg->vpd.vpd_cached = 1;
912#undef REG
913#undef WREG
914}
915
916int
917pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
918{
919	struct pci_devinfo *dinfo = device_get_ivars(child);
920	pcicfgregs *cfg = &dinfo->cfg;
921
922	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
923		pci_read_vpd(device_get_parent(dev), cfg);
924
925	*identptr = cfg->vpd.vpd_ident;
926
927	if (*identptr == NULL)
928		return (ENXIO);
929
930	return (0);
931}
932
933int
934pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
935	const char **vptr)
936{
937	struct pci_devinfo *dinfo = device_get_ivars(child);
938	pcicfgregs *cfg = &dinfo->cfg;
939	int i;
940
941	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
942		pci_read_vpd(device_get_parent(dev), cfg);
943
944	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
945		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
946		    sizeof cfg->vpd.vpd_ros[i].keyword) == 0) {
947			*vptr = cfg->vpd.vpd_ros[i].value;
948		}
949
950	if (i != cfg->vpd.vpd_rocnt)
951		return (0);
952
953	*vptr = NULL;
954	return (ENXIO);
955}
956
957/*
958 * Return the offset in configuration space of the requested extended
959 * capability entry or 0 if the specified capability was not found.
960 */
961int
962pci_find_extcap_method(device_t dev, device_t child, int capability,
963    int *capreg)
964{
965	struct pci_devinfo *dinfo = device_get_ivars(child);
966	pcicfgregs *cfg = &dinfo->cfg;
967	u_int32_t status;
968	u_int8_t ptr;
969
970	/*
971	 * Check the CAP_LIST bit of the PCI status register first.
972	 */
973	status = pci_read_config(child, PCIR_STATUS, 2);
974	if (!(status & PCIM_STATUS_CAPPRESENT))
975		return (ENXIO);
976
977	/*
978	 * Determine the start pointer of the capabilities list.
979	 */
980	switch (cfg->hdrtype & PCIM_HDRTYPE) {
981	case 0:
982	case 1:
983		ptr = PCIR_CAP_PTR;
984		break;
985	case 2:
986		ptr = PCIR_CAP_PTR_2;
987		break;
988	default:
989		/* XXX: panic? */
990		return (ENXIO);		/* no extended capabilities support */
991	}
992	ptr = pci_read_config(child, ptr, 1);
993
994	/*
995	 * Traverse the capabilities list.
996	 */
997	while (ptr != 0) {
998		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
999			if (capreg != NULL)
1000				*capreg = ptr;
1001			return (0);
1002		}
1003		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1004	}
1005
1006	return (ENOENT);
1007}
1008
1009/*
1010 * Support for MSI-X message interrupts.
1011 */
1012void
1013pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1014{
1015	struct pci_devinfo *dinfo = device_get_ivars(dev);
1016	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1017	uint32_t offset;
1018
1019	KASSERT(msix->msix_alloc > index, ("bogus index"));
1020	offset = msix->msix_table_offset + index * 16;
1021	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1022	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1023	bus_write_4(msix->msix_table_res, offset + 8, data);
1024}
1025
1026void
1027pci_mask_msix(device_t dev, u_int index)
1028{
1029	struct pci_devinfo *dinfo = device_get_ivars(dev);
1030	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1031	uint32_t offset, val;
1032
1033	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1034	offset = msix->msix_table_offset + index * 16 + 12;
1035	val = bus_read_4(msix->msix_table_res, offset);
1036	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1037		val |= PCIM_MSIX_VCTRL_MASK;
1038		bus_write_4(msix->msix_table_res, offset, val);
1039	}
1040}
1041
1042void
1043pci_unmask_msix(device_t dev, u_int index)
1044{
1045	struct pci_devinfo *dinfo = device_get_ivars(dev);
1046	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1047	uint32_t offset, val;
1048
1049	KASSERT(msix->msix_alloc > index, ("bogus index"));
1050	offset = msix->msix_table_offset + index * 16 + 12;
1051	val = bus_read_4(msix->msix_table_res, offset);
1052	if (val & PCIM_MSIX_VCTRL_MASK) {
1053		val &= ~PCIM_MSIX_VCTRL_MASK;
1054		bus_write_4(msix->msix_table_res, offset, val);
1055	}
1056}
1057
1058int
1059pci_pending_msix(device_t dev, u_int index)
1060{
1061	struct pci_devinfo *dinfo = device_get_ivars(dev);
1062	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1063	uint32_t offset, bit;
1064
1065	KASSERT(msix->msix_alloc > index, ("bogus index"));
1066	offset = msix->msix_pba_offset + (index / 32) * 4;
1067	bit = 1 << index % 32;
1068	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1069}
1070
1071/*
1072 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1073 * returned in *count.  After this function returns, each message will be
1074 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1075 */
1076int
1077pci_alloc_msix_method(device_t dev, device_t child, int *count)
1078{
1079	struct pci_devinfo *dinfo = device_get_ivars(child);
1080	pcicfgregs *cfg = &dinfo->cfg;
1081	struct resource_list_entry *rle;
1082	int actual, error, i, irq, max;
1083
1084	/* Don't let count == 0 get us into trouble. */
1085	if (*count == 0)
1086		return (EINVAL);
1087
1088	/* If rid 0 is allocated, then fail. */
1089	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1090	if (rle != NULL && rle->res != NULL)
1091		return (ENXIO);
1092
1093	/* Already have allocated messages? */
1094	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1095		return (ENXIO);
1096
1097	/* If MSI is blacklisted for this system, fail. */
1098	if (pci_msi_blacklisted())
1099		return (ENXIO);
1100
1101	/* MSI-X capability present? */
1102	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1103		return (ENODEV);
1104
1105	/* Make sure the appropriate BARs are mapped. */
1106	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1107	    cfg->msix.msix_table_bar);
1108	if (rle == NULL || rle->res == NULL ||
1109	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1110		return (ENXIO);
1111	cfg->msix.msix_table_res = rle->res;
1112	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1113		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1114		    cfg->msix.msix_pba_bar);
1115		if (rle == NULL || rle->res == NULL ||
1116		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1117			return (ENXIO);
1118	}
1119	cfg->msix.msix_pba_res = rle->res;
1120
1121	if (bootverbose)
1122		device_printf(child,
1123		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1124		    *count, cfg->msix.msix_msgnum);
1125	max = min(*count, cfg->msix.msix_msgnum);
1126	for (i = 0; i < max; i++) {
1127		/* Allocate a message. */
1128		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, i,
1129		    &irq);
1130		if (error)
1131			break;
1132		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1133		    irq, 1);
1134	}
1135	actual = i;
1136
1137	if (bootverbose) {
1138		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1139		if (actual == 1)
1140			device_printf(child, "using IRQ %lu for MSI-X\n",
1141			    rle->start);
1142		else {
1143			int run;
1144
1145			/*
1146			 * Be fancy and try to print contiguous runs of
1147			 * IRQ values as ranges.  'irq' is the previous IRQ.
1148			 * 'run' is true if we are in a range.
1149			 */
1150			device_printf(child, "using IRQs %lu", rle->start);
1151			irq = rle->start;
1152			run = 0;
1153			for (i = 1; i < actual; i++) {
1154				rle = resource_list_find(&dinfo->resources,
1155				    SYS_RES_IRQ, i + 1);
1156
1157				/* Still in a run? */
1158				if (rle->start == irq + 1) {
1159					run = 1;
1160					irq++;
1161					continue;
1162				}
1163
1164				/* Finish previous range. */
1165				if (run) {
1166					printf("-%d", irq);
1167					run = 0;
1168				}
1169
1170				/* Start new range. */
1171				printf(",%lu", rle->start);
1172				irq = rle->start;
1173			}
1174
1175			/* Unfinished range? */
1176			if (run)
1177				printf("-%d", irq);
1178			printf(" for MSI-X\n");
1179		}
1180	}
1181
1182	/* Mask all vectors. */
1183	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1184		pci_mask_msix(child, i);
1185
1186	/* Update control register to enable MSI-X. */
1187	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1188	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1189	    cfg->msix.msix_ctrl, 2);
1190
1191	/* Update counts of alloc'd messages. */
1192	cfg->msix.msix_alloc = actual;
1193	*count = actual;
1194	return (0);
1195}
1196
1197/*
1198 * By default, pci_alloc_msix() will assign the allocated IRQ resources to
1199 * the first N messages in the MSI-X table.  However, device drivers may
1200 * want to use different layouts in the case that they do not allocate a
1201 * full table.  This method allows the driver to specify what layout it
1202 * wants.  It must be called after a successful pci_alloc_msix() but
1203 * before any of the associated SYS_RES_IRQ resources are allocated via
1204 * bus_alloc_resource().  The 'indices' array contains N (where N equals
1205 * the 'count' returned from pci_alloc_msix()) message indices.  The
1206 * indices are 1-based (meaning the first message is at index 1).  On
1207 * successful return, each of the messages in the 'indices' array will
1208 * have an associated SYS_RES_IRQ whose rid is equal to the index.  Thus,
1209 * if indices contains { 2, 4 }, then upon successful return, the 'child'
1210 * device will have two SYS_RES_IRQ resources available at rids 2 and 4.
1211 */
1212int
1213pci_remap_msix_method(device_t dev, device_t child, u_int *indices)
1214{
1215	struct pci_devinfo *dinfo = device_get_ivars(child);
1216	pcicfgregs *cfg = &dinfo->cfg;
1217	struct resource_list_entry *rle;
1218	int count, error, i, j, *irqs;
1219
1220	/* Sanity check the indices. */
1221	for (i = 0; i < cfg->msix.msix_alloc; i++)
1222		if (indices[i] == 0 || indices[i] > cfg->msix.msix_msgnum)
1223			return (EINVAL);
1224
1225	/* Check for duplicates. */
1226	for (i = 0; i < cfg->msix.msix_alloc; i++)
1227		for (j = i + 1; j < cfg->msix.msix_alloc; j++)
1228			if (indices[i] == indices[j])
1229				return (EINVAL);
1230
1231	/* Make sure none of the resources are allocated. */
1232	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1233		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1234		if (rle == NULL)
1235			continue;
1236		if (rle->res != NULL)
1237			return (EBUSY);
1238		count++;
1239	}
1240
1241	/* Save the IRQ values and free the existing resources. */
1242	irqs = malloc(sizeof(int) * cfg->msix.msix_alloc, M_TEMP, M_WAITOK);
1243	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1244		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1245		if (rle == NULL)
1246			continue;
1247		irqs[count] = rle->start;
1248		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
1249		count++;
1250	}
1251
1252	/* Map the IRQ values to the new message indices and rids. */
1253	for (i = 0; i < cfg->msix.msix_alloc; i++) {
1254		resource_list_add(&dinfo->resources, SYS_RES_IRQ, indices[i],
1255		    irqs[i], irqs[i], 1);
1256
1257		/*
1258		 * The indices in the backend code (PCIB_* methods and the
1259		 * MI helper routines for MD code such as pci_enable_msix())
1260		 * are all zero-based.  However, the indices passed to this
1261		 * function are 1-based so that the correspond 1:1 with the
1262		 * SYS_RES_IRQ resource IDs.
1263		 */
1264		error = PCIB_REMAP_MSIX(device_get_parent(dev), child,
1265		    indices[i] - 1, irqs[i]);
1266		KASSERT(error == 0, ("Failed to remap MSI-X message"));
1267	}
1268	if (bootverbose) {
1269		if (cfg->msix.msix_alloc == 1)
1270			device_printf(child,
1271			    "Remapped MSI-X IRQ to index %d\n", indices[0]);
1272		else {
1273			device_printf(child, "Remapped MSI-X IRQs to indices");
1274			for (i = 0; i < cfg->msix.msix_alloc - 1; i++)
1275				printf(" %d,", indices[i]);
1276			printf(" %d\n", indices[cfg->msix.msix_alloc - 1]);
1277		}
1278	}
1279	free(irqs, M_TEMP);
1280
1281	return (0);
1282}
1283
1284static int
1285pci_release_msix(device_t dev, device_t child)
1286{
1287	struct pci_devinfo *dinfo = device_get_ivars(child);
1288	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1289	struct resource_list_entry *rle;
1290	int count, i;
1291
1292	/* Do we have any messages to release? */
1293	if (msix->msix_alloc == 0)
1294		return (ENODEV);
1295
1296	/* Make sure none of the resources are allocated. */
1297	for (i = 1, count = 0; count < msix->msix_alloc; i++) {
1298		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1299		if (rle == NULL)
1300			continue;
1301		if (rle->res != NULL)
1302			return (EBUSY);
1303		count++;
1304	}
1305
1306	/* Update control register to disable MSI-X. */
1307	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1308	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1309	    msix->msix_ctrl, 2);
1310
1311	/* Release the messages. */
1312	for (i = 1, count = 0; count < msix->msix_alloc; i++) {
1313		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1314		if (rle == NULL)
1315			continue;
1316		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1317		    rle->start);
1318		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
1319		count++;
1320	}
1321
1322	/* Update alloc count. */
1323	msix->msix_alloc = 0;
1324	return (0);
1325}
1326
1327/*
1328 * Return the max supported MSI-X messages this device supports.
1329 * Basically, assuming the MD code can alloc messages, this function
1330 * should return the maximum value that pci_alloc_msix() can return.
1331 * Thus, it is subject to the tunables, etc.
1332 */
1333int
1334pci_msix_count_method(device_t dev, device_t child)
1335{
1336	struct pci_devinfo *dinfo = device_get_ivars(child);
1337	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1338
1339	if (pci_do_msix && msix->msix_location != 0)
1340		return (msix->msix_msgnum);
1341	return (0);
1342}
1343
1344/*
1345 * Support for MSI message signalled interrupts.
1346 */
1347void
1348pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1349{
1350	struct pci_devinfo *dinfo = device_get_ivars(dev);
1351	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1352
1353	/* Write data and address values. */
1354	msi->msi_addr = address;
1355	msi->msi_data = data;
1356	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1357	    address & 0xffffffff, 4);
1358	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1359		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1360		    address >> 32, 4);
1361		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1362		    data, 2);
1363	} else
1364		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1365		    2);
1366
1367	/* Enable MSI in the control register. */
1368	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1369	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1370	    2);
1371}
1372
1373/*
1374 * Restore MSI registers during resume.  If MSI is enabled then
1375 * restore the data and address registers in addition to the control
1376 * register.
1377 */
1378static void
1379pci_resume_msi(device_t dev)
1380{
1381	struct pci_devinfo *dinfo = device_get_ivars(dev);
1382	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1383	uint64_t address;
1384	uint16_t data;
1385
1386	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1387		address = msi->msi_addr;
1388		data = msi->msi_data;
1389		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1390		    address & 0xffffffff, 4);
1391		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1392			pci_write_config(dev, msi->msi_location +
1393			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1394			pci_write_config(dev, msi->msi_location +
1395			    PCIR_MSI_DATA_64BIT, data, 2);
1396		} else
1397			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1398			    data, 2);
1399	}
1400	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1401	    2);
1402}
1403
1404/*
1405 * Returns true if the specified device is blacklisted because MSI
1406 * doesn't work.
1407 */
1408int
1409pci_msi_device_blacklisted(device_t dev)
1410{
1411	struct pci_quirk *q;
1412
1413	if (!pci_honor_msi_blacklist)
1414		return (0);
1415
1416	for (q = &pci_quirks[0]; q->devid; q++) {
1417		if (q->devid == pci_get_devid(dev) &&
1418		    q->type == PCI_QUIRK_DISABLE_MSI)
1419			return (1);
1420	}
1421	return (0);
1422}
1423
1424/*
1425 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1426 * we just check for blacklisted chipsets as represented by the
1427 * host-PCI bridge at device 0:0:0.  In the future, it may become
1428 * necessary to check other system attributes, such as the kenv values
1429 * that give the motherboard manufacturer and model number.
1430 */
1431static int
1432pci_msi_blacklisted(void)
1433{
1434	device_t dev;
1435
1436	if (!pci_honor_msi_blacklist)
1437		return (0);
1438
1439	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1440	if (!(pcie_chipset || pcix_chipset))
1441		return (1);
1442
1443	dev = pci_find_bsf(0, 0, 0);
1444	if (dev != NULL)
1445		return (pci_msi_device_blacklisted(dev));
1446	return (0);
1447}
1448
1449/*
1450 * Attempt to allocate *count MSI messages.  The actual number allocated is
1451 * returned in *count.  After this function returns, each message will be
1452 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1453 */
1454int
1455pci_alloc_msi_method(device_t dev, device_t child, int *count)
1456{
1457	struct pci_devinfo *dinfo = device_get_ivars(child);
1458	pcicfgregs *cfg = &dinfo->cfg;
1459	struct resource_list_entry *rle;
1460	int actual, error, i, irqs[32];
1461	uint16_t ctrl;
1462
1463	/* Don't let count == 0 get us into trouble. */
1464	if (*count == 0)
1465		return (EINVAL);
1466
1467	/* If rid 0 is allocated, then fail. */
1468	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1469	if (rle != NULL && rle->res != NULL)
1470		return (ENXIO);
1471
1472	/* Already have allocated messages? */
1473	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1474		return (ENXIO);
1475
1476	/* If MSI is blacklisted for this system, fail. */
1477	if (pci_msi_blacklisted())
1478		return (ENXIO);
1479
1480	/* MSI capability present? */
1481	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1482		return (ENODEV);
1483
1484	if (bootverbose)
1485		device_printf(child,
1486		    "attempting to allocate %d MSI vectors (%d supported)\n",
1487		    *count, cfg->msi.msi_msgnum);
1488
1489	/* Don't ask for more than the device supports. */
1490	actual = min(*count, cfg->msi.msi_msgnum);
1491
1492	/* Don't ask for more than 32 messages. */
1493	actual = min(actual, 32);
1494
1495	/* MSI requires power of 2 number of messages. */
1496	if (!powerof2(actual))
1497		return (EINVAL);
1498
1499	for (;;) {
1500		/* Try to allocate N messages. */
1501		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1502		    cfg->msi.msi_msgnum, irqs);
1503		if (error == 0)
1504			break;
1505		if (actual == 1)
1506			return (error);
1507
1508		/* Try N / 2. */
1509		actual >>= 1;
1510	}
1511
1512	/*
1513	 * We now have N actual messages mapped onto SYS_RES_IRQ
1514	 * resources in the irqs[] array, so add new resources
1515	 * starting at rid 1.
1516	 */
1517	for (i = 0; i < actual; i++)
1518		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1519		    irqs[i], irqs[i], 1);
1520
1521	if (bootverbose) {
1522		if (actual == 1)
1523			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1524		else {
1525			int run;
1526
1527			/*
1528			 * Be fancy and try to print contiguous runs
1529			 * of IRQ values as ranges.  'run' is true if
1530			 * we are in a range.
1531			 */
1532			device_printf(child, "using IRQs %d", irqs[0]);
1533			run = 0;
1534			for (i = 1; i < actual; i++) {
1535
1536				/* Still in a run? */
1537				if (irqs[i] == irqs[i - 1] + 1) {
1538					run = 1;
1539					continue;
1540				}
1541
1542				/* Finish previous range. */
1543				if (run) {
1544					printf("-%d", irqs[i - 1]);
1545					run = 0;
1546				}
1547
1548				/* Start new range. */
1549				printf(",%d", irqs[i]);
1550			}
1551
1552			/* Unfinished range? */
1553			if (run)
1554				printf("%d", irqs[actual - 1]);
1555			printf(" for MSI\n");
1556		}
1557	}
1558
1559	/* Update control register with actual count. */
1560	ctrl = cfg->msi.msi_ctrl;
1561	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1562	ctrl |= (ffs(actual) - 1) << 4;
1563	cfg->msi.msi_ctrl = ctrl;
1564	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1565
1566	/* Update counts of alloc'd messages. */
1567	cfg->msi.msi_alloc = actual;
1568	*count = actual;
1569	return (0);
1570}
1571
1572/* Release the MSI messages associated with this device. */
1573int
1574pci_release_msi_method(device_t dev, device_t child)
1575{
1576	struct pci_devinfo *dinfo = device_get_ivars(child);
1577	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1578	struct resource_list_entry *rle;
1579	int error, i, irqs[32];
1580
1581	/* Try MSI-X first. */
1582	error = pci_release_msix(dev, child);
1583	if (error != ENODEV)
1584		return (error);
1585
1586	/* Do we have any messages to release? */
1587	if (msi->msi_alloc == 0)
1588		return (ENODEV);
1589	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1590
1591	/* Make sure none of the resources are allocated. */
1592	for (i = 0; i < msi->msi_alloc; i++) {
1593		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1594		KASSERT(rle != NULL, ("missing MSI resource"));
1595		if (rle->res != NULL)
1596			return (EBUSY);
1597		irqs[i] = rle->start;
1598	}
1599
1600	/* Update control register with 0 count and disable MSI. */
1601	msi->msi_ctrl &= ~(PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE);
1602	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1603	    msi->msi_ctrl, 2);
1604
1605	/* Release the messages. */
1606	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1607	for (i = 0; i < msi->msi_alloc; i++)
1608		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1609
1610	/* Update alloc count. */
1611	msi->msi_alloc = 0;
1612	return (0);
1613}
1614
1615/*
1616 * Return the max supported MSI messages this device supports.
1617 * Basically, assuming the MD code can alloc messages, this function
1618 * should return the maximum value that pci_alloc_msi() can return.
1619 * Thus, it is subject to the tunables, etc.
1620 */
1621int
1622pci_msi_count_method(device_t dev, device_t child)
1623{
1624	struct pci_devinfo *dinfo = device_get_ivars(child);
1625	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1626
1627	if (pci_do_msi && msi->msi_location != 0)
1628		return (msi->msi_msgnum);
1629	return (0);
1630}
1631
1632/* free pcicfgregs structure and all depending data structures */
1633
1634int
1635pci_freecfg(struct pci_devinfo *dinfo)
1636{
1637	struct devlist *devlist_head;
1638	int i;
1639
1640	devlist_head = &pci_devq;
1641
1642	if (dinfo->cfg.vpd.vpd_reg) {
1643		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1644		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1645			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1646		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1647		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1648			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1649		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1650	}
1651	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1652	free(dinfo, M_DEVBUF);
1653
1654	/* increment the generation count */
1655	pci_generation++;
1656
1657	/* we're losing one device */
1658	pci_numdevs--;
1659	return (0);
1660}
1661
1662/*
1663 * PCI power manangement
1664 */
1665int
1666pci_set_powerstate_method(device_t dev, device_t child, int state)
1667{
1668	struct pci_devinfo *dinfo = device_get_ivars(child);
1669	pcicfgregs *cfg = &dinfo->cfg;
1670	uint16_t status;
1671	int result, oldstate, highest, delay;
1672
1673	if (cfg->pp.pp_cap == 0)
1674		return (EOPNOTSUPP);
1675
1676	/*
1677	 * Optimize a no state change request away.  While it would be OK to
1678	 * write to the hardware in theory, some devices have shown odd
1679	 * behavior when going from D3 -> D3.
1680	 */
1681	oldstate = pci_get_powerstate(child);
1682	if (oldstate == state)
1683		return (0);
1684
1685	/*
1686	 * The PCI power management specification states that after a state
1687	 * transition between PCI power states, system software must
1688	 * guarantee a minimal delay before the function accesses the device.
1689	 * Compute the worst case delay that we need to guarantee before we
1690	 * access the device.  Many devices will be responsive much more
1691	 * quickly than this delay, but there are some that don't respond
1692	 * instantly to state changes.  Transitions to/from D3 state require
1693	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
1694	 * is done below with DELAY rather than a sleeper function because
1695	 * this function can be called from contexts where we cannot sleep.
1696	 */
1697	highest = (oldstate > state) ? oldstate : state;
1698	if (highest == PCI_POWERSTATE_D3)
1699	    delay = 10000;
1700	else if (highest == PCI_POWERSTATE_D2)
1701	    delay = 200;
1702	else
1703	    delay = 0;
1704	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
1705	    & ~PCIM_PSTAT_DMASK;
1706	result = 0;
1707	switch (state) {
1708	case PCI_POWERSTATE_D0:
1709		status |= PCIM_PSTAT_D0;
1710		break;
1711	case PCI_POWERSTATE_D1:
1712		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
1713			return (EOPNOTSUPP);
1714		status |= PCIM_PSTAT_D1;
1715		break;
1716	case PCI_POWERSTATE_D2:
1717		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
1718			return (EOPNOTSUPP);
1719		status |= PCIM_PSTAT_D2;
1720		break;
1721	case PCI_POWERSTATE_D3:
1722		status |= PCIM_PSTAT_D3;
1723		break;
1724	default:
1725		return (EINVAL);
1726	}
1727
1728	if (bootverbose)
1729		printf(
1730		    "pci%d:%d:%d: Transition from D%d to D%d\n",
1731		    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func,
1732		    oldstate, state);
1733
1734	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
1735	if (delay)
1736		DELAY(delay);
1737	return (0);
1738}
1739
1740int
1741pci_get_powerstate_method(device_t dev, device_t child)
1742{
1743	struct pci_devinfo *dinfo = device_get_ivars(child);
1744	pcicfgregs *cfg = &dinfo->cfg;
1745	uint16_t status;
1746	int result;
1747
1748	if (cfg->pp.pp_cap != 0) {
1749		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
1750		switch (status & PCIM_PSTAT_DMASK) {
1751		case PCIM_PSTAT_D0:
1752			result = PCI_POWERSTATE_D0;
1753			break;
1754		case PCIM_PSTAT_D1:
1755			result = PCI_POWERSTATE_D1;
1756			break;
1757		case PCIM_PSTAT_D2:
1758			result = PCI_POWERSTATE_D2;
1759			break;
1760		case PCIM_PSTAT_D3:
1761			result = PCI_POWERSTATE_D3;
1762			break;
1763		default:
1764			result = PCI_POWERSTATE_UNKNOWN;
1765			break;
1766		}
1767	} else {
1768		/* No support, device is always at D0 */
1769		result = PCI_POWERSTATE_D0;
1770	}
1771	return (result);
1772}
1773
1774/*
1775 * Some convenience functions for PCI device drivers.
1776 */
1777
1778static __inline void
1779pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
1780{
1781	uint16_t	command;
1782
1783	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1784	command |= bit;
1785	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1786}
1787
1788static __inline void
1789pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
1790{
1791	uint16_t	command;
1792
1793	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1794	command &= ~bit;
1795	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1796}
1797
1798int
1799pci_enable_busmaster_method(device_t dev, device_t child)
1800{
1801	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1802	return (0);
1803}
1804
1805int
1806pci_disable_busmaster_method(device_t dev, device_t child)
1807{
1808	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1809	return (0);
1810}
1811
1812int
1813pci_enable_io_method(device_t dev, device_t child, int space)
1814{
1815	uint16_t command;
1816	uint16_t bit;
1817	char *error;
1818
1819	bit = 0;
1820	error = NULL;
1821
1822	switch(space) {
1823	case SYS_RES_IOPORT:
1824		bit = PCIM_CMD_PORTEN;
1825		error = "port";
1826		break;
1827	case SYS_RES_MEMORY:
1828		bit = PCIM_CMD_MEMEN;
1829		error = "memory";
1830		break;
1831	default:
1832		return (EINVAL);
1833	}
1834	pci_set_command_bit(dev, child, bit);
1835	/* Some devices seem to need a brief stall here, what do to? */
1836	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1837	if (command & bit)
1838		return (0);
1839	device_printf(child, "failed to enable %s mapping!\n", error);
1840	return (ENXIO);
1841}
1842
1843int
1844pci_disable_io_method(device_t dev, device_t child, int space)
1845{
1846	uint16_t command;
1847	uint16_t bit;
1848	char *error;
1849
1850	bit = 0;
1851	error = NULL;
1852
1853	switch(space) {
1854	case SYS_RES_IOPORT:
1855		bit = PCIM_CMD_PORTEN;
1856		error = "port";
1857		break;
1858	case SYS_RES_MEMORY:
1859		bit = PCIM_CMD_MEMEN;
1860		error = "memory";
1861		break;
1862	default:
1863		return (EINVAL);
1864	}
1865	pci_clear_command_bit(dev, child, bit);
1866	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1867	if (command & bit) {
1868		device_printf(child, "failed to disable %s mapping!\n", error);
1869		return (ENXIO);
1870	}
1871	return (0);
1872}
1873
1874/*
1875 * New style pci driver.  Parent device is either a pci-host-bridge or a
1876 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
1877 */
1878
1879void
1880pci_print_verbose(struct pci_devinfo *dinfo)
1881{
1882
1883	if (bootverbose) {
1884		pcicfgregs *cfg = &dinfo->cfg;
1885
1886		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
1887		    cfg->vendor, cfg->device, cfg->revid);
1888		printf("\tbus=%d, slot=%d, func=%d\n",
1889		    cfg->bus, cfg->slot, cfg->func);
1890		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
1891		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
1892		    cfg->mfdev);
1893		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
1894		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
1895		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
1896		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
1897		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
1898		if (cfg->intpin > 0)
1899			printf("\tintpin=%c, irq=%d\n",
1900			    cfg->intpin +'a' -1, cfg->intline);
1901		if (cfg->pp.pp_cap) {
1902			uint16_t status;
1903
1904			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
1905			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
1906			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
1907			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
1908			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
1909			    status & PCIM_PSTAT_DMASK);
1910		}
1911		if (cfg->msi.msi_location) {
1912			int ctrl;
1913
1914			ctrl = cfg->msi.msi_ctrl;
1915			printf("\tMSI supports %d message%s%s%s\n",
1916			    cfg->msi.msi_msgnum,
1917			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
1918			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
1919			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
1920		}
1921		if (cfg->msix.msix_location) {
1922			printf("\tMSI-X supports %d message%s ",
1923			    cfg->msix.msix_msgnum,
1924			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
1925			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
1926				printf("in map 0x%x\n",
1927				    cfg->msix.msix_table_bar);
1928			else
1929				printf("in maps 0x%x and 0x%x\n",
1930				    cfg->msix.msix_table_bar,
1931				    cfg->msix.msix_pba_bar);
1932		}
1933	}
1934}
1935
1936static int
1937pci_porten(device_t pcib, int b, int s, int f)
1938{
1939	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1940		& PCIM_CMD_PORTEN) != 0;
1941}
1942
1943static int
1944pci_memen(device_t pcib, int b, int s, int f)
1945{
1946	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1947		& PCIM_CMD_MEMEN) != 0;
1948}
1949
1950/*
1951 * Add a resource based on a pci map register. Return 1 if the map
1952 * register is a 32bit map register or 2 if it is a 64bit register.
1953 */
1954static int
1955pci_add_map(device_t pcib, device_t bus, device_t dev,
1956    int b, int s, int f, int reg, struct resource_list *rl, int force,
1957    int prefetch)
1958{
1959	uint32_t map;
1960	pci_addr_t base;
1961	pci_addr_t start, end, count;
1962	uint8_t ln2size;
1963	uint8_t ln2range;
1964	uint32_t testval;
1965	uint16_t cmd;
1966	int type;
1967	int barlen;
1968	struct resource *res;
1969
1970	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1971	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
1972	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1973	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
1974
1975	if (PCI_BAR_MEM(map))
1976		type = SYS_RES_MEMORY;
1977	else
1978		type = SYS_RES_IOPORT;
1979	ln2size = pci_mapsize(testval);
1980	ln2range = pci_maprange(testval);
1981	base = pci_mapbase(map);
1982	barlen = ln2range == 64 ? 2 : 1;
1983
1984	/*
1985	 * For I/O registers, if bottom bit is set, and the next bit up
1986	 * isn't clear, we know we have a BAR that doesn't conform to the
1987	 * spec, so ignore it.  Also, sanity check the size of the data
1988	 * areas to the type of memory involved.  Memory must be at least
1989	 * 16 bytes in size, while I/O ranges must be at least 4.
1990	 */
1991	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
1992		return (barlen);
1993	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
1994	    (type == SYS_RES_IOPORT && ln2size < 2))
1995		return (barlen);
1996
1997	if (ln2range == 64)
1998		/* Read the other half of a 64bit map register */
1999		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2000	if (bootverbose) {
2001		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2002		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2003		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2004			printf(", port disabled\n");
2005		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2006			printf(", memory disabled\n");
2007		else
2008			printf(", enabled\n");
2009	}
2010
2011	/*
2012	 * If base is 0, then we have problems.  It is best to ignore
2013	 * such entries for the moment.  These will be allocated later if
2014	 * the driver specifically requests them.  However, some
2015	 * removable busses look better when all resources are allocated,
2016	 * so allow '0' to be overriden.
2017	 *
2018	 * Similarly treat maps whose values is the same as the test value
2019	 * read back.  These maps have had all f's written to them by the
2020	 * BIOS in an attempt to disable the resources.
2021	 */
2022	if (!force && (base == 0 || map == testval))
2023		return (barlen);
2024	if ((u_long)base != base) {
2025		device_printf(bus,
2026		    "pci%d:%d:%d bar %#x too many address bits", b, s, f, reg);
2027		return (barlen);
2028	}
2029
2030	/*
2031	 * This code theoretically does the right thing, but has
2032	 * undesirable side effects in some cases where peripherals
2033	 * respond oddly to having these bits enabled.  Let the user
2034	 * be able to turn them off (since pci_enable_io_modes is 1 by
2035	 * default).
2036	 */
2037	if (pci_enable_io_modes) {
2038		/* Turn on resources that have been left off by a lazy BIOS */
2039		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2040			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2041			cmd |= PCIM_CMD_PORTEN;
2042			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2043		}
2044		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2045			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2046			cmd |= PCIM_CMD_MEMEN;
2047			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2048		}
2049	} else {
2050		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2051			return (barlen);
2052		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2053			return (barlen);
2054	}
2055
2056	count = 1 << ln2size;
2057	if (base == 0 || base == pci_mapbase(testval)) {
2058		start = 0;	/* Let the parent deside */
2059		end = ~0ULL;
2060	} else {
2061		start = base;
2062		end = base + (1 << ln2size) - 1;
2063	}
2064	resource_list_add(rl, type, reg, start, end, count);
2065
2066	/*
2067	 * Not quite sure what to do on failure of allocating the resource
2068	 * since I can postulate several right answers.
2069	 */
2070	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2071	    prefetch ? RF_PREFETCHABLE : 0);
2072	if (res == NULL)
2073		return (barlen);
2074	start = rman_get_start(res);
2075	if ((u_long)start != start) {
2076		/* Wait a minute!  this platform can't do this address. */
2077		device_printf(bus,
2078		    "pci%d.%d.%x bar %#x start %#jx, too many bits.",
2079		    b, s, f, reg, (uintmax_t)start);
2080		resource_list_release(rl, bus, dev, type, reg, res);
2081		return (barlen);
2082	}
2083	pci_write_config(dev, reg, start, 4);
2084	if (ln2range == 64)
2085		pci_write_config(dev, reg + 4, start >> 32, 4);
2086	return (barlen);
2087}
2088
2089/*
2090 * For ATA devices we need to decide early what addressing mode to use.
2091 * Legacy demands that the primary and secondary ATA ports sits on the
2092 * same addresses that old ISA hardware did. This dictates that we use
2093 * those addresses and ignore the BAR's if we cannot set PCI native
2094 * addressing mode.
2095 */
2096static void
2097pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2098    int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2099{
2100	int rid, type, progif;
2101#if 0
2102	/* if this device supports PCI native addressing use it */
2103	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2104	if ((progif & 0x8a) == 0x8a) {
2105		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2106		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2107			printf("Trying ATA native PCI addressing mode\n");
2108			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2109		}
2110	}
2111#endif
2112	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2113	type = SYS_RES_IOPORT;
2114	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2115		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2116		    prefetchmask & (1 << 0));
2117		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2118		    prefetchmask & (1 << 1));
2119	} else {
2120		rid = PCIR_BAR(0);
2121		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2122		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2123		    0);
2124		rid = PCIR_BAR(1);
2125		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2126		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2127		    0);
2128	}
2129	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2130		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2131		    prefetchmask & (1 << 2));
2132		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2133		    prefetchmask & (1 << 3));
2134	} else {
2135		rid = PCIR_BAR(2);
2136		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2137		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2138		    0);
2139		rid = PCIR_BAR(3);
2140		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2141		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2142		    0);
2143	}
2144	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2145	    prefetchmask & (1 << 4));
2146	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2147	    prefetchmask & (1 << 5));
2148}
2149
2150static void
2151pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2152{
2153	struct pci_devinfo *dinfo = device_get_ivars(dev);
2154	pcicfgregs *cfg = &dinfo->cfg;
2155	char tunable_name[64];
2156	int irq;
2157
2158	/* Has to have an intpin to have an interrupt. */
2159	if (cfg->intpin == 0)
2160		return;
2161
2162	/* Let the user override the IRQ with a tunable. */
2163	irq = PCI_INVALID_IRQ;
2164	snprintf(tunable_name, sizeof(tunable_name), "hw.pci%d.%d.INT%c.irq",
2165	    cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2166	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2167		irq = PCI_INVALID_IRQ;
2168
2169	/*
2170	 * If we didn't get an IRQ via the tunable, then we either use the
2171	 * IRQ value in the intline register or we ask the bus to route an
2172	 * interrupt for us.  If force_route is true, then we only use the
2173	 * value in the intline register if the bus was unable to assign an
2174	 * IRQ.
2175	 */
2176	if (!PCI_INTERRUPT_VALID(irq)) {
2177		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2178			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2179		if (!PCI_INTERRUPT_VALID(irq))
2180			irq = cfg->intline;
2181	}
2182
2183	/* If after all that we don't have an IRQ, just bail. */
2184	if (!PCI_INTERRUPT_VALID(irq))
2185		return;
2186
2187	/* Update the config register if it changed. */
2188	if (irq != cfg->intline) {
2189		cfg->intline = irq;
2190		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2191	}
2192
2193	/* Add this IRQ as rid 0 interrupt resource. */
2194	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2195}
2196
2197void
2198pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2199{
2200	device_t pcib;
2201	struct pci_devinfo *dinfo = device_get_ivars(dev);
2202	pcicfgregs *cfg = &dinfo->cfg;
2203	struct resource_list *rl = &dinfo->resources;
2204	struct pci_quirk *q;
2205	int b, i, f, s;
2206
2207	pcib = device_get_parent(bus);
2208
2209	b = cfg->bus;
2210	s = cfg->slot;
2211	f = cfg->func;
2212
2213	/* ATA devices needs special map treatment */
2214	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2215	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2216	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2217	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2218	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2219		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2220	else
2221		for (i = 0; i < cfg->nummaps;)
2222			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2223			    rl, force, prefetchmask & (1 << i));
2224
2225	/*
2226	 * Add additional, quirked resources.
2227	 */
2228	for (q = &pci_quirks[0]; q->devid; q++) {
2229		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2230		    && q->type == PCI_QUIRK_MAP_REG)
2231			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2232			  force, 0);
2233	}
2234
2235	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2236#ifdef __PCI_REROUTE_INTERRUPT
2237		/*
2238		 * Try to re-route interrupts. Sometimes the BIOS or
2239		 * firmware may leave bogus values in these registers.
2240		 * If the re-route fails, then just stick with what we
2241		 * have.
2242		 */
2243		pci_assign_interrupt(bus, dev, 1);
2244#else
2245		pci_assign_interrupt(bus, dev, 0);
2246#endif
2247	}
2248}
2249
2250void
2251pci_add_children(device_t dev, int busno, size_t dinfo_size)
2252{
2253#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2254	device_t pcib = device_get_parent(dev);
2255	struct pci_devinfo *dinfo;
2256	int maxslots;
2257	int s, f, pcifunchigh;
2258	uint8_t hdrtype;
2259
2260	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2261	    ("dinfo_size too small"));
2262	maxslots = PCIB_MAXSLOTS(pcib);
2263	for (s = 0; s <= maxslots; s++) {
2264		pcifunchigh = 0;
2265		f = 0;
2266		DELAY(1);
2267		hdrtype = REG(PCIR_HDRTYPE, 1);
2268		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2269			continue;
2270		if (hdrtype & PCIM_MFDEV)
2271			pcifunchigh = PCI_FUNCMAX;
2272		for (f = 0; f <= pcifunchigh; f++) {
2273			dinfo = pci_read_device(pcib, busno, s, f, dinfo_size);
2274			if (dinfo != NULL) {
2275				pci_add_child(dev, dinfo);
2276			}
2277		}
2278	}
2279#undef REG
2280}
2281
2282void
2283pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2284{
2285	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2286	device_set_ivars(dinfo->cfg.dev, dinfo);
2287	resource_list_init(&dinfo->resources);
2288	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2289	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2290	pci_print_verbose(dinfo);
2291	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2292}
2293
2294static int
2295pci_probe(device_t dev)
2296{
2297
2298	device_set_desc(dev, "PCI bus");
2299
2300	/* Allow other subclasses to override this driver. */
2301	return (-1000);
2302}
2303
2304static int
2305pci_attach(device_t dev)
2306{
2307	int busno;
2308
2309	/*
2310	 * Since there can be multiple independantly numbered PCI
2311	 * busses on systems with multiple PCI domains, we can't use
2312	 * the unit number to decide which bus we are probing. We ask
2313	 * the parent pcib what our bus number is.
2314	 */
2315	busno = pcib_get_bus(dev);
2316	if (bootverbose)
2317		device_printf(dev, "physical bus=%d\n", busno);
2318
2319	pci_add_children(dev, busno, sizeof(struct pci_devinfo));
2320
2321	return (bus_generic_attach(dev));
2322}
2323
2324int
2325pci_suspend(device_t dev)
2326{
2327	int dstate, error, i, numdevs;
2328	device_t acpi_dev, child, *devlist;
2329	struct pci_devinfo *dinfo;
2330
2331	/*
2332	 * Save the PCI configuration space for each child and set the
2333	 * device in the appropriate power state for this sleep state.
2334	 */
2335	acpi_dev = NULL;
2336	if (pci_do_power_resume)
2337		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2338	device_get_children(dev, &devlist, &numdevs);
2339	for (i = 0; i < numdevs; i++) {
2340		child = devlist[i];
2341		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2342		pci_cfg_save(child, dinfo, 0);
2343	}
2344
2345	/* Suspend devices before potentially powering them down. */
2346	error = bus_generic_suspend(dev);
2347	if (error) {
2348		free(devlist, M_TEMP);
2349		return (error);
2350	}
2351
2352	/*
2353	 * Always set the device to D3.  If ACPI suggests a different
2354	 * power state, use it instead.  If ACPI is not present, the
2355	 * firmware is responsible for managing device power.  Skip
2356	 * children who aren't attached since they are powered down
2357	 * separately.  Only manage type 0 devices for now.
2358	 */
2359	for (i = 0; acpi_dev && i < numdevs; i++) {
2360		child = devlist[i];
2361		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2362		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2363			dstate = PCI_POWERSTATE_D3;
2364			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2365			pci_set_powerstate(child, dstate);
2366		}
2367	}
2368	free(devlist, M_TEMP);
2369	return (0);
2370}
2371
2372int
2373pci_resume(device_t dev)
2374{
2375	int i, numdevs;
2376	device_t acpi_dev, child, *devlist;
2377	struct pci_devinfo *dinfo;
2378
2379	/*
2380	 * Set each child to D0 and restore its PCI configuration space.
2381	 */
2382	acpi_dev = NULL;
2383	if (pci_do_power_resume)
2384		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2385	device_get_children(dev, &devlist, &numdevs);
2386	for (i = 0; i < numdevs; i++) {
2387		/*
2388		 * Notify ACPI we're going to D0 but ignore the result.  If
2389		 * ACPI is not present, the firmware is responsible for
2390		 * managing device power.  Only manage type 0 devices for now.
2391		 */
2392		child = devlist[i];
2393		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2394		if (acpi_dev && device_is_attached(child) &&
2395		    dinfo->cfg.hdrtype == 0) {
2396			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2397			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2398		}
2399
2400		/* Now the device is powered up, restore its config space. */
2401		pci_cfg_restore(child, dinfo);
2402	}
2403	free(devlist, M_TEMP);
2404	return (bus_generic_resume(dev));
2405}
2406
2407static void
2408pci_load_vendor_data(void)
2409{
2410	caddr_t vendordata, info;
2411
2412	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2413		info = preload_search_info(vendordata, MODINFO_ADDR);
2414		pci_vendordata = *(char **)info;
2415		info = preload_search_info(vendordata, MODINFO_SIZE);
2416		pci_vendordata_size = *(size_t *)info;
2417		/* terminate the database */
2418		pci_vendordata[pci_vendordata_size] = '\n';
2419	}
2420}
2421
2422void
2423pci_driver_added(device_t dev, driver_t *driver)
2424{
2425	int numdevs;
2426	device_t *devlist;
2427	device_t child;
2428	struct pci_devinfo *dinfo;
2429	int i;
2430
2431	if (bootverbose)
2432		device_printf(dev, "driver added\n");
2433	DEVICE_IDENTIFY(driver, dev);
2434	device_get_children(dev, &devlist, &numdevs);
2435	for (i = 0; i < numdevs; i++) {
2436		child = devlist[i];
2437		if (device_get_state(child) != DS_NOTPRESENT)
2438			continue;
2439		dinfo = device_get_ivars(child);
2440		pci_print_verbose(dinfo);
2441		if (bootverbose)
2442			printf("pci%d:%d:%d: reprobing on driver added\n",
2443			    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func);
2444		pci_cfg_restore(child, dinfo);
2445		if (device_probe_and_attach(child) != 0)
2446			pci_cfg_save(child, dinfo, 1);
2447	}
2448	free(devlist, M_TEMP);
2449}
2450
2451int
2452pci_print_child(device_t dev, device_t child)
2453{
2454	struct pci_devinfo *dinfo;
2455	struct resource_list *rl;
2456	int retval = 0;
2457
2458	dinfo = device_get_ivars(child);
2459	rl = &dinfo->resources;
2460
2461	retval += bus_print_child_header(dev, child);
2462
2463	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2464	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2465	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2466	if (device_get_flags(dev))
2467		retval += printf(" flags %#x", device_get_flags(dev));
2468
2469	retval += printf(" at device %d.%d", pci_get_slot(child),
2470	    pci_get_function(child));
2471
2472	retval += bus_print_child_footer(dev, child);
2473
2474	return (retval);
2475}
2476
2477static struct
2478{
2479	int	class;
2480	int	subclass;
2481	char	*desc;
2482} pci_nomatch_tab[] = {
2483	{PCIC_OLD,		-1,			"old"},
2484	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2485	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
2486	{PCIC_STORAGE,		-1,			"mass storage"},
2487	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
2488	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
2489	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
2490	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
2491	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
2492	{PCIC_NETWORK,		-1,			"network"},
2493	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
2494	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
2495	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
2496	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
2497	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
2498	{PCIC_DISPLAY,		-1,			"display"},
2499	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
2500	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
2501	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
2502	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
2503	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
2504	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
2505	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
2506	{PCIC_MEMORY,		-1,			"memory"},
2507	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
2508	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
2509	{PCIC_BRIDGE,		-1,			"bridge"},
2510	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
2511	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
2512	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
2513	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
2514	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
2515	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
2516	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
2517	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
2518	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
2519	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
2520	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
2521	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
2522	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
2523	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
2524	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
2525	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
2526	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
2527	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
2528	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
2529	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
2530	{PCIC_INPUTDEV,		-1,			"input device"},
2531	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
2532	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
2533	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
2534	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
2535	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
2536	{PCIC_DOCKING,		-1,			"docking station"},
2537	{PCIC_PROCESSOR,	-1,			"processor"},
2538	{PCIC_SERIALBUS,	-1,			"serial bus"},
2539	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
2540	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
2541	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
2542	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
2543	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
2544	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
2545	{PCIC_WIRELESS,		-1,			"wireless controller"},
2546	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
2547	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
2548	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
2549	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
2550	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
2551	{PCIC_SATCOM,		-1,			"satellite communication"},
2552	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
2553	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
2554	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
2555	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
2556	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
2557	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
2558	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
2559	{PCIC_DASP,		-1,			"dasp"},
2560	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
2561	{0, 0,		NULL}
2562};
2563
2564void
2565pci_probe_nomatch(device_t dev, device_t child)
2566{
2567	int	i;
2568	char	*cp, *scp, *device;
2569
2570	/*
2571	 * Look for a listing for this device in a loaded device database.
2572	 */
2573	if ((device = pci_describe_device(child)) != NULL) {
2574		device_printf(dev, "<%s>", device);
2575		free(device, M_DEVBUF);
2576	} else {
2577		/*
2578		 * Scan the class/subclass descriptions for a general
2579		 * description.
2580		 */
2581		cp = "unknown";
2582		scp = NULL;
2583		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
2584			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
2585				if (pci_nomatch_tab[i].subclass == -1) {
2586					cp = pci_nomatch_tab[i].desc;
2587				} else if (pci_nomatch_tab[i].subclass ==
2588				    pci_get_subclass(child)) {
2589					scp = pci_nomatch_tab[i].desc;
2590				}
2591			}
2592		}
2593		device_printf(dev, "<%s%s%s>",
2594		    cp ? cp : "",
2595		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
2596		    scp ? scp : "");
2597	}
2598	printf(" at device %d.%d (no driver attached)\n",
2599	    pci_get_slot(child), pci_get_function(child));
2600	if (pci_do_power_nodriver)
2601		pci_cfg_save(child,
2602		    (struct pci_devinfo *) device_get_ivars(child), 1);
2603	return;
2604}
2605
2606/*
2607 * Parse the PCI device database, if loaded, and return a pointer to a
2608 * description of the device.
2609 *
2610 * The database is flat text formatted as follows:
2611 *
2612 * Any line not in a valid format is ignored.
2613 * Lines are terminated with newline '\n' characters.
2614 *
2615 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
2616 * the vendor name.
2617 *
2618 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
2619 * - devices cannot be listed without a corresponding VENDOR line.
2620 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
2621 * another TAB, then the device name.
2622 */
2623
2624/*
2625 * Assuming (ptr) points to the beginning of a line in the database,
2626 * return the vendor or device and description of the next entry.
2627 * The value of (vendor) or (device) inappropriate for the entry type
2628 * is set to -1.  Returns nonzero at the end of the database.
2629 *
2630 * Note that this is slightly unrobust in the face of corrupt data;
2631 * we attempt to safeguard against this by spamming the end of the
2632 * database with a newline when we initialise.
2633 */
2634static int
2635pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
2636{
2637	char	*cp = *ptr;
2638	int	left;
2639
2640	*device = -1;
2641	*vendor = -1;
2642	**desc = '\0';
2643	for (;;) {
2644		left = pci_vendordata_size - (cp - pci_vendordata);
2645		if (left <= 0) {
2646			*ptr = cp;
2647			return(1);
2648		}
2649
2650		/* vendor entry? */
2651		if (*cp != '\t' &&
2652		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
2653			break;
2654		/* device entry? */
2655		if (*cp == '\t' &&
2656		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
2657			break;
2658
2659		/* skip to next line */
2660		while (*cp != '\n' && left > 0) {
2661			cp++;
2662			left--;
2663		}
2664		if (*cp == '\n') {
2665			cp++;
2666			left--;
2667		}
2668	}
2669	/* skip to next line */
2670	while (*cp != '\n' && left > 0) {
2671		cp++;
2672		left--;
2673	}
2674	if (*cp == '\n' && left > 0)
2675		cp++;
2676	*ptr = cp;
2677	return(0);
2678}
2679
2680static char *
2681pci_describe_device(device_t dev)
2682{
2683	int	vendor, device;
2684	char	*desc, *vp, *dp, *line;
2685
2686	desc = vp = dp = NULL;
2687
2688	/*
2689	 * If we have no vendor data, we can't do anything.
2690	 */
2691	if (pci_vendordata == NULL)
2692		goto out;
2693
2694	/*
2695	 * Scan the vendor data looking for this device
2696	 */
2697	line = pci_vendordata;
2698	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2699		goto out;
2700	for (;;) {
2701		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
2702			goto out;
2703		if (vendor == pci_get_vendor(dev))
2704			break;
2705	}
2706	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2707		goto out;
2708	for (;;) {
2709		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
2710			*dp = 0;
2711			break;
2712		}
2713		if (vendor != -1) {
2714			*dp = 0;
2715			break;
2716		}
2717		if (device == pci_get_device(dev))
2718			break;
2719	}
2720	if (dp[0] == '\0')
2721		snprintf(dp, 80, "0x%x", pci_get_device(dev));
2722	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
2723	    NULL)
2724		sprintf(desc, "%s, %s", vp, dp);
2725 out:
2726	if (vp != NULL)
2727		free(vp, M_DEVBUF);
2728	if (dp != NULL)
2729		free(dp, M_DEVBUF);
2730	return(desc);
2731}
2732
2733int
2734pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
2735{
2736	struct pci_devinfo *dinfo;
2737	pcicfgregs *cfg;
2738
2739	dinfo = device_get_ivars(child);
2740	cfg = &dinfo->cfg;
2741
2742	switch (which) {
2743	case PCI_IVAR_ETHADDR:
2744		/*
2745		 * The generic accessor doesn't deal with failure, so
2746		 * we set the return value, then return an error.
2747		 */
2748		*((uint8_t **) result) = NULL;
2749		return (EINVAL);
2750	case PCI_IVAR_SUBVENDOR:
2751		*result = cfg->subvendor;
2752		break;
2753	case PCI_IVAR_SUBDEVICE:
2754		*result = cfg->subdevice;
2755		break;
2756	case PCI_IVAR_VENDOR:
2757		*result = cfg->vendor;
2758		break;
2759	case PCI_IVAR_DEVICE:
2760		*result = cfg->device;
2761		break;
2762	case PCI_IVAR_DEVID:
2763		*result = (cfg->device << 16) | cfg->vendor;
2764		break;
2765	case PCI_IVAR_CLASS:
2766		*result = cfg->baseclass;
2767		break;
2768	case PCI_IVAR_SUBCLASS:
2769		*result = cfg->subclass;
2770		break;
2771	case PCI_IVAR_PROGIF:
2772		*result = cfg->progif;
2773		break;
2774	case PCI_IVAR_REVID:
2775		*result = cfg->revid;
2776		break;
2777	case PCI_IVAR_INTPIN:
2778		*result = cfg->intpin;
2779		break;
2780	case PCI_IVAR_IRQ:
2781		*result = cfg->intline;
2782		break;
2783	case PCI_IVAR_BUS:
2784		*result = cfg->bus;
2785		break;
2786	case PCI_IVAR_SLOT:
2787		*result = cfg->slot;
2788		break;
2789	case PCI_IVAR_FUNCTION:
2790		*result = cfg->func;
2791		break;
2792	case PCI_IVAR_CMDREG:
2793		*result = cfg->cmdreg;
2794		break;
2795	case PCI_IVAR_CACHELNSZ:
2796		*result = cfg->cachelnsz;
2797		break;
2798	case PCI_IVAR_MINGNT:
2799		*result = cfg->mingnt;
2800		break;
2801	case PCI_IVAR_MAXLAT:
2802		*result = cfg->maxlat;
2803		break;
2804	case PCI_IVAR_LATTIMER:
2805		*result = cfg->lattimer;
2806		break;
2807	default:
2808		return (ENOENT);
2809	}
2810	return (0);
2811}
2812
2813int
2814pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
2815{
2816	struct pci_devinfo *dinfo;
2817
2818	dinfo = device_get_ivars(child);
2819
2820	switch (which) {
2821	case PCI_IVAR_INTPIN:
2822		dinfo->cfg.intpin = value;
2823		return (0);
2824	case PCI_IVAR_ETHADDR:
2825	case PCI_IVAR_SUBVENDOR:
2826	case PCI_IVAR_SUBDEVICE:
2827	case PCI_IVAR_VENDOR:
2828	case PCI_IVAR_DEVICE:
2829	case PCI_IVAR_DEVID:
2830	case PCI_IVAR_CLASS:
2831	case PCI_IVAR_SUBCLASS:
2832	case PCI_IVAR_PROGIF:
2833	case PCI_IVAR_REVID:
2834	case PCI_IVAR_IRQ:
2835	case PCI_IVAR_BUS:
2836	case PCI_IVAR_SLOT:
2837	case PCI_IVAR_FUNCTION:
2838		return (EINVAL);	/* disallow for now */
2839
2840	default:
2841		return (ENOENT);
2842	}
2843}
2844
2845
2846#include "opt_ddb.h"
2847#ifdef DDB
2848#include <ddb/ddb.h>
2849#include <sys/cons.h>
2850
2851/*
2852 * List resources based on pci map registers, used for within ddb
2853 */
2854
2855DB_SHOW_COMMAND(pciregs, db_pci_dump)
2856{
2857	struct pci_devinfo *dinfo;
2858	struct devlist *devlist_head;
2859	struct pci_conf *p;
2860	const char *name;
2861	int i, error, none_count;
2862
2863	none_count = 0;
2864	/* get the head of the device queue */
2865	devlist_head = &pci_devq;
2866
2867	/*
2868	 * Go through the list of devices and print out devices
2869	 */
2870	for (error = 0, i = 0,
2871	     dinfo = STAILQ_FIRST(devlist_head);
2872	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
2873	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
2874
2875		/* Populate pd_name and pd_unit */
2876		name = NULL;
2877		if (dinfo->cfg.dev)
2878			name = device_get_name(dinfo->cfg.dev);
2879
2880		p = &dinfo->conf;
2881		db_printf("%s%d@pci%d:%d:%d:\tclass=0x%06x card=0x%08x "
2882			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
2883			(name && *name) ? name : "none",
2884			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
2885			none_count++,
2886			p->pc_sel.pc_bus, p->pc_sel.pc_dev,
2887			p->pc_sel.pc_func, (p->pc_class << 16) |
2888			(p->pc_subclass << 8) | p->pc_progif,
2889			(p->pc_subdevice << 16) | p->pc_subvendor,
2890			(p->pc_device << 16) | p->pc_vendor,
2891			p->pc_revid, p->pc_hdr);
2892	}
2893}
2894#endif /* DDB */
2895
2896static struct resource *
2897pci_alloc_map(device_t dev, device_t child, int type, int *rid,
2898    u_long start, u_long end, u_long count, u_int flags)
2899{
2900	struct pci_devinfo *dinfo = device_get_ivars(child);
2901	struct resource_list *rl = &dinfo->resources;
2902	struct resource_list_entry *rle;
2903	struct resource *res;
2904	pci_addr_t map, testval;
2905	int mapsize;
2906
2907	/*
2908	 * Weed out the bogons, and figure out how large the BAR/map
2909	 * is.  Bars that read back 0 here are bogus and unimplemented.
2910	 * Note: atapci in legacy mode are special and handled elsewhere
2911	 * in the code.  If you have a atapci device in legacy mode and
2912	 * it fails here, that other code is broken.
2913	 */
2914	res = NULL;
2915	map = pci_read_config(child, *rid, 4);
2916	pci_write_config(child, *rid, 0xffffffff, 4);
2917	testval = pci_read_config(child, *rid, 4);
2918	if (pci_maprange(testval) == 64)
2919		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
2920	if (pci_mapbase(testval) == 0)
2921		goto out;
2922	if (PCI_BAR_MEM(testval)) {
2923		if (type != SYS_RES_MEMORY) {
2924			if (bootverbose)
2925				device_printf(dev,
2926				    "child %s requested type %d for rid %#x,"
2927				    " but the BAR says it is an memio\n",
2928				    device_get_nameunit(child), type, *rid);
2929			goto out;
2930		}
2931	} else {
2932		if (type != SYS_RES_IOPORT) {
2933			if (bootverbose)
2934				device_printf(dev,
2935				    "child %s requested type %d for rid %#x,"
2936				    " but the BAR says it is an ioport\n",
2937				    device_get_nameunit(child), type, *rid);
2938			goto out;
2939		}
2940	}
2941	/*
2942	 * For real BARs, we need to override the size that
2943	 * the driver requests, because that's what the BAR
2944	 * actually uses and we would otherwise have a
2945	 * situation where we might allocate the excess to
2946	 * another driver, which won't work.
2947	 */
2948	mapsize = pci_mapsize(testval);
2949	count = 1UL << mapsize;
2950	if (RF_ALIGNMENT(flags) < mapsize)
2951		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
2952
2953	/*
2954	 * Allocate enough resource, and then write back the
2955	 * appropriate bar for that resource.
2956	 */
2957	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
2958	    start, end, count, flags);
2959	if (res == NULL) {
2960		device_printf(child,
2961		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
2962		    count, *rid, type, start, end);
2963		goto out;
2964	}
2965	resource_list_add(rl, type, *rid, start, end, count);
2966	rle = resource_list_find(rl, type, *rid);
2967	if (rle == NULL)
2968		panic("pci_alloc_map: unexpectedly can't find resource.");
2969	rle->res = res;
2970	rle->start = rman_get_start(res);
2971	rle->end = rman_get_end(res);
2972	rle->count = count;
2973	if (bootverbose)
2974		device_printf(child,
2975		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
2976		    count, *rid, type, rman_get_start(res));
2977	map = rman_get_start(res);
2978out:;
2979	pci_write_config(child, *rid, map, 4);
2980	if (pci_maprange(testval) == 64)
2981		pci_write_config(child, *rid + 4, map >> 32, 4);
2982	return (res);
2983}
2984
2985
2986struct resource *
2987pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
2988		   u_long start, u_long end, u_long count, u_int flags)
2989{
2990	struct pci_devinfo *dinfo = device_get_ivars(child);
2991	struct resource_list *rl = &dinfo->resources;
2992	struct resource_list_entry *rle;
2993	pcicfgregs *cfg = &dinfo->cfg;
2994
2995	/*
2996	 * Perform lazy resource allocation
2997	 */
2998	if (device_get_parent(child) == dev) {
2999		switch (type) {
3000		case SYS_RES_IRQ:
3001			/*
3002			 * Can't alloc legacy interrupt once MSI messages
3003			 * have been allocated.
3004			 */
3005			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3006			    cfg->msix.msix_alloc > 0))
3007				return (NULL);
3008			/*
3009			 * If the child device doesn't have an
3010			 * interrupt routed and is deserving of an
3011			 * interrupt, try to assign it one.
3012			 */
3013			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3014			    (cfg->intpin != 0))
3015				pci_assign_interrupt(dev, child, 0);
3016			break;
3017		case SYS_RES_IOPORT:
3018		case SYS_RES_MEMORY:
3019			if (*rid < PCIR_BAR(cfg->nummaps)) {
3020				/*
3021				 * Enable the I/O mode.  We should
3022				 * also be assigning resources too
3023				 * when none are present.  The
3024				 * resource_list_alloc kind of sorta does
3025				 * this...
3026				 */
3027				if (PCI_ENABLE_IO(dev, child, type))
3028					return (NULL);
3029			}
3030			rle = resource_list_find(rl, type, *rid);
3031			if (rle == NULL)
3032				return (pci_alloc_map(dev, child, type, rid,
3033				    start, end, count, flags));
3034			break;
3035		}
3036		/*
3037		 * If we've already allocated the resource, then
3038		 * return it now.  But first we may need to activate
3039		 * it, since we don't allocate the resource as active
3040		 * above.  Normally this would be done down in the
3041		 * nexus, but since we short-circuit that path we have
3042		 * to do its job here.  Not sure if we should free the
3043		 * resource if it fails to activate.
3044		 */
3045		rle = resource_list_find(rl, type, *rid);
3046		if (rle != NULL && rle->res != NULL) {
3047			if (bootverbose)
3048				device_printf(child,
3049			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3050				    rman_get_size(rle->res), *rid, type,
3051				    rman_get_start(rle->res));
3052			if ((flags & RF_ACTIVE) &&
3053			    bus_generic_activate_resource(dev, child, type,
3054			    *rid, rle->res) != 0)
3055				return (NULL);
3056			return (rle->res);
3057		}
3058	}
3059	return (resource_list_alloc(rl, dev, child, type, rid,
3060	    start, end, count, flags));
3061}
3062
3063void
3064pci_delete_resource(device_t dev, device_t child, int type, int rid)
3065{
3066	struct pci_devinfo *dinfo;
3067	struct resource_list *rl;
3068	struct resource_list_entry *rle;
3069
3070	if (device_get_parent(child) != dev)
3071		return;
3072
3073	dinfo = device_get_ivars(child);
3074	rl = &dinfo->resources;
3075	rle = resource_list_find(rl, type, rid);
3076	if (rle) {
3077		if (rle->res) {
3078			if (rman_get_device(rle->res) != dev ||
3079			    rman_get_flags(rle->res) & RF_ACTIVE) {
3080				device_printf(dev, "delete_resource: "
3081				    "Resource still owned by child, oops. "
3082				    "(type=%d, rid=%d, addr=%lx)\n",
3083				    rle->type, rle->rid,
3084				    rman_get_start(rle->res));
3085				return;
3086			}
3087			bus_release_resource(dev, type, rid, rle->res);
3088		}
3089		resource_list_delete(rl, type, rid);
3090	}
3091	/*
3092	 * Why do we turn off the PCI configuration BAR when we delete a
3093	 * resource? -- imp
3094	 */
3095	pci_write_config(child, rid, 0, 4);
3096	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3097}
3098
3099struct resource_list *
3100pci_get_resource_list (device_t dev, device_t child)
3101{
3102	struct pci_devinfo *dinfo = device_get_ivars(child);
3103
3104	return (&dinfo->resources);
3105}
3106
3107uint32_t
3108pci_read_config_method(device_t dev, device_t child, int reg, int width)
3109{
3110	struct pci_devinfo *dinfo = device_get_ivars(child);
3111	pcicfgregs *cfg = &dinfo->cfg;
3112
3113	return (PCIB_READ_CONFIG(device_get_parent(dev),
3114	    cfg->bus, cfg->slot, cfg->func, reg, width));
3115}
3116
3117void
3118pci_write_config_method(device_t dev, device_t child, int reg,
3119    uint32_t val, int width)
3120{
3121	struct pci_devinfo *dinfo = device_get_ivars(child);
3122	pcicfgregs *cfg = &dinfo->cfg;
3123
3124	PCIB_WRITE_CONFIG(device_get_parent(dev),
3125	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3126}
3127
3128int
3129pci_child_location_str_method(device_t dev, device_t child, char *buf,
3130    size_t buflen)
3131{
3132
3133	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3134	    pci_get_function(child));
3135	return (0);
3136}
3137
3138int
3139pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3140    size_t buflen)
3141{
3142	struct pci_devinfo *dinfo;
3143	pcicfgregs *cfg;
3144
3145	dinfo = device_get_ivars(child);
3146	cfg = &dinfo->cfg;
3147	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3148	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3149	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3150	    cfg->progif);
3151	return (0);
3152}
3153
3154int
3155pci_assign_interrupt_method(device_t dev, device_t child)
3156{
3157	struct pci_devinfo *dinfo = device_get_ivars(child);
3158	pcicfgregs *cfg = &dinfo->cfg;
3159
3160	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3161	    cfg->intpin));
3162}
3163
3164static int
3165pci_modevent(module_t mod, int what, void *arg)
3166{
3167	static struct cdev *pci_cdev;
3168
3169	switch (what) {
3170	case MOD_LOAD:
3171		STAILQ_INIT(&pci_devq);
3172		pci_generation = 0;
3173		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3174		    "pci");
3175		pci_load_vendor_data();
3176		break;
3177
3178	case MOD_UNLOAD:
3179		destroy_dev(pci_cdev);
3180		break;
3181	}
3182
3183	return (0);
3184}
3185
3186void
3187pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3188{
3189	int i;
3190
3191	/*
3192	 * Only do header type 0 devices.  Type 1 devices are bridges,
3193	 * which we know need special treatment.  Type 2 devices are
3194	 * cardbus bridges which also require special treatment.
3195	 * Other types are unknown, and we err on the side of safety
3196	 * by ignoring them.
3197	 */
3198	if (dinfo->cfg.hdrtype != 0)
3199		return;
3200
3201	/*
3202	 * Restore the device to full power mode.  We must do this
3203	 * before we restore the registers because moving from D3 to
3204	 * D0 will cause the chip's BARs and some other registers to
3205	 * be reset to some unknown power on reset values.  Cut down
3206	 * the noise on boot by doing nothing if we are already in
3207	 * state D0.
3208	 */
3209	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3210		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3211	}
3212	for (i = 0; i < dinfo->cfg.nummaps; i++)
3213		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3214	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3215	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3216	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3217	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3218	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3219	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3220	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3221	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3222	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3223	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3224
3225	/*
3226	 * Restore MSI configuration if it is present.  If MSI is enabled,
3227	 * then restore the data and addr registers.
3228	 */
3229	if (dinfo->cfg.msi.msi_location != 0)
3230		pci_resume_msi(dev);
3231}
3232
3233void
3234pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3235{
3236	int i;
3237	uint32_t cls;
3238	int ps;
3239
3240	/*
3241	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3242	 * we know need special treatment.  Type 2 devices are cardbus bridges
3243	 * which also require special treatment.  Other types are unknown, and
3244	 * we err on the side of safety by ignoring them.  Powering down
3245	 * bridges should not be undertaken lightly.
3246	 */
3247	if (dinfo->cfg.hdrtype != 0)
3248		return;
3249	for (i = 0; i < dinfo->cfg.nummaps; i++)
3250		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3251	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3252
3253	/*
3254	 * Some drivers apparently write to these registers w/o updating our
3255	 * cached copy.  No harm happens if we update the copy, so do so here
3256	 * so we can restore them.  The COMMAND register is modified by the
3257	 * bus w/o updating the cache.  This should represent the normally
3258	 * writable portion of the 'defined' part of type 0 headers.  In
3259	 * theory we also need to save/restore the PCI capability structures
3260	 * we know about, but apart from power we don't know any that are
3261	 * writable.
3262	 */
3263	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3264	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3265	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3266	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3267	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3268	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3269	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3270	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3271	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3272	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3273	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3274	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3275	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3276	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3277	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3278
3279	/*
3280	 * don't set the state for display devices, base peripherals and
3281	 * memory devices since bad things happen when they are powered down.
3282	 * We should (a) have drivers that can easily detach and (b) use
3283	 * generic drivers for these devices so that some device actually
3284	 * attaches.  We need to make sure that when we implement (a) we don't
3285	 * power the device down on a reattach.
3286	 */
3287	cls = pci_get_class(dev);
3288	if (!setstate)
3289		return;
3290	switch (pci_do_power_nodriver)
3291	{
3292		case 0:		/* NO powerdown at all */
3293			return;
3294		case 1:		/* Conservative about what to power down */
3295			if (cls == PCIC_STORAGE)
3296				return;
3297			/*FALLTHROUGH*/
3298		case 2:		/* Agressive about what to power down */
3299			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3300			    cls == PCIC_BASEPERIPH)
3301				return;
3302			/*FALLTHROUGH*/
3303		case 3:		/* Power down everything */
3304			break;
3305	}
3306	/*
3307	 * PCI spec says we can only go into D3 state from D0 state.
3308	 * Transition from D[12] into D0 before going to D3 state.
3309	 */
3310	ps = pci_get_powerstate(dev);
3311	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3312		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3313	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3314		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3315}
3316