pci.c revision 166566
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 166566 2007-02-08 14:33:07Z rwatson $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54
55#if defined(__i386__) || defined(__amd64__)
56#include <machine/intr_machdep.h>
57#endif
58
59#include <sys/pciio.h>
60#include <dev/pci/pcireg.h>
61#include <dev/pci/pcivar.h>
62#include <dev/pci/pci_private.h>
63
64#include "pcib_if.h"
65#include "pci_if.h"
66
67#ifdef __HAVE_ACPI
68#include <contrib/dev/acpica/acpi.h>
69#include "acpi_if.h"
70#else
71#define	ACPI_PWR_FOR_SLEEP(x, y, z)
72#endif
73
74static uint32_t		pci_mapbase(unsigned mapreg);
75static int		pci_maptype(unsigned mapreg);
76static int		pci_mapsize(unsigned testval);
77static int		pci_maprange(unsigned mapreg);
78static void		pci_fixancient(pcicfgregs *cfg);
79
80static int		pci_porten(device_t pcib, int b, int s, int f);
81static int		pci_memen(device_t pcib, int b, int s, int f);
82static void		pci_assign_interrupt(device_t bus, device_t dev,
83			    int force_route);
84static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85			    int b, int s, int f, int reg,
86			    struct resource_list *rl, int force, int prefetch);
87static int		pci_probe(device_t dev);
88static int		pci_attach(device_t dev);
89static void		pci_load_vendor_data(void);
90static int		pci_describe_parse_line(char **ptr, int *vendor,
91			    int *device, char **desc);
92static char		*pci_describe_device(device_t dev);
93static int		pci_modevent(module_t mod, int what, void *arg);
94static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95			    pcicfgregs *cfg);
96static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97static uint32_t		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98			    int reg);
99#if 0
100static void		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101			    int reg, uint32_t data);
102#endif
103static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104static int		pci_msi_blacklisted(void);
105
106static device_method_t pci_methods[] = {
107	/* Device interface */
108	DEVMETHOD(device_probe,		pci_probe),
109	DEVMETHOD(device_attach,	pci_attach),
110	DEVMETHOD(device_detach,	bus_generic_detach),
111	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
112	DEVMETHOD(device_suspend,	pci_suspend),
113	DEVMETHOD(device_resume,	pci_resume),
114
115	/* Bus interface */
116	DEVMETHOD(bus_print_child,	pci_print_child),
117	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
118	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
119	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
120	DEVMETHOD(bus_driver_added,	pci_driver_added),
121	DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr),
122	DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
123
124	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
125	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
126	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
127	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
128	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
129	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
130	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
131	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
132	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
133	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
134
135	/* PCI interface */
136	DEVMETHOD(pci_read_config,	pci_read_config_method),
137	DEVMETHOD(pci_write_config,	pci_write_config_method),
138	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
139	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
140	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
141	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
142	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
143	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
144	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
145	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
146	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
147	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
148	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
149	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
150	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
151	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
152	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
153	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
154
155	{ 0, 0 }
156};
157
158DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
159
160static devclass_t pci_devclass;
161DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
162MODULE_VERSION(pci, 1);
163
164static char	*pci_vendordata;
165static size_t	pci_vendordata_size;
166
167
168struct pci_quirk {
169	uint32_t devid;	/* Vendor/device of the card */
170	int	type;
171#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
172#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
173	int	arg1;
174	int	arg2;
175};
176
177struct pci_quirk pci_quirks[] = {
178	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
179	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
180	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
181	/* As does the Serverworks OSB4 (the SMBus mapping register) */
182	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
183
184	/*
185	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
186	 * or the CMIC-SL (AKA ServerWorks GC_LE).
187	 */
188	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
189	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
190
191	/*
192	 * MSI doesn't work on earlier Intel chipsets including
193	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
194	 */
195	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
196	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
197	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
200	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
201	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
202
203	/*
204	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
205	 * bridge.
206	 */
207	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208
209	{ 0 }
210};
211
212/* map register information */
213#define	PCI_MAPMEM	0x01	/* memory map */
214#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
215#define	PCI_MAPPORT	0x04	/* port map */
216
217struct devlist pci_devq;
218uint32_t pci_generation;
219uint32_t pci_numdevs = 0;
220
221/* sysctl vars */
222SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
223
224static int pci_enable_io_modes = 1;
225TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
226SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
227    &pci_enable_io_modes, 1,
228    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
229enable these bits correctly.  We'd like to do this all the time, but there\n\
230are some peripherals that this causes problems with.");
231
232static int pci_do_power_nodriver = 0;
233TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
234SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
235    &pci_do_power_nodriver, 0,
236  "Place a function into D3 state when no driver attaches to it.  0 means\n\
237disable.  1 means conservatively place devices into D3 state.  2 means\n\
238agressively place devices into D3 state.  3 means put absolutely everything\n\
239in D3 state.");
240
241static int pci_do_power_resume = 1;
242TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
243SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
244    &pci_do_power_resume, 1,
245  "Transition from D3 -> D0 on resume.");
246
247static int pci_do_vpd = 1;
248TUNABLE_INT("hw.pci.enable_vpd", &pci_do_vpd);
249SYSCTL_INT(_hw_pci, OID_AUTO, enable_vpd, CTLFLAG_RW, &pci_do_vpd, 1,
250    "Enable support for VPD.");
251
252static int pci_do_msi = 1;
253TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
254SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
255    "Enable support for MSI interrupts");
256
257static int pci_do_msix = 1;
258TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
259SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
260    "Enable support for MSI-X interrupts");
261
262static int pci_honor_msi_blacklist = 1;
263TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
264SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
265    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
266
267/* Find a device_t by bus/slot/function */
268
269device_t
270pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
271{
272	struct pci_devinfo *dinfo;
273
274	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
275		if ((dinfo->cfg.bus == bus) &&
276		    (dinfo->cfg.slot == slot) &&
277		    (dinfo->cfg.func == func)) {
278			return (dinfo->cfg.dev);
279		}
280	}
281
282	return (NULL);
283}
284
285/* Find a device_t by vendor/device ID */
286
287device_t
288pci_find_device(uint16_t vendor, uint16_t device)
289{
290	struct pci_devinfo *dinfo;
291
292	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
293		if ((dinfo->cfg.vendor == vendor) &&
294		    (dinfo->cfg.device == device)) {
295			return (dinfo->cfg.dev);
296		}
297	}
298
299	return (NULL);
300}
301
302/* return base address of memory or port map */
303
304static uint32_t
305pci_mapbase(uint32_t mapreg)
306{
307	int mask = 0x03;
308	if ((mapreg & 0x01) == 0)
309		mask = 0x0f;
310	return (mapreg & ~mask);
311}
312
313/* return map type of memory or port map */
314
315static int
316pci_maptype(unsigned mapreg)
317{
318	static uint8_t maptype[0x10] = {
319		PCI_MAPMEM,		PCI_MAPPORT,
320		PCI_MAPMEM,		0,
321		PCI_MAPMEM,		PCI_MAPPORT,
322		0,			0,
323		PCI_MAPMEM|PCI_MAPMEMP,	PCI_MAPPORT,
324		PCI_MAPMEM|PCI_MAPMEMP, 0,
325		PCI_MAPMEM|PCI_MAPMEMP,	PCI_MAPPORT,
326		0,			0,
327	};
328
329	return maptype[mapreg & 0x0f];
330}
331
332/* return log2 of map size decoded for memory or port map */
333
334static int
335pci_mapsize(uint32_t testval)
336{
337	int ln2size;
338
339	testval = pci_mapbase(testval);
340	ln2size = 0;
341	if (testval != 0) {
342		while ((testval & 1) == 0)
343		{
344			ln2size++;
345			testval >>= 1;
346		}
347	}
348	return (ln2size);
349}
350
351/* return log2 of address range supported by map register */
352
353static int
354pci_maprange(unsigned mapreg)
355{
356	int ln2range = 0;
357	switch (mapreg & 0x07) {
358	case 0x00:
359	case 0x01:
360	case 0x05:
361		ln2range = 32;
362		break;
363	case 0x02:
364		ln2range = 20;
365		break;
366	case 0x04:
367		ln2range = 64;
368		break;
369	}
370	return (ln2range);
371}
372
373/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
374
375static void
376pci_fixancient(pcicfgregs *cfg)
377{
378	if (cfg->hdrtype != 0)
379		return;
380
381	/* PCI to PCI bridges use header type 1 */
382	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
383		cfg->hdrtype = 1;
384}
385
386/* extract header type specific config data */
387
388static void
389pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
390{
391#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
392	switch (cfg->hdrtype) {
393	case 0:
394		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
395		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
396		cfg->nummaps	    = PCI_MAXMAPS_0;
397		break;
398	case 1:
399		cfg->nummaps	    = PCI_MAXMAPS_1;
400		break;
401	case 2:
402		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
403		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
404		cfg->nummaps	    = PCI_MAXMAPS_2;
405		break;
406	}
407#undef REG
408}
409
410/* read configuration header into pcicfgregs structure */
411struct pci_devinfo *
412pci_read_device(device_t pcib, int b, int s, int f, size_t size)
413{
414#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
415	pcicfgregs *cfg = NULL;
416	struct pci_devinfo *devlist_entry;
417	struct devlist *devlist_head;
418
419	devlist_head = &pci_devq;
420
421	devlist_entry = NULL;
422
423	if (REG(PCIR_DEVVENDOR, 4) != -1) {
424		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
425		if (devlist_entry == NULL)
426			return (NULL);
427
428		cfg = &devlist_entry->cfg;
429
430		cfg->bus		= b;
431		cfg->slot		= s;
432		cfg->func		= f;
433		cfg->vendor		= REG(PCIR_VENDOR, 2);
434		cfg->device		= REG(PCIR_DEVICE, 2);
435		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
436		cfg->statreg		= REG(PCIR_STATUS, 2);
437		cfg->baseclass		= REG(PCIR_CLASS, 1);
438		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
439		cfg->progif		= REG(PCIR_PROGIF, 1);
440		cfg->revid		= REG(PCIR_REVID, 1);
441		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
442		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
443		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
444		cfg->intpin		= REG(PCIR_INTPIN, 1);
445		cfg->intline		= REG(PCIR_INTLINE, 1);
446
447		cfg->mingnt		= REG(PCIR_MINGNT, 1);
448		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
449
450		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
451		cfg->hdrtype		&= ~PCIM_MFDEV;
452
453		pci_fixancient(cfg);
454		pci_hdrtypedata(pcib, b, s, f, cfg);
455
456		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
457			pci_read_extcap(pcib, cfg);
458
459		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
460
461		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
462		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
463		devlist_entry->conf.pc_sel.pc_func = cfg->func;
464		devlist_entry->conf.pc_hdr = cfg->hdrtype;
465
466		devlist_entry->conf.pc_subvendor = cfg->subvendor;
467		devlist_entry->conf.pc_subdevice = cfg->subdevice;
468		devlist_entry->conf.pc_vendor = cfg->vendor;
469		devlist_entry->conf.pc_device = cfg->device;
470
471		devlist_entry->conf.pc_class = cfg->baseclass;
472		devlist_entry->conf.pc_subclass = cfg->subclass;
473		devlist_entry->conf.pc_progif = cfg->progif;
474		devlist_entry->conf.pc_revid = cfg->revid;
475
476		pci_numdevs++;
477		pci_generation++;
478	}
479	return (devlist_entry);
480#undef REG
481}
482
483static void
484pci_read_extcap(device_t pcib, pcicfgregs *cfg)
485{
486#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
487#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
488#if defined(__i386__) || defined(__amd64__)
489	uint64_t addr;
490#endif
491	uint32_t val;
492	int	ptr, nextptr, ptrptr;
493
494	switch (cfg->hdrtype & PCIM_HDRTYPE) {
495	case 0:
496	case 1:
497		ptrptr = PCIR_CAP_PTR;
498		break;
499	case 2:
500		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
501		break;
502	default:
503		return;		/* no extended capabilities support */
504	}
505	nextptr = REG(ptrptr, 1);	/* sanity check? */
506
507	/*
508	 * Read capability entries.
509	 */
510	while (nextptr != 0) {
511		/* Sanity check */
512		if (nextptr > 255) {
513			printf("illegal PCI extended capability offset %d\n",
514			    nextptr);
515			return;
516		}
517		/* Find the next entry */
518		ptr = nextptr;
519		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
520
521		/* Process this entry */
522		switch (REG(ptr + PCICAP_ID, 1)) {
523		case PCIY_PMG:		/* PCI power management */
524			if (cfg->pp.pp_cap == 0) {
525				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
526				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
527				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
528				if ((nextptr - ptr) > PCIR_POWER_DATA)
529					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
530			}
531			break;
532#if defined(__i386__) || defined(__amd64__)
533		case PCIY_HT:		/* HyperTransport */
534			/* Determine HT-specific capability type. */
535			val = REG(ptr + PCIR_HT_COMMAND, 2);
536			switch (val & PCIM_HTCMD_CAP_MASK) {
537			case PCIM_HTCAP_MSI_MAPPING:
538				/* Sanity check the mapping window. */
539				addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
540				addr <<= 32;
541				addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
542				if (addr != MSI_INTEL_ADDR_BASE)
543					device_printf(pcib,
544		    "HT Bridge at %d:%d:%d has non-default MSI window 0x%llx\n",
545					    cfg->bus, cfg->slot, cfg->func,
546					    (long long)addr);
547
548				/* Enable MSI -> HT mapping. */
549				val |= PCIM_HTCMD_MSI_ENABLE;
550				WREG(ptr + PCIR_HT_COMMAND, val, 2);
551				break;
552			}
553			break;
554#endif
555		case PCIY_MSI:		/* PCI MSI */
556			cfg->msi.msi_location = ptr;
557			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
558			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
559						     PCIM_MSICTRL_MMC_MASK)>>1);
560			break;
561		case PCIY_MSIX:		/* PCI MSI-X */
562			cfg->msix.msix_location = ptr;
563			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
564			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
565			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
566			val = REG(ptr + PCIR_MSIX_TABLE, 4);
567			cfg->msix.msix_table_bar = PCIR_BAR(val &
568			    PCIM_MSIX_BIR_MASK);
569			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
570			val = REG(ptr + PCIR_MSIX_PBA, 4);
571			cfg->msix.msix_pba_bar = PCIR_BAR(val &
572			    PCIM_MSIX_BIR_MASK);
573			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
574			break;
575		case PCIY_VPD:		/* PCI Vital Product Data */
576			if (pci_do_vpd) {
577				cfg->vpd.vpd_reg = ptr;
578				pci_read_vpd(pcib, cfg);
579			}
580			break;
581		case PCIY_SUBVENDOR:
582			/* Should always be true. */
583			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
584				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
585				cfg->subvendor = val & 0xffff;
586				cfg->subdevice = val >> 16;
587			}
588		default:
589			break;
590		}
591	}
592/* REG and WREG use carry through to next functions */
593}
594
595/*
596 * PCI Vital Product Data
597 */
598static uint32_t
599pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg)
600{
601
602	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
603
604	WREG(cfg->vpd.vpd_reg + 2, reg, 2);
605	while ((REG(cfg->vpd.vpd_reg + 2, 2) & 0x8000) != 0x8000)
606		DELAY(1);	/* limit looping */
607
608	return REG(cfg->vpd.vpd_reg + 4, 4);
609}
610
611#if 0
612static void
613pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
614{
615	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
616
617	WREG(cfg->vpd.vpd_reg + 4, data, 4);
618	WREG(cfg->vpd.vpd_reg + 2, reg | 0x8000, 2);
619	while ((REG(cfg->vpd.vpd_reg + 2, 2) & 0x8000) == 0x8000)
620		DELAY(1);	/* limit looping */
621
622	return;
623}
624#endif
625
626struct vpd_readstate {
627	device_t	pcib;
628	pcicfgregs	*cfg;
629	uint32_t	val;
630	int		bytesinval;
631	int		off;
632	uint8_t		cksum;
633};
634
635static uint8_t
636vpd_nextbyte(struct vpd_readstate *vrs)
637{
638	uint8_t byte;
639
640	if (vrs->bytesinval == 0) {
641		vrs->val = le32toh(pci_read_vpd_reg(vrs->pcib, vrs->cfg,
642		    vrs->off));
643		vrs->off += 4;
644		byte = vrs->val & 0xff;
645		vrs->bytesinval = 3;
646	} else {
647		vrs->val = vrs->val >> 8;
648		byte = vrs->val & 0xff;
649		vrs->bytesinval--;
650	}
651
652	vrs->cksum += byte;
653	return byte;
654}
655
656static void
657pci_read_vpd(device_t pcib, pcicfgregs *cfg)
658{
659	struct vpd_readstate vrs;
660	int state;
661	int name;
662	int remain;
663	int end;
664	int i;
665	uint8_t byte;
666	int alloc, off;		/* alloc/off for RO/W arrays */
667	int cksumvalid;
668	int dflen;
669
670	/* init vpd reader */
671	vrs.bytesinval = 0;
672	vrs.off = 0;
673	vrs.pcib = pcib;
674	vrs.cfg = cfg;
675	vrs.cksum = 0;
676
677	state = 0;
678	name = remain = i = 0;	/* shut up stupid gcc */
679	alloc = off = 0;	/* shut up stupid gcc */
680	dflen = 0;		/* shut up stupid gcc */
681	end = 0;
682	cksumvalid = -1;
683	for (; !end;) {
684		byte = vpd_nextbyte(&vrs);
685#if 0
686		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
687		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
688		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
689#endif
690		switch (state) {
691		case 0:		/* item name */
692			if (byte & 0x80) {
693				remain = vpd_nextbyte(&vrs);
694				remain |= vpd_nextbyte(&vrs) << 8;
695				if (remain > (0x7f*4 - vrs.off)) {
696					end = 1;
697					printf(
698			    "pci%d:%d:%d: invalid vpd data, remain %#x\n",
699					    cfg->bus, cfg->slot, cfg->func,
700					    remain);
701				}
702				name = byte & 0x7f;
703			} else {
704				remain = byte & 0x7;
705				name = (byte >> 3) & 0xf;
706			}
707			switch (name) {
708			case 0x2:	/* String */
709				cfg->vpd.vpd_ident = malloc(remain + 1,
710				    M_DEVBUF, M_WAITOK);
711				i = 0;
712				state = 1;
713				break;
714			case 0xf:	/* End */
715				end = 1;
716				state = -1;
717				break;
718			case 0x10:	/* VPD-R */
719				alloc = 8;
720				off = 0;
721				cfg->vpd.vpd_ros = malloc(alloc *
722				    sizeof *cfg->vpd.vpd_ros, M_DEVBUF,
723				    M_WAITOK);
724				state = 2;
725				break;
726			case 0x11:	/* VPD-W */
727				alloc = 8;
728				off = 0;
729				cfg->vpd.vpd_w = malloc(alloc *
730				    sizeof *cfg->vpd.vpd_w, M_DEVBUF,
731				    M_WAITOK);
732				state = 5;
733				break;
734			default:	/* Invalid data, abort */
735				end = 1;
736				continue;
737			}
738			break;
739
740		case 1:	/* Identifier String */
741			cfg->vpd.vpd_ident[i++] = byte;
742			remain--;
743			if (remain == 0)  {
744				cfg->vpd.vpd_ident[i] = '\0';
745				state = 0;
746			}
747			break;
748
749		case 2:	/* VPD-R Keyword Header */
750			if (off == alloc) {
751				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
752				    (alloc *= 2) * sizeof *cfg->vpd.vpd_ros,
753				    M_DEVBUF, M_WAITOK);
754			}
755			cfg->vpd.vpd_ros[off].keyword[0] = byte;
756			cfg->vpd.vpd_ros[off].keyword[1] = vpd_nextbyte(&vrs);
757			dflen = vpd_nextbyte(&vrs);
758			if (dflen == 0 &&
759			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
760			    2) == 0) {
761				/*
762				 * if this happens, we can't trust the rest
763				 * of the VPD.
764				 */
765				printf("pci%d:%d:%d: bad keyword length: %d\n",
766				    cfg->bus, cfg->slot, cfg->func, dflen);
767				cksumvalid = 0;
768				end = 1;
769				break;
770			} else if (dflen == 0) {
771				cfg->vpd.vpd_ros[off].value = malloc(1 *
772				    sizeof *cfg->vpd.vpd_ros[off].value,
773				    M_DEVBUF, M_WAITOK);
774				cfg->vpd.vpd_ros[off].value[0] = '\x00';
775			} else
776				cfg->vpd.vpd_ros[off].value = malloc(
777				    (dflen + 1) *
778				    sizeof *cfg->vpd.vpd_ros[off].value,
779				    M_DEVBUF, M_WAITOK);
780			remain -= 3;
781			i = 0;
782			/* keep in sync w/ state 3's transistions */
783			if (dflen == 0 && remain == 0)
784				state = 0;
785			else if (dflen == 0)
786				state = 2;
787			else
788				state = 3;
789			break;
790
791		case 3:	/* VPD-R Keyword Value */
792			cfg->vpd.vpd_ros[off].value[i++] = byte;
793			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
794			    "RV", 2) == 0 && cksumvalid == -1) {
795				if (vrs.cksum == 0)
796					cksumvalid = 1;
797				else {
798					printf(
799				    "pci%d:%d:%d: bad VPD cksum, remain %hhu\n",
800					    cfg->bus, cfg->slot, cfg->func,
801					    vrs.cksum);
802					cksumvalid = 0;
803					end = 1;
804					break;
805				}
806			}
807			dflen--;
808			remain--;
809			/* keep in sync w/ state 2's transistions */
810			if (dflen == 0)
811				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
812			if (dflen == 0 && remain == 0) {
813				cfg->vpd.vpd_rocnt = off;
814				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
815				    off * sizeof *cfg->vpd.vpd_ros,
816				    M_DEVBUF, M_WAITOK);
817				state = 0;
818			} else if (dflen == 0)
819				state = 2;
820			break;
821
822		case 4:
823			remain--;
824			if (remain == 0)
825				state = 0;
826			break;
827
828		case 5:	/* VPD-W Keyword Header */
829			if (off == alloc) {
830				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
831				    (alloc *= 2) * sizeof *cfg->vpd.vpd_w,
832				    M_DEVBUF, M_WAITOK);
833			}
834			cfg->vpd.vpd_w[off].keyword[0] = byte;
835			cfg->vpd.vpd_w[off].keyword[1] = vpd_nextbyte(&vrs);
836			cfg->vpd.vpd_w[off].len = dflen = vpd_nextbyte(&vrs);
837			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
838			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
839			    sizeof *cfg->vpd.vpd_w[off].value,
840			    M_DEVBUF, M_WAITOK);
841			remain -= 3;
842			i = 0;
843			/* keep in sync w/ state 6's transistions */
844			if (dflen == 0 && remain == 0)
845				state = 0;
846			else if (dflen == 0)
847				state = 5;
848			else
849				state = 6;
850			break;
851
852		case 6:	/* VPD-W Keyword Value */
853			cfg->vpd.vpd_w[off].value[i++] = byte;
854			dflen--;
855			remain--;
856			/* keep in sync w/ state 5's transistions */
857			if (dflen == 0)
858				cfg->vpd.vpd_w[off++].value[i++] = '\0';
859			if (dflen == 0 && remain == 0) {
860				cfg->vpd.vpd_wcnt = off;
861				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
862				    off * sizeof *cfg->vpd.vpd_w,
863				    M_DEVBUF, M_WAITOK);
864				state = 0;
865			} else if (dflen == 0)
866				state = 5;
867			break;
868
869		default:
870			printf("pci%d:%d:%d: invalid state: %d\n",
871			    cfg->bus, cfg->slot, cfg->func, state);
872			end = 1;
873			break;
874		}
875	}
876
877	if (cksumvalid == 0) {
878		/* read-only data bad, clean up */
879		for (; off; off--)
880			free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
881
882		free(cfg->vpd.vpd_ros, M_DEVBUF);
883		cfg->vpd.vpd_ros = NULL;
884	}
885#undef REG
886#undef WREG
887}
888
889int
890pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
891{
892	struct pci_devinfo *dinfo = device_get_ivars(child);
893	pcicfgregs *cfg = &dinfo->cfg;
894
895	*identptr = cfg->vpd.vpd_ident;
896
897	if (*identptr == NULL)
898		return ENXIO;
899
900	return 0;
901}
902
903int
904pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
905	const char **vptr)
906{
907	struct pci_devinfo *dinfo = device_get_ivars(child);
908	pcicfgregs *cfg = &dinfo->cfg;
909	int i;
910
911	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
912		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
913		    sizeof cfg->vpd.vpd_ros[i].keyword) == 0) {
914			*vptr = cfg->vpd.vpd_ros[i].value;
915		}
916
917	if (i != cfg->vpd.vpd_rocnt)
918		return 0;
919
920	*vptr = NULL;
921	return ENXIO;
922}
923
924/*
925 * Return the offset in configuration space of the requested extended
926 * capability entry or 0 if the specified capability was not found.
927 */
928int
929pci_find_extcap_method(device_t dev, device_t child, int capability,
930    int *capreg)
931{
932	struct pci_devinfo *dinfo = device_get_ivars(child);
933	pcicfgregs *cfg = &dinfo->cfg;
934	u_int32_t status;
935	u_int8_t ptr;
936
937	/*
938	 * Check the CAP_LIST bit of the PCI status register first.
939	 */
940	status = pci_read_config(child, PCIR_STATUS, 2);
941	if (!(status & PCIM_STATUS_CAPPRESENT))
942		return (ENXIO);
943
944	/*
945	 * Determine the start pointer of the capabilities list.
946	 */
947	switch (cfg->hdrtype & PCIM_HDRTYPE) {
948	case 0:
949	case 1:
950		ptr = PCIR_CAP_PTR;
951		break;
952	case 2:
953		ptr = PCIR_CAP_PTR_2;
954		break;
955	default:
956		/* XXX: panic? */
957		return (ENXIO);		/* no extended capabilities support */
958	}
959	ptr = pci_read_config(child, ptr, 1);
960
961	/*
962	 * Traverse the capabilities list.
963	 */
964	while (ptr != 0) {
965		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
966			if (capreg != NULL)
967				*capreg = ptr;
968			return (0);
969		}
970		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
971	}
972
973	return (ENOENT);
974}
975
976/*
977 * Support for MSI-X message interrupts.
978 */
979void
980pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
981{
982	struct pci_devinfo *dinfo = device_get_ivars(dev);
983	pcicfgregs *cfg = &dinfo->cfg;
984	uint32_t offset;
985
986	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
987	offset = cfg->msix.msix_table_offset + index * 16;
988	bus_write_4(cfg->msix.msix_table_res, offset, address & 0xffffffff);
989	bus_write_4(cfg->msix.msix_table_res, offset + 4, address >> 32);
990	bus_write_4(cfg->msix.msix_table_res, offset + 8, data);
991}
992
993void
994pci_mask_msix(device_t dev, u_int index)
995{
996	struct pci_devinfo *dinfo = device_get_ivars(dev);
997	pcicfgregs *cfg = &dinfo->cfg;
998	uint32_t offset, val;
999
1000	KASSERT(cfg->msix.msix_msgnum > index, ("bogus index"));
1001	offset = cfg->msix.msix_table_offset + index * 16 + 12;
1002	val = bus_read_4(cfg->msix.msix_table_res, offset);
1003	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1004		val |= PCIM_MSIX_VCTRL_MASK;
1005		bus_write_4(cfg->msix.msix_table_res, offset, val);
1006	}
1007}
1008
1009void
1010pci_unmask_msix(device_t dev, u_int index)
1011{
1012	struct pci_devinfo *dinfo = device_get_ivars(dev);
1013	pcicfgregs *cfg = &dinfo->cfg;
1014	uint32_t offset, val;
1015
1016	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1017	offset = cfg->msix.msix_table_offset + index * 16 + 12;
1018	val = bus_read_4(cfg->msix.msix_table_res, offset);
1019	if (val & PCIM_MSIX_VCTRL_MASK) {
1020		val &= ~PCIM_MSIX_VCTRL_MASK;
1021		bus_write_4(cfg->msix.msix_table_res, offset, val);
1022	}
1023}
1024
1025int
1026pci_pending_msix(device_t dev, u_int index)
1027{
1028	struct pci_devinfo *dinfo = device_get_ivars(dev);
1029	pcicfgregs *cfg = &dinfo->cfg;
1030	uint32_t offset, bit;
1031
1032	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1033	offset = cfg->msix.msix_pba_offset + (index / 4) * 4;
1034	bit = 1 << index % 32;
1035	return (bus_read_4(cfg->msix.msix_pba_res, offset) & bit);
1036}
1037
1038/*
1039 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1040 * returned in *count.  After this function returns, each message will be
1041 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1042 */
1043int
1044pci_alloc_msix_method(device_t dev, device_t child, int *count)
1045{
1046	struct pci_devinfo *dinfo = device_get_ivars(child);
1047	pcicfgregs *cfg = &dinfo->cfg;
1048	struct resource_list_entry *rle;
1049	int actual, error, i, irq, max;
1050
1051	/* Don't let count == 0 get us into trouble. */
1052	if (*count == 0)
1053		return (EINVAL);
1054
1055	/* If rid 0 is allocated, then fail. */
1056	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1057	if (rle != NULL && rle->res != NULL)
1058		return (ENXIO);
1059
1060	/* Already have allocated messages? */
1061	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1062		return (ENXIO);
1063
1064	/* If MSI is blacklisted for this system, fail. */
1065	if (pci_msi_blacklisted())
1066		return (ENXIO);
1067
1068	/* MSI-X capability present? */
1069	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1070		return (ENODEV);
1071
1072	/* Make sure the appropriate BARs are mapped. */
1073	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1074	    cfg->msix.msix_table_bar);
1075	if (rle == NULL || rle->res == NULL ||
1076	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1077		return (ENXIO);
1078	cfg->msix.msix_table_res = rle->res;
1079	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1080		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1081		    cfg->msix.msix_pba_bar);
1082		if (rle == NULL || rle->res == NULL ||
1083		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1084			return (ENXIO);
1085	}
1086	cfg->msix.msix_pba_res = rle->res;
1087
1088	if (bootverbose)
1089		device_printf(child,
1090		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1091		    *count, cfg->msix.msix_msgnum);
1092	max = min(*count, cfg->msix.msix_msgnum);
1093	for (i = 0; i < max; i++) {
1094		/* Allocate a message. */
1095		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, i,
1096		    &irq);
1097		if (error)
1098			break;
1099		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1100		    irq, 1);
1101	}
1102	actual = i;
1103
1104	if (bootverbose) {
1105		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1106		if (actual == 1)
1107			device_printf(child, "using IRQ %lu for MSI-X\n",
1108			    rle->start);
1109		else {
1110			int run;
1111
1112			/*
1113			 * Be fancy and try to print contiguous runs of
1114			 * IRQ values as ranges.  'irq' is the previous IRQ.
1115			 * 'run' is true if we are in a range.
1116			 */
1117			device_printf(child, "using IRQs %lu", rle->start);
1118			irq = rle->start;
1119			run = 0;
1120			for (i = 1; i < actual; i++) {
1121				rle = resource_list_find(&dinfo->resources,
1122				    SYS_RES_IRQ, i + 1);
1123
1124				/* Still in a run? */
1125				if (rle->start == irq + 1) {
1126					run = 1;
1127					irq++;
1128					continue;
1129				}
1130
1131				/* Finish previous range. */
1132				if (run) {
1133					printf("-%d", irq);
1134					run = 0;
1135				}
1136
1137				/* Start new range. */
1138				printf(",%lu", rle->start);
1139				irq = rle->start;
1140			}
1141
1142			/* Unfinished range? */
1143			if (run)
1144				printf("%d", irq);
1145			printf(" for MSI-X\n");
1146		}
1147	}
1148
1149	/* Mask all vectors. */
1150	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1151		pci_mask_msix(child, i);
1152
1153	/* Update control register to enable MSI-X. */
1154	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1155	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1156	    cfg->msix.msix_ctrl, 2);
1157
1158	/* Update counts of alloc'd messages. */
1159	cfg->msix.msix_alloc = actual;
1160	*count = actual;
1161	return (0);
1162}
1163
1164/*
1165 * By default, pci_alloc_msix() will assign the allocated IRQ resources to
1166 * the first N messages in the MSI-X table.  However, device drivers may
1167 * want to use different layouts in the case that they do not allocate a
1168 * full table.  This method allows the driver to specify what layout it
1169 * wants.  It must be called after a successful pci_alloc_msix() but
1170 * before any of the associated SYS_RES_IRQ resources are allocated via
1171 * bus_alloc_resource().  The 'indices' array contains N (where N equals
1172 * the 'count' returned from pci_alloc_msix()) message indices.  The
1173 * indices are 1-based (meaning the first message is at index 1).  On
1174 * successful return, each of the messages in the 'indices' array will
1175 * have an associated SYS_RES_IRQ whose rid is equal to the index.  Thus,
1176 * if indices contains { 2, 4 }, then upon successful return, the 'child'
1177 * device will have two SYS_RES_IRQ resources available at rids 2 and 4.
1178 */
1179int
1180pci_remap_msix_method(device_t dev, device_t child, u_int *indices)
1181{
1182	struct pci_devinfo *dinfo = device_get_ivars(child);
1183	pcicfgregs *cfg = &dinfo->cfg;
1184	struct resource_list_entry *rle;
1185	int count, error, i, j, *irqs;
1186
1187	/* Sanity check the indices. */
1188	for (i = 0; i < cfg->msix.msix_alloc; i++)
1189		if (indices[i] == 0 || indices[i] > cfg->msix.msix_msgnum)
1190			return (EINVAL);
1191
1192	/* Check for duplicates. */
1193	for (i = 0; i < cfg->msix.msix_alloc; i++)
1194		for (j = i + 1; j < cfg->msix.msix_alloc; j++)
1195			if (indices[i] == indices[j])
1196				return (EINVAL);
1197
1198	/* Make sure none of the resources are allocated. */
1199	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1200		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1201		if (rle == NULL)
1202			continue;
1203		if (rle->res != NULL)
1204			return (EBUSY);
1205		count++;
1206	}
1207
1208	/* Save the IRQ values and free the existing resources. */
1209	irqs = malloc(sizeof(int) * cfg->msix.msix_alloc, M_TEMP, M_WAITOK);
1210	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1211		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1212		if (rle == NULL)
1213			continue;
1214		irqs[count] = rle->start;
1215		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
1216		count++;
1217	}
1218
1219	/* Map the IRQ values to the new message indices and rids. */
1220	for (i = 0; i < cfg->msix.msix_alloc; i++) {
1221		resource_list_add(&dinfo->resources, SYS_RES_IRQ, indices[i],
1222		    irqs[i], irqs[i], 1);
1223		error = PCIB_REMAP_MSIX(device_get_parent(dev), child,
1224		    indices[i], irqs[i]);
1225		KASSERT(error == 0, ("Failed to remap MSI-X message"));
1226	}
1227	if (bootverbose) {
1228		if (cfg->msix.msix_alloc == 1)
1229			device_printf(child,
1230			    "Remapped MSI-X IRQ to index %d\n", indices[0]);
1231		else {
1232			device_printf(child, "Remapped MSI-X IRQs to indices");
1233			for (i = 0; i < cfg->msix.msix_alloc - 1; i++)
1234				printf(" %d,", indices[i]);
1235			printf(" %d\n", indices[cfg->msix.msix_alloc - 1]);
1236		}
1237	}
1238	free(irqs, M_TEMP);
1239
1240	return (0);
1241}
1242
1243static int
1244pci_release_msix(device_t dev, device_t child)
1245{
1246	struct pci_devinfo *dinfo = device_get_ivars(child);
1247	pcicfgregs *cfg = &dinfo->cfg;
1248	struct resource_list_entry *rle;
1249	int count, i;
1250
1251	/* Do we have any messages to release? */
1252	if (cfg->msix.msix_alloc == 0)
1253		return (ENODEV);
1254
1255	/* Make sure none of the resources are allocated. */
1256	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1257		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1258		if (rle == NULL)
1259			continue;
1260		if (rle->res != NULL)
1261			return (EBUSY);
1262		count++;
1263	}
1264
1265	/* Update control register with to disable MSI-X. */
1266	cfg->msix.msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1267	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1268	    cfg->msix.msix_ctrl, 2);
1269
1270	/* Release the messages. */
1271	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1272		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1273		if (rle == NULL)
1274			continue;
1275		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1276		    rle->start);
1277		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
1278		count++;
1279	}
1280
1281	/* Update alloc count. */
1282	cfg->msix.msix_alloc = 0;
1283	return (0);
1284}
1285
1286/*
1287 * Return the max supported MSI-X messages this device supports.
1288 * Basically, assuming the MD code can alloc messages, this function
1289 * should return the maximum value that pci_alloc_msix() can return.
1290 * Thus, it is subject to the tunables, etc.
1291 */
1292int
1293pci_msix_count_method(device_t dev, device_t child)
1294{
1295	struct pci_devinfo *dinfo = device_get_ivars(child);
1296	pcicfgregs *cfg = &dinfo->cfg;
1297
1298	if (pci_do_msix && cfg->msix.msix_location != 0)
1299		return (cfg->msix.msix_msgnum);
1300	return (0);
1301}
1302
1303/*
1304 * Support for MSI message signalled interrupts.
1305 */
1306void
1307pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1308{
1309	struct pci_devinfo *dinfo = device_get_ivars(dev);
1310	pcicfgregs *cfg = &dinfo->cfg;
1311
1312	/* Write data and address values. */
1313	cfg->msi.msi_addr = address;
1314	cfg->msi.msi_data = data;
1315	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1316	    address & 0xffffffff, 4);
1317	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1318		pci_write_config(dev, cfg->msi.msi_location +
1319		    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1320		pci_write_config(dev, cfg->msi.msi_location +
1321		    PCIR_MSI_DATA_64BIT, data, 2);
1322	} else
1323		pci_write_config(dev, cfg->msi.msi_location +
1324		    PCIR_MSI_DATA, data, 2);
1325
1326	/* Enable MSI in the control register. */
1327	cfg->msi.msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1328	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1329	    cfg->msi.msi_ctrl, 2);
1330}
1331
1332/*
1333 * Restore MSI registers during resume.  If MSI is enabled then
1334 * restore the data and address registers in addition to the control
1335 * register.
1336 */
1337static void
1338pci_resume_msi(device_t dev)
1339{
1340	struct pci_devinfo *dinfo = device_get_ivars(dev);
1341	pcicfgregs *cfg = &dinfo->cfg;
1342	uint64_t address;
1343	uint16_t data;
1344
1345	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1346		address = cfg->msi.msi_addr;
1347		data = cfg->msi.msi_data;
1348		pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1349		    address & 0xffffffff, 4);
1350		if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1351			pci_write_config(dev, cfg->msi.msi_location +
1352			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1353			pci_write_config(dev, cfg->msi.msi_location +
1354			    PCIR_MSI_DATA_64BIT, data, 2);
1355		} else
1356			pci_write_config(dev, cfg->msi.msi_location +
1357			    PCIR_MSI_DATA, data, 2);
1358	}
1359	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1360	    cfg->msi.msi_ctrl, 2);
1361}
1362
1363/*
1364 * Returns true if the specified device is blacklisted because MSI
1365 * doesn't work.
1366 */
1367int
1368pci_msi_device_blacklisted(device_t dev)
1369{
1370	struct pci_quirk *q;
1371
1372	if (!pci_honor_msi_blacklist)
1373		return (0);
1374
1375	for (q = &pci_quirks[0]; q->devid; q++) {
1376		if (q->devid == pci_get_devid(dev) &&
1377		    q->type == PCI_QUIRK_DISABLE_MSI)
1378			return (1);
1379	}
1380	return (0);
1381}
1382
1383/*
1384 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1385 * we just check for blacklisted chipsets as represented by the
1386 * host-PCI bridge at device 0:0:0.  In the future, it may become
1387 * necessary to check other system attributes, such as the kenv values
1388 * that give the motherboard manufacturer and model number.
1389 */
1390static int
1391pci_msi_blacklisted(void)
1392{
1393	device_t dev;
1394
1395	if (!pci_honor_msi_blacklist)
1396		return (0);
1397
1398	dev = pci_find_bsf(0, 0, 0);
1399	if (dev != NULL)
1400		return (pci_msi_device_blacklisted(dev));
1401	return (0);
1402}
1403
1404/*
1405 * Attempt to allocate *count MSI messages.  The actual number allocated is
1406 * returned in *count.  After this function returns, each message will be
1407 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1408 */
1409int
1410pci_alloc_msi_method(device_t dev, device_t child, int *count)
1411{
1412	struct pci_devinfo *dinfo = device_get_ivars(child);
1413	pcicfgregs *cfg = &dinfo->cfg;
1414	struct resource_list_entry *rle;
1415	int actual, error, i, irqs[32];
1416	uint16_t ctrl;
1417
1418	/* Don't let count == 0 get us into trouble. */
1419	if (*count == 0)
1420		return (EINVAL);
1421
1422	/* If rid 0 is allocated, then fail. */
1423	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1424	if (rle != NULL && rle->res != NULL)
1425		return (ENXIO);
1426
1427	/* Already have allocated messages? */
1428	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1429		return (ENXIO);
1430
1431	/* If MSI is blacklisted for this system, fail. */
1432	if (pci_msi_blacklisted())
1433		return (ENXIO);
1434
1435	/* MSI capability present? */
1436	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1437		return (ENODEV);
1438
1439	if (bootverbose)
1440		device_printf(child,
1441		    "attempting to allocate %d MSI vectors (%d supported)\n",
1442		    *count, cfg->msi.msi_msgnum);
1443
1444	/* Don't ask for more than the device supports. */
1445	actual = min(*count, cfg->msi.msi_msgnum);
1446
1447	/* Don't ask for more than 32 messages. */
1448	actual = min(actual, 32);
1449
1450	/* MSI requires power of 2 number of messages. */
1451	if (!powerof2(actual))
1452		return (EINVAL);
1453
1454	for (;;) {
1455		/* Try to allocate N messages. */
1456		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1457		    cfg->msi.msi_msgnum, irqs);
1458		if (error == 0)
1459			break;
1460		if (actual == 1)
1461			return (error);
1462
1463		/* Try N / 2. */
1464		actual >>= 1;
1465	}
1466
1467	/*
1468	 * We now have N actual messages mapped onto SYS_RES_IRQ
1469	 * resources in the irqs[] array, so add new resources
1470	 * starting at rid 1.
1471	 */
1472	for (i = 0; i < actual; i++)
1473		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1474		    irqs[i], irqs[i], 1);
1475
1476	if (bootverbose) {
1477		if (actual == 1)
1478			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1479		else {
1480			int run;
1481
1482			/*
1483			 * Be fancy and try to print contiguous runs
1484			 * of IRQ values as ranges.  'run' is true if
1485			 * we are in a range.
1486			 */
1487			device_printf(child, "using IRQs %d", irqs[0]);
1488			run = 0;
1489			for (i = 1; i < actual; i++) {
1490
1491				/* Still in a run? */
1492				if (irqs[i] == irqs[i - 1] + 1) {
1493					run = 1;
1494					continue;
1495				}
1496
1497				/* Finish previous range. */
1498				if (run) {
1499					printf("-%d", irqs[i - 1]);
1500					run = 0;
1501				}
1502
1503				/* Start new range. */
1504				printf(",%d", irqs[i]);
1505			}
1506
1507			/* Unfinished range? */
1508			if (run)
1509				printf("%d", irqs[actual - 1]);
1510			printf(" for MSI\n");
1511		}
1512	}
1513
1514	/* Update control register with actual count and enable MSI. */
1515	ctrl = cfg->msi.msi_ctrl;
1516	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1517	ctrl |= (ffs(actual) - 1) << 4;
1518	cfg->msi.msi_ctrl = ctrl;
1519	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1520
1521	/* Update counts of alloc'd messages. */
1522	cfg->msi.msi_alloc = actual;
1523	*count = actual;
1524	return (0);
1525}
1526
1527/* Release the MSI messages associated with this device. */
1528int
1529pci_release_msi_method(device_t dev, device_t child)
1530{
1531	struct pci_devinfo *dinfo = device_get_ivars(child);
1532	pcicfgregs *cfg = &dinfo->cfg;
1533	struct resource_list_entry *rle;
1534	int error, i, irqs[32];
1535
1536	/* Try MSI-X first. */
1537	error = pci_release_msix(dev, child);
1538	if (error != ENODEV)
1539		return (error);
1540
1541	/* Do we have any messages to release? */
1542	if (cfg->msi.msi_alloc == 0)
1543		return (ENODEV);
1544	KASSERT(cfg->msi.msi_alloc <= 32, ("more than 32 alloc'd messages"));
1545
1546	/* Make sure none of the resources are allocated. */
1547	for (i = 0; i < cfg->msi.msi_alloc; i++) {
1548		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1549		KASSERT(rle != NULL, ("missing MSI resource"));
1550		if (rle->res != NULL)
1551			return (EBUSY);
1552		irqs[i] = rle->start;
1553	}
1554
1555	/* Update control register with 0 count and disable MSI. */
1556	cfg->msi.msi_ctrl &= ~(PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE);
1557	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL,
1558	    cfg->msi.msi_ctrl, 2);
1559
1560	/* Release the messages. */
1561	PCIB_RELEASE_MSI(device_get_parent(dev), child, cfg->msi.msi_alloc,
1562	    irqs);
1563	for (i = 0; i < cfg->msi.msi_alloc; i++)
1564		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1565
1566	/* Update alloc count. */
1567	cfg->msi.msi_alloc = 0;
1568	return (0);
1569}
1570
1571/*
1572 * Return the max supported MSI messages this device supports.
1573 * Basically, assuming the MD code can alloc messages, this function
1574 * should return the maximum value that pci_alloc_msi() can return.
1575 * Thus, it is subject to the tunables, etc.
1576 */
1577int
1578pci_msi_count_method(device_t dev, device_t child)
1579{
1580	struct pci_devinfo *dinfo = device_get_ivars(child);
1581	pcicfgregs *cfg = &dinfo->cfg;
1582
1583	if (pci_do_msi && cfg->msi.msi_location != 0)
1584		return (cfg->msi.msi_msgnum);
1585	return (0);
1586}
1587
1588/* free pcicfgregs structure and all depending data structures */
1589
1590int
1591pci_freecfg(struct pci_devinfo *dinfo)
1592{
1593	struct devlist *devlist_head;
1594	int i;
1595
1596	devlist_head = &pci_devq;
1597
1598	if (dinfo->cfg.vpd.vpd_reg) {
1599		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1600		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1601			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1602		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1603		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1604			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1605		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1606	}
1607	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1608	free(dinfo, M_DEVBUF);
1609
1610	/* increment the generation count */
1611	pci_generation++;
1612
1613	/* we're losing one device */
1614	pci_numdevs--;
1615	return (0);
1616}
1617
1618/*
1619 * PCI power manangement
1620 */
1621int
1622pci_set_powerstate_method(device_t dev, device_t child, int state)
1623{
1624	struct pci_devinfo *dinfo = device_get_ivars(child);
1625	pcicfgregs *cfg = &dinfo->cfg;
1626	uint16_t status;
1627	int result, oldstate, highest, delay;
1628
1629	if (cfg->pp.pp_cap == 0)
1630		return (EOPNOTSUPP);
1631
1632	/*
1633	 * Optimize a no state change request away.  While it would be OK to
1634	 * write to the hardware in theory, some devices have shown odd
1635	 * behavior when going from D3 -> D3.
1636	 */
1637	oldstate = pci_get_powerstate(child);
1638	if (oldstate == state)
1639		return (0);
1640
1641	/*
1642	 * The PCI power management specification states that after a state
1643	 * transition between PCI power states, system software must
1644	 * guarantee a minimal delay before the function accesses the device.
1645	 * Compute the worst case delay that we need to guarantee before we
1646	 * access the device.  Many devices will be responsive much more
1647	 * quickly than this delay, but there are some that don't respond
1648	 * instantly to state changes.  Transitions to/from D3 state require
1649	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
1650	 * is done below with DELAY rather than a sleeper function because
1651	 * this function can be called from contexts where we cannot sleep.
1652	 */
1653	highest = (oldstate > state) ? oldstate : state;
1654	if (highest == PCI_POWERSTATE_D3)
1655	    delay = 10000;
1656	else if (highest == PCI_POWERSTATE_D2)
1657	    delay = 200;
1658	else
1659	    delay = 0;
1660	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
1661	    & ~PCIM_PSTAT_DMASK;
1662	result = 0;
1663	switch (state) {
1664	case PCI_POWERSTATE_D0:
1665		status |= PCIM_PSTAT_D0;
1666		break;
1667	case PCI_POWERSTATE_D1:
1668		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
1669			return (EOPNOTSUPP);
1670		status |= PCIM_PSTAT_D1;
1671		break;
1672	case PCI_POWERSTATE_D2:
1673		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
1674			return (EOPNOTSUPP);
1675		status |= PCIM_PSTAT_D2;
1676		break;
1677	case PCI_POWERSTATE_D3:
1678		status |= PCIM_PSTAT_D3;
1679		break;
1680	default:
1681		return (EINVAL);
1682	}
1683
1684	if (bootverbose)
1685		printf(
1686		    "pci%d:%d:%d: Transition from D%d to D%d\n",
1687		    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func,
1688		    oldstate, state);
1689
1690	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
1691	if (delay)
1692		DELAY(delay);
1693	return (0);
1694}
1695
1696int
1697pci_get_powerstate_method(device_t dev, device_t child)
1698{
1699	struct pci_devinfo *dinfo = device_get_ivars(child);
1700	pcicfgregs *cfg = &dinfo->cfg;
1701	uint16_t status;
1702	int result;
1703
1704	if (cfg->pp.pp_cap != 0) {
1705		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
1706		switch (status & PCIM_PSTAT_DMASK) {
1707		case PCIM_PSTAT_D0:
1708			result = PCI_POWERSTATE_D0;
1709			break;
1710		case PCIM_PSTAT_D1:
1711			result = PCI_POWERSTATE_D1;
1712			break;
1713		case PCIM_PSTAT_D2:
1714			result = PCI_POWERSTATE_D2;
1715			break;
1716		case PCIM_PSTAT_D3:
1717			result = PCI_POWERSTATE_D3;
1718			break;
1719		default:
1720			result = PCI_POWERSTATE_UNKNOWN;
1721			break;
1722		}
1723	} else {
1724		/* No support, device is always at D0 */
1725		result = PCI_POWERSTATE_D0;
1726	}
1727	return (result);
1728}
1729
1730/*
1731 * Some convenience functions for PCI device drivers.
1732 */
1733
1734static __inline void
1735pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
1736{
1737	uint16_t	command;
1738
1739	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1740	command |= bit;
1741	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1742}
1743
1744static __inline void
1745pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
1746{
1747	uint16_t	command;
1748
1749	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1750	command &= ~bit;
1751	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1752}
1753
1754int
1755pci_enable_busmaster_method(device_t dev, device_t child)
1756{
1757	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1758	return (0);
1759}
1760
1761int
1762pci_disable_busmaster_method(device_t dev, device_t child)
1763{
1764	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1765	return (0);
1766}
1767
1768int
1769pci_enable_io_method(device_t dev, device_t child, int space)
1770{
1771	uint16_t command;
1772	uint16_t bit;
1773	char *error;
1774
1775	bit = 0;
1776	error = NULL;
1777
1778	switch(space) {
1779	case SYS_RES_IOPORT:
1780		bit = PCIM_CMD_PORTEN;
1781		error = "port";
1782		break;
1783	case SYS_RES_MEMORY:
1784		bit = PCIM_CMD_MEMEN;
1785		error = "memory";
1786		break;
1787	default:
1788		return (EINVAL);
1789	}
1790	pci_set_command_bit(dev, child, bit);
1791	/* Some devices seem to need a brief stall here, what do to? */
1792	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1793	if (command & bit)
1794		return (0);
1795	device_printf(child, "failed to enable %s mapping!\n", error);
1796	return (ENXIO);
1797}
1798
1799int
1800pci_disable_io_method(device_t dev, device_t child, int space)
1801{
1802	uint16_t command;
1803	uint16_t bit;
1804	char *error;
1805
1806	bit = 0;
1807	error = NULL;
1808
1809	switch(space) {
1810	case SYS_RES_IOPORT:
1811		bit = PCIM_CMD_PORTEN;
1812		error = "port";
1813		break;
1814	case SYS_RES_MEMORY:
1815		bit = PCIM_CMD_MEMEN;
1816		error = "memory";
1817		break;
1818	default:
1819		return (EINVAL);
1820	}
1821	pci_clear_command_bit(dev, child, bit);
1822	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1823	if (command & bit) {
1824		device_printf(child, "failed to disable %s mapping!\n", error);
1825		return (ENXIO);
1826	}
1827	return (0);
1828}
1829
1830/*
1831 * New style pci driver.  Parent device is either a pci-host-bridge or a
1832 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
1833 */
1834
1835void
1836pci_print_verbose(struct pci_devinfo *dinfo)
1837{
1838	int i;
1839
1840	if (bootverbose) {
1841		pcicfgregs *cfg = &dinfo->cfg;
1842
1843		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
1844		    cfg->vendor, cfg->device, cfg->revid);
1845		printf("\tbus=%d, slot=%d, func=%d\n",
1846		    cfg->bus, cfg->slot, cfg->func);
1847		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
1848		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
1849		    cfg->mfdev);
1850		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
1851		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
1852		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
1853		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
1854		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
1855		if (cfg->intpin > 0)
1856			printf("\tintpin=%c, irq=%d\n",
1857			    cfg->intpin +'a' -1, cfg->intline);
1858		if (cfg->pp.pp_cap) {
1859			uint16_t status;
1860
1861			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
1862			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
1863			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
1864			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
1865			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
1866			    status & PCIM_PSTAT_DMASK);
1867		}
1868		if (cfg->vpd.vpd_reg) {
1869			printf("\tVPD Ident: %s\n", cfg->vpd.vpd_ident);
1870			for (i = 0; i < cfg->vpd.vpd_rocnt; i++) {
1871				struct vpd_readonly *vrop;
1872				vrop = &cfg->vpd.vpd_ros[i];
1873				if (strncmp("CP", vrop->keyword, 2) == 0)
1874					printf("\tCP: id %d, BAR%d, off %#x\n",
1875					    vrop->value[0], vrop->value[1],
1876					    le16toh(
1877					      *(uint16_t *)&vrop->value[2]));
1878				else if (strncmp("RV", vrop->keyword, 2) == 0)
1879					printf("\tRV: %#hhx\n", vrop->value[0]);
1880				else
1881					printf("\t%.2s: %s\n", vrop->keyword,
1882					    vrop->value);
1883			}
1884			for (i = 0; i < cfg->vpd.vpd_wcnt; i++) {
1885				struct vpd_write *vwp;
1886				vwp = &cfg->vpd.vpd_w[i];
1887				if (strncmp("RW", vwp->keyword, 2) != 0)
1888					printf("\t%.2s(%#x-%#x): %s\n",
1889					    vwp->keyword, vwp->start,
1890					    vwp->start + vwp->len, vwp->value);
1891			}
1892		}
1893		if (cfg->msi.msi_location) {
1894			int ctrl;
1895
1896			ctrl = cfg->msi.msi_ctrl;
1897			printf("\tMSI supports %d message%s%s%s\n",
1898			    cfg->msi.msi_msgnum,
1899			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
1900			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
1901			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
1902		}
1903		if (cfg->msix.msix_location) {
1904			printf("\tMSI-X supports %d message%s ",
1905			    cfg->msix.msix_msgnum,
1906			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
1907			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
1908				printf("in map 0x%x\n",
1909				    cfg->msix.msix_table_bar);
1910			else
1911				printf("in maps 0x%x and 0x%x\n",
1912				    cfg->msix.msix_table_bar,
1913				    cfg->msix.msix_pba_bar);
1914		}
1915	}
1916}
1917
1918static int
1919pci_porten(device_t pcib, int b, int s, int f)
1920{
1921	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1922		& PCIM_CMD_PORTEN) != 0;
1923}
1924
1925static int
1926pci_memen(device_t pcib, int b, int s, int f)
1927{
1928	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1929		& PCIM_CMD_MEMEN) != 0;
1930}
1931
1932/*
1933 * Add a resource based on a pci map register. Return 1 if the map
1934 * register is a 32bit map register or 2 if it is a 64bit register.
1935 */
1936static int
1937pci_add_map(device_t pcib, device_t bus, device_t dev,
1938    int b, int s, int f, int reg, struct resource_list *rl, int force,
1939    int prefetch)
1940{
1941	uint32_t map;
1942	pci_addr_t base;
1943	pci_addr_t start, end, count;
1944	uint8_t ln2size;
1945	uint8_t ln2range;
1946	uint32_t testval;
1947	uint16_t cmd;
1948	int type;
1949	int barlen;
1950	struct resource *res;
1951
1952	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1953	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
1954	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1955	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
1956
1957	if (pci_maptype(map) & PCI_MAPMEM)
1958		type = SYS_RES_MEMORY;
1959	else
1960		type = SYS_RES_IOPORT;
1961	ln2size = pci_mapsize(testval);
1962	ln2range = pci_maprange(testval);
1963	base = pci_mapbase(map);
1964	barlen = ln2range == 64 ? 2 : 1;
1965
1966	/*
1967	 * For I/O registers, if bottom bit is set, and the next bit up
1968	 * isn't clear, we know we have a BAR that doesn't conform to the
1969	 * spec, so ignore it.  Also, sanity check the size of the data
1970	 * areas to the type of memory involved.  Memory must be at least
1971	 * 16 bytes in size, while I/O ranges must be at least 4.
1972	 */
1973	if ((testval & 0x1) == 0x1 &&
1974	    (testval & 0x2) != 0)
1975		return (barlen);
1976	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
1977	    (type == SYS_RES_IOPORT && ln2size < 2))
1978		return (barlen);
1979
1980	if (ln2range == 64)
1981		/* Read the other half of a 64bit map register */
1982		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
1983	if (bootverbose) {
1984		printf("\tmap[%02x]: type %x, range %2d, base %#jx, size %2d",
1985		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
1986		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
1987			printf(", port disabled\n");
1988		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
1989			printf(", memory disabled\n");
1990		else
1991			printf(", enabled\n");
1992	}
1993
1994	/*
1995	 * If base is 0, then we have problems.  It is best to ignore
1996	 * such entries for the moment.  These will be allocated later if
1997	 * the driver specifically requests them.  However, some
1998	 * removable busses look better when all resources are allocated,
1999	 * so allow '0' to be overriden.
2000	 *
2001	 * Similarly treat maps whose values is the same as the test value
2002	 * read back.  These maps have had all f's written to them by the
2003	 * BIOS in an attempt to disable the resources.
2004	 */
2005	if (!force && (base == 0 || map == testval))
2006		return (barlen);
2007	if ((u_long)base != base) {
2008		device_printf(bus,
2009		    "pci%d:%d:%d bar %#x too many address bits", b, s, f, reg);
2010		return (barlen);
2011	}
2012
2013	/*
2014	 * This code theoretically does the right thing, but has
2015	 * undesirable side effects in some cases where peripherals
2016	 * respond oddly to having these bits enabled.  Let the user
2017	 * be able to turn them off (since pci_enable_io_modes is 1 by
2018	 * default).
2019	 */
2020	if (pci_enable_io_modes) {
2021		/* Turn on resources that have been left off by a lazy BIOS */
2022		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2023			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2024			cmd |= PCIM_CMD_PORTEN;
2025			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2026		}
2027		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2028			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2029			cmd |= PCIM_CMD_MEMEN;
2030			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2031		}
2032	} else {
2033		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2034			return (barlen);
2035		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2036			return (barlen);
2037	}
2038
2039	count = 1 << ln2size;
2040	if (base == 0 || base == pci_mapbase(testval)) {
2041		start = 0;	/* Let the parent deside */
2042		end = ~0ULL;
2043	} else {
2044		start = base;
2045		end = base + (1 << ln2size) - 1;
2046	}
2047	resource_list_add(rl, type, reg, start, end, count);
2048
2049	/*
2050	 * Not quite sure what to do on failure of allocating the resource
2051	 * since I can postulate several right answers.
2052	 */
2053	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2054	    prefetch ? RF_PREFETCHABLE : 0);
2055	if (res == NULL)
2056		return (barlen);
2057	start = rman_get_start(res);
2058	if ((u_long)start != start) {
2059		/* Wait a minute!  this platform can't do this address. */
2060		device_printf(bus,
2061		    "pci%d.%d.%x bar %#x start %#jx, too many bits.",
2062		    b, s, f, reg, (uintmax_t)start);
2063		resource_list_release(rl, bus, dev, type, reg, res);
2064		return (barlen);
2065	}
2066	pci_write_config(dev, reg, start, 4);
2067	if (ln2range == 64)
2068		pci_write_config(dev, reg + 4, start >> 32, 4);
2069	return (barlen);
2070}
2071
2072/*
2073 * For ATA devices we need to decide early what addressing mode to use.
2074 * Legacy demands that the primary and secondary ATA ports sits on the
2075 * same addresses that old ISA hardware did. This dictates that we use
2076 * those addresses and ignore the BAR's if we cannot set PCI native
2077 * addressing mode.
2078 */
2079static void
2080pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2081    int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2082{
2083	int rid, type, progif;
2084#if 0
2085	/* if this device supports PCI native addressing use it */
2086	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2087	if ((progif & 0x8a) == 0x8a) {
2088		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2089		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2090			printf("Trying ATA native PCI addressing mode\n");
2091			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2092		}
2093	}
2094#endif
2095	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2096	type = SYS_RES_IOPORT;
2097	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2098		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2099		    prefetchmask & (1 << 0));
2100		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2101		    prefetchmask & (1 << 1));
2102	} else {
2103		rid = PCIR_BAR(0);
2104		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2105		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2106		    0);
2107		rid = PCIR_BAR(1);
2108		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2109		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2110		    0);
2111	}
2112	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2113		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2114		    prefetchmask & (1 << 2));
2115		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2116		    prefetchmask & (1 << 3));
2117	} else {
2118		rid = PCIR_BAR(2);
2119		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2120		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2121		    0);
2122		rid = PCIR_BAR(3);
2123		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2124		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2125		    0);
2126	}
2127	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2128	    prefetchmask & (1 << 4));
2129	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2130	    prefetchmask & (1 << 5));
2131}
2132
2133static void
2134pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2135{
2136	struct pci_devinfo *dinfo = device_get_ivars(dev);
2137	pcicfgregs *cfg = &dinfo->cfg;
2138	char tunable_name[64];
2139	int irq;
2140
2141	/* Has to have an intpin to have an interrupt. */
2142	if (cfg->intpin == 0)
2143		return;
2144
2145	/* Let the user override the IRQ with a tunable. */
2146	irq = PCI_INVALID_IRQ;
2147	snprintf(tunable_name, sizeof(tunable_name), "hw.pci%d.%d.INT%c.irq",
2148	    cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2149	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2150		irq = PCI_INVALID_IRQ;
2151
2152	/*
2153	 * If we didn't get an IRQ via the tunable, then we either use the
2154	 * IRQ value in the intline register or we ask the bus to route an
2155	 * interrupt for us.  If force_route is true, then we only use the
2156	 * value in the intline register if the bus was unable to assign an
2157	 * IRQ.
2158	 */
2159	if (!PCI_INTERRUPT_VALID(irq)) {
2160		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2161			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2162		if (!PCI_INTERRUPT_VALID(irq))
2163			irq = cfg->intline;
2164	}
2165
2166	/* If after all that we don't have an IRQ, just bail. */
2167	if (!PCI_INTERRUPT_VALID(irq))
2168		return;
2169
2170	/* Update the config register if it changed. */
2171	if (irq != cfg->intline) {
2172		cfg->intline = irq;
2173		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2174	}
2175
2176	/* Add this IRQ as rid 0 interrupt resource. */
2177	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2178}
2179
2180void
2181pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2182{
2183	device_t pcib;
2184	struct pci_devinfo *dinfo = device_get_ivars(dev);
2185	pcicfgregs *cfg = &dinfo->cfg;
2186	struct resource_list *rl = &dinfo->resources;
2187	struct pci_quirk *q;
2188	int b, i, f, s;
2189
2190	pcib = device_get_parent(bus);
2191
2192	b = cfg->bus;
2193	s = cfg->slot;
2194	f = cfg->func;
2195
2196	/* ATA devices needs special map treatment */
2197	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2198	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2199	    (pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV))
2200		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2201	else
2202		for (i = 0; i < cfg->nummaps;)
2203			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2204			    rl, force, prefetchmask & (1 << i));
2205
2206	/*
2207	 * Add additional, quirked resources.
2208	 */
2209	for (q = &pci_quirks[0]; q->devid; q++) {
2210		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2211		    && q->type == PCI_QUIRK_MAP_REG)
2212			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2213			  force, 0);
2214	}
2215
2216	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2217#ifdef __PCI_REROUTE_INTERRUPT
2218		/*
2219		 * Try to re-route interrupts. Sometimes the BIOS or
2220		 * firmware may leave bogus values in these registers.
2221		 * If the re-route fails, then just stick with what we
2222		 * have.
2223		 */
2224		pci_assign_interrupt(bus, dev, 1);
2225#else
2226		pci_assign_interrupt(bus, dev, 0);
2227#endif
2228	}
2229}
2230
2231void
2232pci_add_children(device_t dev, int busno, size_t dinfo_size)
2233{
2234#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2235	device_t pcib = device_get_parent(dev);
2236	struct pci_devinfo *dinfo;
2237	int maxslots;
2238	int s, f, pcifunchigh;
2239	uint8_t hdrtype;
2240
2241	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2242	    ("dinfo_size too small"));
2243	maxslots = PCIB_MAXSLOTS(pcib);
2244	for (s = 0; s <= maxslots; s++) {
2245		pcifunchigh = 0;
2246		f = 0;
2247		DELAY(1);
2248		hdrtype = REG(PCIR_HDRTYPE, 1);
2249		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2250			continue;
2251		if (hdrtype & PCIM_MFDEV)
2252			pcifunchigh = PCI_FUNCMAX;
2253		for (f = 0; f <= pcifunchigh; f++) {
2254			dinfo = pci_read_device(pcib, busno, s, f, dinfo_size);
2255			if (dinfo != NULL) {
2256				pci_add_child(dev, dinfo);
2257			}
2258		}
2259	}
2260#undef REG
2261}
2262
2263void
2264pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2265{
2266	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2267	device_set_ivars(dinfo->cfg.dev, dinfo);
2268	resource_list_init(&dinfo->resources);
2269	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2270	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2271	pci_print_verbose(dinfo);
2272	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2273}
2274
2275static int
2276pci_probe(device_t dev)
2277{
2278
2279	device_set_desc(dev, "PCI bus");
2280
2281	/* Allow other subclasses to override this driver. */
2282	return (-1000);
2283}
2284
2285static int
2286pci_attach(device_t dev)
2287{
2288	int busno;
2289
2290	/*
2291	 * Since there can be multiple independantly numbered PCI
2292	 * busses on systems with multiple PCI domains, we can't use
2293	 * the unit number to decide which bus we are probing. We ask
2294	 * the parent pcib what our bus number is.
2295	 */
2296	busno = pcib_get_bus(dev);
2297	if (bootverbose)
2298		device_printf(dev, "physical bus=%d\n", busno);
2299
2300	pci_add_children(dev, busno, sizeof(struct pci_devinfo));
2301
2302	return (bus_generic_attach(dev));
2303}
2304
2305int
2306pci_suspend(device_t dev)
2307{
2308	int dstate, error, i, numdevs;
2309	device_t acpi_dev, child, *devlist;
2310	struct pci_devinfo *dinfo;
2311
2312	/*
2313	 * Save the PCI configuration space for each child and set the
2314	 * device in the appropriate power state for this sleep state.
2315	 */
2316	acpi_dev = NULL;
2317	if (pci_do_power_resume)
2318		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2319	device_get_children(dev, &devlist, &numdevs);
2320	for (i = 0; i < numdevs; i++) {
2321		child = devlist[i];
2322		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2323		pci_cfg_save(child, dinfo, 0);
2324	}
2325
2326	/* Suspend devices before potentially powering them down. */
2327	error = bus_generic_suspend(dev);
2328	if (error) {
2329		free(devlist, M_TEMP);
2330		return (error);
2331	}
2332
2333	/*
2334	 * Always set the device to D3.  If ACPI suggests a different
2335	 * power state, use it instead.  If ACPI is not present, the
2336	 * firmware is responsible for managing device power.  Skip
2337	 * children who aren't attached since they are powered down
2338	 * separately.  Only manage type 0 devices for now.
2339	 */
2340	for (i = 0; acpi_dev && i < numdevs; i++) {
2341		child = devlist[i];
2342		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2343		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2344			dstate = PCI_POWERSTATE_D3;
2345			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2346			pci_set_powerstate(child, dstate);
2347		}
2348	}
2349	free(devlist, M_TEMP);
2350	return (0);
2351}
2352
2353int
2354pci_resume(device_t dev)
2355{
2356	int i, numdevs;
2357	device_t acpi_dev, child, *devlist;
2358	struct pci_devinfo *dinfo;
2359
2360	/*
2361	 * Set each child to D0 and restore its PCI configuration space.
2362	 */
2363	acpi_dev = NULL;
2364	if (pci_do_power_resume)
2365		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2366	device_get_children(dev, &devlist, &numdevs);
2367	for (i = 0; i < numdevs; i++) {
2368		/*
2369		 * Notify ACPI we're going to D0 but ignore the result.  If
2370		 * ACPI is not present, the firmware is responsible for
2371		 * managing device power.  Only manage type 0 devices for now.
2372		 */
2373		child = devlist[i];
2374		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2375		if (acpi_dev && device_is_attached(child) &&
2376		    dinfo->cfg.hdrtype == 0) {
2377			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2378			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2379		}
2380
2381		/* Now the device is powered up, restore its config space. */
2382		pci_cfg_restore(child, dinfo);
2383	}
2384	free(devlist, M_TEMP);
2385	return (bus_generic_resume(dev));
2386}
2387
2388static void
2389pci_load_vendor_data(void)
2390{
2391	caddr_t vendordata, info;
2392
2393	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2394		info = preload_search_info(vendordata, MODINFO_ADDR);
2395		pci_vendordata = *(char **)info;
2396		info = preload_search_info(vendordata, MODINFO_SIZE);
2397		pci_vendordata_size = *(size_t *)info;
2398		/* terminate the database */
2399		pci_vendordata[pci_vendordata_size] = '\n';
2400	}
2401}
2402
2403void
2404pci_driver_added(device_t dev, driver_t *driver)
2405{
2406	int numdevs;
2407	device_t *devlist;
2408	device_t child;
2409	struct pci_devinfo *dinfo;
2410	int i;
2411
2412	if (bootverbose)
2413		device_printf(dev, "driver added\n");
2414	DEVICE_IDENTIFY(driver, dev);
2415	device_get_children(dev, &devlist, &numdevs);
2416	for (i = 0; i < numdevs; i++) {
2417		child = devlist[i];
2418		if (device_get_state(child) != DS_NOTPRESENT)
2419			continue;
2420		dinfo = device_get_ivars(child);
2421		pci_print_verbose(dinfo);
2422		if (bootverbose)
2423			printf("pci%d:%d:%d: reprobing on driver added\n",
2424			    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func);
2425		pci_cfg_restore(child, dinfo);
2426		if (device_probe_and_attach(child) != 0)
2427			pci_cfg_save(child, dinfo, 1);
2428	}
2429	free(devlist, M_TEMP);
2430}
2431
2432int
2433pci_print_child(device_t dev, device_t child)
2434{
2435	struct pci_devinfo *dinfo;
2436	struct resource_list *rl;
2437	int retval = 0;
2438
2439	dinfo = device_get_ivars(child);
2440	rl = &dinfo->resources;
2441
2442	retval += bus_print_child_header(dev, child);
2443
2444	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2445	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2446	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2447	if (device_get_flags(dev))
2448		retval += printf(" flags %#x", device_get_flags(dev));
2449
2450	retval += printf(" at device %d.%d", pci_get_slot(child),
2451	    pci_get_function(child));
2452
2453	retval += bus_print_child_footer(dev, child);
2454
2455	return (retval);
2456}
2457
2458static struct
2459{
2460	int	class;
2461	int	subclass;
2462	char	*desc;
2463} pci_nomatch_tab[] = {
2464	{PCIC_OLD,		-1,			"old"},
2465	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2466	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
2467	{PCIC_STORAGE,		-1,			"mass storage"},
2468	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
2469	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
2470	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
2471	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
2472	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
2473	{PCIC_NETWORK,		-1,			"network"},
2474	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
2475	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
2476	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
2477	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
2478	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
2479	{PCIC_DISPLAY,		-1,			"display"},
2480	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
2481	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
2482	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
2483	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
2484	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
2485	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
2486	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
2487	{PCIC_MEMORY,		-1,			"memory"},
2488	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
2489	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
2490	{PCIC_BRIDGE,		-1,			"bridge"},
2491	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
2492	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
2493	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
2494	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
2495	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
2496	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
2497	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
2498	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
2499	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
2500	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
2501	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
2502	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
2503	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
2504	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
2505	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
2506	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
2507	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
2508	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
2509	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
2510	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
2511	{PCIC_INPUTDEV,		-1,			"input device"},
2512	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
2513	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
2514	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
2515	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
2516	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
2517	{PCIC_DOCKING,		-1,			"docking station"},
2518	{PCIC_PROCESSOR,	-1,			"processor"},
2519	{PCIC_SERIALBUS,	-1,			"serial bus"},
2520	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
2521	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
2522	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
2523	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
2524	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
2525	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
2526	{PCIC_WIRELESS,		-1,			"wireless controller"},
2527	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
2528	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
2529	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
2530	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
2531	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
2532	{PCIC_SATCOM,		-1,			"satellite communication"},
2533	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
2534	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
2535	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
2536	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
2537	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
2538	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
2539	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
2540	{PCIC_DASP,		-1,			"dasp"},
2541	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
2542	{0, 0,		NULL}
2543};
2544
2545void
2546pci_probe_nomatch(device_t dev, device_t child)
2547{
2548	int	i;
2549	char	*cp, *scp, *device;
2550
2551	/*
2552	 * Look for a listing for this device in a loaded device database.
2553	 */
2554	if ((device = pci_describe_device(child)) != NULL) {
2555		device_printf(dev, "<%s>", device);
2556		free(device, M_DEVBUF);
2557	} else {
2558		/*
2559		 * Scan the class/subclass descriptions for a general
2560		 * description.
2561		 */
2562		cp = "unknown";
2563		scp = NULL;
2564		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
2565			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
2566				if (pci_nomatch_tab[i].subclass == -1) {
2567					cp = pci_nomatch_tab[i].desc;
2568				} else if (pci_nomatch_tab[i].subclass ==
2569				    pci_get_subclass(child)) {
2570					scp = pci_nomatch_tab[i].desc;
2571				}
2572			}
2573		}
2574		device_printf(dev, "<%s%s%s>",
2575		    cp ? cp : "",
2576		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
2577		    scp ? scp : "");
2578	}
2579	printf(" at device %d.%d (no driver attached)\n",
2580	    pci_get_slot(child), pci_get_function(child));
2581	if (pci_do_power_nodriver)
2582		pci_cfg_save(child,
2583		    (struct pci_devinfo *) device_get_ivars(child), 1);
2584	return;
2585}
2586
2587/*
2588 * Parse the PCI device database, if loaded, and return a pointer to a
2589 * description of the device.
2590 *
2591 * The database is flat text formatted as follows:
2592 *
2593 * Any line not in a valid format is ignored.
2594 * Lines are terminated with newline '\n' characters.
2595 *
2596 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
2597 * the vendor name.
2598 *
2599 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
2600 * - devices cannot be listed without a corresponding VENDOR line.
2601 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
2602 * another TAB, then the device name.
2603 */
2604
2605/*
2606 * Assuming (ptr) points to the beginning of a line in the database,
2607 * return the vendor or device and description of the next entry.
2608 * The value of (vendor) or (device) inappropriate for the entry type
2609 * is set to -1.  Returns nonzero at the end of the database.
2610 *
2611 * Note that this is slightly unrobust in the face of corrupt data;
2612 * we attempt to safeguard against this by spamming the end of the
2613 * database with a newline when we initialise.
2614 */
2615static int
2616pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
2617{
2618	char	*cp = *ptr;
2619	int	left;
2620
2621	*device = -1;
2622	*vendor = -1;
2623	**desc = '\0';
2624	for (;;) {
2625		left = pci_vendordata_size - (cp - pci_vendordata);
2626		if (left <= 0) {
2627			*ptr = cp;
2628			return(1);
2629		}
2630
2631		/* vendor entry? */
2632		if (*cp != '\t' &&
2633		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
2634			break;
2635		/* device entry? */
2636		if (*cp == '\t' &&
2637		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
2638			break;
2639
2640		/* skip to next line */
2641		while (*cp != '\n' && left > 0) {
2642			cp++;
2643			left--;
2644		}
2645		if (*cp == '\n') {
2646			cp++;
2647			left--;
2648		}
2649	}
2650	/* skip to next line */
2651	while (*cp != '\n' && left > 0) {
2652		cp++;
2653		left--;
2654	}
2655	if (*cp == '\n' && left > 0)
2656		cp++;
2657	*ptr = cp;
2658	return(0);
2659}
2660
2661static char *
2662pci_describe_device(device_t dev)
2663{
2664	int	vendor, device;
2665	char	*desc, *vp, *dp, *line;
2666
2667	desc = vp = dp = NULL;
2668
2669	/*
2670	 * If we have no vendor data, we can't do anything.
2671	 */
2672	if (pci_vendordata == NULL)
2673		goto out;
2674
2675	/*
2676	 * Scan the vendor data looking for this device
2677	 */
2678	line = pci_vendordata;
2679	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2680		goto out;
2681	for (;;) {
2682		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
2683			goto out;
2684		if (vendor == pci_get_vendor(dev))
2685			break;
2686	}
2687	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2688		goto out;
2689	for (;;) {
2690		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
2691			*dp = 0;
2692			break;
2693		}
2694		if (vendor != -1) {
2695			*dp = 0;
2696			break;
2697		}
2698		if (device == pci_get_device(dev))
2699			break;
2700	}
2701	if (dp[0] == '\0')
2702		snprintf(dp, 80, "0x%x", pci_get_device(dev));
2703	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
2704	    NULL)
2705		sprintf(desc, "%s, %s", vp, dp);
2706 out:
2707	if (vp != NULL)
2708		free(vp, M_DEVBUF);
2709	if (dp != NULL)
2710		free(dp, M_DEVBUF);
2711	return(desc);
2712}
2713
2714int
2715pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
2716{
2717	struct pci_devinfo *dinfo;
2718	pcicfgregs *cfg;
2719
2720	dinfo = device_get_ivars(child);
2721	cfg = &dinfo->cfg;
2722
2723	switch (which) {
2724	case PCI_IVAR_ETHADDR:
2725		/*
2726		 * The generic accessor doesn't deal with failure, so
2727		 * we set the return value, then return an error.
2728		 */
2729		*((uint8_t **) result) = NULL;
2730		return (EINVAL);
2731	case PCI_IVAR_SUBVENDOR:
2732		*result = cfg->subvendor;
2733		break;
2734	case PCI_IVAR_SUBDEVICE:
2735		*result = cfg->subdevice;
2736		break;
2737	case PCI_IVAR_VENDOR:
2738		*result = cfg->vendor;
2739		break;
2740	case PCI_IVAR_DEVICE:
2741		*result = cfg->device;
2742		break;
2743	case PCI_IVAR_DEVID:
2744		*result = (cfg->device << 16) | cfg->vendor;
2745		break;
2746	case PCI_IVAR_CLASS:
2747		*result = cfg->baseclass;
2748		break;
2749	case PCI_IVAR_SUBCLASS:
2750		*result = cfg->subclass;
2751		break;
2752	case PCI_IVAR_PROGIF:
2753		*result = cfg->progif;
2754		break;
2755	case PCI_IVAR_REVID:
2756		*result = cfg->revid;
2757		break;
2758	case PCI_IVAR_INTPIN:
2759		*result = cfg->intpin;
2760		break;
2761	case PCI_IVAR_IRQ:
2762		*result = cfg->intline;
2763		break;
2764	case PCI_IVAR_BUS:
2765		*result = cfg->bus;
2766		break;
2767	case PCI_IVAR_SLOT:
2768		*result = cfg->slot;
2769		break;
2770	case PCI_IVAR_FUNCTION:
2771		*result = cfg->func;
2772		break;
2773	case PCI_IVAR_CMDREG:
2774		*result = cfg->cmdreg;
2775		break;
2776	case PCI_IVAR_CACHELNSZ:
2777		*result = cfg->cachelnsz;
2778		break;
2779	case PCI_IVAR_MINGNT:
2780		*result = cfg->mingnt;
2781		break;
2782	case PCI_IVAR_MAXLAT:
2783		*result = cfg->maxlat;
2784		break;
2785	case PCI_IVAR_LATTIMER:
2786		*result = cfg->lattimer;
2787		break;
2788	default:
2789		return (ENOENT);
2790	}
2791	return (0);
2792}
2793
2794int
2795pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
2796{
2797	struct pci_devinfo *dinfo;
2798
2799	dinfo = device_get_ivars(child);
2800
2801	switch (which) {
2802	case PCI_IVAR_INTPIN:
2803		dinfo->cfg.intpin = value;
2804		return (0);
2805	case PCI_IVAR_ETHADDR:
2806	case PCI_IVAR_SUBVENDOR:
2807	case PCI_IVAR_SUBDEVICE:
2808	case PCI_IVAR_VENDOR:
2809	case PCI_IVAR_DEVICE:
2810	case PCI_IVAR_DEVID:
2811	case PCI_IVAR_CLASS:
2812	case PCI_IVAR_SUBCLASS:
2813	case PCI_IVAR_PROGIF:
2814	case PCI_IVAR_REVID:
2815	case PCI_IVAR_IRQ:
2816	case PCI_IVAR_BUS:
2817	case PCI_IVAR_SLOT:
2818	case PCI_IVAR_FUNCTION:
2819		return (EINVAL);	/* disallow for now */
2820
2821	default:
2822		return (ENOENT);
2823	}
2824}
2825
2826
2827#include "opt_ddb.h"
2828#ifdef DDB
2829#include <ddb/ddb.h>
2830#include <sys/cons.h>
2831
2832/*
2833 * List resources based on pci map registers, used for within ddb
2834 */
2835
2836DB_SHOW_COMMAND(pciregs, db_pci_dump)
2837{
2838	struct pci_devinfo *dinfo;
2839	struct devlist *devlist_head;
2840	struct pci_conf *p;
2841	const char *name;
2842	int i, error, none_count;
2843
2844	none_count = 0;
2845	/* get the head of the device queue */
2846	devlist_head = &pci_devq;
2847
2848	/*
2849	 * Go through the list of devices and print out devices
2850	 */
2851	for (error = 0, i = 0,
2852	     dinfo = STAILQ_FIRST(devlist_head);
2853	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
2854	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
2855
2856		/* Populate pd_name and pd_unit */
2857		name = NULL;
2858		if (dinfo->cfg.dev)
2859			name = device_get_name(dinfo->cfg.dev);
2860
2861		p = &dinfo->conf;
2862		db_printf("%s%d@pci%d:%d:%d:\tclass=0x%06x card=0x%08x "
2863			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
2864			(name && *name) ? name : "none",
2865			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
2866			none_count++,
2867			p->pc_sel.pc_bus, p->pc_sel.pc_dev,
2868			p->pc_sel.pc_func, (p->pc_class << 16) |
2869			(p->pc_subclass << 8) | p->pc_progif,
2870			(p->pc_subdevice << 16) | p->pc_subvendor,
2871			(p->pc_device << 16) | p->pc_vendor,
2872			p->pc_revid, p->pc_hdr);
2873	}
2874}
2875#endif /* DDB */
2876
2877static struct resource *
2878pci_alloc_map(device_t dev, device_t child, int type, int *rid,
2879    u_long start, u_long end, u_long count, u_int flags)
2880{
2881	struct pci_devinfo *dinfo = device_get_ivars(child);
2882	struct resource_list *rl = &dinfo->resources;
2883	struct resource_list_entry *rle;
2884	struct resource *res;
2885	pci_addr_t map, testval;
2886	int mapsize;
2887
2888	/*
2889	 * Weed out the bogons, and figure out how large the BAR/map
2890	 * is.  Bars that read back 0 here are bogus and unimplemented.
2891	 * Note: atapci in legacy mode are special and handled elsewhere
2892	 * in the code.  If you have a atapci device in legacy mode and
2893	 * it fails here, that other code is broken.
2894	 */
2895	res = NULL;
2896	map = pci_read_config(child, *rid, 4);
2897	pci_write_config(child, *rid, 0xffffffff, 4);
2898	testval = pci_read_config(child, *rid, 4);
2899	if (pci_maprange(testval) == 64)
2900		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
2901	if (pci_mapbase(testval) == 0)
2902		goto out;
2903	if (pci_maptype(testval) & PCI_MAPMEM) {
2904		if (type != SYS_RES_MEMORY) {
2905			if (bootverbose)
2906				device_printf(dev,
2907				    "child %s requested type %d for rid %#x,"
2908				    " but the BAR says it is an memio\n",
2909				    device_get_nameunit(child), type, *rid);
2910			goto out;
2911		}
2912	} else {
2913		if (type != SYS_RES_IOPORT) {
2914			if (bootverbose)
2915				device_printf(dev,
2916				    "child %s requested type %d for rid %#x,"
2917				    " but the BAR says it is an ioport\n",
2918				    device_get_nameunit(child), type, *rid);
2919			goto out;
2920		}
2921	}
2922	/*
2923	 * For real BARs, we need to override the size that
2924	 * the driver requests, because that's what the BAR
2925	 * actually uses and we would otherwise have a
2926	 * situation where we might allocate the excess to
2927	 * another driver, which won't work.
2928	 */
2929	mapsize = pci_mapsize(testval);
2930	count = 1UL << mapsize;
2931	if (RF_ALIGNMENT(flags) < mapsize)
2932		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
2933
2934	/*
2935	 * Allocate enough resource, and then write back the
2936	 * appropriate bar for that resource.
2937	 */
2938	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
2939	    start, end, count, flags);
2940	if (res == NULL) {
2941		device_printf(child,
2942		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
2943		    count, *rid, type, start, end);
2944		goto out;
2945	}
2946	resource_list_add(rl, type, *rid, start, end, count);
2947	rle = resource_list_find(rl, type, *rid);
2948	if (rle == NULL)
2949		panic("pci_alloc_map: unexpectedly can't find resource.");
2950	rle->res = res;
2951	rle->start = rman_get_start(res);
2952	rle->end = rman_get_end(res);
2953	rle->count = count;
2954	if (bootverbose)
2955		device_printf(child,
2956		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
2957		    count, *rid, type, rman_get_start(res));
2958	map = rman_get_start(res);
2959out:;
2960	pci_write_config(child, *rid, map, 4);
2961	if (pci_maprange(testval) == 64)
2962		pci_write_config(child, *rid + 4, map >> 32, 4);
2963	return (res);
2964}
2965
2966
2967struct resource *
2968pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
2969		   u_long start, u_long end, u_long count, u_int flags)
2970{
2971	struct pci_devinfo *dinfo = device_get_ivars(child);
2972	struct resource_list *rl = &dinfo->resources;
2973	struct resource_list_entry *rle;
2974	pcicfgregs *cfg = &dinfo->cfg;
2975
2976	/*
2977	 * Perform lazy resource allocation
2978	 */
2979	if (device_get_parent(child) == dev) {
2980		switch (type) {
2981		case SYS_RES_IRQ:
2982			/*
2983			 * Can't alloc legacy interrupt once MSI messages
2984			 * have been allocated.
2985			 */
2986			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
2987			    cfg->msix.msix_alloc > 0))
2988				return (NULL);
2989			/*
2990			 * If the child device doesn't have an
2991			 * interrupt routed and is deserving of an
2992			 * interrupt, try to assign it one.
2993			 */
2994			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
2995			    (cfg->intpin != 0))
2996				pci_assign_interrupt(dev, child, 0);
2997			break;
2998		case SYS_RES_IOPORT:
2999		case SYS_RES_MEMORY:
3000			if (*rid < PCIR_BAR(cfg->nummaps)) {
3001				/*
3002				 * Enable the I/O mode.  We should
3003				 * also be assigning resources too
3004				 * when none are present.  The
3005				 * resource_list_alloc kind of sorta does
3006				 * this...
3007				 */
3008				if (PCI_ENABLE_IO(dev, child, type))
3009					return (NULL);
3010			}
3011			rle = resource_list_find(rl, type, *rid);
3012			if (rle == NULL)
3013				return (pci_alloc_map(dev, child, type, rid,
3014				    start, end, count, flags));
3015			break;
3016		}
3017		/*
3018		 * If we've already allocated the resource, then
3019		 * return it now.  But first we may need to activate
3020		 * it, since we don't allocate the resource as active
3021		 * above.  Normally this would be done down in the
3022		 * nexus, but since we short-circuit that path we have
3023		 * to do its job here.  Not sure if we should free the
3024		 * resource if it fails to activate.
3025		 */
3026		rle = resource_list_find(rl, type, *rid);
3027		if (rle != NULL && rle->res != NULL) {
3028			if (bootverbose)
3029				device_printf(child,
3030			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3031				    rman_get_size(rle->res), *rid, type,
3032				    rman_get_start(rle->res));
3033			if ((flags & RF_ACTIVE) &&
3034			    bus_generic_activate_resource(dev, child, type,
3035			    *rid, rle->res) != 0)
3036				return NULL;
3037			return (rle->res);
3038		}
3039	}
3040	return (resource_list_alloc(rl, dev, child, type, rid,
3041	    start, end, count, flags));
3042}
3043
3044void
3045pci_delete_resource(device_t dev, device_t child, int type, int rid)
3046{
3047	struct pci_devinfo *dinfo;
3048	struct resource_list *rl;
3049	struct resource_list_entry *rle;
3050
3051	if (device_get_parent(child) != dev)
3052		return;
3053
3054	dinfo = device_get_ivars(child);
3055	rl = &dinfo->resources;
3056	rle = resource_list_find(rl, type, rid);
3057	if (rle) {
3058		if (rle->res) {
3059			if (rman_get_device(rle->res) != dev ||
3060			    rman_get_flags(rle->res) & RF_ACTIVE) {
3061				device_printf(dev, "delete_resource: "
3062				    "Resource still owned by child, oops. "
3063				    "(type=%d, rid=%d, addr=%lx)\n",
3064				    rle->type, rle->rid,
3065				    rman_get_start(rle->res));
3066				return;
3067			}
3068			bus_release_resource(dev, type, rid, rle->res);
3069		}
3070		resource_list_delete(rl, type, rid);
3071	}
3072	/*
3073	 * Why do we turn off the PCI configuration BAR when we delete a
3074	 * resource? -- imp
3075	 */
3076	pci_write_config(child, rid, 0, 4);
3077	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3078}
3079
3080struct resource_list *
3081pci_get_resource_list (device_t dev, device_t child)
3082{
3083	struct pci_devinfo *dinfo = device_get_ivars(child);
3084
3085	return (&dinfo->resources);
3086}
3087
3088uint32_t
3089pci_read_config_method(device_t dev, device_t child, int reg, int width)
3090{
3091	struct pci_devinfo *dinfo = device_get_ivars(child);
3092	pcicfgregs *cfg = &dinfo->cfg;
3093
3094	return (PCIB_READ_CONFIG(device_get_parent(dev),
3095	    cfg->bus, cfg->slot, cfg->func, reg, width));
3096}
3097
3098void
3099pci_write_config_method(device_t dev, device_t child, int reg,
3100    uint32_t val, int width)
3101{
3102	struct pci_devinfo *dinfo = device_get_ivars(child);
3103	pcicfgregs *cfg = &dinfo->cfg;
3104
3105	PCIB_WRITE_CONFIG(device_get_parent(dev),
3106	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3107}
3108
3109int
3110pci_child_location_str_method(device_t dev, device_t child, char *buf,
3111    size_t buflen)
3112{
3113
3114	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3115	    pci_get_function(child));
3116	return (0);
3117}
3118
3119int
3120pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3121    size_t buflen)
3122{
3123	struct pci_devinfo *dinfo;
3124	pcicfgregs *cfg;
3125
3126	dinfo = device_get_ivars(child);
3127	cfg = &dinfo->cfg;
3128	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3129	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3130	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3131	    cfg->progif);
3132	return (0);
3133}
3134
3135int
3136pci_assign_interrupt_method(device_t dev, device_t child)
3137{
3138	struct pci_devinfo *dinfo = device_get_ivars(child);
3139	pcicfgregs *cfg = &dinfo->cfg;
3140
3141	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3142	    cfg->intpin));
3143}
3144
3145static int
3146pci_modevent(module_t mod, int what, void *arg)
3147{
3148	static struct cdev *pci_cdev;
3149
3150	switch (what) {
3151	case MOD_LOAD:
3152		STAILQ_INIT(&pci_devq);
3153		pci_generation = 0;
3154		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3155		    "pci");
3156		pci_load_vendor_data();
3157		break;
3158
3159	case MOD_UNLOAD:
3160		destroy_dev(pci_cdev);
3161		break;
3162	}
3163
3164	return (0);
3165}
3166
3167void
3168pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3169{
3170	int i;
3171
3172	/*
3173	 * Only do header type 0 devices.  Type 1 devices are bridges,
3174	 * which we know need special treatment.  Type 2 devices are
3175	 * cardbus bridges which also require special treatment.
3176	 * Other types are unknown, and we err on the side of safety
3177	 * by ignoring them.
3178	 */
3179	if (dinfo->cfg.hdrtype != 0)
3180		return;
3181
3182	/*
3183	 * Restore the device to full power mode.  We must do this
3184	 * before we restore the registers because moving from D3 to
3185	 * D0 will cause the chip's BARs and some other registers to
3186	 * be reset to some unknown power on reset values.  Cut down
3187	 * the noise on boot by doing nothing if we are already in
3188	 * state D0.
3189	 */
3190	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3191		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3192	}
3193	for (i = 0; i < dinfo->cfg.nummaps; i++)
3194		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3195	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3196	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3197	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3198	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3199	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3200	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3201	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3202	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3203	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3204	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3205
3206	/*
3207	 * Restore MSI configuration if it is present.  If MSI is enabled,
3208	 * then restore the data and addr registers.
3209	 */
3210	if (dinfo->cfg.msi.msi_location != 0)
3211		pci_resume_msi(dev);
3212}
3213
3214void
3215pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3216{
3217	int i;
3218	uint32_t cls;
3219	int ps;
3220
3221	/*
3222	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3223	 * we know need special treatment.  Type 2 devices are cardbus bridges
3224	 * which also require special treatment.  Other types are unknown, and
3225	 * we err on the side of safety by ignoring them.  Powering down
3226	 * bridges should not be undertaken lightly.
3227	 */
3228	if (dinfo->cfg.hdrtype != 0)
3229		return;
3230	for (i = 0; i < dinfo->cfg.nummaps; i++)
3231		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3232	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3233
3234	/*
3235	 * Some drivers apparently write to these registers w/o updating our
3236	 * cached copy.  No harm happens if we update the copy, so do so here
3237	 * so we can restore them.  The COMMAND register is modified by the
3238	 * bus w/o updating the cache.  This should represent the normally
3239	 * writable portion of the 'defined' part of type 0 headers.  In
3240	 * theory we also need to save/restore the PCI capability structures
3241	 * we know about, but apart from power we don't know any that are
3242	 * writable.
3243	 */
3244	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3245	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3246	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3247	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3248	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3249	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3250	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3251	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3252	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3253	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3254	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3255	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3256	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3257	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3258	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3259
3260	/*
3261	 * don't set the state for display devices, base peripherals and
3262	 * memory devices since bad things happen when they are powered down.
3263	 * We should (a) have drivers that can easily detach and (b) use
3264	 * generic drivers for these devices so that some device actually
3265	 * attaches.  We need to make sure that when we implement (a) we don't
3266	 * power the device down on a reattach.
3267	 */
3268	cls = pci_get_class(dev);
3269	if (!setstate)
3270		return;
3271	switch (pci_do_power_nodriver)
3272	{
3273		case 0:		/* NO powerdown at all */
3274			return;
3275		case 1:		/* Conservative about what to power down */
3276			if (cls == PCIC_STORAGE)
3277				return;
3278			/*FALLTHROUGH*/
3279		case 2:		/* Agressive about what to power down */
3280			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3281			    cls == PCIC_BASEPERIPH)
3282				return;
3283			/*FALLTHROUGH*/
3284		case 3:		/* Power down everything */
3285			break;
3286	}
3287	/*
3288	 * PCI spec says we can only go into D3 state from D0 state.
3289	 * Transition from D[12] into D0 before going to D3 state.
3290	 */
3291	ps = pci_get_powerstate(dev);
3292	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3293		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3294	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3295		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3296}
3297