pci.c revision 192450
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 192450 2009-05-20 17:29:21Z imp $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54
55#if defined(__i386__) || defined(__amd64__)
56#include <machine/intr_machdep.h>
57#endif
58
59#include <sys/pciio.h>
60#include <dev/pci/pcireg.h>
61#include <dev/pci/pcivar.h>
62#include <dev/pci/pci_private.h>
63
64#include "pcib_if.h"
65#include "pci_if.h"
66
67#ifdef __HAVE_ACPI
68#include <contrib/dev/acpica/acpi.h>
69#include "acpi_if.h"
70#else
71#define	ACPI_PWR_FOR_SLEEP(x, y, z)
72#endif
73
74static pci_addr_t	pci_mapbase(uint64_t mapreg);
75static const char	*pci_maptype(uint64_t mapreg);
76static int		pci_mapsize(uint64_t testval);
77static int		pci_maprange(uint64_t mapreg);
78static void		pci_fixancient(pcicfgregs *cfg);
79
80static int		pci_porten(device_t dev);
81static int		pci_memen(device_t dev);
82static void		pci_assign_interrupt(device_t bus, device_t dev,
83			    int force_route);
84static int		pci_add_map(device_t bus, device_t dev, int reg,
85			    struct resource_list *rl, int force, int prefetch);
86static int		pci_probe(device_t dev);
87static int		pci_attach(device_t dev);
88static void		pci_load_vendor_data(void);
89static int		pci_describe_parse_line(char **ptr, int *vendor,
90			    int *device, char **desc);
91static char		*pci_describe_device(device_t dev);
92static int		pci_modevent(module_t mod, int what, void *arg);
93static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
94			    pcicfgregs *cfg);
95static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
96static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
97			    int reg, uint32_t *data);
98#if 0
99static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
100			    int reg, uint32_t data);
101#endif
102static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
103static void		pci_disable_msi(device_t dev);
104static void		pci_enable_msi(device_t dev, uint64_t address,
105			    uint16_t data);
106static void		pci_enable_msix(device_t dev, u_int index,
107			    uint64_t address, uint32_t data);
108static void		pci_mask_msix(device_t dev, u_int index);
109static void		pci_unmask_msix(device_t dev, u_int index);
110static int		pci_msi_blacklisted(void);
111static void		pci_resume_msi(device_t dev);
112static void		pci_resume_msix(device_t dev);
113
114static device_method_t pci_methods[] = {
115	/* Device interface */
116	DEVMETHOD(device_probe,		pci_probe),
117	DEVMETHOD(device_attach,	pci_attach),
118	DEVMETHOD(device_detach,	bus_generic_detach),
119	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
120	DEVMETHOD(device_suspend,	pci_suspend),
121	DEVMETHOD(device_resume,	pci_resume),
122
123	/* Bus interface */
124	DEVMETHOD(bus_print_child,	pci_print_child),
125	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
126	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
127	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
128	DEVMETHOD(bus_driver_added,	pci_driver_added),
129	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
130	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
131
132	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
133	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
134	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
135	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
136	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
137	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
138	DEVMETHOD(bus_activate_resource, pci_activate_resource),
139	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
140	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
141	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
142
143	/* PCI interface */
144	DEVMETHOD(pci_read_config,	pci_read_config_method),
145	DEVMETHOD(pci_write_config,	pci_write_config_method),
146	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
147	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
148	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
149	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
150	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
151	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
152	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
153	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
154	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
155	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
156	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
157	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
158	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
159	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
160	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
161	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
162
163	{ 0, 0 }
164};
165
166DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
167
168static devclass_t pci_devclass;
169DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
170MODULE_VERSION(pci, 1);
171
172static char	*pci_vendordata;
173static size_t	pci_vendordata_size;
174
175
176struct pci_quirk {
177	uint32_t devid;	/* Vendor/device of the card */
178	int	type;
179#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
180#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
181	int	arg1;
182	int	arg2;
183};
184
185struct pci_quirk pci_quirks[] = {
186	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
187	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
188	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
189	/* As does the Serverworks OSB4 (the SMBus mapping register) */
190	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
191
192	/*
193	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
194	 * or the CMIC-SL (AKA ServerWorks GC_LE).
195	 */
196	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
197	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198
199	/*
200	 * MSI doesn't work on earlier Intel chipsets including
201	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
202	 */
203	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
204	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
205	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210
211	/*
212	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
213	 * bridge.
214	 */
215	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216
217	{ 0 }
218};
219
220/* map register information */
221#define	PCI_MAPMEM	0x01	/* memory map */
222#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
223#define	PCI_MAPPORT	0x04	/* port map */
224
225struct devlist pci_devq;
226uint32_t pci_generation;
227uint32_t pci_numdevs = 0;
228static int pcie_chipset, pcix_chipset;
229
230/* sysctl vars */
231SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
232
233static int pci_enable_io_modes = 1;
234TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
235SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
236    &pci_enable_io_modes, 1,
237    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
238enable these bits correctly.  We'd like to do this all the time, but there\n\
239are some peripherals that this causes problems with.");
240
241static int pci_do_power_nodriver = 0;
242TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
243SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
244    &pci_do_power_nodriver, 0,
245  "Place a function into D3 state when no driver attaches to it.  0 means\n\
246disable.  1 means conservatively place devices into D3 state.  2 means\n\
247agressively place devices into D3 state.  3 means put absolutely everything\n\
248in D3 state.");
249
250static int pci_do_power_resume = 1;
251TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
252SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
253    &pci_do_power_resume, 1,
254  "Transition from D3 -> D0 on resume.");
255
256static int pci_do_msi = 1;
257TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
258SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
259    "Enable support for MSI interrupts");
260
261static int pci_do_msix = 1;
262TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
263SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
264    "Enable support for MSI-X interrupts");
265
266static int pci_honor_msi_blacklist = 1;
267TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
268SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
269    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
270
271/* Find a device_t by bus/slot/function in domain 0 */
272
273device_t
274pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
275{
276
277	return (pci_find_dbsf(0, bus, slot, func));
278}
279
280/* Find a device_t by domain/bus/slot/function */
281
282device_t
283pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
284{
285	struct pci_devinfo *dinfo;
286
287	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
288		if ((dinfo->cfg.domain == domain) &&
289		    (dinfo->cfg.bus == bus) &&
290		    (dinfo->cfg.slot == slot) &&
291		    (dinfo->cfg.func == func)) {
292			return (dinfo->cfg.dev);
293		}
294	}
295
296	return (NULL);
297}
298
299/* Find a device_t by vendor/device ID */
300
301device_t
302pci_find_device(uint16_t vendor, uint16_t device)
303{
304	struct pci_devinfo *dinfo;
305
306	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
307		if ((dinfo->cfg.vendor == vendor) &&
308		    (dinfo->cfg.device == device)) {
309			return (dinfo->cfg.dev);
310		}
311	}
312
313	return (NULL);
314}
315
316/* return base address of memory or port map */
317
318static pci_addr_t
319pci_mapbase(uint64_t mapreg)
320{
321
322	if (PCI_BAR_MEM(mapreg))
323		return (mapreg & PCIM_BAR_MEM_BASE);
324	else
325		return (mapreg & PCIM_BAR_IO_BASE);
326}
327
328/* return map type of memory or port map */
329
330static const char *
331pci_maptype(uint64_t mapreg)
332{
333
334	if (PCI_BAR_IO(mapreg))
335		return ("I/O Port");
336	if (mapreg & PCIM_BAR_MEM_PREFETCH)
337		return ("Prefetchable Memory");
338	return ("Memory");
339}
340
341/* return log2 of map size decoded for memory or port map */
342
343static int
344pci_mapsize(uint64_t testval)
345{
346	int ln2size;
347
348	testval = pci_mapbase(testval);
349	ln2size = 0;
350	if (testval != 0) {
351		while ((testval & 1) == 0)
352		{
353			ln2size++;
354			testval >>= 1;
355		}
356	}
357	return (ln2size);
358}
359
360/* return log2 of address range supported by map register */
361
362static int
363pci_maprange(uint64_t mapreg)
364{
365	int ln2range = 0;
366
367	if (PCI_BAR_IO(mapreg))
368		ln2range = 32;
369	else
370		switch (mapreg & PCIM_BAR_MEM_TYPE) {
371		case PCIM_BAR_MEM_32:
372			ln2range = 32;
373			break;
374		case PCIM_BAR_MEM_1MB:
375			ln2range = 20;
376			break;
377		case PCIM_BAR_MEM_64:
378			ln2range = 64;
379			break;
380		}
381	return (ln2range);
382}
383
384/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
385
386static void
387pci_fixancient(pcicfgregs *cfg)
388{
389	if (cfg->hdrtype != 0)
390		return;
391
392	/* PCI to PCI bridges use header type 1 */
393	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
394		cfg->hdrtype = 1;
395}
396
397/* extract header type specific config data */
398
399static void
400pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
401{
402#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
403	switch (cfg->hdrtype) {
404	case 0:
405		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
406		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
407		cfg->nummaps	    = PCI_MAXMAPS_0;
408		break;
409	case 1:
410		cfg->nummaps	    = PCI_MAXMAPS_1;
411		break;
412	case 2:
413		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
414		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
415		cfg->nummaps	    = PCI_MAXMAPS_2;
416		break;
417	}
418#undef REG
419}
420
421/*
422 * This is a lame example: we should have some way of managing this table
423 * from userland.  The user should be able to tell us from the boot loader
424 * or at runtime what mapping to do.
425 */
426static struct pci_remap_entry
427{
428	uint16_t vendor;
429	uint16_t device;
430	uint16_t mapped_vendor;
431	uint16_t mapped_device;
432} pci_remap[] =
433{
434	{ 0x1039, 0x0901, 0x1039, 0x0900 }	/* Map sis 901 to sis 900 */
435};
436static int pci_remap_entries = 1;
437
438static void
439pci_apply_remap_table(pcicfgregs *cfg)
440{
441	int i;
442
443	for (i = 0; i < pci_remap_entries; i++) {
444		if (cfg->vendor == pci_remap[i].vendor &&
445		    cfg->device == pci_remap[i].device) {
446			cfg->vendor = pci_remap[i].mapped_vendor;
447			cfg->device = pci_remap[i].mapped_device;
448			return;
449		}
450	}
451}
452
453/* read configuration header into pcicfgregs structure */
454struct pci_devinfo *
455pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
456{
457#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
458	pcicfgregs *cfg = NULL;
459	struct pci_devinfo *devlist_entry;
460	struct devlist *devlist_head;
461
462	devlist_head = &pci_devq;
463
464	devlist_entry = NULL;
465
466	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
467		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
468		if (devlist_entry == NULL)
469			return (NULL);
470
471		cfg = &devlist_entry->cfg;
472
473		cfg->domain		= d;
474		cfg->bus		= b;
475		cfg->slot		= s;
476		cfg->func		= f;
477		cfg->vendor		= REG(PCIR_VENDOR, 2);
478		cfg->device		= REG(PCIR_DEVICE, 2);
479		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
480		cfg->statreg		= REG(PCIR_STATUS, 2);
481		cfg->baseclass		= REG(PCIR_CLASS, 1);
482		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
483		cfg->progif		= REG(PCIR_PROGIF, 1);
484		cfg->revid		= REG(PCIR_REVID, 1);
485		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
486		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
487		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
488		cfg->intpin		= REG(PCIR_INTPIN, 1);
489		cfg->intline		= REG(PCIR_INTLINE, 1);
490
491		cfg->mingnt		= REG(PCIR_MINGNT, 1);
492		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
493
494		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
495		cfg->hdrtype		&= ~PCIM_MFDEV;
496
497		pci_fixancient(cfg);
498		pci_hdrtypedata(pcib, b, s, f, cfg);
499		pci_apply_remap_table(cfg);
500
501		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
502			pci_read_extcap(pcib, cfg);
503
504		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
505
506		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
507		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
508		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
509		devlist_entry->conf.pc_sel.pc_func = cfg->func;
510		devlist_entry->conf.pc_hdr = cfg->hdrtype;
511
512		devlist_entry->conf.pc_subvendor = cfg->subvendor;
513		devlist_entry->conf.pc_subdevice = cfg->subdevice;
514		devlist_entry->conf.pc_vendor = cfg->vendor;
515		devlist_entry->conf.pc_device = cfg->device;
516
517		devlist_entry->conf.pc_class = cfg->baseclass;
518		devlist_entry->conf.pc_subclass = cfg->subclass;
519		devlist_entry->conf.pc_progif = cfg->progif;
520		devlist_entry->conf.pc_revid = cfg->revid;
521
522		pci_numdevs++;
523		pci_generation++;
524	}
525	return (devlist_entry);
526#undef REG
527}
528
529static void
530pci_read_extcap(device_t pcib, pcicfgregs *cfg)
531{
532#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
533#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
534#if defined(__i386__) || defined(__amd64__)
535	uint64_t addr;
536#endif
537	uint32_t val;
538	int	ptr, nextptr, ptrptr;
539
540	switch (cfg->hdrtype & PCIM_HDRTYPE) {
541	case 0:
542	case 1:
543		ptrptr = PCIR_CAP_PTR;
544		break;
545	case 2:
546		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
547		break;
548	default:
549		return;		/* no extended capabilities support */
550	}
551	nextptr = REG(ptrptr, 1);	/* sanity check? */
552
553	/*
554	 * Read capability entries.
555	 */
556	while (nextptr != 0) {
557		/* Sanity check */
558		if (nextptr > 255) {
559			printf("illegal PCI extended capability offset %d\n",
560			    nextptr);
561			return;
562		}
563		/* Find the next entry */
564		ptr = nextptr;
565		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
566
567		/* Process this entry */
568		switch (REG(ptr + PCICAP_ID, 1)) {
569		case PCIY_PMG:		/* PCI power management */
570			if (cfg->pp.pp_cap == 0) {
571				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
572				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
573				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
574				if ((nextptr - ptr) > PCIR_POWER_DATA)
575					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
576			}
577			break;
578#if defined(__i386__) || defined(__amd64__)
579		case PCIY_HT:		/* HyperTransport */
580			/* Determine HT-specific capability type. */
581			val = REG(ptr + PCIR_HT_COMMAND, 2);
582			switch (val & PCIM_HTCMD_CAP_MASK) {
583			case PCIM_HTCAP_MSI_MAPPING:
584				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
585					/* Sanity check the mapping window. */
586					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
587					    4);
588					addr <<= 32;
589					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
590					    4);
591					if (addr != MSI_INTEL_ADDR_BASE)
592						device_printf(pcib,
593	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
594						    cfg->domain, cfg->bus,
595						    cfg->slot, cfg->func,
596						    (long long)addr);
597				} else
598					addr = MSI_INTEL_ADDR_BASE;
599
600				cfg->ht.ht_msimap = ptr;
601				cfg->ht.ht_msictrl = val;
602				cfg->ht.ht_msiaddr = addr;
603				break;
604			}
605			break;
606#endif
607		case PCIY_MSI:		/* PCI MSI */
608			cfg->msi.msi_location = ptr;
609			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
610			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
611						     PCIM_MSICTRL_MMC_MASK)>>1);
612			break;
613		case PCIY_MSIX:		/* PCI MSI-X */
614			cfg->msix.msix_location = ptr;
615			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
616			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
617			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
618			val = REG(ptr + PCIR_MSIX_TABLE, 4);
619			cfg->msix.msix_table_bar = PCIR_BAR(val &
620			    PCIM_MSIX_BIR_MASK);
621			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
622			val = REG(ptr + PCIR_MSIX_PBA, 4);
623			cfg->msix.msix_pba_bar = PCIR_BAR(val &
624			    PCIM_MSIX_BIR_MASK);
625			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
626			break;
627		case PCIY_VPD:		/* PCI Vital Product Data */
628			cfg->vpd.vpd_reg = ptr;
629			break;
630		case PCIY_SUBVENDOR:
631			/* Should always be true. */
632			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
633				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
634				cfg->subvendor = val & 0xffff;
635				cfg->subdevice = val >> 16;
636			}
637			break;
638		case PCIY_PCIX:		/* PCI-X */
639			/*
640			 * Assume we have a PCI-X chipset if we have
641			 * at least one PCI-PCI bridge with a PCI-X
642			 * capability.  Note that some systems with
643			 * PCI-express or HT chipsets might match on
644			 * this check as well.
645			 */
646			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
647				pcix_chipset = 1;
648			break;
649		case PCIY_EXPRESS:	/* PCI-express */
650			/*
651			 * Assume we have a PCI-express chipset if we have
652			 * at least one PCI-express device.
653			 */
654			pcie_chipset = 1;
655			break;
656		default:
657			break;
658		}
659	}
660/* REG and WREG use carry through to next functions */
661}
662
663/*
664 * PCI Vital Product Data
665 */
666
667#define	PCI_VPD_TIMEOUT		1000000
668
669static int
670pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
671{
672	int count = PCI_VPD_TIMEOUT;
673
674	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
675
676	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
677
678	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
679		if (--count < 0)
680			return (ENXIO);
681		DELAY(1);	/* limit looping */
682	}
683	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
684
685	return (0);
686}
687
688#if 0
689static int
690pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
691{
692	int count = PCI_VPD_TIMEOUT;
693
694	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
695
696	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
697	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
698	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
699		if (--count < 0)
700			return (ENXIO);
701		DELAY(1);	/* limit looping */
702	}
703
704	return (0);
705}
706#endif
707
708#undef PCI_VPD_TIMEOUT
709
710struct vpd_readstate {
711	device_t	pcib;
712	pcicfgregs	*cfg;
713	uint32_t	val;
714	int		bytesinval;
715	int		off;
716	uint8_t		cksum;
717};
718
719static int
720vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
721{
722	uint32_t reg;
723	uint8_t byte;
724
725	if (vrs->bytesinval == 0) {
726		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
727			return (ENXIO);
728		vrs->val = le32toh(reg);
729		vrs->off += 4;
730		byte = vrs->val & 0xff;
731		vrs->bytesinval = 3;
732	} else {
733		vrs->val = vrs->val >> 8;
734		byte = vrs->val & 0xff;
735		vrs->bytesinval--;
736	}
737
738	vrs->cksum += byte;
739	*data = byte;
740	return (0);
741}
742
743static void
744pci_read_vpd(device_t pcib, pcicfgregs *cfg)
745{
746	struct vpd_readstate vrs;
747	int state;
748	int name;
749	int remain;
750	int i;
751	int alloc, off;		/* alloc/off for RO/W arrays */
752	int cksumvalid;
753	int dflen;
754	uint8_t byte;
755	uint8_t byte2;
756
757	/* init vpd reader */
758	vrs.bytesinval = 0;
759	vrs.off = 0;
760	vrs.pcib = pcib;
761	vrs.cfg = cfg;
762	vrs.cksum = 0;
763
764	state = 0;
765	name = remain = i = 0;	/* shut up stupid gcc */
766	alloc = off = 0;	/* shut up stupid gcc */
767	dflen = 0;		/* shut up stupid gcc */
768	cksumvalid = -1;
769	while (state >= 0) {
770		if (vpd_nextbyte(&vrs, &byte)) {
771			state = -2;
772			break;
773		}
774#if 0
775		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
776		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
777		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
778#endif
779		switch (state) {
780		case 0:		/* item name */
781			if (byte & 0x80) {
782				if (vpd_nextbyte(&vrs, &byte2)) {
783					state = -2;
784					break;
785				}
786				remain = byte2;
787				if (vpd_nextbyte(&vrs, &byte2)) {
788					state = -2;
789					break;
790				}
791				remain |= byte2 << 8;
792				if (remain > (0x7f*4 - vrs.off)) {
793					state = -1;
794					printf(
795			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
796					    cfg->domain, cfg->bus, cfg->slot,
797					    cfg->func, remain);
798				}
799				name = byte & 0x7f;
800			} else {
801				remain = byte & 0x7;
802				name = (byte >> 3) & 0xf;
803			}
804			switch (name) {
805			case 0x2:	/* String */
806				cfg->vpd.vpd_ident = malloc(remain + 1,
807				    M_DEVBUF, M_WAITOK);
808				i = 0;
809				state = 1;
810				break;
811			case 0xf:	/* End */
812				state = -1;
813				break;
814			case 0x10:	/* VPD-R */
815				alloc = 8;
816				off = 0;
817				cfg->vpd.vpd_ros = malloc(alloc *
818				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
819				    M_WAITOK | M_ZERO);
820				state = 2;
821				break;
822			case 0x11:	/* VPD-W */
823				alloc = 8;
824				off = 0;
825				cfg->vpd.vpd_w = malloc(alloc *
826				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
827				    M_WAITOK | M_ZERO);
828				state = 5;
829				break;
830			default:	/* Invalid data, abort */
831				state = -1;
832				break;
833			}
834			break;
835
836		case 1:	/* Identifier String */
837			cfg->vpd.vpd_ident[i++] = byte;
838			remain--;
839			if (remain == 0)  {
840				cfg->vpd.vpd_ident[i] = '\0';
841				state = 0;
842			}
843			break;
844
845		case 2:	/* VPD-R Keyword Header */
846			if (off == alloc) {
847				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
848				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
849				    M_DEVBUF, M_WAITOK | M_ZERO);
850			}
851			cfg->vpd.vpd_ros[off].keyword[0] = byte;
852			if (vpd_nextbyte(&vrs, &byte2)) {
853				state = -2;
854				break;
855			}
856			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
857			if (vpd_nextbyte(&vrs, &byte2)) {
858				state = -2;
859				break;
860			}
861			dflen = byte2;
862			if (dflen == 0 &&
863			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
864			    2) == 0) {
865				/*
866				 * if this happens, we can't trust the rest
867				 * of the VPD.
868				 */
869				printf(
870				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
871				    cfg->domain, cfg->bus, cfg->slot,
872				    cfg->func, dflen);
873				cksumvalid = 0;
874				state = -1;
875				break;
876			} else if (dflen == 0) {
877				cfg->vpd.vpd_ros[off].value = malloc(1 *
878				    sizeof(*cfg->vpd.vpd_ros[off].value),
879				    M_DEVBUF, M_WAITOK);
880				cfg->vpd.vpd_ros[off].value[0] = '\x00';
881			} else
882				cfg->vpd.vpd_ros[off].value = malloc(
883				    (dflen + 1) *
884				    sizeof(*cfg->vpd.vpd_ros[off].value),
885				    M_DEVBUF, M_WAITOK);
886			remain -= 3;
887			i = 0;
888			/* keep in sync w/ state 3's transistions */
889			if (dflen == 0 && remain == 0)
890				state = 0;
891			else if (dflen == 0)
892				state = 2;
893			else
894				state = 3;
895			break;
896
897		case 3:	/* VPD-R Keyword Value */
898			cfg->vpd.vpd_ros[off].value[i++] = byte;
899			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
900			    "RV", 2) == 0 && cksumvalid == -1) {
901				if (vrs.cksum == 0)
902					cksumvalid = 1;
903				else {
904					if (bootverbose)
905						printf(
906				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
907						    cfg->domain, cfg->bus,
908						    cfg->slot, cfg->func,
909						    vrs.cksum);
910					cksumvalid = 0;
911					state = -1;
912					break;
913				}
914			}
915			dflen--;
916			remain--;
917			/* keep in sync w/ state 2's transistions */
918			if (dflen == 0)
919				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
920			if (dflen == 0 && remain == 0) {
921				cfg->vpd.vpd_rocnt = off;
922				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
923				    off * sizeof(*cfg->vpd.vpd_ros),
924				    M_DEVBUF, M_WAITOK | M_ZERO);
925				state = 0;
926			} else if (dflen == 0)
927				state = 2;
928			break;
929
930		case 4:
931			remain--;
932			if (remain == 0)
933				state = 0;
934			break;
935
936		case 5:	/* VPD-W Keyword Header */
937			if (off == alloc) {
938				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
939				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
940				    M_DEVBUF, M_WAITOK | M_ZERO);
941			}
942			cfg->vpd.vpd_w[off].keyword[0] = byte;
943			if (vpd_nextbyte(&vrs, &byte2)) {
944				state = -2;
945				break;
946			}
947			cfg->vpd.vpd_w[off].keyword[1] = byte2;
948			if (vpd_nextbyte(&vrs, &byte2)) {
949				state = -2;
950				break;
951			}
952			cfg->vpd.vpd_w[off].len = dflen = byte2;
953			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
954			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
955			    sizeof(*cfg->vpd.vpd_w[off].value),
956			    M_DEVBUF, M_WAITOK);
957			remain -= 3;
958			i = 0;
959			/* keep in sync w/ state 6's transistions */
960			if (dflen == 0 && remain == 0)
961				state = 0;
962			else if (dflen == 0)
963				state = 5;
964			else
965				state = 6;
966			break;
967
968		case 6:	/* VPD-W Keyword Value */
969			cfg->vpd.vpd_w[off].value[i++] = byte;
970			dflen--;
971			remain--;
972			/* keep in sync w/ state 5's transistions */
973			if (dflen == 0)
974				cfg->vpd.vpd_w[off++].value[i++] = '\0';
975			if (dflen == 0 && remain == 0) {
976				cfg->vpd.vpd_wcnt = off;
977				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
978				    off * sizeof(*cfg->vpd.vpd_w),
979				    M_DEVBUF, M_WAITOK | M_ZERO);
980				state = 0;
981			} else if (dflen == 0)
982				state = 5;
983			break;
984
985		default:
986			printf("pci%d:%d:%d:%d: invalid state: %d\n",
987			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
988			    state);
989			state = -1;
990			break;
991		}
992	}
993
994	if (cksumvalid == 0 || state < -1) {
995		/* read-only data bad, clean up */
996		if (cfg->vpd.vpd_ros != NULL) {
997			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
998				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
999			free(cfg->vpd.vpd_ros, M_DEVBUF);
1000			cfg->vpd.vpd_ros = NULL;
1001		}
1002	}
1003	if (state < -1) {
1004		/* I/O error, clean up */
1005		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1006		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1007		if (cfg->vpd.vpd_ident != NULL) {
1008			free(cfg->vpd.vpd_ident, M_DEVBUF);
1009			cfg->vpd.vpd_ident = NULL;
1010		}
1011		if (cfg->vpd.vpd_w != NULL) {
1012			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1013				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1014			free(cfg->vpd.vpd_w, M_DEVBUF);
1015			cfg->vpd.vpd_w = NULL;
1016		}
1017	}
1018	cfg->vpd.vpd_cached = 1;
1019#undef REG
1020#undef WREG
1021}
1022
1023int
1024pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1025{
1026	struct pci_devinfo *dinfo = device_get_ivars(child);
1027	pcicfgregs *cfg = &dinfo->cfg;
1028
1029	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1030		pci_read_vpd(device_get_parent(dev), cfg);
1031
1032	*identptr = cfg->vpd.vpd_ident;
1033
1034	if (*identptr == NULL)
1035		return (ENXIO);
1036
1037	return (0);
1038}
1039
1040int
1041pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1042	const char **vptr)
1043{
1044	struct pci_devinfo *dinfo = device_get_ivars(child);
1045	pcicfgregs *cfg = &dinfo->cfg;
1046	int i;
1047
1048	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1049		pci_read_vpd(device_get_parent(dev), cfg);
1050
1051	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1052		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1053		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1054			*vptr = cfg->vpd.vpd_ros[i].value;
1055		}
1056
1057	if (i != cfg->vpd.vpd_rocnt)
1058		return (0);
1059
1060	*vptr = NULL;
1061	return (ENXIO);
1062}
1063
1064/*
1065 * Find the requested extended capability and return the offset in
1066 * configuration space via the pointer provided. The function returns
1067 * 0 on success and error code otherwise.
1068 */
1069int
1070pci_find_extcap_method(device_t dev, device_t child, int capability,
1071    int *capreg)
1072{
1073	struct pci_devinfo *dinfo = device_get_ivars(child);
1074	pcicfgregs *cfg = &dinfo->cfg;
1075	u_int32_t status;
1076	u_int8_t ptr;
1077
1078	/*
1079	 * Check the CAP_LIST bit of the PCI status register first.
1080	 */
1081	status = pci_read_config(child, PCIR_STATUS, 2);
1082	if (!(status & PCIM_STATUS_CAPPRESENT))
1083		return (ENXIO);
1084
1085	/*
1086	 * Determine the start pointer of the capabilities list.
1087	 */
1088	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1089	case 0:
1090	case 1:
1091		ptr = PCIR_CAP_PTR;
1092		break;
1093	case 2:
1094		ptr = PCIR_CAP_PTR_2;
1095		break;
1096	default:
1097		/* XXX: panic? */
1098		return (ENXIO);		/* no extended capabilities support */
1099	}
1100	ptr = pci_read_config(child, ptr, 1);
1101
1102	/*
1103	 * Traverse the capabilities list.
1104	 */
1105	while (ptr != 0) {
1106		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1107			if (capreg != NULL)
1108				*capreg = ptr;
1109			return (0);
1110		}
1111		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1112	}
1113
1114	return (ENOENT);
1115}
1116
1117/*
1118 * Support for MSI-X message interrupts.
1119 */
1120void
1121pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1122{
1123	struct pci_devinfo *dinfo = device_get_ivars(dev);
1124	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1125	uint32_t offset;
1126
1127	KASSERT(msix->msix_table_len > index, ("bogus index"));
1128	offset = msix->msix_table_offset + index * 16;
1129	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1130	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1131	bus_write_4(msix->msix_table_res, offset + 8, data);
1132
1133	/* Enable MSI -> HT mapping. */
1134	pci_ht_map_msi(dev, address);
1135}
1136
1137void
1138pci_mask_msix(device_t dev, u_int index)
1139{
1140	struct pci_devinfo *dinfo = device_get_ivars(dev);
1141	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1142	uint32_t offset, val;
1143
1144	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1145	offset = msix->msix_table_offset + index * 16 + 12;
1146	val = bus_read_4(msix->msix_table_res, offset);
1147	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1148		val |= PCIM_MSIX_VCTRL_MASK;
1149		bus_write_4(msix->msix_table_res, offset, val);
1150	}
1151}
1152
1153void
1154pci_unmask_msix(device_t dev, u_int index)
1155{
1156	struct pci_devinfo *dinfo = device_get_ivars(dev);
1157	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1158	uint32_t offset, val;
1159
1160	KASSERT(msix->msix_table_len > index, ("bogus index"));
1161	offset = msix->msix_table_offset + index * 16 + 12;
1162	val = bus_read_4(msix->msix_table_res, offset);
1163	if (val & PCIM_MSIX_VCTRL_MASK) {
1164		val &= ~PCIM_MSIX_VCTRL_MASK;
1165		bus_write_4(msix->msix_table_res, offset, val);
1166	}
1167}
1168
1169int
1170pci_pending_msix(device_t dev, u_int index)
1171{
1172	struct pci_devinfo *dinfo = device_get_ivars(dev);
1173	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1174	uint32_t offset, bit;
1175
1176	KASSERT(msix->msix_table_len > index, ("bogus index"));
1177	offset = msix->msix_pba_offset + (index / 32) * 4;
1178	bit = 1 << index % 32;
1179	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1180}
1181
1182/*
1183 * Restore MSI-X registers and table during resume.  If MSI-X is
1184 * enabled then walk the virtual table to restore the actual MSI-X
1185 * table.
1186 */
1187static void
1188pci_resume_msix(device_t dev)
1189{
1190	struct pci_devinfo *dinfo = device_get_ivars(dev);
1191	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1192	struct msix_table_entry *mte;
1193	struct msix_vector *mv;
1194	int i;
1195
1196	if (msix->msix_alloc > 0) {
1197		/* First, mask all vectors. */
1198		for (i = 0; i < msix->msix_msgnum; i++)
1199			pci_mask_msix(dev, i);
1200
1201		/* Second, program any messages with at least one handler. */
1202		for (i = 0; i < msix->msix_table_len; i++) {
1203			mte = &msix->msix_table[i];
1204			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1205				continue;
1206			mv = &msix->msix_vectors[mte->mte_vector - 1];
1207			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1208			pci_unmask_msix(dev, i);
1209		}
1210	}
1211	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1212	    msix->msix_ctrl, 2);
1213}
1214
1215/*
1216 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1217 * returned in *count.  After this function returns, each message will be
1218 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1219 */
1220int
1221pci_alloc_msix_method(device_t dev, device_t child, int *count)
1222{
1223	struct pci_devinfo *dinfo = device_get_ivars(child);
1224	pcicfgregs *cfg = &dinfo->cfg;
1225	struct resource_list_entry *rle;
1226	int actual, error, i, irq, max;
1227
1228	/* Don't let count == 0 get us into trouble. */
1229	if (*count == 0)
1230		return (EINVAL);
1231
1232	/* If rid 0 is allocated, then fail. */
1233	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1234	if (rle != NULL && rle->res != NULL)
1235		return (ENXIO);
1236
1237	/* Already have allocated messages? */
1238	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1239		return (ENXIO);
1240
1241	/* If MSI is blacklisted for this system, fail. */
1242	if (pci_msi_blacklisted())
1243		return (ENXIO);
1244
1245	/* MSI-X capability present? */
1246	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1247		return (ENODEV);
1248
1249	/* Make sure the appropriate BARs are mapped. */
1250	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1251	    cfg->msix.msix_table_bar);
1252	if (rle == NULL || rle->res == NULL ||
1253	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1254		return (ENXIO);
1255	cfg->msix.msix_table_res = rle->res;
1256	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1257		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1258		    cfg->msix.msix_pba_bar);
1259		if (rle == NULL || rle->res == NULL ||
1260		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1261			return (ENXIO);
1262	}
1263	cfg->msix.msix_pba_res = rle->res;
1264
1265	if (bootverbose)
1266		device_printf(child,
1267		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1268		    *count, cfg->msix.msix_msgnum);
1269	max = min(*count, cfg->msix.msix_msgnum);
1270	for (i = 0; i < max; i++) {
1271		/* Allocate a message. */
1272		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1273		if (error)
1274			break;
1275		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1276		    irq, 1);
1277	}
1278	actual = i;
1279
1280	if (bootverbose) {
1281		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1282		if (actual == 1)
1283			device_printf(child, "using IRQ %lu for MSI-X\n",
1284			    rle->start);
1285		else {
1286			int run;
1287
1288			/*
1289			 * Be fancy and try to print contiguous runs of
1290			 * IRQ values as ranges.  'irq' is the previous IRQ.
1291			 * 'run' is true if we are in a range.
1292			 */
1293			device_printf(child, "using IRQs %lu", rle->start);
1294			irq = rle->start;
1295			run = 0;
1296			for (i = 1; i < actual; i++) {
1297				rle = resource_list_find(&dinfo->resources,
1298				    SYS_RES_IRQ, i + 1);
1299
1300				/* Still in a run? */
1301				if (rle->start == irq + 1) {
1302					run = 1;
1303					irq++;
1304					continue;
1305				}
1306
1307				/* Finish previous range. */
1308				if (run) {
1309					printf("-%d", irq);
1310					run = 0;
1311				}
1312
1313				/* Start new range. */
1314				printf(",%lu", rle->start);
1315				irq = rle->start;
1316			}
1317
1318			/* Unfinished range? */
1319			if (run)
1320				printf("-%d", irq);
1321			printf(" for MSI-X\n");
1322		}
1323	}
1324
1325	/* Mask all vectors. */
1326	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1327		pci_mask_msix(child, i);
1328
1329	/* Allocate and initialize vector data and virtual table. */
1330	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1331	    M_DEVBUF, M_WAITOK | M_ZERO);
1332	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1333	    M_DEVBUF, M_WAITOK | M_ZERO);
1334	for (i = 0; i < actual; i++) {
1335		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1336		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1337		cfg->msix.msix_table[i].mte_vector = i + 1;
1338	}
1339
1340	/* Update control register to enable MSI-X. */
1341	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1342	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1343	    cfg->msix.msix_ctrl, 2);
1344
1345	/* Update counts of alloc'd messages. */
1346	cfg->msix.msix_alloc = actual;
1347	cfg->msix.msix_table_len = actual;
1348	*count = actual;
1349	return (0);
1350}
1351
1352/*
1353 * By default, pci_alloc_msix() will assign the allocated IRQ
1354 * resources consecutively to the first N messages in the MSI-X table.
1355 * However, device drivers may want to use different layouts if they
1356 * either receive fewer messages than they asked for, or they wish to
1357 * populate the MSI-X table sparsely.  This method allows the driver
1358 * to specify what layout it wants.  It must be called after a
1359 * successful pci_alloc_msix() but before any of the associated
1360 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1361 *
1362 * The 'vectors' array contains 'count' message vectors.  The array
1363 * maps directly to the MSI-X table in that index 0 in the array
1364 * specifies the vector for the first message in the MSI-X table, etc.
1365 * The vector value in each array index can either be 0 to indicate
1366 * that no vector should be assigned to a message slot, or it can be a
1367 * number from 1 to N (where N is the count returned from a
1368 * succcessful call to pci_alloc_msix()) to indicate which message
1369 * vector (IRQ) to be used for the corresponding message.
1370 *
1371 * On successful return, each message with a non-zero vector will have
1372 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1373 * 1.  Additionally, if any of the IRQs allocated via the previous
1374 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1375 * will be freed back to the system automatically.
1376 *
1377 * For example, suppose a driver has a MSI-X table with 6 messages and
1378 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1379 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1380 * C.  After the call to pci_alloc_msix(), the device will be setup to
1381 * have an MSI-X table of ABC--- (where - means no vector assigned).
1382 * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1383 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1384 * be freed back to the system.  This device will also have valid
1385 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1386 *
1387 * In any case, the SYS_RES_IRQ rid X will always map to the message
1388 * at MSI-X table index X - 1 and will only be valid if a vector is
1389 * assigned to that table entry.
1390 */
1391int
1392pci_remap_msix_method(device_t dev, device_t child, int count,
1393    const u_int *vectors)
1394{
1395	struct pci_devinfo *dinfo = device_get_ivars(child);
1396	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1397	struct resource_list_entry *rle;
1398	int i, irq, j, *used;
1399
1400	/*
1401	 * Have to have at least one message in the table but the
1402	 * table can't be bigger than the actual MSI-X table in the
1403	 * device.
1404	 */
1405	if (count == 0 || count > msix->msix_msgnum)
1406		return (EINVAL);
1407
1408	/* Sanity check the vectors. */
1409	for (i = 0; i < count; i++)
1410		if (vectors[i] > msix->msix_alloc)
1411			return (EINVAL);
1412
1413	/*
1414	 * Make sure there aren't any holes in the vectors to be used.
1415	 * It's a big pain to support it, and it doesn't really make
1416	 * sense anyway.  Also, at least one vector must be used.
1417	 */
1418	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1419	    M_ZERO);
1420	for (i = 0; i < count; i++)
1421		if (vectors[i] != 0)
1422			used[vectors[i] - 1] = 1;
1423	for (i = 0; i < msix->msix_alloc - 1; i++)
1424		if (used[i] == 0 && used[i + 1] == 1) {
1425			free(used, M_DEVBUF);
1426			return (EINVAL);
1427		}
1428	if (used[0] != 1) {
1429		free(used, M_DEVBUF);
1430		return (EINVAL);
1431	}
1432
1433	/* Make sure none of the resources are allocated. */
1434	for (i = 0; i < msix->msix_table_len; i++) {
1435		if (msix->msix_table[i].mte_vector == 0)
1436			continue;
1437		if (msix->msix_table[i].mte_handlers > 0)
1438			return (EBUSY);
1439		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1440		KASSERT(rle != NULL, ("missing resource"));
1441		if (rle->res != NULL)
1442			return (EBUSY);
1443	}
1444
1445	/* Free the existing resource list entries. */
1446	for (i = 0; i < msix->msix_table_len; i++) {
1447		if (msix->msix_table[i].mte_vector == 0)
1448			continue;
1449		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1450	}
1451
1452	/*
1453	 * Build the new virtual table keeping track of which vectors are
1454	 * used.
1455	 */
1456	free(msix->msix_table, M_DEVBUF);
1457	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1458	    M_DEVBUF, M_WAITOK | M_ZERO);
1459	for (i = 0; i < count; i++)
1460		msix->msix_table[i].mte_vector = vectors[i];
1461	msix->msix_table_len = count;
1462
1463	/* Free any unused IRQs and resize the vectors array if necessary. */
1464	j = msix->msix_alloc - 1;
1465	if (used[j] == 0) {
1466		struct msix_vector *vec;
1467
1468		while (used[j] == 0) {
1469			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1470			    msix->msix_vectors[j].mv_irq);
1471			j--;
1472		}
1473		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1474		    M_WAITOK);
1475		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1476		    (j + 1));
1477		free(msix->msix_vectors, M_DEVBUF);
1478		msix->msix_vectors = vec;
1479		msix->msix_alloc = j + 1;
1480	}
1481	free(used, M_DEVBUF);
1482
1483	/* Map the IRQs onto the rids. */
1484	for (i = 0; i < count; i++) {
1485		if (vectors[i] == 0)
1486			continue;
1487		irq = msix->msix_vectors[vectors[i]].mv_irq;
1488		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1489		    irq, 1);
1490	}
1491
1492	if (bootverbose) {
1493		device_printf(child, "Remapped MSI-X IRQs as: ");
1494		for (i = 0; i < count; i++) {
1495			if (i != 0)
1496				printf(", ");
1497			if (vectors[i] == 0)
1498				printf("---");
1499			else
1500				printf("%d",
1501				    msix->msix_vectors[vectors[i]].mv_irq);
1502		}
1503		printf("\n");
1504	}
1505
1506	return (0);
1507}
1508
1509static int
1510pci_release_msix(device_t dev, device_t child)
1511{
1512	struct pci_devinfo *dinfo = device_get_ivars(child);
1513	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1514	struct resource_list_entry *rle;
1515	int i;
1516
1517	/* Do we have any messages to release? */
1518	if (msix->msix_alloc == 0)
1519		return (ENODEV);
1520
1521	/* Make sure none of the resources are allocated. */
1522	for (i = 0; i < msix->msix_table_len; i++) {
1523		if (msix->msix_table[i].mte_vector == 0)
1524			continue;
1525		if (msix->msix_table[i].mte_handlers > 0)
1526			return (EBUSY);
1527		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1528		KASSERT(rle != NULL, ("missing resource"));
1529		if (rle->res != NULL)
1530			return (EBUSY);
1531	}
1532
1533	/* Update control register to disable MSI-X. */
1534	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1535	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1536	    msix->msix_ctrl, 2);
1537
1538	/* Free the resource list entries. */
1539	for (i = 0; i < msix->msix_table_len; i++) {
1540		if (msix->msix_table[i].mte_vector == 0)
1541			continue;
1542		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1543	}
1544	free(msix->msix_table, M_DEVBUF);
1545	msix->msix_table_len = 0;
1546
1547	/* Release the IRQs. */
1548	for (i = 0; i < msix->msix_alloc; i++)
1549		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1550		    msix->msix_vectors[i].mv_irq);
1551	free(msix->msix_vectors, M_DEVBUF);
1552	msix->msix_alloc = 0;
1553	return (0);
1554}
1555
1556/*
1557 * Return the max supported MSI-X messages this device supports.
1558 * Basically, assuming the MD code can alloc messages, this function
1559 * should return the maximum value that pci_alloc_msix() can return.
1560 * Thus, it is subject to the tunables, etc.
1561 */
1562int
1563pci_msix_count_method(device_t dev, device_t child)
1564{
1565	struct pci_devinfo *dinfo = device_get_ivars(child);
1566	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1567
1568	if (pci_do_msix && msix->msix_location != 0)
1569		return (msix->msix_msgnum);
1570	return (0);
1571}
1572
1573/*
1574 * HyperTransport MSI mapping control
1575 */
1576void
1577pci_ht_map_msi(device_t dev, uint64_t addr)
1578{
1579	struct pci_devinfo *dinfo = device_get_ivars(dev);
1580	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1581
1582	if (!ht->ht_msimap)
1583		return;
1584
1585	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1586	    ht->ht_msiaddr >> 20 == addr >> 20) {
1587		/* Enable MSI -> HT mapping. */
1588		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1589		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1590		    ht->ht_msictrl, 2);
1591	}
1592
1593	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1594		/* Disable MSI -> HT mapping. */
1595		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1596		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1597		    ht->ht_msictrl, 2);
1598	}
1599}
1600
1601/*
1602 * Support for MSI message signalled interrupts.
1603 */
1604void
1605pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1606{
1607	struct pci_devinfo *dinfo = device_get_ivars(dev);
1608	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1609
1610	/* Write data and address values. */
1611	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1612	    address & 0xffffffff, 4);
1613	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1614		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1615		    address >> 32, 4);
1616		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1617		    data, 2);
1618	} else
1619		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1620		    2);
1621
1622	/* Enable MSI in the control register. */
1623	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1624	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1625	    2);
1626
1627	/* Enable MSI -> HT mapping. */
1628	pci_ht_map_msi(dev, address);
1629}
1630
1631void
1632pci_disable_msi(device_t dev)
1633{
1634	struct pci_devinfo *dinfo = device_get_ivars(dev);
1635	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1636
1637	/* Disable MSI -> HT mapping. */
1638	pci_ht_map_msi(dev, 0);
1639
1640	/* Disable MSI in the control register. */
1641	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1642	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1643	    2);
1644}
1645
1646/*
1647 * Restore MSI registers during resume.  If MSI is enabled then
1648 * restore the data and address registers in addition to the control
1649 * register.
1650 */
1651static void
1652pci_resume_msi(device_t dev)
1653{
1654	struct pci_devinfo *dinfo = device_get_ivars(dev);
1655	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1656	uint64_t address;
1657	uint16_t data;
1658
1659	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1660		address = msi->msi_addr;
1661		data = msi->msi_data;
1662		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1663		    address & 0xffffffff, 4);
1664		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1665			pci_write_config(dev, msi->msi_location +
1666			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1667			pci_write_config(dev, msi->msi_location +
1668			    PCIR_MSI_DATA_64BIT, data, 2);
1669		} else
1670			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1671			    data, 2);
1672	}
1673	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1674	    2);
1675}
1676
1677int
1678pci_remap_msi_irq(device_t dev, u_int irq)
1679{
1680	struct pci_devinfo *dinfo = device_get_ivars(dev);
1681	pcicfgregs *cfg = &dinfo->cfg;
1682	struct resource_list_entry *rle;
1683	struct msix_table_entry *mte;
1684	struct msix_vector *mv;
1685	device_t bus;
1686	uint64_t addr;
1687	uint32_t data;
1688	int error, i, j;
1689
1690	bus = device_get_parent(dev);
1691
1692	/*
1693	 * Handle MSI first.  We try to find this IRQ among our list
1694	 * of MSI IRQs.  If we find it, we request updated address and
1695	 * data registers and apply the results.
1696	 */
1697	if (cfg->msi.msi_alloc > 0) {
1698
1699		/* If we don't have any active handlers, nothing to do. */
1700		if (cfg->msi.msi_handlers == 0)
1701			return (0);
1702		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1703			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1704			    i + 1);
1705			if (rle->start == irq) {
1706				error = PCIB_MAP_MSI(device_get_parent(bus),
1707				    dev, irq, &addr, &data);
1708				if (error)
1709					return (error);
1710				pci_disable_msi(dev);
1711				dinfo->cfg.msi.msi_addr = addr;
1712				dinfo->cfg.msi.msi_data = data;
1713				pci_enable_msi(dev, addr, data);
1714				return (0);
1715			}
1716		}
1717		return (ENOENT);
1718	}
1719
1720	/*
1721	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1722	 * we request the updated mapping info.  If that works, we go
1723	 * through all the slots that use this IRQ and update them.
1724	 */
1725	if (cfg->msix.msix_alloc > 0) {
1726		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1727			mv = &cfg->msix.msix_vectors[i];
1728			if (mv->mv_irq == irq) {
1729				error = PCIB_MAP_MSI(device_get_parent(bus),
1730				    dev, irq, &addr, &data);
1731				if (error)
1732					return (error);
1733				mv->mv_address = addr;
1734				mv->mv_data = data;
1735				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1736					mte = &cfg->msix.msix_table[j];
1737					if (mte->mte_vector != i + 1)
1738						continue;
1739					if (mte->mte_handlers == 0)
1740						continue;
1741					pci_mask_msix(dev, j);
1742					pci_enable_msix(dev, j, addr, data);
1743					pci_unmask_msix(dev, j);
1744				}
1745			}
1746		}
1747		return (ENOENT);
1748	}
1749
1750	return (ENOENT);
1751}
1752
1753/*
1754 * Returns true if the specified device is blacklisted because MSI
1755 * doesn't work.
1756 */
1757int
1758pci_msi_device_blacklisted(device_t dev)
1759{
1760	struct pci_quirk *q;
1761
1762	if (!pci_honor_msi_blacklist)
1763		return (0);
1764
1765	for (q = &pci_quirks[0]; q->devid; q++) {
1766		if (q->devid == pci_get_devid(dev) &&
1767		    q->type == PCI_QUIRK_DISABLE_MSI)
1768			return (1);
1769	}
1770	return (0);
1771}
1772
1773/*
1774 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1775 * we just check for blacklisted chipsets as represented by the
1776 * host-PCI bridge at device 0:0:0.  In the future, it may become
1777 * necessary to check other system attributes, such as the kenv values
1778 * that give the motherboard manufacturer and model number.
1779 */
1780static int
1781pci_msi_blacklisted(void)
1782{
1783	device_t dev;
1784
1785	if (!pci_honor_msi_blacklist)
1786		return (0);
1787
1788	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1789	if (!(pcie_chipset || pcix_chipset))
1790		return (1);
1791
1792	dev = pci_find_bsf(0, 0, 0);
1793	if (dev != NULL)
1794		return (pci_msi_device_blacklisted(dev));
1795	return (0);
1796}
1797
1798/*
1799 * Attempt to allocate *count MSI messages.  The actual number allocated is
1800 * returned in *count.  After this function returns, each message will be
1801 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1802 */
1803int
1804pci_alloc_msi_method(device_t dev, device_t child, int *count)
1805{
1806	struct pci_devinfo *dinfo = device_get_ivars(child);
1807	pcicfgregs *cfg = &dinfo->cfg;
1808	struct resource_list_entry *rle;
1809	int actual, error, i, irqs[32];
1810	uint16_t ctrl;
1811
1812	/* Don't let count == 0 get us into trouble. */
1813	if (*count == 0)
1814		return (EINVAL);
1815
1816	/* If rid 0 is allocated, then fail. */
1817	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1818	if (rle != NULL && rle->res != NULL)
1819		return (ENXIO);
1820
1821	/* Already have allocated messages? */
1822	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1823		return (ENXIO);
1824
1825	/* If MSI is blacklisted for this system, fail. */
1826	if (pci_msi_blacklisted())
1827		return (ENXIO);
1828
1829	/* MSI capability present? */
1830	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1831		return (ENODEV);
1832
1833	if (bootverbose)
1834		device_printf(child,
1835		    "attempting to allocate %d MSI vectors (%d supported)\n",
1836		    *count, cfg->msi.msi_msgnum);
1837
1838	/* Don't ask for more than the device supports. */
1839	actual = min(*count, cfg->msi.msi_msgnum);
1840
1841	/* Don't ask for more than 32 messages. */
1842	actual = min(actual, 32);
1843
1844	/* MSI requires power of 2 number of messages. */
1845	if (!powerof2(actual))
1846		return (EINVAL);
1847
1848	for (;;) {
1849		/* Try to allocate N messages. */
1850		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1851		    cfg->msi.msi_msgnum, irqs);
1852		if (error == 0)
1853			break;
1854		if (actual == 1)
1855			return (error);
1856
1857		/* Try N / 2. */
1858		actual >>= 1;
1859	}
1860
1861	/*
1862	 * We now have N actual messages mapped onto SYS_RES_IRQ
1863	 * resources in the irqs[] array, so add new resources
1864	 * starting at rid 1.
1865	 */
1866	for (i = 0; i < actual; i++)
1867		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1868		    irqs[i], irqs[i], 1);
1869
1870	if (bootverbose) {
1871		if (actual == 1)
1872			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1873		else {
1874			int run;
1875
1876			/*
1877			 * Be fancy and try to print contiguous runs
1878			 * of IRQ values as ranges.  'run' is true if
1879			 * we are in a range.
1880			 */
1881			device_printf(child, "using IRQs %d", irqs[0]);
1882			run = 0;
1883			for (i = 1; i < actual; i++) {
1884
1885				/* Still in a run? */
1886				if (irqs[i] == irqs[i - 1] + 1) {
1887					run = 1;
1888					continue;
1889				}
1890
1891				/* Finish previous range. */
1892				if (run) {
1893					printf("-%d", irqs[i - 1]);
1894					run = 0;
1895				}
1896
1897				/* Start new range. */
1898				printf(",%d", irqs[i]);
1899			}
1900
1901			/* Unfinished range? */
1902			if (run)
1903				printf("-%d", irqs[actual - 1]);
1904			printf(" for MSI\n");
1905		}
1906	}
1907
1908	/* Update control register with actual count. */
1909	ctrl = cfg->msi.msi_ctrl;
1910	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1911	ctrl |= (ffs(actual) - 1) << 4;
1912	cfg->msi.msi_ctrl = ctrl;
1913	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1914
1915	/* Update counts of alloc'd messages. */
1916	cfg->msi.msi_alloc = actual;
1917	cfg->msi.msi_handlers = 0;
1918	*count = actual;
1919	return (0);
1920}
1921
1922/* Release the MSI messages associated with this device. */
1923int
1924pci_release_msi_method(device_t dev, device_t child)
1925{
1926	struct pci_devinfo *dinfo = device_get_ivars(child);
1927	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1928	struct resource_list_entry *rle;
1929	int error, i, irqs[32];
1930
1931	/* Try MSI-X first. */
1932	error = pci_release_msix(dev, child);
1933	if (error != ENODEV)
1934		return (error);
1935
1936	/* Do we have any messages to release? */
1937	if (msi->msi_alloc == 0)
1938		return (ENODEV);
1939	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1940
1941	/* Make sure none of the resources are allocated. */
1942	if (msi->msi_handlers > 0)
1943		return (EBUSY);
1944	for (i = 0; i < msi->msi_alloc; i++) {
1945		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1946		KASSERT(rle != NULL, ("missing MSI resource"));
1947		if (rle->res != NULL)
1948			return (EBUSY);
1949		irqs[i] = rle->start;
1950	}
1951
1952	/* Update control register with 0 count. */
1953	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1954	    ("%s: MSI still enabled", __func__));
1955	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1956	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1957	    msi->msi_ctrl, 2);
1958
1959	/* Release the messages. */
1960	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1961	for (i = 0; i < msi->msi_alloc; i++)
1962		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1963
1964	/* Update alloc count. */
1965	msi->msi_alloc = 0;
1966	msi->msi_addr = 0;
1967	msi->msi_data = 0;
1968	return (0);
1969}
1970
1971/*
1972 * Return the max supported MSI messages this device supports.
1973 * Basically, assuming the MD code can alloc messages, this function
1974 * should return the maximum value that pci_alloc_msi() can return.
1975 * Thus, it is subject to the tunables, etc.
1976 */
1977int
1978pci_msi_count_method(device_t dev, device_t child)
1979{
1980	struct pci_devinfo *dinfo = device_get_ivars(child);
1981	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1982
1983	if (pci_do_msi && msi->msi_location != 0)
1984		return (msi->msi_msgnum);
1985	return (0);
1986}
1987
1988/* free pcicfgregs structure and all depending data structures */
1989
1990int
1991pci_freecfg(struct pci_devinfo *dinfo)
1992{
1993	struct devlist *devlist_head;
1994	int i;
1995
1996	devlist_head = &pci_devq;
1997
1998	if (dinfo->cfg.vpd.vpd_reg) {
1999		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2000		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2001			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2002		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2003		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2004			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2005		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2006	}
2007	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2008	free(dinfo, M_DEVBUF);
2009
2010	/* increment the generation count */
2011	pci_generation++;
2012
2013	/* we're losing one device */
2014	pci_numdevs--;
2015	return (0);
2016}
2017
2018/*
2019 * PCI power manangement
2020 */
2021int
2022pci_set_powerstate_method(device_t dev, device_t child, int state)
2023{
2024	struct pci_devinfo *dinfo = device_get_ivars(child);
2025	pcicfgregs *cfg = &dinfo->cfg;
2026	uint16_t status;
2027	int result, oldstate, highest, delay;
2028
2029	if (cfg->pp.pp_cap == 0)
2030		return (EOPNOTSUPP);
2031
2032	/*
2033	 * Optimize a no state change request away.  While it would be OK to
2034	 * write to the hardware in theory, some devices have shown odd
2035	 * behavior when going from D3 -> D3.
2036	 */
2037	oldstate = pci_get_powerstate(child);
2038	if (oldstate == state)
2039		return (0);
2040
2041	/*
2042	 * The PCI power management specification states that after a state
2043	 * transition between PCI power states, system software must
2044	 * guarantee a minimal delay before the function accesses the device.
2045	 * Compute the worst case delay that we need to guarantee before we
2046	 * access the device.  Many devices will be responsive much more
2047	 * quickly than this delay, but there are some that don't respond
2048	 * instantly to state changes.  Transitions to/from D3 state require
2049	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2050	 * is done below with DELAY rather than a sleeper function because
2051	 * this function can be called from contexts where we cannot sleep.
2052	 */
2053	highest = (oldstate > state) ? oldstate : state;
2054	if (highest == PCI_POWERSTATE_D3)
2055	    delay = 10000;
2056	else if (highest == PCI_POWERSTATE_D2)
2057	    delay = 200;
2058	else
2059	    delay = 0;
2060	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2061	    & ~PCIM_PSTAT_DMASK;
2062	result = 0;
2063	switch (state) {
2064	case PCI_POWERSTATE_D0:
2065		status |= PCIM_PSTAT_D0;
2066		break;
2067	case PCI_POWERSTATE_D1:
2068		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2069			return (EOPNOTSUPP);
2070		status |= PCIM_PSTAT_D1;
2071		break;
2072	case PCI_POWERSTATE_D2:
2073		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2074			return (EOPNOTSUPP);
2075		status |= PCIM_PSTAT_D2;
2076		break;
2077	case PCI_POWERSTATE_D3:
2078		status |= PCIM_PSTAT_D3;
2079		break;
2080	default:
2081		return (EINVAL);
2082	}
2083
2084	if (bootverbose)
2085		printf(
2086		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2087		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2088		    dinfo->cfg.func, oldstate, state);
2089
2090	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2091	if (delay)
2092		DELAY(delay);
2093	return (0);
2094}
2095
2096int
2097pci_get_powerstate_method(device_t dev, device_t child)
2098{
2099	struct pci_devinfo *dinfo = device_get_ivars(child);
2100	pcicfgregs *cfg = &dinfo->cfg;
2101	uint16_t status;
2102	int result;
2103
2104	if (cfg->pp.pp_cap != 0) {
2105		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2106		switch (status & PCIM_PSTAT_DMASK) {
2107		case PCIM_PSTAT_D0:
2108			result = PCI_POWERSTATE_D0;
2109			break;
2110		case PCIM_PSTAT_D1:
2111			result = PCI_POWERSTATE_D1;
2112			break;
2113		case PCIM_PSTAT_D2:
2114			result = PCI_POWERSTATE_D2;
2115			break;
2116		case PCIM_PSTAT_D3:
2117			result = PCI_POWERSTATE_D3;
2118			break;
2119		default:
2120			result = PCI_POWERSTATE_UNKNOWN;
2121			break;
2122		}
2123	} else {
2124		/* No support, device is always at D0 */
2125		result = PCI_POWERSTATE_D0;
2126	}
2127	return (result);
2128}
2129
2130/*
2131 * Some convenience functions for PCI device drivers.
2132 */
2133
2134static __inline void
2135pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2136{
2137	uint16_t	command;
2138
2139	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2140	command |= bit;
2141	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2142}
2143
2144static __inline void
2145pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2146{
2147	uint16_t	command;
2148
2149	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2150	command &= ~bit;
2151	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2152}
2153
2154int
2155pci_enable_busmaster_method(device_t dev, device_t child)
2156{
2157	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2158	return (0);
2159}
2160
2161int
2162pci_disable_busmaster_method(device_t dev, device_t child)
2163{
2164	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2165	return (0);
2166}
2167
2168int
2169pci_enable_io_method(device_t dev, device_t child, int space)
2170{
2171	uint16_t command;
2172	uint16_t bit;
2173	char *error;
2174
2175	bit = 0;
2176	error = NULL;
2177
2178	switch(space) {
2179	case SYS_RES_IOPORT:
2180		bit = PCIM_CMD_PORTEN;
2181		error = "port";
2182		break;
2183	case SYS_RES_MEMORY:
2184		bit = PCIM_CMD_MEMEN;
2185		error = "memory";
2186		break;
2187	default:
2188		return (EINVAL);
2189	}
2190	pci_set_command_bit(dev, child, bit);
2191	/* Some devices seem to need a brief stall here, what do to? */
2192	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2193	if (command & bit)
2194		return (0);
2195	device_printf(child, "failed to enable %s mapping!\n", error);
2196	return (ENXIO);
2197}
2198
2199int
2200pci_disable_io_method(device_t dev, device_t child, int space)
2201{
2202	uint16_t command;
2203	uint16_t bit;
2204	char *error;
2205
2206	bit = 0;
2207	error = NULL;
2208
2209	switch(space) {
2210	case SYS_RES_IOPORT:
2211		bit = PCIM_CMD_PORTEN;
2212		error = "port";
2213		break;
2214	case SYS_RES_MEMORY:
2215		bit = PCIM_CMD_MEMEN;
2216		error = "memory";
2217		break;
2218	default:
2219		return (EINVAL);
2220	}
2221	pci_clear_command_bit(dev, child, bit);
2222	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2223	if (command & bit) {
2224		device_printf(child, "failed to disable %s mapping!\n", error);
2225		return (ENXIO);
2226	}
2227	return (0);
2228}
2229
2230/*
2231 * New style pci driver.  Parent device is either a pci-host-bridge or a
2232 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2233 */
2234
2235void
2236pci_print_verbose(struct pci_devinfo *dinfo)
2237{
2238
2239	if (bootverbose) {
2240		pcicfgregs *cfg = &dinfo->cfg;
2241
2242		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2243		    cfg->vendor, cfg->device, cfg->revid);
2244		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2245		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2246		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2247		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2248		    cfg->mfdev);
2249		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2250		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2251		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2252		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2253		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2254		if (cfg->intpin > 0)
2255			printf("\tintpin=%c, irq=%d\n",
2256			    cfg->intpin +'a' -1, cfg->intline);
2257		if (cfg->pp.pp_cap) {
2258			uint16_t status;
2259
2260			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2261			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2262			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2263			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2264			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2265			    status & PCIM_PSTAT_DMASK);
2266		}
2267		if (cfg->msi.msi_location) {
2268			int ctrl;
2269
2270			ctrl = cfg->msi.msi_ctrl;
2271			printf("\tMSI supports %d message%s%s%s\n",
2272			    cfg->msi.msi_msgnum,
2273			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2274			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2275			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2276		}
2277		if (cfg->msix.msix_location) {
2278			printf("\tMSI-X supports %d message%s ",
2279			    cfg->msix.msix_msgnum,
2280			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2281			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2282				printf("in map 0x%x\n",
2283				    cfg->msix.msix_table_bar);
2284			else
2285				printf("in maps 0x%x and 0x%x\n",
2286				    cfg->msix.msix_table_bar,
2287				    cfg->msix.msix_pba_bar);
2288		}
2289	}
2290}
2291
2292static int
2293pci_porten(device_t dev)
2294{
2295	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2296}
2297
2298static int
2299pci_memen(device_t dev)
2300{
2301	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2302}
2303
2304static void
2305pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2306{
2307	pci_addr_t map, testval;
2308	int ln2range;
2309	uint16_t cmd;
2310
2311	map = pci_read_config(dev, reg, 4);
2312	ln2range = pci_maprange(map);
2313	if (ln2range == 64)
2314		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2315
2316	/*
2317	 * Disable decoding via the command register before
2318	 * determining the BAR's length since we will be placing it in
2319	 * a weird state.
2320	 */
2321	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2322	pci_write_config(dev, PCIR_COMMAND,
2323	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2324
2325	/*
2326	 * Determine the BAR's length by writing all 1's.  The bottom
2327	 * log_2(size) bits of the BAR will stick as 0 when we read
2328	 * the value back.
2329	 */
2330	pci_write_config(dev, reg, 0xffffffff, 4);
2331	testval = pci_read_config(dev, reg, 4);
2332	if (ln2range == 64) {
2333		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2334		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2335	}
2336
2337	/*
2338	 * Restore the original value of the BAR.  We may have reprogrammed
2339	 * the BAR of the low-level console device and when booting verbose,
2340	 * we need the console device addressable.
2341	 */
2342	pci_write_config(dev, reg, map, 4);
2343	if (ln2range == 64)
2344		pci_write_config(dev, reg + 4, map >> 32, 4);
2345	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2346
2347	*mapp = map;
2348	*testvalp = testval;
2349}
2350
2351static void
2352pci_write_bar(device_t dev, int reg, pci_addr_t base)
2353{
2354	pci_addr_t map;
2355	int ln2range;
2356
2357	map = pci_read_config(dev, reg, 4);
2358	ln2range = pci_maprange(map);
2359	pci_write_config(dev, reg, base, 4);
2360	if (ln2range == 64)
2361		pci_write_config(dev, reg + 4, base >> 32, 4);
2362}
2363
2364/*
2365 * Add a resource based on a pci map register. Return 1 if the map
2366 * register is a 32bit map register or 2 if it is a 64bit register.
2367 */
2368static int
2369pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2370    int force, int prefetch)
2371{
2372	pci_addr_t base, map, testval;
2373	pci_addr_t start, end, count;
2374	int barlen, maprange, mapsize, type;
2375	uint16_t cmd;
2376	struct resource *res;
2377
2378	pci_read_bar(dev, reg, &map, &testval);
2379	if (PCI_BAR_MEM(map)) {
2380		type = SYS_RES_MEMORY;
2381		if (map & PCIM_BAR_MEM_PREFETCH)
2382			prefetch = 1;
2383	} else
2384		type = SYS_RES_IOPORT;
2385	mapsize = pci_mapsize(testval);
2386	base = pci_mapbase(map);
2387	maprange = pci_maprange(map);
2388	barlen = maprange == 64 ? 2 : 1;
2389
2390	/*
2391	 * For I/O registers, if bottom bit is set, and the next bit up
2392	 * isn't clear, we know we have a BAR that doesn't conform to the
2393	 * spec, so ignore it.  Also, sanity check the size of the data
2394	 * areas to the type of memory involved.  Memory must be at least
2395	 * 16 bytes in size, while I/O ranges must be at least 4.
2396	 */
2397	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2398		return (barlen);
2399	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2400	    (type == SYS_RES_IOPORT && mapsize < 2))
2401		return (barlen);
2402
2403	if (bootverbose) {
2404		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2405		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2406		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2407			printf(", port disabled\n");
2408		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2409			printf(", memory disabled\n");
2410		else
2411			printf(", enabled\n");
2412	}
2413
2414	/*
2415	 * If base is 0, then we have problems.  It is best to ignore
2416	 * such entries for the moment.  These will be allocated later if
2417	 * the driver specifically requests them.  However, some
2418	 * removable busses look better when all resources are allocated,
2419	 * so allow '0' to be overriden.
2420	 *
2421	 * Similarly treat maps whose values is the same as the test value
2422	 * read back.  These maps have had all f's written to them by the
2423	 * BIOS in an attempt to disable the resources.
2424	 */
2425	if (!force && (base == 0 || map == testval))
2426		return (barlen);
2427	if ((u_long)base != base) {
2428		device_printf(bus,
2429		    "pci%d:%d:%d:%d bar %#x too many address bits",
2430		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2431		    pci_get_function(dev), reg);
2432		return (barlen);
2433	}
2434
2435	/*
2436	 * This code theoretically does the right thing, but has
2437	 * undesirable side effects in some cases where peripherals
2438	 * respond oddly to having these bits enabled.  Let the user
2439	 * be able to turn them off (since pci_enable_io_modes is 1 by
2440	 * default).
2441	 */
2442	if (pci_enable_io_modes) {
2443		/* Turn on resources that have been left off by a lazy BIOS */
2444		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2445			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2446			cmd |= PCIM_CMD_PORTEN;
2447			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2448		}
2449		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2450			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2451			cmd |= PCIM_CMD_MEMEN;
2452			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2453		}
2454	} else {
2455		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2456			return (barlen);
2457		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2458			return (barlen);
2459	}
2460
2461	count = 1 << mapsize;
2462	if (base == 0 || base == pci_mapbase(testval)) {
2463		start = 0;	/* Let the parent decide. */
2464		end = ~0ULL;
2465	} else {
2466		start = base;
2467		end = base + (1 << mapsize) - 1;
2468	}
2469	resource_list_add(rl, type, reg, start, end, count);
2470
2471	/*
2472	 * Try to allocate the resource for this BAR from our parent
2473	 * so that this resource range is already reserved.  The
2474	 * driver for this device will later inherit this resource in
2475	 * pci_alloc_resource().
2476	 */
2477	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2478	    prefetch ? RF_PREFETCHABLE : 0);
2479	if (res == NULL) {
2480		/*
2481		 * If the allocation fails, clear the BAR and delete
2482		 * the resource list entry to force
2483		 * pci_alloc_resource() to allocate resources from the
2484		 * parent.
2485		 */
2486		resource_list_delete(rl, type, reg);
2487		start = 0;
2488	} else {
2489		start = rman_get_start(res);
2490		rman_set_device(res, bus);
2491	}
2492	pci_write_bar(dev, reg, start);
2493	return (barlen);
2494}
2495
2496/*
2497 * For ATA devices we need to decide early what addressing mode to use.
2498 * Legacy demands that the primary and secondary ATA ports sits on the
2499 * same addresses that old ISA hardware did. This dictates that we use
2500 * those addresses and ignore the BAR's if we cannot set PCI native
2501 * addressing mode.
2502 */
2503static void
2504pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2505    uint32_t prefetchmask)
2506{
2507	struct resource *r;
2508	int rid, type, progif;
2509#if 0
2510	/* if this device supports PCI native addressing use it */
2511	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2512	if ((progif & 0x8a) == 0x8a) {
2513		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2514		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2515			printf("Trying ATA native PCI addressing mode\n");
2516			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2517		}
2518	}
2519#endif
2520	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2521	type = SYS_RES_IOPORT;
2522	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2523		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2524		    prefetchmask & (1 << 0));
2525		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2526		    prefetchmask & (1 << 1));
2527	} else {
2528		rid = PCIR_BAR(0);
2529		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2530		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7,
2531		    8, 0);
2532		rman_set_device(r, bus);
2533		rid = PCIR_BAR(1);
2534		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2535		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6,
2536		    1, 0);
2537		rman_set_device(r, bus);
2538	}
2539	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2540		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2541		    prefetchmask & (1 << 2));
2542		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2543		    prefetchmask & (1 << 3));
2544	} else {
2545		rid = PCIR_BAR(2);
2546		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2547		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177,
2548		    8, 0);
2549		rman_set_device(r, bus);
2550		rid = PCIR_BAR(3);
2551		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2552		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376,
2553		    1, 0);
2554		rman_set_device(r, bus);
2555	}
2556	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2557	    prefetchmask & (1 << 4));
2558	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2559	    prefetchmask & (1 << 5));
2560}
2561
2562static void
2563pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2564{
2565	struct pci_devinfo *dinfo = device_get_ivars(dev);
2566	pcicfgregs *cfg = &dinfo->cfg;
2567	char tunable_name[64];
2568	int irq;
2569
2570	/* Has to have an intpin to have an interrupt. */
2571	if (cfg->intpin == 0)
2572		return;
2573
2574	/* Let the user override the IRQ with a tunable. */
2575	irq = PCI_INVALID_IRQ;
2576	snprintf(tunable_name, sizeof(tunable_name),
2577	    "hw.pci%d.%d.%d.INT%c.irq",
2578	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2579	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2580		irq = PCI_INVALID_IRQ;
2581
2582	/*
2583	 * If we didn't get an IRQ via the tunable, then we either use the
2584	 * IRQ value in the intline register or we ask the bus to route an
2585	 * interrupt for us.  If force_route is true, then we only use the
2586	 * value in the intline register if the bus was unable to assign an
2587	 * IRQ.
2588	 */
2589	if (!PCI_INTERRUPT_VALID(irq)) {
2590		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2591			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2592		if (!PCI_INTERRUPT_VALID(irq))
2593			irq = cfg->intline;
2594	}
2595
2596	/* If after all that we don't have an IRQ, just bail. */
2597	if (!PCI_INTERRUPT_VALID(irq))
2598		return;
2599
2600	/* Update the config register if it changed. */
2601	if (irq != cfg->intline) {
2602		cfg->intline = irq;
2603		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2604	}
2605
2606	/* Add this IRQ as rid 0 interrupt resource. */
2607	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2608}
2609
2610void
2611pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2612{
2613	struct pci_devinfo *dinfo = device_get_ivars(dev);
2614	pcicfgregs *cfg = &dinfo->cfg;
2615	struct resource_list *rl = &dinfo->resources;
2616	struct pci_quirk *q;
2617	int i;
2618
2619	/* ATA devices needs special map treatment */
2620	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2621	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2622	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2623	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2624	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2625		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2626	else
2627		for (i = 0; i < cfg->nummaps;)
2628			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2629			    prefetchmask & (1 << i));
2630
2631	/*
2632	 * Add additional, quirked resources.
2633	 */
2634	for (q = &pci_quirks[0]; q->devid; q++) {
2635		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2636		    && q->type == PCI_QUIRK_MAP_REG)
2637			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2638	}
2639
2640	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2641#ifdef __PCI_REROUTE_INTERRUPT
2642		/*
2643		 * Try to re-route interrupts. Sometimes the BIOS or
2644		 * firmware may leave bogus values in these registers.
2645		 * If the re-route fails, then just stick with what we
2646		 * have.
2647		 */
2648		pci_assign_interrupt(bus, dev, 1);
2649#else
2650		pci_assign_interrupt(bus, dev, 0);
2651#endif
2652	}
2653}
2654
2655/*
2656 * After we've added the children to the pci bus device, we need to fixup
2657 * the children in various ways.  This function fixes things that require
2658 * multiple passes to get right, such as bus number and some resource
2659 * things (although the latter hasn't been implemented yet).  This must be
2660 * done before the children are probe/attached, sicne by that point these
2661 * things must be fixed.
2662 */
2663static void
2664pci_fix_bridges(device_t dev)
2665{
2666	int i, numdevs, error, secbus, subbus;
2667	device_t child, *devlist;
2668
2669	if ((error = device_get_children(dev, &devlist, &numdevs)))
2670		return;
2671	/*
2672	 * First pass, get the bus numbers that are in use
2673	 */
2674	for (i = 0; i < numdevs; i++) {
2675		child = devlist[i];
2676		switch (pci_read_config(child, PCIR_HDRTYPE, 1) & PCIM_HDRTYPE) {
2677		default:
2678			continue;
2679		case 1:	/* PCI-PCI bridge */
2680		case 2: /* CardBus bridge -- offsets are the same */
2681			secbus = pci_read_config(child, PCIR_SECBUS_1, 1);
2682			subbus = pci_read_config(child, PCIR_SUBBUS_1, 1);
2683			break;
2684		}
2685		printf("%d:%d:%d:%d sec %d sub %d\n", pcib_get_domain(dev),
2686		    pci_get_bus(child), pci_get_slot(child),
2687		    pci_get_function(child), secbus, subbus);
2688	}
2689#if 0
2690	/*
2691	 * Second pass, Fix the bus numbers, as needed
2692	 */
2693	for (i = 0; i < numdevs; i++) {
2694		child = devlist[i];
2695		switch (pci_read_config(dev, PCIR_HDRTYPE, 1) & PCIM_HDRTYPE) {
2696		case 1:	/* PCI-PCI bridge */
2697			break;
2698		case 2: /* CardBus bridge */
2699			break;
2700		default:
2701			continue;
2702		}
2703	}
2704#endif
2705	free(devlist, M_TEMP);
2706}
2707
2708void
2709pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2710{
2711#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2712	device_t pcib = device_get_parent(dev);
2713	struct pci_devinfo *dinfo;
2714	int maxslots;
2715	int s, f, pcifunchigh;
2716	uint8_t hdrtype;
2717
2718	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2719	    ("dinfo_size too small"));
2720	maxslots = PCIB_MAXSLOTS(pcib);
2721	for (s = 0; s <= maxslots; s++) {
2722		pcifunchigh = 0;
2723		f = 0;
2724		DELAY(1);
2725		hdrtype = REG(PCIR_HDRTYPE, 1);
2726		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2727			continue;
2728		if (hdrtype & PCIM_MFDEV)
2729			pcifunchigh = PCI_FUNCMAX;
2730		for (f = 0; f <= pcifunchigh; f++) {
2731			dinfo = pci_read_device(pcib, domain, busno, s, f,
2732			    dinfo_size);
2733			if (dinfo != NULL) {
2734				pci_add_child(dev, dinfo);
2735			}
2736		}
2737	}
2738#undef REG
2739	pci_fix_bridges(dev);
2740}
2741
2742void
2743pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2744{
2745	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2746	device_set_ivars(dinfo->cfg.dev, dinfo);
2747	resource_list_init(&dinfo->resources);
2748	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2749	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2750	pci_print_verbose(dinfo);
2751	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2752}
2753
2754static int
2755pci_probe(device_t dev)
2756{
2757
2758	device_set_desc(dev, "PCI bus");
2759
2760	/* Allow other subclasses to override this driver. */
2761	return (BUS_PROBE_GENERIC);
2762}
2763
2764static int
2765pci_attach(device_t dev)
2766{
2767	int busno, domain;
2768
2769	/*
2770	 * Since there can be multiple independantly numbered PCI
2771	 * busses on systems with multiple PCI domains, we can't use
2772	 * the unit number to decide which bus we are probing. We ask
2773	 * the parent pcib what our domain and bus numbers are.
2774	 */
2775	domain = pcib_get_domain(dev);
2776	busno = pcib_get_bus(dev);
2777	if (bootverbose)
2778		device_printf(dev, "domain=%d, physical bus=%d\n",
2779		    domain, busno);
2780	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2781	return (bus_generic_attach(dev));
2782}
2783
2784int
2785pci_suspend(device_t dev)
2786{
2787	int dstate, error, i, numdevs;
2788	device_t acpi_dev, child, *devlist;
2789	struct pci_devinfo *dinfo;
2790
2791	/*
2792	 * Save the PCI configuration space for each child and set the
2793	 * device in the appropriate power state for this sleep state.
2794	 */
2795	acpi_dev = NULL;
2796	if (pci_do_power_resume)
2797		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2798	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2799		return (error);
2800	for (i = 0; i < numdevs; i++) {
2801		child = devlist[i];
2802		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2803		pci_cfg_save(child, dinfo, 0);
2804	}
2805
2806	/* Suspend devices before potentially powering them down. */
2807	error = bus_generic_suspend(dev);
2808	if (error) {
2809		free(devlist, M_TEMP);
2810		return (error);
2811	}
2812
2813	/*
2814	 * Always set the device to D3.  If ACPI suggests a different
2815	 * power state, use it instead.  If ACPI is not present, the
2816	 * firmware is responsible for managing device power.  Skip
2817	 * children who aren't attached since they are powered down
2818	 * separately.  Only manage type 0 devices for now.
2819	 */
2820	for (i = 0; acpi_dev && i < numdevs; i++) {
2821		child = devlist[i];
2822		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2823		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2824			dstate = PCI_POWERSTATE_D3;
2825			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2826			pci_set_powerstate(child, dstate);
2827		}
2828	}
2829	free(devlist, M_TEMP);
2830	return (0);
2831}
2832
2833int
2834pci_resume(device_t dev)
2835{
2836	int i, numdevs, error;
2837	device_t acpi_dev, child, *devlist;
2838	struct pci_devinfo *dinfo;
2839
2840	/*
2841	 * Set each child to D0 and restore its PCI configuration space.
2842	 */
2843	acpi_dev = NULL;
2844	if (pci_do_power_resume)
2845		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2846	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2847		return (error);
2848	for (i = 0; i < numdevs; i++) {
2849		/*
2850		 * Notify ACPI we're going to D0 but ignore the result.  If
2851		 * ACPI is not present, the firmware is responsible for
2852		 * managing device power.  Only manage type 0 devices for now.
2853		 */
2854		child = devlist[i];
2855		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2856		if (acpi_dev && device_is_attached(child) &&
2857		    dinfo->cfg.hdrtype == 0) {
2858			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2859			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2860		}
2861
2862		/* Now the device is powered up, restore its config space. */
2863		pci_cfg_restore(child, dinfo);
2864	}
2865	free(devlist, M_TEMP);
2866	return (bus_generic_resume(dev));
2867}
2868
2869static void
2870pci_load_vendor_data(void)
2871{
2872	caddr_t vendordata, info;
2873
2874	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2875		info = preload_search_info(vendordata, MODINFO_ADDR);
2876		pci_vendordata = *(char **)info;
2877		info = preload_search_info(vendordata, MODINFO_SIZE);
2878		pci_vendordata_size = *(size_t *)info;
2879		/* terminate the database */
2880		pci_vendordata[pci_vendordata_size] = '\n';
2881	}
2882}
2883
2884void
2885pci_driver_added(device_t dev, driver_t *driver)
2886{
2887	int numdevs;
2888	device_t *devlist;
2889	device_t child;
2890	struct pci_devinfo *dinfo;
2891	int i;
2892
2893	if (bootverbose)
2894		device_printf(dev, "driver added\n");
2895	DEVICE_IDENTIFY(driver, dev);
2896	if (device_get_children(dev, &devlist, &numdevs) != 0)
2897		return;
2898	for (i = 0; i < numdevs; i++) {
2899		child = devlist[i];
2900		if (device_get_state(child) != DS_NOTPRESENT)
2901			continue;
2902		dinfo = device_get_ivars(child);
2903		pci_print_verbose(dinfo);
2904		if (bootverbose)
2905			printf("pci%d:%d:%d:%d: reprobing on driver added\n",
2906			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2907			    dinfo->cfg.func);
2908		pci_cfg_restore(child, dinfo);
2909		if (device_probe_and_attach(child) != 0)
2910			pci_cfg_save(child, dinfo, 1);
2911	}
2912	free(devlist, M_TEMP);
2913}
2914
2915int
2916pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2917    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2918{
2919	struct pci_devinfo *dinfo;
2920	struct msix_table_entry *mte;
2921	struct msix_vector *mv;
2922	uint64_t addr;
2923	uint32_t data;
2924	void *cookie;
2925	int error, rid;
2926
2927	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2928	    arg, &cookie);
2929	if (error)
2930		return (error);
2931
2932	/* If this is not a direct child, just bail out. */
2933	if (device_get_parent(child) != dev) {
2934		*cookiep = cookie;
2935		return(0);
2936	}
2937
2938	rid = rman_get_rid(irq);
2939	if (rid == 0) {
2940		/* Make sure that INTx is enabled */
2941		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
2942	} else {
2943		/*
2944		 * Check to see if the interrupt is MSI or MSI-X.
2945		 * Ask our parent to map the MSI and give
2946		 * us the address and data register values.
2947		 * If we fail for some reason, teardown the
2948		 * interrupt handler.
2949		 */
2950		dinfo = device_get_ivars(child);
2951		if (dinfo->cfg.msi.msi_alloc > 0) {
2952			if (dinfo->cfg.msi.msi_addr == 0) {
2953				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2954			    ("MSI has handlers, but vectors not mapped"));
2955				error = PCIB_MAP_MSI(device_get_parent(dev),
2956				    child, rman_get_start(irq), &addr, &data);
2957				if (error)
2958					goto bad;
2959				dinfo->cfg.msi.msi_addr = addr;
2960				dinfo->cfg.msi.msi_data = data;
2961				pci_enable_msi(child, addr, data);
2962			}
2963			dinfo->cfg.msi.msi_handlers++;
2964		} else {
2965			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2966			    ("No MSI or MSI-X interrupts allocated"));
2967			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2968			    ("MSI-X index too high"));
2969			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2970			KASSERT(mte->mte_vector != 0, ("no message vector"));
2971			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
2972			KASSERT(mv->mv_irq == rman_get_start(irq),
2973			    ("IRQ mismatch"));
2974			if (mv->mv_address == 0) {
2975				KASSERT(mte->mte_handlers == 0,
2976		    ("MSI-X table entry has handlers, but vector not mapped"));
2977				error = PCIB_MAP_MSI(device_get_parent(dev),
2978				    child, rman_get_start(irq), &addr, &data);
2979				if (error)
2980					goto bad;
2981				mv->mv_address = addr;
2982				mv->mv_data = data;
2983			}
2984			if (mte->mte_handlers == 0) {
2985				pci_enable_msix(child, rid - 1, mv->mv_address,
2986				    mv->mv_data);
2987				pci_unmask_msix(child, rid - 1);
2988			}
2989			mte->mte_handlers++;
2990		}
2991
2992		/* Make sure that INTx is disabled if we are using MSI/MSIX */
2993		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2994	bad:
2995		if (error) {
2996			(void)bus_generic_teardown_intr(dev, child, irq,
2997			    cookie);
2998			return (error);
2999		}
3000	}
3001	*cookiep = cookie;
3002	return (0);
3003}
3004
3005int
3006pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3007    void *cookie)
3008{
3009	struct msix_table_entry *mte;
3010	struct resource_list_entry *rle;
3011	struct pci_devinfo *dinfo;
3012	int error, rid;
3013
3014	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3015		return (EINVAL);
3016
3017	/* If this isn't a direct child, just bail out */
3018	if (device_get_parent(child) != dev)
3019		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3020
3021	rid = rman_get_rid(irq);
3022	if (rid == 0) {
3023		/* Mask INTx */
3024		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3025	} else {
3026		/*
3027		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3028		 * decrement the appropriate handlers count and mask the
3029		 * MSI-X message, or disable MSI messages if the count
3030		 * drops to 0.
3031		 */
3032		dinfo = device_get_ivars(child);
3033		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3034		if (rle->res != irq)
3035			return (EINVAL);
3036		if (dinfo->cfg.msi.msi_alloc > 0) {
3037			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3038			    ("MSI-X index too high"));
3039			if (dinfo->cfg.msi.msi_handlers == 0)
3040				return (EINVAL);
3041			dinfo->cfg.msi.msi_handlers--;
3042			if (dinfo->cfg.msi.msi_handlers == 0)
3043				pci_disable_msi(child);
3044		} else {
3045			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3046			    ("No MSI or MSI-X interrupts allocated"));
3047			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3048			    ("MSI-X index too high"));
3049			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3050			if (mte->mte_handlers == 0)
3051				return (EINVAL);
3052			mte->mte_handlers--;
3053			if (mte->mte_handlers == 0)
3054				pci_mask_msix(child, rid - 1);
3055		}
3056	}
3057	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3058	if (rid > 0)
3059		KASSERT(error == 0,
3060		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3061	return (error);
3062}
3063
3064int
3065pci_print_child(device_t dev, device_t child)
3066{
3067	struct pci_devinfo *dinfo;
3068	struct resource_list *rl;
3069	int retval = 0;
3070
3071	dinfo = device_get_ivars(child);
3072	rl = &dinfo->resources;
3073
3074	retval += bus_print_child_header(dev, child);
3075
3076	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3077	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3078	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3079	if (device_get_flags(dev))
3080		retval += printf(" flags %#x", device_get_flags(dev));
3081
3082	retval += printf(" at device %d.%d", pci_get_slot(child),
3083	    pci_get_function(child));
3084
3085	retval += bus_print_child_footer(dev, child);
3086
3087	return (retval);
3088}
3089
3090static struct
3091{
3092	int	class;
3093	int	subclass;
3094	char	*desc;
3095} pci_nomatch_tab[] = {
3096	{PCIC_OLD,		-1,			"old"},
3097	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3098	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3099	{PCIC_STORAGE,		-1,			"mass storage"},
3100	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3101	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3102	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3103	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3104	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3105	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3106	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3107	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3108	{PCIC_NETWORK,		-1,			"network"},
3109	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3110	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3111	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3112	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3113	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3114	{PCIC_DISPLAY,		-1,			"display"},
3115	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3116	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3117	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3118	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3119	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3120	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3121	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3122	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3123	{PCIC_MEMORY,		-1,			"memory"},
3124	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3125	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3126	{PCIC_BRIDGE,		-1,			"bridge"},
3127	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3128	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3129	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3130	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3131	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3132	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3133	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3134	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3135	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3136	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3137	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3138	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3139	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3140	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3141	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3142	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3143	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3144	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3145	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3146	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3147	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3148	{PCIC_INPUTDEV,		-1,			"input device"},
3149	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3150	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3151	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3152	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3153	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3154	{PCIC_DOCKING,		-1,			"docking station"},
3155	{PCIC_PROCESSOR,	-1,			"processor"},
3156	{PCIC_SERIALBUS,	-1,			"serial bus"},
3157	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3158	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3159	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3160	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3161	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3162	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3163	{PCIC_WIRELESS,		-1,			"wireless controller"},
3164	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3165	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3166	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3167	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3168	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3169	{PCIC_SATCOM,		-1,			"satellite communication"},
3170	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3171	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3172	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3173	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3174	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3175	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3176	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3177	{PCIC_DASP,		-1,			"dasp"},
3178	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3179	{0, 0,		NULL}
3180};
3181
3182void
3183pci_probe_nomatch(device_t dev, device_t child)
3184{
3185	int	i;
3186	char	*cp, *scp, *device;
3187
3188	/*
3189	 * Look for a listing for this device in a loaded device database.
3190	 */
3191	if ((device = pci_describe_device(child)) != NULL) {
3192		device_printf(dev, "<%s>", device);
3193		free(device, M_DEVBUF);
3194	} else {
3195		/*
3196		 * Scan the class/subclass descriptions for a general
3197		 * description.
3198		 */
3199		cp = "unknown";
3200		scp = NULL;
3201		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3202			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3203				if (pci_nomatch_tab[i].subclass == -1) {
3204					cp = pci_nomatch_tab[i].desc;
3205				} else if (pci_nomatch_tab[i].subclass ==
3206				    pci_get_subclass(child)) {
3207					scp = pci_nomatch_tab[i].desc;
3208				}
3209			}
3210		}
3211		device_printf(dev, "<%s%s%s>",
3212		    cp ? cp : "",
3213		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3214		    scp ? scp : "");
3215	}
3216	printf(" at device %d.%d (no driver attached)\n",
3217	    pci_get_slot(child), pci_get_function(child));
3218	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3219	return;
3220}
3221
3222/*
3223 * Parse the PCI device database, if loaded, and return a pointer to a
3224 * description of the device.
3225 *
3226 * The database is flat text formatted as follows:
3227 *
3228 * Any line not in a valid format is ignored.
3229 * Lines are terminated with newline '\n' characters.
3230 *
3231 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3232 * the vendor name.
3233 *
3234 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3235 * - devices cannot be listed without a corresponding VENDOR line.
3236 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3237 * another TAB, then the device name.
3238 */
3239
3240/*
3241 * Assuming (ptr) points to the beginning of a line in the database,
3242 * return the vendor or device and description of the next entry.
3243 * The value of (vendor) or (device) inappropriate for the entry type
3244 * is set to -1.  Returns nonzero at the end of the database.
3245 *
3246 * Note that this is slightly unrobust in the face of corrupt data;
3247 * we attempt to safeguard against this by spamming the end of the
3248 * database with a newline when we initialise.
3249 */
3250static int
3251pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3252{
3253	char	*cp = *ptr;
3254	int	left;
3255
3256	*device = -1;
3257	*vendor = -1;
3258	**desc = '\0';
3259	for (;;) {
3260		left = pci_vendordata_size - (cp - pci_vendordata);
3261		if (left <= 0) {
3262			*ptr = cp;
3263			return(1);
3264		}
3265
3266		/* vendor entry? */
3267		if (*cp != '\t' &&
3268		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3269			break;
3270		/* device entry? */
3271		if (*cp == '\t' &&
3272		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3273			break;
3274
3275		/* skip to next line */
3276		while (*cp != '\n' && left > 0) {
3277			cp++;
3278			left--;
3279		}
3280		if (*cp == '\n') {
3281			cp++;
3282			left--;
3283		}
3284	}
3285	/* skip to next line */
3286	while (*cp != '\n' && left > 0) {
3287		cp++;
3288		left--;
3289	}
3290	if (*cp == '\n' && left > 0)
3291		cp++;
3292	*ptr = cp;
3293	return(0);
3294}
3295
3296static char *
3297pci_describe_device(device_t dev)
3298{
3299	int	vendor, device;
3300	char	*desc, *vp, *dp, *line;
3301
3302	desc = vp = dp = NULL;
3303
3304	/*
3305	 * If we have no vendor data, we can't do anything.
3306	 */
3307	if (pci_vendordata == NULL)
3308		goto out;
3309
3310	/*
3311	 * Scan the vendor data looking for this device
3312	 */
3313	line = pci_vendordata;
3314	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3315		goto out;
3316	for (;;) {
3317		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3318			goto out;
3319		if (vendor == pci_get_vendor(dev))
3320			break;
3321	}
3322	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3323		goto out;
3324	for (;;) {
3325		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3326			*dp = 0;
3327			break;
3328		}
3329		if (vendor != -1) {
3330			*dp = 0;
3331			break;
3332		}
3333		if (device == pci_get_device(dev))
3334			break;
3335	}
3336	if (dp[0] == '\0')
3337		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3338	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3339	    NULL)
3340		sprintf(desc, "%s, %s", vp, dp);
3341 out:
3342	if (vp != NULL)
3343		free(vp, M_DEVBUF);
3344	if (dp != NULL)
3345		free(dp, M_DEVBUF);
3346	return(desc);
3347}
3348
3349int
3350pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3351{
3352	struct pci_devinfo *dinfo;
3353	pcicfgregs *cfg;
3354
3355	dinfo = device_get_ivars(child);
3356	cfg = &dinfo->cfg;
3357
3358	switch (which) {
3359	case PCI_IVAR_ETHADDR:
3360		/*
3361		 * The generic accessor doesn't deal with failure, so
3362		 * we set the return value, then return an error.
3363		 */
3364		*((uint8_t **) result) = NULL;
3365		return (EINVAL);
3366	case PCI_IVAR_SUBVENDOR:
3367		*result = cfg->subvendor;
3368		break;
3369	case PCI_IVAR_SUBDEVICE:
3370		*result = cfg->subdevice;
3371		break;
3372	case PCI_IVAR_VENDOR:
3373		*result = cfg->vendor;
3374		break;
3375	case PCI_IVAR_DEVICE:
3376		*result = cfg->device;
3377		break;
3378	case PCI_IVAR_DEVID:
3379		*result = (cfg->device << 16) | cfg->vendor;
3380		break;
3381	case PCI_IVAR_CLASS:
3382		*result = cfg->baseclass;
3383		break;
3384	case PCI_IVAR_SUBCLASS:
3385		*result = cfg->subclass;
3386		break;
3387	case PCI_IVAR_PROGIF:
3388		*result = cfg->progif;
3389		break;
3390	case PCI_IVAR_REVID:
3391		*result = cfg->revid;
3392		break;
3393	case PCI_IVAR_INTPIN:
3394		*result = cfg->intpin;
3395		break;
3396	case PCI_IVAR_IRQ:
3397		*result = cfg->intline;
3398		break;
3399	case PCI_IVAR_DOMAIN:
3400		*result = cfg->domain;
3401		break;
3402	case PCI_IVAR_BUS:
3403		*result = cfg->bus;
3404		break;
3405	case PCI_IVAR_SLOT:
3406		*result = cfg->slot;
3407		break;
3408	case PCI_IVAR_FUNCTION:
3409		*result = cfg->func;
3410		break;
3411	case PCI_IVAR_CMDREG:
3412		*result = cfg->cmdreg;
3413		break;
3414	case PCI_IVAR_CACHELNSZ:
3415		*result = cfg->cachelnsz;
3416		break;
3417	case PCI_IVAR_MINGNT:
3418		*result = cfg->mingnt;
3419		break;
3420	case PCI_IVAR_MAXLAT:
3421		*result = cfg->maxlat;
3422		break;
3423	case PCI_IVAR_LATTIMER:
3424		*result = cfg->lattimer;
3425		break;
3426	default:
3427		return (ENOENT);
3428	}
3429	return (0);
3430}
3431
3432int
3433pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3434{
3435	struct pci_devinfo *dinfo;
3436
3437	dinfo = device_get_ivars(child);
3438
3439	switch (which) {
3440	case PCI_IVAR_INTPIN:
3441		dinfo->cfg.intpin = value;
3442		return (0);
3443	case PCI_IVAR_ETHADDR:
3444	case PCI_IVAR_SUBVENDOR:
3445	case PCI_IVAR_SUBDEVICE:
3446	case PCI_IVAR_VENDOR:
3447	case PCI_IVAR_DEVICE:
3448	case PCI_IVAR_DEVID:
3449	case PCI_IVAR_CLASS:
3450	case PCI_IVAR_SUBCLASS:
3451	case PCI_IVAR_PROGIF:
3452	case PCI_IVAR_REVID:
3453	case PCI_IVAR_IRQ:
3454	case PCI_IVAR_DOMAIN:
3455	case PCI_IVAR_BUS:
3456	case PCI_IVAR_SLOT:
3457	case PCI_IVAR_FUNCTION:
3458		return (EINVAL);	/* disallow for now */
3459
3460	default:
3461		return (ENOENT);
3462	}
3463}
3464
3465
3466#include "opt_ddb.h"
3467#ifdef DDB
3468#include <ddb/ddb.h>
3469#include <sys/cons.h>
3470
3471/*
3472 * List resources based on pci map registers, used for within ddb
3473 */
3474
3475DB_SHOW_COMMAND(pciregs, db_pci_dump)
3476{
3477	struct pci_devinfo *dinfo;
3478	struct devlist *devlist_head;
3479	struct pci_conf *p;
3480	const char *name;
3481	int i, error, none_count;
3482
3483	none_count = 0;
3484	/* get the head of the device queue */
3485	devlist_head = &pci_devq;
3486
3487	/*
3488	 * Go through the list of devices and print out devices
3489	 */
3490	for (error = 0, i = 0,
3491	     dinfo = STAILQ_FIRST(devlist_head);
3492	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3493	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3494
3495		/* Populate pd_name and pd_unit */
3496		name = NULL;
3497		if (dinfo->cfg.dev)
3498			name = device_get_name(dinfo->cfg.dev);
3499
3500		p = &dinfo->conf;
3501		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3502			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3503			(name && *name) ? name : "none",
3504			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3505			none_count++,
3506			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3507			p->pc_sel.pc_func, (p->pc_class << 16) |
3508			(p->pc_subclass << 8) | p->pc_progif,
3509			(p->pc_subdevice << 16) | p->pc_subvendor,
3510			(p->pc_device << 16) | p->pc_vendor,
3511			p->pc_revid, p->pc_hdr);
3512	}
3513}
3514#endif /* DDB */
3515
3516static struct resource *
3517pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3518    u_long start, u_long end, u_long count, u_int flags)
3519{
3520	struct pci_devinfo *dinfo = device_get_ivars(child);
3521	struct resource_list *rl = &dinfo->resources;
3522	struct resource_list_entry *rle;
3523	struct resource *res;
3524	pci_addr_t map, testval;
3525	int mapsize;
3526
3527	/*
3528	 * Weed out the bogons, and figure out how large the BAR/map
3529	 * is.  Bars that read back 0 here are bogus and unimplemented.
3530	 * Note: atapci in legacy mode are special and handled elsewhere
3531	 * in the code.  If you have a atapci device in legacy mode and
3532	 * it fails here, that other code is broken.
3533	 */
3534	res = NULL;
3535	pci_read_bar(child, *rid, &map, &testval);
3536
3537	/* Ignore a BAR with a base of 0. */
3538	if (pci_mapbase(testval) == 0)
3539		goto out;
3540
3541	if (PCI_BAR_MEM(testval)) {
3542		if (type != SYS_RES_MEMORY) {
3543			if (bootverbose)
3544				device_printf(dev,
3545				    "child %s requested type %d for rid %#x,"
3546				    " but the BAR says it is an memio\n",
3547				    device_get_nameunit(child), type, *rid);
3548			goto out;
3549		}
3550	} else {
3551		if (type != SYS_RES_IOPORT) {
3552			if (bootverbose)
3553				device_printf(dev,
3554				    "child %s requested type %d for rid %#x,"
3555				    " but the BAR says it is an ioport\n",
3556				    device_get_nameunit(child), type, *rid);
3557			goto out;
3558		}
3559	}
3560
3561	/*
3562	 * For real BARs, we need to override the size that
3563	 * the driver requests, because that's what the BAR
3564	 * actually uses and we would otherwise have a
3565	 * situation where we might allocate the excess to
3566	 * another driver, which won't work.
3567	 */
3568	mapsize = pci_mapsize(testval);
3569	count = 1UL << mapsize;
3570	if (RF_ALIGNMENT(flags) < mapsize)
3571		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3572	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3573		flags |= RF_PREFETCHABLE;
3574
3575	/*
3576	 * Allocate enough resource, and then write back the
3577	 * appropriate bar for that resource.
3578	 */
3579	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3580	    start, end, count, flags & ~RF_ACTIVE);
3581	if (res == NULL) {
3582		device_printf(child,
3583		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3584		    count, *rid, type, start, end);
3585		goto out;
3586	}
3587	rman_set_device(res, dev);
3588	resource_list_add(rl, type, *rid, start, end, count);
3589	rle = resource_list_find(rl, type, *rid);
3590	if (rle == NULL)
3591		panic("pci_alloc_map: unexpectedly can't find resource.");
3592	rle->res = res;
3593	rle->start = rman_get_start(res);
3594	rle->end = rman_get_end(res);
3595	rle->count = count;
3596	if (bootverbose)
3597		device_printf(child,
3598		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3599		    count, *rid, type, rman_get_start(res));
3600	map = rman_get_start(res);
3601	pci_write_bar(child, *rid, map);
3602out:;
3603	return (res);
3604}
3605
3606
3607struct resource *
3608pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3609		   u_long start, u_long end, u_long count, u_int flags)
3610{
3611	struct pci_devinfo *dinfo = device_get_ivars(child);
3612	struct resource_list *rl = &dinfo->resources;
3613	struct resource_list_entry *rle;
3614	struct resource *res;
3615	pcicfgregs *cfg = &dinfo->cfg;
3616
3617	if (device_get_parent(child) != dev)
3618		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3619		    type, rid, start, end, count, flags));
3620
3621	/*
3622	 * Perform lazy resource allocation
3623	 */
3624	switch (type) {
3625	case SYS_RES_IRQ:
3626		/*
3627		 * Can't alloc legacy interrupt once MSI messages have
3628		 * been allocated.
3629		 */
3630		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3631		    cfg->msix.msix_alloc > 0))
3632			return (NULL);
3633
3634		/*
3635		 * If the child device doesn't have an interrupt
3636		 * routed and is deserving of an interrupt, try to
3637		 * assign it one.
3638		 */
3639		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3640		    (cfg->intpin != 0))
3641			pci_assign_interrupt(dev, child, 0);
3642		break;
3643	case SYS_RES_IOPORT:
3644	case SYS_RES_MEMORY:
3645		/* Allocate resources for this BAR if needed. */
3646		rle = resource_list_find(rl, type, *rid);
3647		if (rle == NULL) {
3648			res = pci_alloc_map(dev, child, type, rid, start, end,
3649			    count, flags);
3650			if (res == NULL)
3651				return (NULL);
3652			rle = resource_list_find(rl, type, *rid);
3653		}
3654
3655		/*
3656		 * If the resource belongs to the bus, then give it to
3657		 * the child.  We need to activate it if requested
3658		 * since the bus always allocates inactive resources.
3659		 */
3660		if (rle != NULL && rle->res != NULL &&
3661		    rman_get_device(rle->res) == dev) {
3662			if (bootverbose)
3663				device_printf(child,
3664			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3665				    rman_get_size(rle->res), *rid, type,
3666				    rman_get_start(rle->res));
3667			rman_set_device(rle->res, child);
3668			if ((flags & RF_ACTIVE) &&
3669			    bus_activate_resource(child, type, *rid,
3670			    rle->res) != 0)
3671				return (NULL);
3672			return (rle->res);
3673		}
3674	}
3675	return (resource_list_alloc(rl, dev, child, type, rid,
3676	    start, end, count, flags));
3677}
3678
3679int
3680pci_release_resource(device_t dev, device_t child, int type, int rid,
3681    struct resource *r)
3682{
3683	int error;
3684
3685	if (device_get_parent(child) != dev)
3686		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
3687		    type, rid, r));
3688
3689	/*
3690	 * For BARs we don't actually want to release the resource.
3691	 * Instead, we deactivate the resource if needed and then give
3692	 * ownership of the BAR back to the bus.
3693	 */
3694	switch (type) {
3695	case SYS_RES_IOPORT:
3696	case SYS_RES_MEMORY:
3697		if (rman_get_device(r) != child)
3698			return (EINVAL);
3699		if (rman_get_flags(r) & RF_ACTIVE) {
3700			error = bus_deactivate_resource(child, type, rid, r);
3701			if (error)
3702				return (error);
3703		}
3704		rman_set_device(r, dev);
3705		return (0);
3706	}
3707	return (bus_generic_rl_release_resource(dev, child, type, rid, r));
3708}
3709
3710int
3711pci_activate_resource(device_t dev, device_t child, int type, int rid,
3712    struct resource *r)
3713{
3714	int error;
3715
3716	error = bus_generic_activate_resource(dev, child, type, rid, r);
3717	if (error)
3718		return (error);
3719
3720	/* Enable decoding in the command register when activating BARs. */
3721	if (device_get_parent(child) == dev) {
3722		switch (type) {
3723		case SYS_RES_IOPORT:
3724		case SYS_RES_MEMORY:
3725			error = PCI_ENABLE_IO(dev, child, type);
3726			break;
3727		}
3728	}
3729	return (error);
3730}
3731
3732void
3733pci_delete_resource(device_t dev, device_t child, int type, int rid)
3734{
3735	struct pci_devinfo *dinfo;
3736	struct resource_list *rl;
3737	struct resource_list_entry *rle;
3738
3739	if (device_get_parent(child) != dev)
3740		return;
3741
3742	dinfo = device_get_ivars(child);
3743	rl = &dinfo->resources;
3744	rle = resource_list_find(rl, type, rid);
3745	if (rle == NULL)
3746		return;
3747
3748	if (rle->res) {
3749		if (rman_get_device(rle->res) != dev ||
3750		    rman_get_flags(rle->res) & RF_ACTIVE) {
3751			device_printf(dev, "delete_resource: "
3752			    "Resource still owned by child, oops. "
3753			    "(type=%d, rid=%d, addr=%lx)\n",
3754			    rle->type, rle->rid,
3755			    rman_get_start(rle->res));
3756			return;
3757		}
3758
3759		/*
3760		 * If this is a BAR, clear the BAR so it stops
3761		 * decoding before releasing the resource.
3762		 */
3763		switch (type) {
3764		case SYS_RES_IOPORT:
3765		case SYS_RES_MEMORY:
3766			pci_write_bar(child, rid, 0);
3767			break;
3768		}
3769		bus_release_resource(dev, type, rid, rle->res);
3770	}
3771	resource_list_delete(rl, type, rid);
3772}
3773
3774struct resource_list *
3775pci_get_resource_list (device_t dev, device_t child)
3776{
3777	struct pci_devinfo *dinfo = device_get_ivars(child);
3778
3779	return (&dinfo->resources);
3780}
3781
3782uint32_t
3783pci_read_config_method(device_t dev, device_t child, int reg, int width)
3784{
3785	struct pci_devinfo *dinfo = device_get_ivars(child);
3786	pcicfgregs *cfg = &dinfo->cfg;
3787
3788	return (PCIB_READ_CONFIG(device_get_parent(dev),
3789	    cfg->bus, cfg->slot, cfg->func, reg, width));
3790}
3791
3792void
3793pci_write_config_method(device_t dev, device_t child, int reg,
3794    uint32_t val, int width)
3795{
3796	struct pci_devinfo *dinfo = device_get_ivars(child);
3797	pcicfgregs *cfg = &dinfo->cfg;
3798
3799	PCIB_WRITE_CONFIG(device_get_parent(dev),
3800	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3801}
3802
3803int
3804pci_child_location_str_method(device_t dev, device_t child, char *buf,
3805    size_t buflen)
3806{
3807
3808	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3809	    pci_get_function(child));
3810	return (0);
3811}
3812
3813int
3814pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3815    size_t buflen)
3816{
3817	struct pci_devinfo *dinfo;
3818	pcicfgregs *cfg;
3819
3820	dinfo = device_get_ivars(child);
3821	cfg = &dinfo->cfg;
3822	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3823	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3824	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3825	    cfg->progif);
3826	return (0);
3827}
3828
3829int
3830pci_assign_interrupt_method(device_t dev, device_t child)
3831{
3832	struct pci_devinfo *dinfo = device_get_ivars(child);
3833	pcicfgregs *cfg = &dinfo->cfg;
3834
3835	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3836	    cfg->intpin));
3837}
3838
3839static int
3840pci_modevent(module_t mod, int what, void *arg)
3841{
3842	static struct cdev *pci_cdev;
3843
3844	switch (what) {
3845	case MOD_LOAD:
3846		STAILQ_INIT(&pci_devq);
3847		pci_generation = 0;
3848		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3849		    "pci");
3850		pci_load_vendor_data();
3851		break;
3852
3853	case MOD_UNLOAD:
3854		destroy_dev(pci_cdev);
3855		break;
3856	}
3857
3858	return (0);
3859}
3860
3861void
3862pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3863{
3864	int i;
3865
3866	/*
3867	 * Only do header type 0 devices.  Type 1 devices are bridges,
3868	 * which we know need special treatment.  Type 2 devices are
3869	 * cardbus bridges which also require special treatment.
3870	 * Other types are unknown, and we err on the side of safety
3871	 * by ignoring them.
3872	 */
3873	if (dinfo->cfg.hdrtype != 0)
3874		return;
3875
3876	/*
3877	 * Restore the device to full power mode.  We must do this
3878	 * before we restore the registers because moving from D3 to
3879	 * D0 will cause the chip's BARs and some other registers to
3880	 * be reset to some unknown power on reset values.  Cut down
3881	 * the noise on boot by doing nothing if we are already in
3882	 * state D0.
3883	 */
3884	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3885		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3886	}
3887	for (i = 0; i < dinfo->cfg.nummaps; i++)
3888		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3889	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3890	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3891	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3892	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3893	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3894	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3895	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3896	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3897	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3898	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3899
3900	/* Restore MSI and MSI-X configurations if they are present. */
3901	if (dinfo->cfg.msi.msi_location != 0)
3902		pci_resume_msi(dev);
3903	if (dinfo->cfg.msix.msix_location != 0)
3904		pci_resume_msix(dev);
3905}
3906
3907void
3908pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3909{
3910	int i;
3911	uint32_t cls;
3912	int ps;
3913
3914	/*
3915	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3916	 * we know need special treatment.  Type 2 devices are cardbus bridges
3917	 * which also require special treatment.  Other types are unknown, and
3918	 * we err on the side of safety by ignoring them.  Powering down
3919	 * bridges should not be undertaken lightly.
3920	 */
3921	if (dinfo->cfg.hdrtype != 0)
3922		return;
3923	for (i = 0; i < dinfo->cfg.nummaps; i++)
3924		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3925	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3926
3927	/*
3928	 * Some drivers apparently write to these registers w/o updating our
3929	 * cached copy.  No harm happens if we update the copy, so do so here
3930	 * so we can restore them.  The COMMAND register is modified by the
3931	 * bus w/o updating the cache.  This should represent the normally
3932	 * writable portion of the 'defined' part of type 0 headers.  In
3933	 * theory we also need to save/restore the PCI capability structures
3934	 * we know about, but apart from power we don't know any that are
3935	 * writable.
3936	 */
3937	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3938	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3939	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3940	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3941	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3942	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3943	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3944	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3945	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3946	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3947	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3948	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3949	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3950	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3951	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3952
3953	/*
3954	 * don't set the state for display devices, base peripherals and
3955	 * memory devices since bad things happen when they are powered down.
3956	 * We should (a) have drivers that can easily detach and (b) use
3957	 * generic drivers for these devices so that some device actually
3958	 * attaches.  We need to make sure that when we implement (a) we don't
3959	 * power the device down on a reattach.
3960	 */
3961	cls = pci_get_class(dev);
3962	if (!setstate)
3963		return;
3964	switch (pci_do_power_nodriver)
3965	{
3966		case 0:		/* NO powerdown at all */
3967			return;
3968		case 1:		/* Conservative about what to power down */
3969			if (cls == PCIC_STORAGE)
3970				return;
3971			/*FALLTHROUGH*/
3972		case 2:		/* Agressive about what to power down */
3973			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3974			    cls == PCIC_BASEPERIPH)
3975				return;
3976			/*FALLTHROUGH*/
3977		case 3:		/* Power down everything */
3978			break;
3979	}
3980	/*
3981	 * PCI spec says we can only go into D3 state from D0 state.
3982	 * Transition from D[12] into D0 before going to D3 state.
3983	 */
3984	ps = pci_get_powerstate(dev);
3985	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3986		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3987	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3988		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3989}
3990