pci.c revision 168159
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 168157 2007-03-31 20:41:00Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54
55#if defined(__i386__) || defined(__amd64__)
56#include <machine/intr_machdep.h>
57#endif
58
59#include <sys/pciio.h>
60#include <dev/pci/pcireg.h>
61#include <dev/pci/pcivar.h>
62#include <dev/pci/pci_private.h>
63
64#include "pcib_if.h"
65#include "pci_if.h"
66
67#ifdef __HAVE_ACPI
68#include <contrib/dev/acpica/acpi.h>
69#include "acpi_if.h"
70#else
71#define	ACPI_PWR_FOR_SLEEP(x, y, z)
72#endif
73
74static uint32_t		pci_mapbase(unsigned mapreg);
75static int		pci_maptype(unsigned mapreg);
76static int		pci_mapsize(unsigned testval);
77static int		pci_maprange(unsigned mapreg);
78static void		pci_fixancient(pcicfgregs *cfg);
79
80static int		pci_porten(device_t pcib, int b, int s, int f);
81static int		pci_memen(device_t pcib, int b, int s, int f);
82static void		pci_assign_interrupt(device_t bus, device_t dev,
83			    int force_route);
84static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85			    int b, int s, int f, int reg,
86			    struct resource_list *rl, int force, int prefetch);
87static int		pci_probe(device_t dev);
88static int		pci_attach(device_t dev);
89static void		pci_load_vendor_data(void);
90static int		pci_describe_parse_line(char **ptr, int *vendor,
91			    int *device, char **desc);
92static char		*pci_describe_device(device_t dev);
93static int		pci_modevent(module_t mod, int what, void *arg);
94static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95			    pcicfgregs *cfg);
96static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97static uint32_t		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98			    int reg);
99#if 0
100static void		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101			    int reg, uint32_t data);
102#endif
103static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104static int		pci_msi_blacklisted(void);
105
106static device_method_t pci_methods[] = {
107	/* Device interface */
108	DEVMETHOD(device_probe,		pci_probe),
109	DEVMETHOD(device_attach,	pci_attach),
110	DEVMETHOD(device_detach,	bus_generic_detach),
111	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
112	DEVMETHOD(device_suspend,	pci_suspend),
113	DEVMETHOD(device_resume,	pci_resume),
114
115	/* Bus interface */
116	DEVMETHOD(bus_print_child,	pci_print_child),
117	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
118	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
119	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
120	DEVMETHOD(bus_driver_added,	pci_driver_added),
121	DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr),
122	DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
123
124	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
125	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
126	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
127	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
128	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
129	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
130	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
131	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
132	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
133	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
134
135	/* PCI interface */
136	DEVMETHOD(pci_read_config,	pci_read_config_method),
137	DEVMETHOD(pci_write_config,	pci_write_config_method),
138	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
139	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
140	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
141	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
142	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
143	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
144	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
145	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
146	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
147	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
148	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
149	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
150	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
151	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
152	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
153	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
154
155	{ 0, 0 }
156};
157
158DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
159
160static devclass_t pci_devclass;
161DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
162MODULE_VERSION(pci, 1);
163
164static char	*pci_vendordata;
165static size_t	pci_vendordata_size;
166
167
168struct pci_quirk {
169	uint32_t devid;	/* Vendor/device of the card */
170	int	type;
171#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
172#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
173	int	arg1;
174	int	arg2;
175};
176
177struct pci_quirk pci_quirks[] = {
178	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
179	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
180	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
181	/* As does the Serverworks OSB4 (the SMBus mapping register) */
182	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
183
184	/*
185	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
186	 * or the CMIC-SL (AKA ServerWorks GC_LE).
187	 */
188	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
189	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
190
191	/*
192	 * MSI doesn't work on earlier Intel chipsets including
193	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
194	 */
195	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
196	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
197	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
200	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
201	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
202
203	/*
204	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
205	 * bridge.
206	 */
207	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208
209	{ 0 }
210};
211
212/* map register information */
213#define	PCI_MAPMEM	0x01	/* memory map */
214#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
215#define	PCI_MAPPORT	0x04	/* port map */
216
217struct devlist pci_devq;
218uint32_t pci_generation;
219uint32_t pci_numdevs = 0;
220static int pcie_chipset, pcix_chipset;
221
222/* sysctl vars */
223SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
224
225static int pci_enable_io_modes = 1;
226TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
227SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
228    &pci_enable_io_modes, 1,
229    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
230enable these bits correctly.  We'd like to do this all the time, but there\n\
231are some peripherals that this causes problems with.");
232
233static int pci_do_power_nodriver = 0;
234TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
235SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
236    &pci_do_power_nodriver, 0,
237  "Place a function into D3 state when no driver attaches to it.  0 means\n\
238disable.  1 means conservatively place devices into D3 state.  2 means\n\
239agressively place devices into D3 state.  3 means put absolutely everything\n\
240in D3 state.");
241
242static int pci_do_power_resume = 1;
243TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
244SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
245    &pci_do_power_resume, 1,
246  "Transition from D3 -> D0 on resume.");
247
248static int pci_do_vpd = 1;
249TUNABLE_INT("hw.pci.enable_vpd", &pci_do_vpd);
250SYSCTL_INT(_hw_pci, OID_AUTO, enable_vpd, CTLFLAG_RW, &pci_do_vpd, 1,
251    "Enable support for VPD.");
252
253static int pci_do_msi = 1;
254TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
255SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
256    "Enable support for MSI interrupts");
257
258static int pci_do_msix = 1;
259TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
260SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
261    "Enable support for MSI-X interrupts");
262
263static int pci_honor_msi_blacklist = 1;
264TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
265SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
266    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
267
268/* Find a device_t by bus/slot/function */
269
270device_t
271pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
272{
273	struct pci_devinfo *dinfo;
274
275	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
276		if ((dinfo->cfg.bus == bus) &&
277		    (dinfo->cfg.slot == slot) &&
278		    (dinfo->cfg.func == func)) {
279			return (dinfo->cfg.dev);
280		}
281	}
282
283	return (NULL);
284}
285
286/* Find a device_t by vendor/device ID */
287
288device_t
289pci_find_device(uint16_t vendor, uint16_t device)
290{
291	struct pci_devinfo *dinfo;
292
293	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
294		if ((dinfo->cfg.vendor == vendor) &&
295		    (dinfo->cfg.device == device)) {
296			return (dinfo->cfg.dev);
297		}
298	}
299
300	return (NULL);
301}
302
303/* return base address of memory or port map */
304
305static uint32_t
306pci_mapbase(uint32_t mapreg)
307{
308	int mask = 0x03;
309	if ((mapreg & 0x01) == 0)
310		mask = 0x0f;
311	return (mapreg & ~mask);
312}
313
314/* return map type of memory or port map */
315
316static int
317pci_maptype(unsigned mapreg)
318{
319	static uint8_t maptype[0x10] = {
320		PCI_MAPMEM,		PCI_MAPPORT,
321		PCI_MAPMEM,		0,
322		PCI_MAPMEM,		PCI_MAPPORT,
323		0,			0,
324		PCI_MAPMEM|PCI_MAPMEMP,	PCI_MAPPORT,
325		PCI_MAPMEM|PCI_MAPMEMP, 0,
326		PCI_MAPMEM|PCI_MAPMEMP,	PCI_MAPPORT,
327		0,			0,
328	};
329
330	return (maptype[mapreg & 0x0f]);
331}
332
333/* return log2 of map size decoded for memory or port map */
334
335static int
336pci_mapsize(uint32_t testval)
337{
338	int ln2size;
339
340	testval = pci_mapbase(testval);
341	ln2size = 0;
342	if (testval != 0) {
343		while ((testval & 1) == 0)
344		{
345			ln2size++;
346			testval >>= 1;
347		}
348	}
349	return (ln2size);
350}
351
352/* return log2 of address range supported by map register */
353
354static int
355pci_maprange(unsigned mapreg)
356{
357	int ln2range = 0;
358	switch (mapreg & 0x07) {
359	case 0x00:
360	case 0x01:
361	case 0x05:
362		ln2range = 32;
363		break;
364	case 0x02:
365		ln2range = 20;
366		break;
367	case 0x04:
368		ln2range = 64;
369		break;
370	}
371	return (ln2range);
372}
373
374/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
375
376static void
377pci_fixancient(pcicfgregs *cfg)
378{
379	if (cfg->hdrtype != 0)
380		return;
381
382	/* PCI to PCI bridges use header type 1 */
383	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
384		cfg->hdrtype = 1;
385}
386
387/* extract header type specific config data */
388
389static void
390pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
391{
392#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
393	switch (cfg->hdrtype) {
394	case 0:
395		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
396		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
397		cfg->nummaps	    = PCI_MAXMAPS_0;
398		break;
399	case 1:
400		cfg->nummaps	    = PCI_MAXMAPS_1;
401		break;
402	case 2:
403		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
404		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
405		cfg->nummaps	    = PCI_MAXMAPS_2;
406		break;
407	}
408#undef REG
409}
410
411/* read configuration header into pcicfgregs structure */
412struct pci_devinfo *
413pci_read_device(device_t pcib, int b, int s, int f, size_t size)
414{
415#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
416	pcicfgregs *cfg = NULL;
417	struct pci_devinfo *devlist_entry;
418	struct devlist *devlist_head;
419
420	devlist_head = &pci_devq;
421
422	devlist_entry = NULL;
423
424	if (REG(PCIR_DEVVENDOR, 4) != -1) {
425		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
426		if (devlist_entry == NULL)
427			return (NULL);
428
429		cfg = &devlist_entry->cfg;
430
431		cfg->bus		= b;
432		cfg->slot		= s;
433		cfg->func		= f;
434		cfg->vendor		= REG(PCIR_VENDOR, 2);
435		cfg->device		= REG(PCIR_DEVICE, 2);
436		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
437		cfg->statreg		= REG(PCIR_STATUS, 2);
438		cfg->baseclass		= REG(PCIR_CLASS, 1);
439		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
440		cfg->progif		= REG(PCIR_PROGIF, 1);
441		cfg->revid		= REG(PCIR_REVID, 1);
442		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
443		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
444		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
445		cfg->intpin		= REG(PCIR_INTPIN, 1);
446		cfg->intline		= REG(PCIR_INTLINE, 1);
447
448		cfg->mingnt		= REG(PCIR_MINGNT, 1);
449		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
450
451		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
452		cfg->hdrtype		&= ~PCIM_MFDEV;
453
454		pci_fixancient(cfg);
455		pci_hdrtypedata(pcib, b, s, f, cfg);
456
457		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
458			pci_read_extcap(pcib, cfg);
459
460		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
461
462		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
463		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
464		devlist_entry->conf.pc_sel.pc_func = cfg->func;
465		devlist_entry->conf.pc_hdr = cfg->hdrtype;
466
467		devlist_entry->conf.pc_subvendor = cfg->subvendor;
468		devlist_entry->conf.pc_subdevice = cfg->subdevice;
469		devlist_entry->conf.pc_vendor = cfg->vendor;
470		devlist_entry->conf.pc_device = cfg->device;
471
472		devlist_entry->conf.pc_class = cfg->baseclass;
473		devlist_entry->conf.pc_subclass = cfg->subclass;
474		devlist_entry->conf.pc_progif = cfg->progif;
475		devlist_entry->conf.pc_revid = cfg->revid;
476
477		pci_numdevs++;
478		pci_generation++;
479	}
480	return (devlist_entry);
481#undef REG
482}
483
484static void
485pci_read_extcap(device_t pcib, pcicfgregs *cfg)
486{
487#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
488#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
489#if defined(__i386__) || defined(__amd64__)
490	uint64_t addr;
491#endif
492	uint32_t val;
493	int	ptr, nextptr, ptrptr;
494
495	switch (cfg->hdrtype & PCIM_HDRTYPE) {
496	case 0:
497	case 1:
498		ptrptr = PCIR_CAP_PTR;
499		break;
500	case 2:
501		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
502		break;
503	default:
504		return;		/* no extended capabilities support */
505	}
506	nextptr = REG(ptrptr, 1);	/* sanity check? */
507
508	/*
509	 * Read capability entries.
510	 */
511	while (nextptr != 0) {
512		/* Sanity check */
513		if (nextptr > 255) {
514			printf("illegal PCI extended capability offset %d\n",
515			    nextptr);
516			return;
517		}
518		/* Find the next entry */
519		ptr = nextptr;
520		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
521
522		/* Process this entry */
523		switch (REG(ptr + PCICAP_ID, 1)) {
524		case PCIY_PMG:		/* PCI power management */
525			if (cfg->pp.pp_cap == 0) {
526				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
527				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
528				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
529				if ((nextptr - ptr) > PCIR_POWER_DATA)
530					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
531			}
532			break;
533#if defined(__i386__) || defined(__amd64__)
534		case PCIY_HT:		/* HyperTransport */
535			/* Determine HT-specific capability type. */
536			val = REG(ptr + PCIR_HT_COMMAND, 2);
537			switch (val & PCIM_HTCMD_CAP_MASK) {
538			case PCIM_HTCAP_MSI_MAPPING:
539				/* Sanity check the mapping window. */
540				addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
541				addr <<= 32;
542				addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
543				if (addr != MSI_INTEL_ADDR_BASE)
544					device_printf(pcib,
545		    "HT Bridge at %d:%d:%d has non-default MSI window 0x%llx\n",
546					    cfg->bus, cfg->slot, cfg->func,
547					    (long long)addr);
548
549				/* Enable MSI -> HT mapping. */
550				val |= PCIM_HTCMD_MSI_ENABLE;
551				WREG(ptr + PCIR_HT_COMMAND, val, 2);
552				break;
553			}
554			break;
555#endif
556		case PCIY_MSI:		/* PCI MSI */
557			cfg->msi.msi_location = ptr;
558			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
559			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
560						     PCIM_MSICTRL_MMC_MASK)>>1);
561			break;
562		case PCIY_MSIX:		/* PCI MSI-X */
563			cfg->msix.msix_location = ptr;
564			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
565			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
566			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
567			val = REG(ptr + PCIR_MSIX_TABLE, 4);
568			cfg->msix.msix_table_bar = PCIR_BAR(val &
569			    PCIM_MSIX_BIR_MASK);
570			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
571			val = REG(ptr + PCIR_MSIX_PBA, 4);
572			cfg->msix.msix_pba_bar = PCIR_BAR(val &
573			    PCIM_MSIX_BIR_MASK);
574			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
575			break;
576		case PCIY_VPD:		/* PCI Vital Product Data */
577			cfg->vpd.vpd_reg = ptr;
578			break;
579		case PCIY_SUBVENDOR:
580			/* Should always be true. */
581			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
582				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
583				cfg->subvendor = val & 0xffff;
584				cfg->subdevice = val >> 16;
585			}
586			break;
587		case PCIY_PCIX:		/* PCI-X */
588			/*
589			 * Assume we have a PCI-X chipset if we have
590			 * at least one PCI-PCI bridge with a PCI-X
591			 * capability.  Note that some systems with
592			 * PCI-express or HT chipsets might match on
593			 * this check as well.
594			 */
595			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
596				pcix_chipset = 1;
597			break;
598		case PCIY_EXPRESS:	/* PCI-express */
599			/*
600			 * Assume we have a PCI-express chipset if we have
601			 * at least one PCI-express root port.
602			 */
603			val = REG(ptr + PCIR_EXPRESS_FLAGS, 2);
604			if ((val & PCIM_EXP_FLAGS_TYPE) ==
605			    PCIM_EXP_TYPE_ROOT_PORT)
606				pcie_chipset = 1;
607			break;
608		default:
609			break;
610		}
611	}
612/* REG and WREG use carry through to next functions */
613}
614
615/*
616 * PCI Vital Product Data
617 */
618static uint32_t
619pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg)
620{
621
622	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
623
624	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
625	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000)
626		DELAY(1);	/* limit looping */
627
628	return (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
629}
630
631#if 0
632static void
633pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
634{
635	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
636
637	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
638	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
639	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000)
640		DELAY(1);	/* limit looping */
641
642	return;
643}
644#endif
645
646struct vpd_readstate {
647	device_t	pcib;
648	pcicfgregs	*cfg;
649	uint32_t	val;
650	int		bytesinval;
651	int		off;
652	uint8_t		cksum;
653};
654
655static uint8_t
656vpd_nextbyte(struct vpd_readstate *vrs)
657{
658	uint8_t byte;
659
660	if (vrs->bytesinval == 0) {
661		vrs->val = le32toh(pci_read_vpd_reg(vrs->pcib, vrs->cfg,
662		    vrs->off));
663		vrs->off += 4;
664		byte = vrs->val & 0xff;
665		vrs->bytesinval = 3;
666	} else {
667		vrs->val = vrs->val >> 8;
668		byte = vrs->val & 0xff;
669		vrs->bytesinval--;
670	}
671
672	vrs->cksum += byte;
673	return (byte);
674}
675
676static void
677pci_read_vpd(device_t pcib, pcicfgregs *cfg)
678{
679	struct vpd_readstate vrs;
680	int state;
681	int name;
682	int remain;
683	int end;
684	int i;
685	uint8_t byte;
686	int alloc, off;		/* alloc/off for RO/W arrays */
687	int cksumvalid;
688	int dflen;
689
690	if (!pci_do_vpd) {
691		cfg->vpd.vpd_cached = 1;
692		return;
693	}
694
695	/* init vpd reader */
696	vrs.bytesinval = 0;
697	vrs.off = 0;
698	vrs.pcib = pcib;
699	vrs.cfg = cfg;
700	vrs.cksum = 0;
701
702	state = 0;
703	name = remain = i = 0;	/* shut up stupid gcc */
704	alloc = off = 0;	/* shut up stupid gcc */
705	dflen = 0;		/* shut up stupid gcc */
706	end = 0;
707	cksumvalid = -1;
708	for (; !end;) {
709		byte = vpd_nextbyte(&vrs);
710#if 0
711		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
712		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
713		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
714#endif
715		switch (state) {
716		case 0:		/* item name */
717			if (byte & 0x80) {
718				remain = vpd_nextbyte(&vrs);
719				remain |= vpd_nextbyte(&vrs) << 8;
720				if (remain > (0x7f*4 - vrs.off)) {
721					end = 1;
722					printf(
723			    "pci%d:%d:%d: invalid vpd data, remain %#x\n",
724					    cfg->bus, cfg->slot, cfg->func,
725					    remain);
726				}
727				name = byte & 0x7f;
728			} else {
729				remain = byte & 0x7;
730				name = (byte >> 3) & 0xf;
731			}
732			switch (name) {
733			case 0x2:	/* String */
734				cfg->vpd.vpd_ident = malloc(remain + 1,
735				    M_DEVBUF, M_WAITOK);
736				i = 0;
737				state = 1;
738				break;
739			case 0xf:	/* End */
740				end = 1;
741				state = -1;
742				break;
743			case 0x10:	/* VPD-R */
744				alloc = 8;
745				off = 0;
746				cfg->vpd.vpd_ros = malloc(alloc *
747				    sizeof *cfg->vpd.vpd_ros, M_DEVBUF,
748				    M_WAITOK);
749				state = 2;
750				break;
751			case 0x11:	/* VPD-W */
752				alloc = 8;
753				off = 0;
754				cfg->vpd.vpd_w = malloc(alloc *
755				    sizeof *cfg->vpd.vpd_w, M_DEVBUF,
756				    M_WAITOK);
757				state = 5;
758				break;
759			default:	/* Invalid data, abort */
760				end = 1;
761				continue;
762			}
763			break;
764
765		case 1:	/* Identifier String */
766			cfg->vpd.vpd_ident[i++] = byte;
767			remain--;
768			if (remain == 0)  {
769				cfg->vpd.vpd_ident[i] = '\0';
770				state = 0;
771			}
772			break;
773
774		case 2:	/* VPD-R Keyword Header */
775			if (off == alloc) {
776				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
777				    (alloc *= 2) * sizeof *cfg->vpd.vpd_ros,
778				    M_DEVBUF, M_WAITOK);
779			}
780			cfg->vpd.vpd_ros[off].keyword[0] = byte;
781			cfg->vpd.vpd_ros[off].keyword[1] = vpd_nextbyte(&vrs);
782			dflen = vpd_nextbyte(&vrs);
783			if (dflen == 0 &&
784			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
785			    2) == 0) {
786				/*
787				 * if this happens, we can't trust the rest
788				 * of the VPD.
789				 */
790				printf("pci%d:%d:%d: bad keyword length: %d\n",
791				    cfg->bus, cfg->slot, cfg->func, dflen);
792				cksumvalid = 0;
793				end = 1;
794				break;
795			} else if (dflen == 0) {
796				cfg->vpd.vpd_ros[off].value = malloc(1 *
797				    sizeof *cfg->vpd.vpd_ros[off].value,
798				    M_DEVBUF, M_WAITOK);
799				cfg->vpd.vpd_ros[off].value[0] = '\x00';
800			} else
801				cfg->vpd.vpd_ros[off].value = malloc(
802				    (dflen + 1) *
803				    sizeof *cfg->vpd.vpd_ros[off].value,
804				    M_DEVBUF, M_WAITOK);
805			remain -= 3;
806			i = 0;
807			/* keep in sync w/ state 3's transistions */
808			if (dflen == 0 && remain == 0)
809				state = 0;
810			else if (dflen == 0)
811				state = 2;
812			else
813				state = 3;
814			break;
815
816		case 3:	/* VPD-R Keyword Value */
817			cfg->vpd.vpd_ros[off].value[i++] = byte;
818			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
819			    "RV", 2) == 0 && cksumvalid == -1) {
820				if (vrs.cksum == 0)
821					cksumvalid = 1;
822				else {
823					printf(
824				    "pci%d:%d:%d: bad VPD cksum, remain %hhu\n",
825					    cfg->bus, cfg->slot, cfg->func,
826					    vrs.cksum);
827					cksumvalid = 0;
828					end = 1;
829					break;
830				}
831			}
832			dflen--;
833			remain--;
834			/* keep in sync w/ state 2's transistions */
835			if (dflen == 0)
836				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
837			if (dflen == 0 && remain == 0) {
838				cfg->vpd.vpd_rocnt = off;
839				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
840				    off * sizeof *cfg->vpd.vpd_ros,
841				    M_DEVBUF, M_WAITOK);
842				state = 0;
843			} else if (dflen == 0)
844				state = 2;
845			break;
846
847		case 4:
848			remain--;
849			if (remain == 0)
850				state = 0;
851			break;
852
853		case 5:	/* VPD-W Keyword Header */
854			if (off == alloc) {
855				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
856				    (alloc *= 2) * sizeof *cfg->vpd.vpd_w,
857				    M_DEVBUF, M_WAITOK);
858			}
859			cfg->vpd.vpd_w[off].keyword[0] = byte;
860			cfg->vpd.vpd_w[off].keyword[1] = vpd_nextbyte(&vrs);
861			cfg->vpd.vpd_w[off].len = dflen = vpd_nextbyte(&vrs);
862			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
863			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
864			    sizeof *cfg->vpd.vpd_w[off].value,
865			    M_DEVBUF, M_WAITOK);
866			remain -= 3;
867			i = 0;
868			/* keep in sync w/ state 6's transistions */
869			if (dflen == 0 && remain == 0)
870				state = 0;
871			else if (dflen == 0)
872				state = 5;
873			else
874				state = 6;
875			break;
876
877		case 6:	/* VPD-W Keyword Value */
878			cfg->vpd.vpd_w[off].value[i++] = byte;
879			dflen--;
880			remain--;
881			/* keep in sync w/ state 5's transistions */
882			if (dflen == 0)
883				cfg->vpd.vpd_w[off++].value[i++] = '\0';
884			if (dflen == 0 && remain == 0) {
885				cfg->vpd.vpd_wcnt = off;
886				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
887				    off * sizeof *cfg->vpd.vpd_w,
888				    M_DEVBUF, M_WAITOK);
889				state = 0;
890			} else if (dflen == 0)
891				state = 5;
892			break;
893
894		default:
895			printf("pci%d:%d:%d: invalid state: %d\n",
896			    cfg->bus, cfg->slot, cfg->func, state);
897			end = 1;
898			break;
899		}
900	}
901
902	if (cksumvalid == 0) {
903		/* read-only data bad, clean up */
904		for (; off; off--)
905			free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
906
907		free(cfg->vpd.vpd_ros, M_DEVBUF);
908		cfg->vpd.vpd_ros = NULL;
909	}
910	cfg->vpd.vpd_cached = 1;
911#undef REG
912#undef WREG
913}
914
915int
916pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
917{
918	struct pci_devinfo *dinfo = device_get_ivars(child);
919	pcicfgregs *cfg = &dinfo->cfg;
920
921	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
922		pci_read_vpd(device_get_parent(dev), cfg);
923
924	*identptr = cfg->vpd.vpd_ident;
925
926	if (*identptr == NULL)
927		return (ENXIO);
928
929	return (0);
930}
931
932int
933pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
934	const char **vptr)
935{
936	struct pci_devinfo *dinfo = device_get_ivars(child);
937	pcicfgregs *cfg = &dinfo->cfg;
938	int i;
939
940	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
941		pci_read_vpd(device_get_parent(dev), cfg);
942
943	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
944		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
945		    sizeof cfg->vpd.vpd_ros[i].keyword) == 0) {
946			*vptr = cfg->vpd.vpd_ros[i].value;
947		}
948
949	if (i != cfg->vpd.vpd_rocnt)
950		return (0);
951
952	*vptr = NULL;
953	return (ENXIO);
954}
955
956/*
957 * Return the offset in configuration space of the requested extended
958 * capability entry or 0 if the specified capability was not found.
959 */
960int
961pci_find_extcap_method(device_t dev, device_t child, int capability,
962    int *capreg)
963{
964	struct pci_devinfo *dinfo = device_get_ivars(child);
965	pcicfgregs *cfg = &dinfo->cfg;
966	u_int32_t status;
967	u_int8_t ptr;
968
969	/*
970	 * Check the CAP_LIST bit of the PCI status register first.
971	 */
972	status = pci_read_config(child, PCIR_STATUS, 2);
973	if (!(status & PCIM_STATUS_CAPPRESENT))
974		return (ENXIO);
975
976	/*
977	 * Determine the start pointer of the capabilities list.
978	 */
979	switch (cfg->hdrtype & PCIM_HDRTYPE) {
980	case 0:
981	case 1:
982		ptr = PCIR_CAP_PTR;
983		break;
984	case 2:
985		ptr = PCIR_CAP_PTR_2;
986		break;
987	default:
988		/* XXX: panic? */
989		return (ENXIO);		/* no extended capabilities support */
990	}
991	ptr = pci_read_config(child, ptr, 1);
992
993	/*
994	 * Traverse the capabilities list.
995	 */
996	while (ptr != 0) {
997		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
998			if (capreg != NULL)
999				*capreg = ptr;
1000			return (0);
1001		}
1002		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1003	}
1004
1005	return (ENOENT);
1006}
1007
1008/*
1009 * Support for MSI-X message interrupts.
1010 */
1011void
1012pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1013{
1014	struct pci_devinfo *dinfo = device_get_ivars(dev);
1015	pcicfgregs *cfg = &dinfo->cfg;
1016	uint32_t offset;
1017
1018	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1019	offset = cfg->msix.msix_table_offset + index * 16;
1020	bus_write_4(cfg->msix.msix_table_res, offset, address & 0xffffffff);
1021	bus_write_4(cfg->msix.msix_table_res, offset + 4, address >> 32);
1022	bus_write_4(cfg->msix.msix_table_res, offset + 8, data);
1023}
1024
1025void
1026pci_mask_msix(device_t dev, u_int index)
1027{
1028	struct pci_devinfo *dinfo = device_get_ivars(dev);
1029	pcicfgregs *cfg = &dinfo->cfg;
1030	uint32_t offset, val;
1031
1032	KASSERT(cfg->msix.msix_msgnum > index, ("bogus index"));
1033	offset = cfg->msix.msix_table_offset + index * 16 + 12;
1034	val = bus_read_4(cfg->msix.msix_table_res, offset);
1035	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1036		val |= PCIM_MSIX_VCTRL_MASK;
1037		bus_write_4(cfg->msix.msix_table_res, offset, val);
1038	}
1039}
1040
1041void
1042pci_unmask_msix(device_t dev, u_int index)
1043{
1044	struct pci_devinfo *dinfo = device_get_ivars(dev);
1045	pcicfgregs *cfg = &dinfo->cfg;
1046	uint32_t offset, val;
1047
1048	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1049	offset = cfg->msix.msix_table_offset + index * 16 + 12;
1050	val = bus_read_4(cfg->msix.msix_table_res, offset);
1051	if (val & PCIM_MSIX_VCTRL_MASK) {
1052		val &= ~PCIM_MSIX_VCTRL_MASK;
1053		bus_write_4(cfg->msix.msix_table_res, offset, val);
1054	}
1055}
1056
1057int
1058pci_pending_msix(device_t dev, u_int index)
1059{
1060	struct pci_devinfo *dinfo = device_get_ivars(dev);
1061	pcicfgregs *cfg = &dinfo->cfg;
1062	uint32_t offset, bit;
1063
1064	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1065	offset = cfg->msix.msix_pba_offset + (index / 32) * 4;
1066	bit = 1 << index % 32;
1067	return (bus_read_4(cfg->msix.msix_pba_res, offset) & bit);
1068}
1069
1070/*
1071 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1072 * returned in *count.  After this function returns, each message will be
1073 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1074 */
1075int
1076pci_alloc_msix_method(device_t dev, device_t child, int *count)
1077{
1078	struct pci_devinfo *dinfo = device_get_ivars(child);
1079	pcicfgregs *cfg = &dinfo->cfg;
1080	struct resource_list_entry *rle;
1081	int actual, error, i, irq, max;
1082
1083	/* Don't let count == 0 get us into trouble. */
1084	if (*count == 0)
1085		return (EINVAL);
1086
1087	/* If rid 0 is allocated, then fail. */
1088	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1089	if (rle != NULL && rle->res != NULL)
1090		return (ENXIO);
1091
1092	/* Already have allocated messages? */
1093	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1094		return (ENXIO);
1095
1096	/* If MSI is blacklisted for this system, fail. */
1097	if (pci_msi_blacklisted())
1098		return (ENXIO);
1099
1100	/* MSI-X capability present? */
1101	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1102		return (ENODEV);
1103
1104	/* Make sure the appropriate BARs are mapped. */
1105	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1106	    cfg->msix.msix_table_bar);
1107	if (rle == NULL || rle->res == NULL ||
1108	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1109		return (ENXIO);
1110	cfg->msix.msix_table_res = rle->res;
1111	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1112		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1113		    cfg->msix.msix_pba_bar);
1114		if (rle == NULL || rle->res == NULL ||
1115		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1116			return (ENXIO);
1117	}
1118	cfg->msix.msix_pba_res = rle->res;
1119
1120	if (bootverbose)
1121		device_printf(child,
1122		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1123		    *count, cfg->msix.msix_msgnum);
1124	max = min(*count, cfg->msix.msix_msgnum);
1125	for (i = 0; i < max; i++) {
1126		/* Allocate a message. */
1127		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, i,
1128		    &irq);
1129		if (error)
1130			break;
1131		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1132		    irq, 1);
1133	}
1134	actual = i;
1135
1136	if (bootverbose) {
1137		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1138		if (actual == 1)
1139			device_printf(child, "using IRQ %lu for MSI-X\n",
1140			    rle->start);
1141		else {
1142			int run;
1143
1144			/*
1145			 * Be fancy and try to print contiguous runs of
1146			 * IRQ values as ranges.  'irq' is the previous IRQ.
1147			 * 'run' is true if we are in a range.
1148			 */
1149			device_printf(child, "using IRQs %lu", rle->start);
1150			irq = rle->start;
1151			run = 0;
1152			for (i = 1; i < actual; i++) {
1153				rle = resource_list_find(&dinfo->resources,
1154				    SYS_RES_IRQ, i + 1);
1155
1156				/* Still in a run? */
1157				if (rle->start == irq + 1) {
1158					run = 1;
1159					irq++;
1160					continue;
1161				}
1162
1163				/* Finish previous range. */
1164				if (run) {
1165					printf("-%d", irq);
1166					run = 0;
1167				}
1168
1169				/* Start new range. */
1170				printf(",%lu", rle->start);
1171				irq = rle->start;
1172			}
1173
1174			/* Unfinished range? */
1175			if (run)
1176				printf("-%d", irq);
1177			printf(" for MSI-X\n");
1178		}
1179	}
1180
1181	/* Mask all vectors. */
1182	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1183		pci_mask_msix(child, i);
1184
1185	/* Update control register to enable MSI-X. */
1186	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1187	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1188	    cfg->msix.msix_ctrl, 2);
1189
1190	/* Update counts of alloc'd messages. */
1191	cfg->msix.msix_alloc = actual;
1192	*count = actual;
1193	return (0);
1194}
1195
1196/*
1197 * By default, pci_alloc_msix() will assign the allocated IRQ resources to
1198 * the first N messages in the MSI-X table.  However, device drivers may
1199 * want to use different layouts in the case that they do not allocate a
1200 * full table.  This method allows the driver to specify what layout it
1201 * wants.  It must be called after a successful pci_alloc_msix() but
1202 * before any of the associated SYS_RES_IRQ resources are allocated via
1203 * bus_alloc_resource().  The 'indices' array contains N (where N equals
1204 * the 'count' returned from pci_alloc_msix()) message indices.  The
1205 * indices are 1-based (meaning the first message is at index 1).  On
1206 * successful return, each of the messages in the 'indices' array will
1207 * have an associated SYS_RES_IRQ whose rid is equal to the index.  Thus,
1208 * if indices contains { 2, 4 }, then upon successful return, the 'child'
1209 * device will have two SYS_RES_IRQ resources available at rids 2 and 4.
1210 */
1211int
1212pci_remap_msix_method(device_t dev, device_t child, u_int *indices)
1213{
1214	struct pci_devinfo *dinfo = device_get_ivars(child);
1215	pcicfgregs *cfg = &dinfo->cfg;
1216	struct resource_list_entry *rle;
1217	int count, error, i, j, *irqs;
1218
1219	/* Sanity check the indices. */
1220	for (i = 0; i < cfg->msix.msix_alloc; i++)
1221		if (indices[i] == 0 || indices[i] > cfg->msix.msix_msgnum)
1222			return (EINVAL);
1223
1224	/* Check for duplicates. */
1225	for (i = 0; i < cfg->msix.msix_alloc; i++)
1226		for (j = i + 1; j < cfg->msix.msix_alloc; j++)
1227			if (indices[i] == indices[j])
1228				return (EINVAL);
1229
1230	/* Make sure none of the resources are allocated. */
1231	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1232		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1233		if (rle == NULL)
1234			continue;
1235		if (rle->res != NULL)
1236			return (EBUSY);
1237		count++;
1238	}
1239
1240	/* Save the IRQ values and free the existing resources. */
1241	irqs = malloc(sizeof(int) * cfg->msix.msix_alloc, M_TEMP, M_WAITOK);
1242	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1243		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1244		if (rle == NULL)
1245			continue;
1246		irqs[count] = rle->start;
1247		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
1248		count++;
1249	}
1250
1251	/* Map the IRQ values to the new message indices and rids. */
1252	for (i = 0; i < cfg->msix.msix_alloc; i++) {
1253		resource_list_add(&dinfo->resources, SYS_RES_IRQ, indices[i],
1254		    irqs[i], irqs[i], 1);
1255
1256		/*
1257		 * The indices in the backend code (PCIB_* methods and the
1258		 * MI helper routines for MD code such as pci_enable_msix())
1259		 * are all zero-based.  However, the indices passed to this
1260		 * function are 1-based so that the correspond 1:1 with the
1261		 * SYS_RES_IRQ resource IDs.
1262		 */
1263		error = PCIB_REMAP_MSIX(device_get_parent(dev), child,
1264		    indices[i] - 1, irqs[i]);
1265		KASSERT(error == 0, ("Failed to remap MSI-X message"));
1266	}
1267	if (bootverbose) {
1268		if (cfg->msix.msix_alloc == 1)
1269			device_printf(child,
1270			    "Remapped MSI-X IRQ to index %d\n", indices[0]);
1271		else {
1272			device_printf(child, "Remapped MSI-X IRQs to indices");
1273			for (i = 0; i < cfg->msix.msix_alloc - 1; i++)
1274				printf(" %d,", indices[i]);
1275			printf(" %d\n", indices[cfg->msix.msix_alloc - 1]);
1276		}
1277	}
1278	free(irqs, M_TEMP);
1279
1280	return (0);
1281}
1282
1283static int
1284pci_release_msix(device_t dev, device_t child)
1285{
1286	struct pci_devinfo *dinfo = device_get_ivars(child);
1287	pcicfgregs *cfg = &dinfo->cfg;
1288	struct resource_list_entry *rle;
1289	int count, i;
1290
1291	/* Do we have any messages to release? */
1292	if (cfg->msix.msix_alloc == 0)
1293		return (ENODEV);
1294
1295	/* Make sure none of the resources are allocated. */
1296	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1297		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1298		if (rle == NULL)
1299			continue;
1300		if (rle->res != NULL)
1301			return (EBUSY);
1302		count++;
1303	}
1304
1305	/* Update control register with to disable MSI-X. */
1306	cfg->msix.msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1307	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1308	    cfg->msix.msix_ctrl, 2);
1309
1310	/* Release the messages. */
1311	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1312		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1313		if (rle == NULL)
1314			continue;
1315		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1316		    rle->start);
1317		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
1318		count++;
1319	}
1320
1321	/* Update alloc count. */
1322	cfg->msix.msix_alloc = 0;
1323	return (0);
1324}
1325
1326/*
1327 * Return the max supported MSI-X messages this device supports.
1328 * Basically, assuming the MD code can alloc messages, this function
1329 * should return the maximum value that pci_alloc_msix() can return.
1330 * Thus, it is subject to the tunables, etc.
1331 */
1332int
1333pci_msix_count_method(device_t dev, device_t child)
1334{
1335	struct pci_devinfo *dinfo = device_get_ivars(child);
1336	pcicfgregs *cfg = &dinfo->cfg;
1337
1338	if (pci_do_msix && cfg->msix.msix_location != 0)
1339		return (cfg->msix.msix_msgnum);
1340	return (0);
1341}
1342
1343/*
1344 * Support for MSI message signalled interrupts.
1345 */
1346void
1347pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1348{
1349	struct pci_devinfo *dinfo = device_get_ivars(dev);
1350	pcicfgregs *cfg = &dinfo->cfg;
1351
1352	/* Write data and address values. */
1353	cfg->msi.msi_addr = address;
1354	cfg->msi.msi_data = data;
1355	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1356	    address & 0xffffffff, 4);
1357	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1358		pci_write_config(dev, cfg->msi.msi_location +
1359		    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1360		pci_write_config(dev, cfg->msi.msi_location +
1361		    PCIR_MSI_DATA_64BIT, data, 2);
1362	} else
1363		pci_write_config(dev, cfg->msi.msi_location +
1364		    PCIR_MSI_DATA, data, 2);
1365
1366	/* Enable MSI in the control register. */
1367	cfg->msi.msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1368	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1369	    cfg->msi.msi_ctrl, 2);
1370}
1371
1372/*
1373 * Restore MSI registers during resume.  If MSI is enabled then
1374 * restore the data and address registers in addition to the control
1375 * register.
1376 */
1377static void
1378pci_resume_msi(device_t dev)
1379{
1380	struct pci_devinfo *dinfo = device_get_ivars(dev);
1381	pcicfgregs *cfg = &dinfo->cfg;
1382	uint64_t address;
1383	uint16_t data;
1384
1385	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1386		address = cfg->msi.msi_addr;
1387		data = cfg->msi.msi_data;
1388		pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1389		    address & 0xffffffff, 4);
1390		if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1391			pci_write_config(dev, cfg->msi.msi_location +
1392			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1393			pci_write_config(dev, cfg->msi.msi_location +
1394			    PCIR_MSI_DATA_64BIT, data, 2);
1395		} else
1396			pci_write_config(dev, cfg->msi.msi_location +
1397			    PCIR_MSI_DATA, data, 2);
1398	}
1399	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1400	    cfg->msi.msi_ctrl, 2);
1401}
1402
1403/*
1404 * Returns true if the specified device is blacklisted because MSI
1405 * doesn't work.
1406 */
1407int
1408pci_msi_device_blacklisted(device_t dev)
1409{
1410	struct pci_quirk *q;
1411
1412	if (!pci_honor_msi_blacklist)
1413		return (0);
1414
1415	for (q = &pci_quirks[0]; q->devid; q++) {
1416		if (q->devid == pci_get_devid(dev) &&
1417		    q->type == PCI_QUIRK_DISABLE_MSI)
1418			return (1);
1419	}
1420	return (0);
1421}
1422
1423/*
1424 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1425 * we just check for blacklisted chipsets as represented by the
1426 * host-PCI bridge at device 0:0:0.  In the future, it may become
1427 * necessary to check other system attributes, such as the kenv values
1428 * that give the motherboard manufacturer and model number.
1429 */
1430static int
1431pci_msi_blacklisted(void)
1432{
1433	device_t dev;
1434
1435	if (!pci_honor_msi_blacklist)
1436		return (0);
1437
1438	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1439	if (!(pcie_chipset || pcix_chipset))
1440		return (1);
1441
1442	dev = pci_find_bsf(0, 0, 0);
1443	if (dev != NULL)
1444		return (pci_msi_device_blacklisted(dev));
1445	return (0);
1446}
1447
1448/*
1449 * Attempt to allocate *count MSI messages.  The actual number allocated is
1450 * returned in *count.  After this function returns, each message will be
1451 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1452 */
1453int
1454pci_alloc_msi_method(device_t dev, device_t child, int *count)
1455{
1456	struct pci_devinfo *dinfo = device_get_ivars(child);
1457	pcicfgregs *cfg = &dinfo->cfg;
1458	struct resource_list_entry *rle;
1459	int actual, error, i, irqs[32];
1460	uint16_t ctrl;
1461
1462	/* Don't let count == 0 get us into trouble. */
1463	if (*count == 0)
1464		return (EINVAL);
1465
1466	/* If rid 0 is allocated, then fail. */
1467	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1468	if (rle != NULL && rle->res != NULL)
1469		return (ENXIO);
1470
1471	/* Already have allocated messages? */
1472	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1473		return (ENXIO);
1474
1475	/* If MSI is blacklisted for this system, fail. */
1476	if (pci_msi_blacklisted())
1477		return (ENXIO);
1478
1479	/* MSI capability present? */
1480	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1481		return (ENODEV);
1482
1483	if (bootverbose)
1484		device_printf(child,
1485		    "attempting to allocate %d MSI vectors (%d supported)\n",
1486		    *count, cfg->msi.msi_msgnum);
1487
1488	/* Don't ask for more than the device supports. */
1489	actual = min(*count, cfg->msi.msi_msgnum);
1490
1491	/* Don't ask for more than 32 messages. */
1492	actual = min(actual, 32);
1493
1494	/* MSI requires power of 2 number of messages. */
1495	if (!powerof2(actual))
1496		return (EINVAL);
1497
1498	for (;;) {
1499		/* Try to allocate N messages. */
1500		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1501		    cfg->msi.msi_msgnum, irqs);
1502		if (error == 0)
1503			break;
1504		if (actual == 1)
1505			return (error);
1506
1507		/* Try N / 2. */
1508		actual >>= 1;
1509	}
1510
1511	/*
1512	 * We now have N actual messages mapped onto SYS_RES_IRQ
1513	 * resources in the irqs[] array, so add new resources
1514	 * starting at rid 1.
1515	 */
1516	for (i = 0; i < actual; i++)
1517		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1518		    irqs[i], irqs[i], 1);
1519
1520	if (bootverbose) {
1521		if (actual == 1)
1522			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1523		else {
1524			int run;
1525
1526			/*
1527			 * Be fancy and try to print contiguous runs
1528			 * of IRQ values as ranges.  'run' is true if
1529			 * we are in a range.
1530			 */
1531			device_printf(child, "using IRQs %d", irqs[0]);
1532			run = 0;
1533			for (i = 1; i < actual; i++) {
1534
1535				/* Still in a run? */
1536				if (irqs[i] == irqs[i - 1] + 1) {
1537					run = 1;
1538					continue;
1539				}
1540
1541				/* Finish previous range. */
1542				if (run) {
1543					printf("-%d", irqs[i - 1]);
1544					run = 0;
1545				}
1546
1547				/* Start new range. */
1548				printf(",%d", irqs[i]);
1549			}
1550
1551			/* Unfinished range? */
1552			if (run)
1553				printf("%d", irqs[actual - 1]);
1554			printf(" for MSI\n");
1555		}
1556	}
1557
1558	/* Update control register with actual count and enable MSI. */
1559	ctrl = cfg->msi.msi_ctrl;
1560	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1561	ctrl |= (ffs(actual) - 1) << 4;
1562	cfg->msi.msi_ctrl = ctrl;
1563	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1564
1565	/* Update counts of alloc'd messages. */
1566	cfg->msi.msi_alloc = actual;
1567	*count = actual;
1568	return (0);
1569}
1570
1571/* Release the MSI messages associated with this device. */
1572int
1573pci_release_msi_method(device_t dev, device_t child)
1574{
1575	struct pci_devinfo *dinfo = device_get_ivars(child);
1576	pcicfgregs *cfg = &dinfo->cfg;
1577	struct resource_list_entry *rle;
1578	int error, i, irqs[32];
1579
1580	/* Try MSI-X first. */
1581	error = pci_release_msix(dev, child);
1582	if (error != ENODEV)
1583		return (error);
1584
1585	/* Do we have any messages to release? */
1586	if (cfg->msi.msi_alloc == 0)
1587		return (ENODEV);
1588	KASSERT(cfg->msi.msi_alloc <= 32, ("more than 32 alloc'd messages"));
1589
1590	/* Make sure none of the resources are allocated. */
1591	for (i = 0; i < cfg->msi.msi_alloc; i++) {
1592		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1593		KASSERT(rle != NULL, ("missing MSI resource"));
1594		if (rle->res != NULL)
1595			return (EBUSY);
1596		irqs[i] = rle->start;
1597	}
1598
1599	/* Update control register with 0 count and disable MSI. */
1600	cfg->msi.msi_ctrl &= ~(PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE);
1601	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL,
1602	    cfg->msi.msi_ctrl, 2);
1603
1604	/* Release the messages. */
1605	PCIB_RELEASE_MSI(device_get_parent(dev), child, cfg->msi.msi_alloc,
1606	    irqs);
1607	for (i = 0; i < cfg->msi.msi_alloc; i++)
1608		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1609
1610	/* Update alloc count. */
1611	cfg->msi.msi_alloc = 0;
1612	return (0);
1613}
1614
1615/*
1616 * Return the max supported MSI messages this device supports.
1617 * Basically, assuming the MD code can alloc messages, this function
1618 * should return the maximum value that pci_alloc_msi() can return.
1619 * Thus, it is subject to the tunables, etc.
1620 */
1621int
1622pci_msi_count_method(device_t dev, device_t child)
1623{
1624	struct pci_devinfo *dinfo = device_get_ivars(child);
1625	pcicfgregs *cfg = &dinfo->cfg;
1626
1627	if (pci_do_msi && cfg->msi.msi_location != 0)
1628		return (cfg->msi.msi_msgnum);
1629	return (0);
1630}
1631
1632/* free pcicfgregs structure and all depending data structures */
1633
1634int
1635pci_freecfg(struct pci_devinfo *dinfo)
1636{
1637	struct devlist *devlist_head;
1638	int i;
1639
1640	devlist_head = &pci_devq;
1641
1642	if (dinfo->cfg.vpd.vpd_reg) {
1643		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1644		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1645			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1646		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1647		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1648			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1649		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1650	}
1651	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1652	free(dinfo, M_DEVBUF);
1653
1654	/* increment the generation count */
1655	pci_generation++;
1656
1657	/* we're losing one device */
1658	pci_numdevs--;
1659	return (0);
1660}
1661
1662/*
1663 * PCI power manangement
1664 */
1665int
1666pci_set_powerstate_method(device_t dev, device_t child, int state)
1667{
1668	struct pci_devinfo *dinfo = device_get_ivars(child);
1669	pcicfgregs *cfg = &dinfo->cfg;
1670	uint16_t status;
1671	int result, oldstate, highest, delay;
1672
1673	if (cfg->pp.pp_cap == 0)
1674		return (EOPNOTSUPP);
1675
1676	/*
1677	 * Optimize a no state change request away.  While it would be OK to
1678	 * write to the hardware in theory, some devices have shown odd
1679	 * behavior when going from D3 -> D3.
1680	 */
1681	oldstate = pci_get_powerstate(child);
1682	if (oldstate == state)
1683		return (0);
1684
1685	/*
1686	 * The PCI power management specification states that after a state
1687	 * transition between PCI power states, system software must
1688	 * guarantee a minimal delay before the function accesses the device.
1689	 * Compute the worst case delay that we need to guarantee before we
1690	 * access the device.  Many devices will be responsive much more
1691	 * quickly than this delay, but there are some that don't respond
1692	 * instantly to state changes.  Transitions to/from D3 state require
1693	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
1694	 * is done below with DELAY rather than a sleeper function because
1695	 * this function can be called from contexts where we cannot sleep.
1696	 */
1697	highest = (oldstate > state) ? oldstate : state;
1698	if (highest == PCI_POWERSTATE_D3)
1699	    delay = 10000;
1700	else if (highest == PCI_POWERSTATE_D2)
1701	    delay = 200;
1702	else
1703	    delay = 0;
1704	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
1705	    & ~PCIM_PSTAT_DMASK;
1706	result = 0;
1707	switch (state) {
1708	case PCI_POWERSTATE_D0:
1709		status |= PCIM_PSTAT_D0;
1710		break;
1711	case PCI_POWERSTATE_D1:
1712		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
1713			return (EOPNOTSUPP);
1714		status |= PCIM_PSTAT_D1;
1715		break;
1716	case PCI_POWERSTATE_D2:
1717		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
1718			return (EOPNOTSUPP);
1719		status |= PCIM_PSTAT_D2;
1720		break;
1721	case PCI_POWERSTATE_D3:
1722		status |= PCIM_PSTAT_D3;
1723		break;
1724	default:
1725		return (EINVAL);
1726	}
1727
1728	if (bootverbose)
1729		printf(
1730		    "pci%d:%d:%d: Transition from D%d to D%d\n",
1731		    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func,
1732		    oldstate, state);
1733
1734	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
1735	if (delay)
1736		DELAY(delay);
1737	return (0);
1738}
1739
1740int
1741pci_get_powerstate_method(device_t dev, device_t child)
1742{
1743	struct pci_devinfo *dinfo = device_get_ivars(child);
1744	pcicfgregs *cfg = &dinfo->cfg;
1745	uint16_t status;
1746	int result;
1747
1748	if (cfg->pp.pp_cap != 0) {
1749		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
1750		switch (status & PCIM_PSTAT_DMASK) {
1751		case PCIM_PSTAT_D0:
1752			result = PCI_POWERSTATE_D0;
1753			break;
1754		case PCIM_PSTAT_D1:
1755			result = PCI_POWERSTATE_D1;
1756			break;
1757		case PCIM_PSTAT_D2:
1758			result = PCI_POWERSTATE_D2;
1759			break;
1760		case PCIM_PSTAT_D3:
1761			result = PCI_POWERSTATE_D3;
1762			break;
1763		default:
1764			result = PCI_POWERSTATE_UNKNOWN;
1765			break;
1766		}
1767	} else {
1768		/* No support, device is always at D0 */
1769		result = PCI_POWERSTATE_D0;
1770	}
1771	return (result);
1772}
1773
1774/*
1775 * Some convenience functions for PCI device drivers.
1776 */
1777
1778static __inline void
1779pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
1780{
1781	uint16_t	command;
1782
1783	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1784	command |= bit;
1785	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1786}
1787
1788static __inline void
1789pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
1790{
1791	uint16_t	command;
1792
1793	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1794	command &= ~bit;
1795	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1796}
1797
1798int
1799pci_enable_busmaster_method(device_t dev, device_t child)
1800{
1801	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1802	return (0);
1803}
1804
1805int
1806pci_disable_busmaster_method(device_t dev, device_t child)
1807{
1808	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1809	return (0);
1810}
1811
1812int
1813pci_enable_io_method(device_t dev, device_t child, int space)
1814{
1815	uint16_t command;
1816	uint16_t bit;
1817	char *error;
1818
1819	bit = 0;
1820	error = NULL;
1821
1822	switch(space) {
1823	case SYS_RES_IOPORT:
1824		bit = PCIM_CMD_PORTEN;
1825		error = "port";
1826		break;
1827	case SYS_RES_MEMORY:
1828		bit = PCIM_CMD_MEMEN;
1829		error = "memory";
1830		break;
1831	default:
1832		return (EINVAL);
1833	}
1834	pci_set_command_bit(dev, child, bit);
1835	/* Some devices seem to need a brief stall here, what do to? */
1836	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1837	if (command & bit)
1838		return (0);
1839	device_printf(child, "failed to enable %s mapping!\n", error);
1840	return (ENXIO);
1841}
1842
1843int
1844pci_disable_io_method(device_t dev, device_t child, int space)
1845{
1846	uint16_t command;
1847	uint16_t bit;
1848	char *error;
1849
1850	bit = 0;
1851	error = NULL;
1852
1853	switch(space) {
1854	case SYS_RES_IOPORT:
1855		bit = PCIM_CMD_PORTEN;
1856		error = "port";
1857		break;
1858	case SYS_RES_MEMORY:
1859		bit = PCIM_CMD_MEMEN;
1860		error = "memory";
1861		break;
1862	default:
1863		return (EINVAL);
1864	}
1865	pci_clear_command_bit(dev, child, bit);
1866	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1867	if (command & bit) {
1868		device_printf(child, "failed to disable %s mapping!\n", error);
1869		return (ENXIO);
1870	}
1871	return (0);
1872}
1873
1874/*
1875 * New style pci driver.  Parent device is either a pci-host-bridge or a
1876 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
1877 */
1878
1879void
1880pci_print_verbose(struct pci_devinfo *dinfo)
1881{
1882
1883	if (bootverbose) {
1884		pcicfgregs *cfg = &dinfo->cfg;
1885
1886		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
1887		    cfg->vendor, cfg->device, cfg->revid);
1888		printf("\tbus=%d, slot=%d, func=%d\n",
1889		    cfg->bus, cfg->slot, cfg->func);
1890		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
1891		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
1892		    cfg->mfdev);
1893		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
1894		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
1895		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
1896		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
1897		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
1898		if (cfg->intpin > 0)
1899			printf("\tintpin=%c, irq=%d\n",
1900			    cfg->intpin +'a' -1, cfg->intline);
1901		if (cfg->pp.pp_cap) {
1902			uint16_t status;
1903
1904			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
1905			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
1906			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
1907			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
1908			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
1909			    status & PCIM_PSTAT_DMASK);
1910		}
1911		if (cfg->msi.msi_location) {
1912			int ctrl;
1913
1914			ctrl = cfg->msi.msi_ctrl;
1915			printf("\tMSI supports %d message%s%s%s\n",
1916			    cfg->msi.msi_msgnum,
1917			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
1918			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
1919			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
1920		}
1921		if (cfg->msix.msix_location) {
1922			printf("\tMSI-X supports %d message%s ",
1923			    cfg->msix.msix_msgnum,
1924			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
1925			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
1926				printf("in map 0x%x\n",
1927				    cfg->msix.msix_table_bar);
1928			else
1929				printf("in maps 0x%x and 0x%x\n",
1930				    cfg->msix.msix_table_bar,
1931				    cfg->msix.msix_pba_bar);
1932		}
1933	}
1934}
1935
1936static int
1937pci_porten(device_t pcib, int b, int s, int f)
1938{
1939	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1940		& PCIM_CMD_PORTEN) != 0;
1941}
1942
1943static int
1944pci_memen(device_t pcib, int b, int s, int f)
1945{
1946	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1947		& PCIM_CMD_MEMEN) != 0;
1948}
1949
1950/*
1951 * Add a resource based on a pci map register. Return 1 if the map
1952 * register is a 32bit map register or 2 if it is a 64bit register.
1953 */
1954static int
1955pci_add_map(device_t pcib, device_t bus, device_t dev,
1956    int b, int s, int f, int reg, struct resource_list *rl, int force,
1957    int prefetch)
1958{
1959	uint32_t map;
1960	pci_addr_t base;
1961	pci_addr_t start, end, count;
1962	uint8_t ln2size;
1963	uint8_t ln2range;
1964	uint32_t testval;
1965	uint16_t cmd;
1966	int type;
1967	int barlen;
1968	struct resource *res;
1969
1970	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1971	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
1972	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1973	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
1974
1975	if (pci_maptype(map) & PCI_MAPMEM)
1976		type = SYS_RES_MEMORY;
1977	else
1978		type = SYS_RES_IOPORT;
1979	ln2size = pci_mapsize(testval);
1980	ln2range = pci_maprange(testval);
1981	base = pci_mapbase(map);
1982	barlen = ln2range == 64 ? 2 : 1;
1983
1984	/*
1985	 * For I/O registers, if bottom bit is set, and the next bit up
1986	 * isn't clear, we know we have a BAR that doesn't conform to the
1987	 * spec, so ignore it.  Also, sanity check the size of the data
1988	 * areas to the type of memory involved.  Memory must be at least
1989	 * 16 bytes in size, while I/O ranges must be at least 4.
1990	 */
1991	if ((testval & 0x1) == 0x1 &&
1992	    (testval & 0x2) != 0)
1993		return (barlen);
1994	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
1995	    (type == SYS_RES_IOPORT && ln2size < 2))
1996		return (barlen);
1997
1998	if (ln2range == 64)
1999		/* Read the other half of a 64bit map register */
2000		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2001	if (bootverbose) {
2002		printf("\tmap[%02x]: type %x, range %2d, base %#jx, size %2d",
2003		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2004		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2005			printf(", port disabled\n");
2006		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2007			printf(", memory disabled\n");
2008		else
2009			printf(", enabled\n");
2010	}
2011
2012	/*
2013	 * If base is 0, then we have problems.  It is best to ignore
2014	 * such entries for the moment.  These will be allocated later if
2015	 * the driver specifically requests them.  However, some
2016	 * removable busses look better when all resources are allocated,
2017	 * so allow '0' to be overriden.
2018	 *
2019	 * Similarly treat maps whose values is the same as the test value
2020	 * read back.  These maps have had all f's written to them by the
2021	 * BIOS in an attempt to disable the resources.
2022	 */
2023	if (!force && (base == 0 || map == testval))
2024		return (barlen);
2025	if ((u_long)base != base) {
2026		device_printf(bus,
2027		    "pci%d:%d:%d bar %#x too many address bits", b, s, f, reg);
2028		return (barlen);
2029	}
2030
2031	/*
2032	 * This code theoretically does the right thing, but has
2033	 * undesirable side effects in some cases where peripherals
2034	 * respond oddly to having these bits enabled.  Let the user
2035	 * be able to turn them off (since pci_enable_io_modes is 1 by
2036	 * default).
2037	 */
2038	if (pci_enable_io_modes) {
2039		/* Turn on resources that have been left off by a lazy BIOS */
2040		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2041			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2042			cmd |= PCIM_CMD_PORTEN;
2043			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2044		}
2045		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2046			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2047			cmd |= PCIM_CMD_MEMEN;
2048			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2049		}
2050	} else {
2051		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2052			return (barlen);
2053		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2054			return (barlen);
2055	}
2056
2057	count = 1 << ln2size;
2058	if (base == 0 || base == pci_mapbase(testval)) {
2059		start = 0;	/* Let the parent deside */
2060		end = ~0ULL;
2061	} else {
2062		start = base;
2063		end = base + (1 << ln2size) - 1;
2064	}
2065	resource_list_add(rl, type, reg, start, end, count);
2066
2067	/*
2068	 * Not quite sure what to do on failure of allocating the resource
2069	 * since I can postulate several right answers.
2070	 */
2071	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2072	    prefetch ? RF_PREFETCHABLE : 0);
2073	if (res == NULL)
2074		return (barlen);
2075	start = rman_get_start(res);
2076	if ((u_long)start != start) {
2077		/* Wait a minute!  this platform can't do this address. */
2078		device_printf(bus,
2079		    "pci%d.%d.%x bar %#x start %#jx, too many bits.",
2080		    b, s, f, reg, (uintmax_t)start);
2081		resource_list_release(rl, bus, dev, type, reg, res);
2082		return (barlen);
2083	}
2084	pci_write_config(dev, reg, start, 4);
2085	if (ln2range == 64)
2086		pci_write_config(dev, reg + 4, start >> 32, 4);
2087	return (barlen);
2088}
2089
2090/*
2091 * For ATA devices we need to decide early what addressing mode to use.
2092 * Legacy demands that the primary and secondary ATA ports sits on the
2093 * same addresses that old ISA hardware did. This dictates that we use
2094 * those addresses and ignore the BAR's if we cannot set PCI native
2095 * addressing mode.
2096 */
2097static void
2098pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2099    int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2100{
2101	int rid, type, progif;
2102#if 0
2103	/* if this device supports PCI native addressing use it */
2104	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2105	if ((progif & 0x8a) == 0x8a) {
2106		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2107		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2108			printf("Trying ATA native PCI addressing mode\n");
2109			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2110		}
2111	}
2112#endif
2113	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2114	type = SYS_RES_IOPORT;
2115	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2116		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2117		    prefetchmask & (1 << 0));
2118		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2119		    prefetchmask & (1 << 1));
2120	} else {
2121		rid = PCIR_BAR(0);
2122		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2123		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2124		    0);
2125		rid = PCIR_BAR(1);
2126		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2127		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2128		    0);
2129	}
2130	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2131		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2132		    prefetchmask & (1 << 2));
2133		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2134		    prefetchmask & (1 << 3));
2135	} else {
2136		rid = PCIR_BAR(2);
2137		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2138		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2139		    0);
2140		rid = PCIR_BAR(3);
2141		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2142		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2143		    0);
2144	}
2145	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2146	    prefetchmask & (1 << 4));
2147	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2148	    prefetchmask & (1 << 5));
2149}
2150
2151static void
2152pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2153{
2154	struct pci_devinfo *dinfo = device_get_ivars(dev);
2155	pcicfgregs *cfg = &dinfo->cfg;
2156	char tunable_name[64];
2157	int irq;
2158
2159	/* Has to have an intpin to have an interrupt. */
2160	if (cfg->intpin == 0)
2161		return;
2162
2163	/* Let the user override the IRQ with a tunable. */
2164	irq = PCI_INVALID_IRQ;
2165	snprintf(tunable_name, sizeof(tunable_name), "hw.pci%d.%d.INT%c.irq",
2166	    cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2167	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2168		irq = PCI_INVALID_IRQ;
2169
2170	/*
2171	 * If we didn't get an IRQ via the tunable, then we either use the
2172	 * IRQ value in the intline register or we ask the bus to route an
2173	 * interrupt for us.  If force_route is true, then we only use the
2174	 * value in the intline register if the bus was unable to assign an
2175	 * IRQ.
2176	 */
2177	if (!PCI_INTERRUPT_VALID(irq)) {
2178		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2179			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2180		if (!PCI_INTERRUPT_VALID(irq))
2181			irq = cfg->intline;
2182	}
2183
2184	/* If after all that we don't have an IRQ, just bail. */
2185	if (!PCI_INTERRUPT_VALID(irq))
2186		return;
2187
2188	/* Update the config register if it changed. */
2189	if (irq != cfg->intline) {
2190		cfg->intline = irq;
2191		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2192	}
2193
2194	/* Add this IRQ as rid 0 interrupt resource. */
2195	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2196}
2197
2198void
2199pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2200{
2201	device_t pcib;
2202	struct pci_devinfo *dinfo = device_get_ivars(dev);
2203	pcicfgregs *cfg = &dinfo->cfg;
2204	struct resource_list *rl = &dinfo->resources;
2205	struct pci_quirk *q;
2206	int b, i, f, s;
2207
2208	pcib = device_get_parent(bus);
2209
2210	b = cfg->bus;
2211	s = cfg->slot;
2212	f = cfg->func;
2213
2214	/* ATA devices needs special map treatment */
2215	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2216	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2217	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2218	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2219	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2220		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2221	else
2222		for (i = 0; i < cfg->nummaps;)
2223			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2224			    rl, force, prefetchmask & (1 << i));
2225
2226	/*
2227	 * Add additional, quirked resources.
2228	 */
2229	for (q = &pci_quirks[0]; q->devid; q++) {
2230		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2231		    && q->type == PCI_QUIRK_MAP_REG)
2232			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2233			  force, 0);
2234	}
2235
2236	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2237#ifdef __PCI_REROUTE_INTERRUPT
2238		/*
2239		 * Try to re-route interrupts. Sometimes the BIOS or
2240		 * firmware may leave bogus values in these registers.
2241		 * If the re-route fails, then just stick with what we
2242		 * have.
2243		 */
2244		pci_assign_interrupt(bus, dev, 1);
2245#else
2246		pci_assign_interrupt(bus, dev, 0);
2247#endif
2248	}
2249}
2250
2251void
2252pci_add_children(device_t dev, int busno, size_t dinfo_size)
2253{
2254#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2255	device_t pcib = device_get_parent(dev);
2256	struct pci_devinfo *dinfo;
2257	int maxslots;
2258	int s, f, pcifunchigh;
2259	uint8_t hdrtype;
2260
2261	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2262	    ("dinfo_size too small"));
2263	maxslots = PCIB_MAXSLOTS(pcib);
2264	for (s = 0; s <= maxslots; s++) {
2265		pcifunchigh = 0;
2266		f = 0;
2267		DELAY(1);
2268		hdrtype = REG(PCIR_HDRTYPE, 1);
2269		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2270			continue;
2271		if (hdrtype & PCIM_MFDEV)
2272			pcifunchigh = PCI_FUNCMAX;
2273		for (f = 0; f <= pcifunchigh; f++) {
2274			dinfo = pci_read_device(pcib, busno, s, f, dinfo_size);
2275			if (dinfo != NULL) {
2276				pci_add_child(dev, dinfo);
2277			}
2278		}
2279	}
2280#undef REG
2281}
2282
2283void
2284pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2285{
2286	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2287	device_set_ivars(dinfo->cfg.dev, dinfo);
2288	resource_list_init(&dinfo->resources);
2289	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2290	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2291	pci_print_verbose(dinfo);
2292	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2293}
2294
2295static int
2296pci_probe(device_t dev)
2297{
2298
2299	device_set_desc(dev, "PCI bus");
2300
2301	/* Allow other subclasses to override this driver. */
2302	return (-1000);
2303}
2304
2305static int
2306pci_attach(device_t dev)
2307{
2308	int busno;
2309
2310	/*
2311	 * Since there can be multiple independantly numbered PCI
2312	 * busses on systems with multiple PCI domains, we can't use
2313	 * the unit number to decide which bus we are probing. We ask
2314	 * the parent pcib what our bus number is.
2315	 */
2316	busno = pcib_get_bus(dev);
2317	if (bootverbose)
2318		device_printf(dev, "physical bus=%d\n", busno);
2319
2320	pci_add_children(dev, busno, sizeof(struct pci_devinfo));
2321
2322	return (bus_generic_attach(dev));
2323}
2324
2325int
2326pci_suspend(device_t dev)
2327{
2328	int dstate, error, i, numdevs;
2329	device_t acpi_dev, child, *devlist;
2330	struct pci_devinfo *dinfo;
2331
2332	/*
2333	 * Save the PCI configuration space for each child and set the
2334	 * device in the appropriate power state for this sleep state.
2335	 */
2336	acpi_dev = NULL;
2337	if (pci_do_power_resume)
2338		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2339	device_get_children(dev, &devlist, &numdevs);
2340	for (i = 0; i < numdevs; i++) {
2341		child = devlist[i];
2342		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2343		pci_cfg_save(child, dinfo, 0);
2344	}
2345
2346	/* Suspend devices before potentially powering them down. */
2347	error = bus_generic_suspend(dev);
2348	if (error) {
2349		free(devlist, M_TEMP);
2350		return (error);
2351	}
2352
2353	/*
2354	 * Always set the device to D3.  If ACPI suggests a different
2355	 * power state, use it instead.  If ACPI is not present, the
2356	 * firmware is responsible for managing device power.  Skip
2357	 * children who aren't attached since they are powered down
2358	 * separately.  Only manage type 0 devices for now.
2359	 */
2360	for (i = 0; acpi_dev && i < numdevs; i++) {
2361		child = devlist[i];
2362		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2363		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2364			dstate = PCI_POWERSTATE_D3;
2365			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2366			pci_set_powerstate(child, dstate);
2367		}
2368	}
2369	free(devlist, M_TEMP);
2370	return (0);
2371}
2372
2373int
2374pci_resume(device_t dev)
2375{
2376	int i, numdevs;
2377	device_t acpi_dev, child, *devlist;
2378	struct pci_devinfo *dinfo;
2379
2380	/*
2381	 * Set each child to D0 and restore its PCI configuration space.
2382	 */
2383	acpi_dev = NULL;
2384	if (pci_do_power_resume)
2385		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2386	device_get_children(dev, &devlist, &numdevs);
2387	for (i = 0; i < numdevs; i++) {
2388		/*
2389		 * Notify ACPI we're going to D0 but ignore the result.  If
2390		 * ACPI is not present, the firmware is responsible for
2391		 * managing device power.  Only manage type 0 devices for now.
2392		 */
2393		child = devlist[i];
2394		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2395		if (acpi_dev && device_is_attached(child) &&
2396		    dinfo->cfg.hdrtype == 0) {
2397			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2398			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2399		}
2400
2401		/* Now the device is powered up, restore its config space. */
2402		pci_cfg_restore(child, dinfo);
2403	}
2404	free(devlist, M_TEMP);
2405	return (bus_generic_resume(dev));
2406}
2407
2408static void
2409pci_load_vendor_data(void)
2410{
2411	caddr_t vendordata, info;
2412
2413	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2414		info = preload_search_info(vendordata, MODINFO_ADDR);
2415		pci_vendordata = *(char **)info;
2416		info = preload_search_info(vendordata, MODINFO_SIZE);
2417		pci_vendordata_size = *(size_t *)info;
2418		/* terminate the database */
2419		pci_vendordata[pci_vendordata_size] = '\n';
2420	}
2421}
2422
2423void
2424pci_driver_added(device_t dev, driver_t *driver)
2425{
2426	int numdevs;
2427	device_t *devlist;
2428	device_t child;
2429	struct pci_devinfo *dinfo;
2430	int i;
2431
2432	if (bootverbose)
2433		device_printf(dev, "driver added\n");
2434	DEVICE_IDENTIFY(driver, dev);
2435	device_get_children(dev, &devlist, &numdevs);
2436	for (i = 0; i < numdevs; i++) {
2437		child = devlist[i];
2438		if (device_get_state(child) != DS_NOTPRESENT)
2439			continue;
2440		dinfo = device_get_ivars(child);
2441		pci_print_verbose(dinfo);
2442		if (bootverbose)
2443			printf("pci%d:%d:%d: reprobing on driver added\n",
2444			    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func);
2445		pci_cfg_restore(child, dinfo);
2446		if (device_probe_and_attach(child) != 0)
2447			pci_cfg_save(child, dinfo, 1);
2448	}
2449	free(devlist, M_TEMP);
2450}
2451
2452int
2453pci_print_child(device_t dev, device_t child)
2454{
2455	struct pci_devinfo *dinfo;
2456	struct resource_list *rl;
2457	int retval = 0;
2458
2459	dinfo = device_get_ivars(child);
2460	rl = &dinfo->resources;
2461
2462	retval += bus_print_child_header(dev, child);
2463
2464	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2465	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2466	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2467	if (device_get_flags(dev))
2468		retval += printf(" flags %#x", device_get_flags(dev));
2469
2470	retval += printf(" at device %d.%d", pci_get_slot(child),
2471	    pci_get_function(child));
2472
2473	retval += bus_print_child_footer(dev, child);
2474
2475	return (retval);
2476}
2477
2478static struct
2479{
2480	int	class;
2481	int	subclass;
2482	char	*desc;
2483} pci_nomatch_tab[] = {
2484	{PCIC_OLD,		-1,			"old"},
2485	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2486	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
2487	{PCIC_STORAGE,		-1,			"mass storage"},
2488	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
2489	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
2490	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
2491	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
2492	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
2493	{PCIC_NETWORK,		-1,			"network"},
2494	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
2495	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
2496	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
2497	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
2498	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
2499	{PCIC_DISPLAY,		-1,			"display"},
2500	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
2501	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
2502	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
2503	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
2504	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
2505	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
2506	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
2507	{PCIC_MEMORY,		-1,			"memory"},
2508	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
2509	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
2510	{PCIC_BRIDGE,		-1,			"bridge"},
2511	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
2512	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
2513	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
2514	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
2515	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
2516	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
2517	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
2518	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
2519	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
2520	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
2521	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
2522	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
2523	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
2524	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
2525	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
2526	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
2527	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
2528	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
2529	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
2530	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
2531	{PCIC_INPUTDEV,		-1,			"input device"},
2532	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
2533	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
2534	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
2535	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
2536	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
2537	{PCIC_DOCKING,		-1,			"docking station"},
2538	{PCIC_PROCESSOR,	-1,			"processor"},
2539	{PCIC_SERIALBUS,	-1,			"serial bus"},
2540	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
2541	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
2542	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
2543	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
2544	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
2545	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
2546	{PCIC_WIRELESS,		-1,			"wireless controller"},
2547	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
2548	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
2549	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
2550	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
2551	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
2552	{PCIC_SATCOM,		-1,			"satellite communication"},
2553	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
2554	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
2555	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
2556	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
2557	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
2558	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
2559	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
2560	{PCIC_DASP,		-1,			"dasp"},
2561	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
2562	{0, 0,		NULL}
2563};
2564
2565void
2566pci_probe_nomatch(device_t dev, device_t child)
2567{
2568	int	i;
2569	char	*cp, *scp, *device;
2570
2571	/*
2572	 * Look for a listing for this device in a loaded device database.
2573	 */
2574	if ((device = pci_describe_device(child)) != NULL) {
2575		device_printf(dev, "<%s>", device);
2576		free(device, M_DEVBUF);
2577	} else {
2578		/*
2579		 * Scan the class/subclass descriptions for a general
2580		 * description.
2581		 */
2582		cp = "unknown";
2583		scp = NULL;
2584		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
2585			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
2586				if (pci_nomatch_tab[i].subclass == -1) {
2587					cp = pci_nomatch_tab[i].desc;
2588				} else if (pci_nomatch_tab[i].subclass ==
2589				    pci_get_subclass(child)) {
2590					scp = pci_nomatch_tab[i].desc;
2591				}
2592			}
2593		}
2594		device_printf(dev, "<%s%s%s>",
2595		    cp ? cp : "",
2596		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
2597		    scp ? scp : "");
2598	}
2599	printf(" at device %d.%d (no driver attached)\n",
2600	    pci_get_slot(child), pci_get_function(child));
2601	if (pci_do_power_nodriver)
2602		pci_cfg_save(child,
2603		    (struct pci_devinfo *) device_get_ivars(child), 1);
2604	return;
2605}
2606
2607/*
2608 * Parse the PCI device database, if loaded, and return a pointer to a
2609 * description of the device.
2610 *
2611 * The database is flat text formatted as follows:
2612 *
2613 * Any line not in a valid format is ignored.
2614 * Lines are terminated with newline '\n' characters.
2615 *
2616 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
2617 * the vendor name.
2618 *
2619 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
2620 * - devices cannot be listed without a corresponding VENDOR line.
2621 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
2622 * another TAB, then the device name.
2623 */
2624
2625/*
2626 * Assuming (ptr) points to the beginning of a line in the database,
2627 * return the vendor or device and description of the next entry.
2628 * The value of (vendor) or (device) inappropriate for the entry type
2629 * is set to -1.  Returns nonzero at the end of the database.
2630 *
2631 * Note that this is slightly unrobust in the face of corrupt data;
2632 * we attempt to safeguard against this by spamming the end of the
2633 * database with a newline when we initialise.
2634 */
2635static int
2636pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
2637{
2638	char	*cp = *ptr;
2639	int	left;
2640
2641	*device = -1;
2642	*vendor = -1;
2643	**desc = '\0';
2644	for (;;) {
2645		left = pci_vendordata_size - (cp - pci_vendordata);
2646		if (left <= 0) {
2647			*ptr = cp;
2648			return(1);
2649		}
2650
2651		/* vendor entry? */
2652		if (*cp != '\t' &&
2653		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
2654			break;
2655		/* device entry? */
2656		if (*cp == '\t' &&
2657		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
2658			break;
2659
2660		/* skip to next line */
2661		while (*cp != '\n' && left > 0) {
2662			cp++;
2663			left--;
2664		}
2665		if (*cp == '\n') {
2666			cp++;
2667			left--;
2668		}
2669	}
2670	/* skip to next line */
2671	while (*cp != '\n' && left > 0) {
2672		cp++;
2673		left--;
2674	}
2675	if (*cp == '\n' && left > 0)
2676		cp++;
2677	*ptr = cp;
2678	return(0);
2679}
2680
2681static char *
2682pci_describe_device(device_t dev)
2683{
2684	int	vendor, device;
2685	char	*desc, *vp, *dp, *line;
2686
2687	desc = vp = dp = NULL;
2688
2689	/*
2690	 * If we have no vendor data, we can't do anything.
2691	 */
2692	if (pci_vendordata == NULL)
2693		goto out;
2694
2695	/*
2696	 * Scan the vendor data looking for this device
2697	 */
2698	line = pci_vendordata;
2699	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2700		goto out;
2701	for (;;) {
2702		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
2703			goto out;
2704		if (vendor == pci_get_vendor(dev))
2705			break;
2706	}
2707	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2708		goto out;
2709	for (;;) {
2710		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
2711			*dp = 0;
2712			break;
2713		}
2714		if (vendor != -1) {
2715			*dp = 0;
2716			break;
2717		}
2718		if (device == pci_get_device(dev))
2719			break;
2720	}
2721	if (dp[0] == '\0')
2722		snprintf(dp, 80, "0x%x", pci_get_device(dev));
2723	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
2724	    NULL)
2725		sprintf(desc, "%s, %s", vp, dp);
2726 out:
2727	if (vp != NULL)
2728		free(vp, M_DEVBUF);
2729	if (dp != NULL)
2730		free(dp, M_DEVBUF);
2731	return(desc);
2732}
2733
2734int
2735pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
2736{
2737	struct pci_devinfo *dinfo;
2738	pcicfgregs *cfg;
2739
2740	dinfo = device_get_ivars(child);
2741	cfg = &dinfo->cfg;
2742
2743	switch (which) {
2744	case PCI_IVAR_ETHADDR:
2745		/*
2746		 * The generic accessor doesn't deal with failure, so
2747		 * we set the return value, then return an error.
2748		 */
2749		*((uint8_t **) result) = NULL;
2750		return (EINVAL);
2751	case PCI_IVAR_SUBVENDOR:
2752		*result = cfg->subvendor;
2753		break;
2754	case PCI_IVAR_SUBDEVICE:
2755		*result = cfg->subdevice;
2756		break;
2757	case PCI_IVAR_VENDOR:
2758		*result = cfg->vendor;
2759		break;
2760	case PCI_IVAR_DEVICE:
2761		*result = cfg->device;
2762		break;
2763	case PCI_IVAR_DEVID:
2764		*result = (cfg->device << 16) | cfg->vendor;
2765		break;
2766	case PCI_IVAR_CLASS:
2767		*result = cfg->baseclass;
2768		break;
2769	case PCI_IVAR_SUBCLASS:
2770		*result = cfg->subclass;
2771		break;
2772	case PCI_IVAR_PROGIF:
2773		*result = cfg->progif;
2774		break;
2775	case PCI_IVAR_REVID:
2776		*result = cfg->revid;
2777		break;
2778	case PCI_IVAR_INTPIN:
2779		*result = cfg->intpin;
2780		break;
2781	case PCI_IVAR_IRQ:
2782		*result = cfg->intline;
2783		break;
2784	case PCI_IVAR_BUS:
2785		*result = cfg->bus;
2786		break;
2787	case PCI_IVAR_SLOT:
2788		*result = cfg->slot;
2789		break;
2790	case PCI_IVAR_FUNCTION:
2791		*result = cfg->func;
2792		break;
2793	case PCI_IVAR_CMDREG:
2794		*result = cfg->cmdreg;
2795		break;
2796	case PCI_IVAR_CACHELNSZ:
2797		*result = cfg->cachelnsz;
2798		break;
2799	case PCI_IVAR_MINGNT:
2800		*result = cfg->mingnt;
2801		break;
2802	case PCI_IVAR_MAXLAT:
2803		*result = cfg->maxlat;
2804		break;
2805	case PCI_IVAR_LATTIMER:
2806		*result = cfg->lattimer;
2807		break;
2808	default:
2809		return (ENOENT);
2810	}
2811	return (0);
2812}
2813
2814int
2815pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
2816{
2817	struct pci_devinfo *dinfo;
2818
2819	dinfo = device_get_ivars(child);
2820
2821	switch (which) {
2822	case PCI_IVAR_INTPIN:
2823		dinfo->cfg.intpin = value;
2824		return (0);
2825	case PCI_IVAR_ETHADDR:
2826	case PCI_IVAR_SUBVENDOR:
2827	case PCI_IVAR_SUBDEVICE:
2828	case PCI_IVAR_VENDOR:
2829	case PCI_IVAR_DEVICE:
2830	case PCI_IVAR_DEVID:
2831	case PCI_IVAR_CLASS:
2832	case PCI_IVAR_SUBCLASS:
2833	case PCI_IVAR_PROGIF:
2834	case PCI_IVAR_REVID:
2835	case PCI_IVAR_IRQ:
2836	case PCI_IVAR_BUS:
2837	case PCI_IVAR_SLOT:
2838	case PCI_IVAR_FUNCTION:
2839		return (EINVAL);	/* disallow for now */
2840
2841	default:
2842		return (ENOENT);
2843	}
2844}
2845
2846
2847#include "opt_ddb.h"
2848#ifdef DDB
2849#include <ddb/ddb.h>
2850#include <sys/cons.h>
2851
2852/*
2853 * List resources based on pci map registers, used for within ddb
2854 */
2855
2856DB_SHOW_COMMAND(pciregs, db_pci_dump)
2857{
2858	struct pci_devinfo *dinfo;
2859	struct devlist *devlist_head;
2860	struct pci_conf *p;
2861	const char *name;
2862	int i, error, none_count;
2863
2864	none_count = 0;
2865	/* get the head of the device queue */
2866	devlist_head = &pci_devq;
2867
2868	/*
2869	 * Go through the list of devices and print out devices
2870	 */
2871	for (error = 0, i = 0,
2872	     dinfo = STAILQ_FIRST(devlist_head);
2873	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
2874	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
2875
2876		/* Populate pd_name and pd_unit */
2877		name = NULL;
2878		if (dinfo->cfg.dev)
2879			name = device_get_name(dinfo->cfg.dev);
2880
2881		p = &dinfo->conf;
2882		db_printf("%s%d@pci%d:%d:%d:\tclass=0x%06x card=0x%08x "
2883			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
2884			(name && *name) ? name : "none",
2885			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
2886			none_count++,
2887			p->pc_sel.pc_bus, p->pc_sel.pc_dev,
2888			p->pc_sel.pc_func, (p->pc_class << 16) |
2889			(p->pc_subclass << 8) | p->pc_progif,
2890			(p->pc_subdevice << 16) | p->pc_subvendor,
2891			(p->pc_device << 16) | p->pc_vendor,
2892			p->pc_revid, p->pc_hdr);
2893	}
2894}
2895#endif /* DDB */
2896
2897static struct resource *
2898pci_alloc_map(device_t dev, device_t child, int type, int *rid,
2899    u_long start, u_long end, u_long count, u_int flags)
2900{
2901	struct pci_devinfo *dinfo = device_get_ivars(child);
2902	struct resource_list *rl = &dinfo->resources;
2903	struct resource_list_entry *rle;
2904	struct resource *res;
2905	pci_addr_t map, testval;
2906	int mapsize;
2907
2908	/*
2909	 * Weed out the bogons, and figure out how large the BAR/map
2910	 * is.  Bars that read back 0 here are bogus and unimplemented.
2911	 * Note: atapci in legacy mode are special and handled elsewhere
2912	 * in the code.  If you have a atapci device in legacy mode and
2913	 * it fails here, that other code is broken.
2914	 */
2915	res = NULL;
2916	map = pci_read_config(child, *rid, 4);
2917	pci_write_config(child, *rid, 0xffffffff, 4);
2918	testval = pci_read_config(child, *rid, 4);
2919	if (pci_maprange(testval) == 64)
2920		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
2921	if (pci_mapbase(testval) == 0)
2922		goto out;
2923	if (pci_maptype(testval) & PCI_MAPMEM) {
2924		if (type != SYS_RES_MEMORY) {
2925			if (bootverbose)
2926				device_printf(dev,
2927				    "child %s requested type %d for rid %#x,"
2928				    " but the BAR says it is an memio\n",
2929				    device_get_nameunit(child), type, *rid);
2930			goto out;
2931		}
2932	} else {
2933		if (type != SYS_RES_IOPORT) {
2934			if (bootverbose)
2935				device_printf(dev,
2936				    "child %s requested type %d for rid %#x,"
2937				    " but the BAR says it is an ioport\n",
2938				    device_get_nameunit(child), type, *rid);
2939			goto out;
2940		}
2941	}
2942	/*
2943	 * For real BARs, we need to override the size that
2944	 * the driver requests, because that's what the BAR
2945	 * actually uses and we would otherwise have a
2946	 * situation where we might allocate the excess to
2947	 * another driver, which won't work.
2948	 */
2949	mapsize = pci_mapsize(testval);
2950	count = 1UL << mapsize;
2951	if (RF_ALIGNMENT(flags) < mapsize)
2952		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
2953
2954	/*
2955	 * Allocate enough resource, and then write back the
2956	 * appropriate bar for that resource.
2957	 */
2958	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
2959	    start, end, count, flags);
2960	if (res == NULL) {
2961		device_printf(child,
2962		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
2963		    count, *rid, type, start, end);
2964		goto out;
2965	}
2966	resource_list_add(rl, type, *rid, start, end, count);
2967	rle = resource_list_find(rl, type, *rid);
2968	if (rle == NULL)
2969		panic("pci_alloc_map: unexpectedly can't find resource.");
2970	rle->res = res;
2971	rle->start = rman_get_start(res);
2972	rle->end = rman_get_end(res);
2973	rle->count = count;
2974	if (bootverbose)
2975		device_printf(child,
2976		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
2977		    count, *rid, type, rman_get_start(res));
2978	map = rman_get_start(res);
2979out:;
2980	pci_write_config(child, *rid, map, 4);
2981	if (pci_maprange(testval) == 64)
2982		pci_write_config(child, *rid + 4, map >> 32, 4);
2983	return (res);
2984}
2985
2986
2987struct resource *
2988pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
2989		   u_long start, u_long end, u_long count, u_int flags)
2990{
2991	struct pci_devinfo *dinfo = device_get_ivars(child);
2992	struct resource_list *rl = &dinfo->resources;
2993	struct resource_list_entry *rle;
2994	pcicfgregs *cfg = &dinfo->cfg;
2995
2996	/*
2997	 * Perform lazy resource allocation
2998	 */
2999	if (device_get_parent(child) == dev) {
3000		switch (type) {
3001		case SYS_RES_IRQ:
3002			/*
3003			 * Can't alloc legacy interrupt once MSI messages
3004			 * have been allocated.
3005			 */
3006			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3007			    cfg->msix.msix_alloc > 0))
3008				return (NULL);
3009			/*
3010			 * If the child device doesn't have an
3011			 * interrupt routed and is deserving of an
3012			 * interrupt, try to assign it one.
3013			 */
3014			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3015			    (cfg->intpin != 0))
3016				pci_assign_interrupt(dev, child, 0);
3017			break;
3018		case SYS_RES_IOPORT:
3019		case SYS_RES_MEMORY:
3020			if (*rid < PCIR_BAR(cfg->nummaps)) {
3021				/*
3022				 * Enable the I/O mode.  We should
3023				 * also be assigning resources too
3024				 * when none are present.  The
3025				 * resource_list_alloc kind of sorta does
3026				 * this...
3027				 */
3028				if (PCI_ENABLE_IO(dev, child, type))
3029					return (NULL);
3030			}
3031			rle = resource_list_find(rl, type, *rid);
3032			if (rle == NULL)
3033				return (pci_alloc_map(dev, child, type, rid,
3034				    start, end, count, flags));
3035			break;
3036		}
3037		/*
3038		 * If we've already allocated the resource, then
3039		 * return it now.  But first we may need to activate
3040		 * it, since we don't allocate the resource as active
3041		 * above.  Normally this would be done down in the
3042		 * nexus, but since we short-circuit that path we have
3043		 * to do its job here.  Not sure if we should free the
3044		 * resource if it fails to activate.
3045		 */
3046		rle = resource_list_find(rl, type, *rid);
3047		if (rle != NULL && rle->res != NULL) {
3048			if (bootverbose)
3049				device_printf(child,
3050			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3051				    rman_get_size(rle->res), *rid, type,
3052				    rman_get_start(rle->res));
3053			if ((flags & RF_ACTIVE) &&
3054			    bus_generic_activate_resource(dev, child, type,
3055			    *rid, rle->res) != 0)
3056				return (NULL);
3057			return (rle->res);
3058		}
3059	}
3060	return (resource_list_alloc(rl, dev, child, type, rid,
3061	    start, end, count, flags));
3062}
3063
3064void
3065pci_delete_resource(device_t dev, device_t child, int type, int rid)
3066{
3067	struct pci_devinfo *dinfo;
3068	struct resource_list *rl;
3069	struct resource_list_entry *rle;
3070
3071	if (device_get_parent(child) != dev)
3072		return;
3073
3074	dinfo = device_get_ivars(child);
3075	rl = &dinfo->resources;
3076	rle = resource_list_find(rl, type, rid);
3077	if (rle) {
3078		if (rle->res) {
3079			if (rman_get_device(rle->res) != dev ||
3080			    rman_get_flags(rle->res) & RF_ACTIVE) {
3081				device_printf(dev, "delete_resource: "
3082				    "Resource still owned by child, oops. "
3083				    "(type=%d, rid=%d, addr=%lx)\n",
3084				    rle->type, rle->rid,
3085				    rman_get_start(rle->res));
3086				return;
3087			}
3088			bus_release_resource(dev, type, rid, rle->res);
3089		}
3090		resource_list_delete(rl, type, rid);
3091	}
3092	/*
3093	 * Why do we turn off the PCI configuration BAR when we delete a
3094	 * resource? -- imp
3095	 */
3096	pci_write_config(child, rid, 0, 4);
3097	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3098}
3099
3100struct resource_list *
3101pci_get_resource_list (device_t dev, device_t child)
3102{
3103	struct pci_devinfo *dinfo = device_get_ivars(child);
3104
3105	return (&dinfo->resources);
3106}
3107
3108uint32_t
3109pci_read_config_method(device_t dev, device_t child, int reg, int width)
3110{
3111	struct pci_devinfo *dinfo = device_get_ivars(child);
3112	pcicfgregs *cfg = &dinfo->cfg;
3113
3114	return (PCIB_READ_CONFIG(device_get_parent(dev),
3115	    cfg->bus, cfg->slot, cfg->func, reg, width));
3116}
3117
3118void
3119pci_write_config_method(device_t dev, device_t child, int reg,
3120    uint32_t val, int width)
3121{
3122	struct pci_devinfo *dinfo = device_get_ivars(child);
3123	pcicfgregs *cfg = &dinfo->cfg;
3124
3125	PCIB_WRITE_CONFIG(device_get_parent(dev),
3126	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3127}
3128
3129int
3130pci_child_location_str_method(device_t dev, device_t child, char *buf,
3131    size_t buflen)
3132{
3133
3134	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3135	    pci_get_function(child));
3136	return (0);
3137}
3138
3139int
3140pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3141    size_t buflen)
3142{
3143	struct pci_devinfo *dinfo;
3144	pcicfgregs *cfg;
3145
3146	dinfo = device_get_ivars(child);
3147	cfg = &dinfo->cfg;
3148	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3149	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3150	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3151	    cfg->progif);
3152	return (0);
3153}
3154
3155int
3156pci_assign_interrupt_method(device_t dev, device_t child)
3157{
3158	struct pci_devinfo *dinfo = device_get_ivars(child);
3159	pcicfgregs *cfg = &dinfo->cfg;
3160
3161	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3162	    cfg->intpin));
3163}
3164
3165static int
3166pci_modevent(module_t mod, int what, void *arg)
3167{
3168	static struct cdev *pci_cdev;
3169
3170	switch (what) {
3171	case MOD_LOAD:
3172		STAILQ_INIT(&pci_devq);
3173		pci_generation = 0;
3174		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3175		    "pci");
3176		pci_load_vendor_data();
3177		break;
3178
3179	case MOD_UNLOAD:
3180		destroy_dev(pci_cdev);
3181		break;
3182	}
3183
3184	return (0);
3185}
3186
3187void
3188pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3189{
3190	int i;
3191
3192	/*
3193	 * Only do header type 0 devices.  Type 1 devices are bridges,
3194	 * which we know need special treatment.  Type 2 devices are
3195	 * cardbus bridges which also require special treatment.
3196	 * Other types are unknown, and we err on the side of safety
3197	 * by ignoring them.
3198	 */
3199	if (dinfo->cfg.hdrtype != 0)
3200		return;
3201
3202	/*
3203	 * Restore the device to full power mode.  We must do this
3204	 * before we restore the registers because moving from D3 to
3205	 * D0 will cause the chip's BARs and some other registers to
3206	 * be reset to some unknown power on reset values.  Cut down
3207	 * the noise on boot by doing nothing if we are already in
3208	 * state D0.
3209	 */
3210	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3211		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3212	}
3213	for (i = 0; i < dinfo->cfg.nummaps; i++)
3214		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3215	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3216	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3217	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3218	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3219	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3220	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3221	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3222	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3223	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3224	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3225
3226	/*
3227	 * Restore MSI configuration if it is present.  If MSI is enabled,
3228	 * then restore the data and addr registers.
3229	 */
3230	if (dinfo->cfg.msi.msi_location != 0)
3231		pci_resume_msi(dev);
3232}
3233
3234void
3235pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3236{
3237	int i;
3238	uint32_t cls;
3239	int ps;
3240
3241	/*
3242	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3243	 * we know need special treatment.  Type 2 devices are cardbus bridges
3244	 * which also require special treatment.  Other types are unknown, and
3245	 * we err on the side of safety by ignoring them.  Powering down
3246	 * bridges should not be undertaken lightly.
3247	 */
3248	if (dinfo->cfg.hdrtype != 0)
3249		return;
3250	for (i = 0; i < dinfo->cfg.nummaps; i++)
3251		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3252	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3253
3254	/*
3255	 * Some drivers apparently write to these registers w/o updating our
3256	 * cached copy.  No harm happens if we update the copy, so do so here
3257	 * so we can restore them.  The COMMAND register is modified by the
3258	 * bus w/o updating the cache.  This should represent the normally
3259	 * writable portion of the 'defined' part of type 0 headers.  In
3260	 * theory we also need to save/restore the PCI capability structures
3261	 * we know about, but apart from power we don't know any that are
3262	 * writable.
3263	 */
3264	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3265	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3266	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3267	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3268	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3269	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3270	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3271	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3272	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3273	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3274	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3275	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3276	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3277	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3278	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3279
3280	/*
3281	 * don't set the state for display devices, base peripherals and
3282	 * memory devices since bad things happen when they are powered down.
3283	 * We should (a) have drivers that can easily detach and (b) use
3284	 * generic drivers for these devices so that some device actually
3285	 * attaches.  We need to make sure that when we implement (a) we don't
3286	 * power the device down on a reattach.
3287	 */
3288	cls = pci_get_class(dev);
3289	if (!setstate)
3290		return;
3291	switch (pci_do_power_nodriver)
3292	{
3293		case 0:		/* NO powerdown at all */
3294			return;
3295		case 1:		/* Conservative about what to power down */
3296			if (cls == PCIC_STORAGE)
3297				return;
3298			/*FALLTHROUGH*/
3299		case 2:		/* Agressive about what to power down */
3300			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3301			    cls == PCIC_BASEPERIPH)
3302				return;
3303			/*FALLTHROUGH*/
3304		case 3:		/* Power down everything */
3305			break;
3306	}
3307	/*
3308	 * PCI spec says we can only go into D3 state from D0 state.
3309	 * Transition from D[12] into D0 before going to D3 state.
3310	 */
3311	ps = pci_get_powerstate(dev);
3312	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3313		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3314	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3315		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3316}
3317