pci.c revision 165577
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 165577 2006-12-28 06:14:42Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54
55#if defined(__i386__) || defined(__amd64__)
56#include <machine/intr_machdep.h>
57#endif
58
59#include <sys/pciio.h>
60#include <dev/pci/pcireg.h>
61#include <dev/pci/pcivar.h>
62#include <dev/pci/pci_private.h>
63
64#include "pcib_if.h"
65#include "pci_if.h"
66
67#ifdef __HAVE_ACPI
68#include <contrib/dev/acpica/acpi.h>
69#include "acpi_if.h"
70#else
71#define	ACPI_PWR_FOR_SLEEP(x, y, z)
72#endif
73
74static uint32_t		pci_mapbase(unsigned mapreg);
75static int		pci_maptype(unsigned mapreg);
76static int		pci_mapsize(unsigned testval);
77static int		pci_maprange(unsigned mapreg);
78static void		pci_fixancient(pcicfgregs *cfg);
79
80static int		pci_porten(device_t pcib, int b, int s, int f);
81static int		pci_memen(device_t pcib, int b, int s, int f);
82static void		pci_assign_interrupt(device_t bus, device_t dev,
83			    int force_route);
84static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85			    int b, int s, int f, int reg,
86			    struct resource_list *rl, int force, int prefetch);
87static int		pci_probe(device_t dev);
88static int		pci_attach(device_t dev);
89static void		pci_load_vendor_data(void);
90static int		pci_describe_parse_line(char **ptr, int *vendor,
91			    int *device, char **desc);
92static char		*pci_describe_device(device_t dev);
93static int		pci_modevent(module_t mod, int what, void *arg);
94static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95			    pcicfgregs *cfg);
96static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97static uint32_t		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98			    int reg);
99#if 0
100static void		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101			    int reg, uint32_t data);
102#endif
103static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104
105static device_method_t pci_methods[] = {
106	/* Device interface */
107	DEVMETHOD(device_probe,		pci_probe),
108	DEVMETHOD(device_attach,	pci_attach),
109	DEVMETHOD(device_detach,	bus_generic_detach),
110	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
111	DEVMETHOD(device_suspend,	pci_suspend),
112	DEVMETHOD(device_resume,	pci_resume),
113
114	/* Bus interface */
115	DEVMETHOD(bus_print_child,	pci_print_child),
116	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
117	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
118	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
119	DEVMETHOD(bus_driver_added,	pci_driver_added),
120	DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr),
121	DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
122
123	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
124	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
125	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
126	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
127	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
128	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
129	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
130	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
131	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
132	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
133
134	/* PCI interface */
135	DEVMETHOD(pci_read_config,	pci_read_config_method),
136	DEVMETHOD(pci_write_config,	pci_write_config_method),
137	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
138	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
139	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
140	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
141	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
142	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
143	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
144	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
145	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
146	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
147	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
148	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
149	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
150
151	{ 0, 0 }
152};
153
154DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
155
156static devclass_t pci_devclass;
157DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
158MODULE_VERSION(pci, 1);
159
160static char	*pci_vendordata;
161static size_t	pci_vendordata_size;
162
163
164struct pci_quirk {
165	uint32_t devid;	/* Vendor/device of the card */
166	int	type;
167#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
168#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
169	int	arg1;
170	int	arg2;
171};
172
173struct pci_quirk pci_quirks[] = {
174	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
175	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
176	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
177	/* As does the Serverworks OSB4 (the SMBus mapping register) */
178	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
179
180	/*
181	 * MSI doesn't work with the Intel E7501 chipset, at least on
182	 * the Tyan 2721 motherboard.
183	 */
184	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
185
186	/*
187	 * MSI doesn't work with the Intel E7505 chipset, at least on
188	 * the Tyan S2665ANF motherboard.
189	 */
190	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
191
192	{ 0 }
193};
194
195/* map register information */
196#define	PCI_MAPMEM	0x01	/* memory map */
197#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
198#define	PCI_MAPPORT	0x04	/* port map */
199
200struct devlist pci_devq;
201uint32_t pci_generation;
202uint32_t pci_numdevs = 0;
203
204/* sysctl vars */
205SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
206
207static int pci_enable_io_modes = 1;
208TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
209SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
210    &pci_enable_io_modes, 1,
211    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
212enable these bits correctly.  We'd like to do this all the time, but there\n\
213are some peripherals that this causes problems with.");
214
215static int pci_do_power_nodriver = 0;
216TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
217SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
218    &pci_do_power_nodriver, 0,
219  "Place a function into D3 state when no driver attaches to it.  0 means\n\
220disable.  1 means conservatively place devices into D3 state.  2 means\n\
221agressively place devices into D3 state.  3 means put absolutely everything\n\
222in D3 state.");
223
224static int pci_do_power_resume = 1;
225TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
226SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
227    &pci_do_power_resume, 1,
228  "Transition from D3 -> D0 on resume.");
229
230static int pci_do_msi = 1;
231TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
232SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
233    "Enable support for MSI interrupts");
234
235static int pci_do_msix = 1;
236TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
237SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
238    "Enable support for MSI-X interrupts");
239
240static int pci_honor_msi_blacklist = 1;
241TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
242SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
243    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
244
245/* Find a device_t by bus/slot/function */
246
247device_t
248pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
249{
250	struct pci_devinfo *dinfo;
251
252	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
253		if ((dinfo->cfg.bus == bus) &&
254		    (dinfo->cfg.slot == slot) &&
255		    (dinfo->cfg.func == func)) {
256			return (dinfo->cfg.dev);
257		}
258	}
259
260	return (NULL);
261}
262
263/* Find a device_t by vendor/device ID */
264
265device_t
266pci_find_device(uint16_t vendor, uint16_t device)
267{
268	struct pci_devinfo *dinfo;
269
270	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
271		if ((dinfo->cfg.vendor == vendor) &&
272		    (dinfo->cfg.device == device)) {
273			return (dinfo->cfg.dev);
274		}
275	}
276
277	return (NULL);
278}
279
280/* return base address of memory or port map */
281
282static uint32_t
283pci_mapbase(uint32_t mapreg)
284{
285	int mask = 0x03;
286	if ((mapreg & 0x01) == 0)
287		mask = 0x0f;
288	return (mapreg & ~mask);
289}
290
291/* return map type of memory or port map */
292
293static int
294pci_maptype(unsigned mapreg)
295{
296	static uint8_t maptype[0x10] = {
297		PCI_MAPMEM,		PCI_MAPPORT,
298		PCI_MAPMEM,		0,
299		PCI_MAPMEM,		PCI_MAPPORT,
300		0,			0,
301		PCI_MAPMEM|PCI_MAPMEMP,	PCI_MAPPORT,
302		PCI_MAPMEM|PCI_MAPMEMP, 0,
303		PCI_MAPMEM|PCI_MAPMEMP,	PCI_MAPPORT,
304		0,			0,
305	};
306
307	return maptype[mapreg & 0x0f];
308}
309
310/* return log2 of map size decoded for memory or port map */
311
312static int
313pci_mapsize(uint32_t testval)
314{
315	int ln2size;
316
317	testval = pci_mapbase(testval);
318	ln2size = 0;
319	if (testval != 0) {
320		while ((testval & 1) == 0)
321		{
322			ln2size++;
323			testval >>= 1;
324		}
325	}
326	return (ln2size);
327}
328
329/* return log2 of address range supported by map register */
330
331static int
332pci_maprange(unsigned mapreg)
333{
334	int ln2range = 0;
335	switch (mapreg & 0x07) {
336	case 0x00:
337	case 0x01:
338	case 0x05:
339		ln2range = 32;
340		break;
341	case 0x02:
342		ln2range = 20;
343		break;
344	case 0x04:
345		ln2range = 64;
346		break;
347	}
348	return (ln2range);
349}
350
351/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
352
353static void
354pci_fixancient(pcicfgregs *cfg)
355{
356	if (cfg->hdrtype != 0)
357		return;
358
359	/* PCI to PCI bridges use header type 1 */
360	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
361		cfg->hdrtype = 1;
362}
363
364/* extract header type specific config data */
365
366static void
367pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
368{
369#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
370	switch (cfg->hdrtype) {
371	case 0:
372		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
373		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
374		cfg->nummaps	    = PCI_MAXMAPS_0;
375		break;
376	case 1:
377		cfg->subvendor      = REG(PCIR_SUBVEND_1, 2);
378		cfg->subdevice      = REG(PCIR_SUBDEV_1, 2);
379		cfg->nummaps	    = PCI_MAXMAPS_1;
380		break;
381	case 2:
382		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
383		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
384		cfg->nummaps	    = PCI_MAXMAPS_2;
385		break;
386	}
387#undef REG
388}
389
390/* read configuration header into pcicfgregs structure */
391struct pci_devinfo *
392pci_read_device(device_t pcib, int b, int s, int f, size_t size)
393{
394#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
395	pcicfgregs *cfg = NULL;
396	struct pci_devinfo *devlist_entry;
397	struct devlist *devlist_head;
398
399	devlist_head = &pci_devq;
400
401	devlist_entry = NULL;
402
403	if (REG(PCIR_DEVVENDOR, 4) != -1) {
404		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
405		if (devlist_entry == NULL)
406			return (NULL);
407
408		cfg = &devlist_entry->cfg;
409
410		cfg->bus		= b;
411		cfg->slot		= s;
412		cfg->func		= f;
413		cfg->vendor		= REG(PCIR_VENDOR, 2);
414		cfg->device		= REG(PCIR_DEVICE, 2);
415		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
416		cfg->statreg		= REG(PCIR_STATUS, 2);
417		cfg->baseclass		= REG(PCIR_CLASS, 1);
418		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
419		cfg->progif		= REG(PCIR_PROGIF, 1);
420		cfg->revid		= REG(PCIR_REVID, 1);
421		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
422		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
423		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
424		cfg->intpin		= REG(PCIR_INTPIN, 1);
425		cfg->intline		= REG(PCIR_INTLINE, 1);
426
427		cfg->mingnt		= REG(PCIR_MINGNT, 1);
428		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
429
430		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
431		cfg->hdrtype		&= ~PCIM_MFDEV;
432
433		pci_fixancient(cfg);
434		pci_hdrtypedata(pcib, b, s, f, cfg);
435
436		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
437			pci_read_extcap(pcib, cfg);
438
439		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
440
441		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
442		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
443		devlist_entry->conf.pc_sel.pc_func = cfg->func;
444		devlist_entry->conf.pc_hdr = cfg->hdrtype;
445
446		devlist_entry->conf.pc_subvendor = cfg->subvendor;
447		devlist_entry->conf.pc_subdevice = cfg->subdevice;
448		devlist_entry->conf.pc_vendor = cfg->vendor;
449		devlist_entry->conf.pc_device = cfg->device;
450
451		devlist_entry->conf.pc_class = cfg->baseclass;
452		devlist_entry->conf.pc_subclass = cfg->subclass;
453		devlist_entry->conf.pc_progif = cfg->progif;
454		devlist_entry->conf.pc_revid = cfg->revid;
455
456		pci_numdevs++;
457		pci_generation++;
458	}
459	return (devlist_entry);
460#undef REG
461}
462
463static void
464pci_read_extcap(device_t pcib, pcicfgregs *cfg)
465{
466#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
467#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
468#if defined(__i386__) || defined(__amd64__)
469	uint64_t addr;
470#endif
471	uint32_t val;
472	int	ptr, nextptr, ptrptr;
473
474	switch (cfg->hdrtype & PCIM_HDRTYPE) {
475	case 0:
476	case 1:
477		ptrptr = PCIR_CAP_PTR;
478		break;
479	case 2:
480		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
481		break;
482	default:
483		return;		/* no extended capabilities support */
484	}
485	nextptr = REG(ptrptr, 1);	/* sanity check? */
486
487	/*
488	 * Read capability entries.
489	 */
490	while (nextptr != 0) {
491		/* Sanity check */
492		if (nextptr > 255) {
493			printf("illegal PCI extended capability offset %d\n",
494			    nextptr);
495			return;
496		}
497		/* Find the next entry */
498		ptr = nextptr;
499		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
500
501		/* Process this entry */
502		switch (REG(ptr + PCICAP_ID, 1)) {
503		case PCIY_PMG:		/* PCI power management */
504			if (cfg->pp.pp_cap == 0) {
505				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
506				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
507				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
508				if ((nextptr - ptr) > PCIR_POWER_DATA)
509					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
510			}
511			break;
512#if defined(__i386__) || defined(__amd64__)
513		case PCIY_HT:		/* HyperTransport */
514			/* Determine HT-specific capability type. */
515			val = REG(ptr + PCIR_HT_COMMAND, 2);
516			switch (val & PCIM_HTCMD_CAP_MASK) {
517			case PCIM_HTCAP_MSI_MAPPING:
518				/* Sanity check the mapping window. */
519				addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
520				addr <<= 32;
521				addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
522				if (addr != MSI_INTEL_ADDR_BASE)
523					device_printf(pcib,
524		    "HT Bridge at %d:%d:%d has non-default MSI window 0x%llx\n",
525					    cfg->bus, cfg->slot, cfg->func,
526					    (long long)addr);
527
528				/* Enable MSI -> HT mapping. */
529				val |= PCIM_HTCMD_MSI_ENABLE;
530				WREG(ptr + PCIR_HT_COMMAND, val, 2);
531				break;
532			}
533			break;
534#endif
535		case PCIY_MSI:		/* PCI MSI */
536			cfg->msi.msi_location = ptr;
537			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
538			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
539						     PCIM_MSICTRL_MMC_MASK)>>1);
540			break;
541		case PCIY_MSIX:		/* PCI MSI-X */
542			cfg->msix.msix_location = ptr;
543			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
544			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
545			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
546			val = REG(ptr + PCIR_MSIX_TABLE, 4);
547			cfg->msix.msix_table_bar = PCIR_BAR(val &
548			    PCIM_MSIX_BIR_MASK);
549			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
550			val = REG(ptr + PCIR_MSIX_PBA, 4);
551			cfg->msix.msix_pba_bar = PCIR_BAR(val &
552			    PCIM_MSIX_BIR_MASK);
553			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
554			break;
555		case PCIY_VPD:		/* PCI Vital Product Data */
556			cfg->vpd.vpd_reg = ptr;
557			pci_read_vpd(pcib, cfg);
558			break;
559		default:
560			break;
561		}
562	}
563/* REG and WREG use carry through to next functions */
564}
565
566/*
567 * PCI Vital Product Data
568 */
569static uint32_t
570pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg)
571{
572
573	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
574
575	WREG(cfg->vpd.vpd_reg + 2, reg, 2);
576	while ((REG(cfg->vpd.vpd_reg + 2, 2) & 0x8000) != 0x8000)
577		DELAY(1);	/* limit looping */
578
579	return REG(cfg->vpd.vpd_reg + 4, 4);
580}
581
582#if 0
583static void
584pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
585{
586	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
587
588	WREG(cfg->vpd.vpd_reg + 4, data, 4);
589	WREG(cfg->vpd.vpd_reg + 2, reg | 0x8000, 2);
590	while ((REG(cfg->vpd.vpd_reg + 2, 2) & 0x8000) == 0x8000)
591		DELAY(1);	/* limit looping */
592
593	return;
594}
595#endif
596
597struct vpd_readstate {
598	device_t	pcib;
599	pcicfgregs	*cfg;
600	uint32_t	val;
601	int		bytesinval;
602	int		off;
603	uint8_t		cksum;
604};
605
606static uint8_t
607vpd_nextbyte(struct vpd_readstate *vrs)
608{
609	uint8_t byte;
610
611	if (vrs->bytesinval == 0) {
612		vrs->val = le32toh(pci_read_vpd_reg(vrs->pcib, vrs->cfg,
613		    vrs->off));
614		vrs->off += 4;
615		byte = vrs->val & 0xff;
616		vrs->bytesinval = 3;
617	} else {
618		vrs->val = vrs->val >> 8;
619		byte = vrs->val & 0xff;
620		vrs->bytesinval--;
621	}
622
623	vrs->cksum += byte;
624	return byte;
625}
626
627static void
628pci_read_vpd(device_t pcib, pcicfgregs *cfg)
629{
630	struct vpd_readstate vrs;
631	int state;
632	int name;
633	int remain;
634	int end;
635	int i;
636	uint8_t byte;
637	int alloc, off;		/* alloc/off for RO/W arrays */
638	int cksumvalid;
639	int dflen;
640
641	/* init vpd reader */
642	vrs.bytesinval = 0;
643	vrs.off = 0;
644	vrs.pcib = pcib;
645	vrs.cfg = cfg;
646	vrs.cksum = 0;
647
648	state = 0;
649	name = remain = i = 0;	/* shut up stupid gcc */
650	alloc = off = 0;	/* shut up stupid gcc */
651	dflen = 0;		/* shut up stupid gcc */
652	end = 0;
653	cksumvalid = -1;
654	for (; !end;) {
655		byte = vpd_nextbyte(&vrs);
656#if 0
657		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
658		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
659		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
660#endif
661		switch (state) {
662		case 0:		/* item name */
663			if (byte & 0x80) {
664				remain = vpd_nextbyte(&vrs);
665				remain |= vpd_nextbyte(&vrs) << 8;
666				if (remain > (0x7f*4 - vrs.off)) {
667					end = 1;
668					printf(
669			    "pci%d:%d:%d: invalid vpd data, remain %#x\n",
670					    cfg->bus, cfg->slot, cfg->func,
671					    remain);
672				}
673				name = byte & 0x7f;
674			} else {
675				remain = byte & 0x7;
676				name = (byte >> 3) & 0xf;
677			}
678			switch (name) {
679			case 0x2:	/* String */
680				cfg->vpd.vpd_ident = malloc(remain + 1,
681				    M_DEVBUF, M_WAITOK);
682				i = 0;
683				state = 1;
684				break;
685			case 0xf:	/* End */
686				end = 1;
687				state = -1;
688				break;
689			case 0x10:	/* VPD-R */
690				alloc = 8;
691				off = 0;
692				cfg->vpd.vpd_ros = malloc(alloc *
693				    sizeof *cfg->vpd.vpd_ros, M_DEVBUF,
694				    M_WAITOK);
695				state = 2;
696				break;
697			case 0x11:	/* VPD-W */
698				alloc = 8;
699				off = 0;
700				cfg->vpd.vpd_w = malloc(alloc *
701				    sizeof *cfg->vpd.vpd_w, M_DEVBUF,
702				    M_WAITOK);
703				state = 5;
704				break;
705			default:	/* Invalid data, abort */
706				end = 1;
707				continue;
708			}
709			break;
710
711		case 1:	/* Identifier String */
712			cfg->vpd.vpd_ident[i++] = byte;
713			remain--;
714			if (remain == 0)  {
715				cfg->vpd.vpd_ident[i] = '\0';
716				state = 0;
717			}
718			break;
719
720		case 2:	/* VPD-R Keyword Header */
721			if (off == alloc) {
722				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
723				    (alloc *= 2) * sizeof *cfg->vpd.vpd_ros,
724				    M_DEVBUF, M_WAITOK);
725			}
726			cfg->vpd.vpd_ros[off].keyword[0] = byte;
727			cfg->vpd.vpd_ros[off].keyword[1] = vpd_nextbyte(&vrs);
728			dflen = vpd_nextbyte(&vrs);
729			if (dflen == 0 &&
730			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
731			    2) == 0) {
732				/*
733				 * if this happens, we can't trust the rest
734				 * of the VPD.
735				 */
736				printf("pci%d:%d:%d: bad keyword length: %d\n",
737				    cfg->bus, cfg->slot, cfg->func, dflen);
738				cksumvalid = 0;
739				end = 1;
740				break;
741			} else if (dflen == 0) {
742				cfg->vpd.vpd_ros[off].value = malloc(1 *
743				    sizeof *cfg->vpd.vpd_ros[off].value,
744				    M_DEVBUF, M_WAITOK);
745				cfg->vpd.vpd_ros[off].value[0] = '\x00';
746			} else
747				cfg->vpd.vpd_ros[off].value = malloc(
748				    (dflen + 1) *
749				    sizeof *cfg->vpd.vpd_ros[off].value,
750				    M_DEVBUF, M_WAITOK);
751			remain -= 3;
752			i = 0;
753			/* keep in sync w/ state 3's transistions */
754			if (dflen == 0 && remain == 0)
755				state = 0;
756			else if (dflen == 0)
757				state = 2;
758			else
759				state = 3;
760			break;
761
762		case 3:	/* VPD-R Keyword Value */
763			cfg->vpd.vpd_ros[off].value[i++] = byte;
764			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
765			    "RV", 2) == 0 && cksumvalid == -1) {
766				if (vrs.cksum == 0)
767					cksumvalid = 1;
768				else {
769					printf(
770				    "pci%d:%d:%d: bad VPD cksum, remain %hhu\n",
771					    cfg->bus, cfg->slot, cfg->func,
772					    vrs.cksum);
773					cksumvalid = 0;
774					end = 1;
775					break;
776				}
777			}
778			dflen--;
779			remain--;
780			/* keep in sync w/ state 2's transistions */
781			if (dflen == 0)
782				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
783			if (dflen == 0 && remain == 0) {
784				cfg->vpd.vpd_rocnt = off;
785				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
786				    off * sizeof *cfg->vpd.vpd_ros,
787				    M_DEVBUF, M_WAITOK);
788				state = 0;
789			} else if (dflen == 0)
790				state = 2;
791			break;
792
793		case 4:
794			remain--;
795			if (remain == 0)
796				state = 0;
797			break;
798
799		case 5:	/* VPD-W Keyword Header */
800			if (off == alloc) {
801				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
802				    (alloc *= 2) * sizeof *cfg->vpd.vpd_w,
803				    M_DEVBUF, M_WAITOK);
804			}
805			cfg->vpd.vpd_w[off].keyword[0] = byte;
806			cfg->vpd.vpd_w[off].keyword[1] = vpd_nextbyte(&vrs);
807			cfg->vpd.vpd_w[off].len = dflen = vpd_nextbyte(&vrs);
808			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
809			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
810			    sizeof *cfg->vpd.vpd_w[off].value,
811			    M_DEVBUF, M_WAITOK);
812			remain -= 3;
813			i = 0;
814			/* keep in sync w/ state 6's transistions */
815			if (dflen == 0 && remain == 0)
816				state = 0;
817			else if (dflen == 0)
818				state = 5;
819			else
820				state = 6;
821			break;
822
823		case 6:	/* VPD-W Keyword Value */
824			cfg->vpd.vpd_w[off].value[i++] = byte;
825			dflen--;
826			remain--;
827			/* keep in sync w/ state 5's transistions */
828			if (dflen == 0)
829				cfg->vpd.vpd_w[off++].value[i++] = '\0';
830			if (dflen == 0 && remain == 0) {
831				cfg->vpd.vpd_wcnt = off;
832				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
833				    off * sizeof *cfg->vpd.vpd_w,
834				    M_DEVBUF, M_WAITOK);
835				state = 0;
836			} else if (dflen == 0)
837				state = 5;
838			break;
839
840		default:
841			printf("pci%d:%d:%d: invalid state: %d\n",
842			    cfg->bus, cfg->slot, cfg->func, state);
843			end = 1;
844			break;
845		}
846	}
847
848	if (cksumvalid == 0) {
849		/* read-only data bad, clean up */
850		for (; off; off--)
851			free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
852
853		free(cfg->vpd.vpd_ros, M_DEVBUF);
854		cfg->vpd.vpd_ros = NULL;
855	}
856#undef REG
857#undef WREG
858}
859
860int
861pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
862{
863	struct pci_devinfo *dinfo = device_get_ivars(child);
864	pcicfgregs *cfg = &dinfo->cfg;
865
866	*identptr = cfg->vpd.vpd_ident;
867
868	if (*identptr == NULL)
869		return ENXIO;
870
871	return 0;
872}
873
874int
875pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
876	const char **vptr)
877{
878	struct pci_devinfo *dinfo = device_get_ivars(child);
879	pcicfgregs *cfg = &dinfo->cfg;
880	int i;
881
882	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
883		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
884		    sizeof cfg->vpd.vpd_ros[i].keyword) == 0) {
885			*vptr = cfg->vpd.vpd_ros[i].value;
886		}
887
888	if (i != cfg->vpd.vpd_rocnt)
889		return 0;
890
891	*vptr = NULL;
892	return ENXIO;
893}
894
895/*
896 * Return the offset in configuration space of the requested extended
897 * capability entry or 0 if the specified capability was not found.
898 */
899int
900pci_find_extcap_method(device_t dev, device_t child, int capability,
901    int *capreg)
902{
903	struct pci_devinfo *dinfo = device_get_ivars(child);
904	pcicfgregs *cfg = &dinfo->cfg;
905	u_int32_t status;
906	u_int8_t ptr;
907
908	/*
909	 * Check the CAP_LIST bit of the PCI status register first.
910	 */
911	status = pci_read_config(child, PCIR_STATUS, 2);
912	if (!(status & PCIM_STATUS_CAPPRESENT))
913		return (ENXIO);
914
915	/*
916	 * Determine the start pointer of the capabilities list.
917	 */
918	switch (cfg->hdrtype & PCIM_HDRTYPE) {
919	case 0:
920	case 1:
921		ptr = PCIR_CAP_PTR;
922		break;
923	case 2:
924		ptr = PCIR_CAP_PTR_2;
925		break;
926	default:
927		/* XXX: panic? */
928		return (ENXIO);		/* no extended capabilities support */
929	}
930	ptr = pci_read_config(child, ptr, 1);
931
932	/*
933	 * Traverse the capabilities list.
934	 */
935	while (ptr != 0) {
936		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
937			if (capreg != NULL)
938				*capreg = ptr;
939			return (0);
940		}
941		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
942	}
943
944	return (ENOENT);
945}
946
947/*
948 * Support for MSI-X message interrupts.
949 */
950void
951pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
952{
953	struct pci_devinfo *dinfo = device_get_ivars(dev);
954	pcicfgregs *cfg = &dinfo->cfg;
955	uint32_t offset;
956
957	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
958	offset = cfg->msix.msix_table_offset + index * 16;
959	bus_write_4(cfg->msix.msix_table_res, offset, address & 0xffffffff);
960	bus_write_4(cfg->msix.msix_table_res, offset + 4, address >> 32);
961	bus_write_4(cfg->msix.msix_table_res, offset + 8, data);
962}
963
964void
965pci_mask_msix(device_t dev, u_int index)
966{
967	struct pci_devinfo *dinfo = device_get_ivars(dev);
968	pcicfgregs *cfg = &dinfo->cfg;
969	uint32_t offset, val;
970
971	KASSERT(cfg->msix.msix_msgnum > index, ("bogus index"));
972	offset = cfg->msix.msix_table_offset + index * 16 + 12;
973	val = bus_read_4(cfg->msix.msix_table_res, offset);
974	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
975		val |= PCIM_MSIX_VCTRL_MASK;
976		bus_write_4(cfg->msix.msix_table_res, offset, val);
977	}
978}
979
980void
981pci_unmask_msix(device_t dev, u_int index)
982{
983	struct pci_devinfo *dinfo = device_get_ivars(dev);
984	pcicfgregs *cfg = &dinfo->cfg;
985	uint32_t offset, val;
986
987	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
988	offset = cfg->msix.msix_table_offset + index * 16 + 12;
989	val = bus_read_4(cfg->msix.msix_table_res, offset);
990	if (val & PCIM_MSIX_VCTRL_MASK) {
991		val &= ~PCIM_MSIX_VCTRL_MASK;
992		bus_write_4(cfg->msix.msix_table_res, offset, val);
993	}
994}
995
996int
997pci_pending_msix(device_t dev, u_int index)
998{
999	struct pci_devinfo *dinfo = device_get_ivars(dev);
1000	pcicfgregs *cfg = &dinfo->cfg;
1001	uint32_t offset, bit;
1002
1003	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1004	offset = cfg->msix.msix_pba_offset + (index / 4) * 4;
1005	bit = 1 << index % 32;
1006	return (bus_read_4(cfg->msix.msix_pba_res, offset) & bit);
1007}
1008
1009static int
1010pci_alloc_msix(device_t dev, device_t child, int *count)
1011{
1012	struct pci_devinfo *dinfo = device_get_ivars(child);
1013	pcicfgregs *cfg = &dinfo->cfg;
1014	struct resource_list_entry *rle;
1015	int actual, error, i, irq, max;
1016
1017	/* MSI-X capability present? */
1018	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1019		return (ENODEV);
1020
1021	/* Make sure the appropriate BARs are mapped. */
1022	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1023	    cfg->msix.msix_table_bar);
1024	if (rle == NULL || rle->res == NULL ||
1025	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1026		return (ENXIO);
1027	cfg->msix.msix_table_res = rle->res;
1028	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1029		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1030		    cfg->msix.msix_pba_bar);
1031		if (rle == NULL || rle->res == NULL ||
1032		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1033			return (ENXIO);
1034	}
1035	cfg->msix.msix_pba_res = rle->res;
1036
1037	/* Already have allocated messages? */
1038	if (cfg->msix.msix_alloc != 0)
1039		return (ENXIO);
1040
1041	if (bootverbose)
1042		device_printf(child,
1043		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1044		    *count, cfg->msix.msix_msgnum);
1045	max = min(*count, cfg->msix.msix_msgnum);
1046	for (i = 0; i < max; i++) {
1047		/* Allocate a message. */
1048		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, i,
1049		    &irq);
1050		if (error)
1051			break;
1052		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1053		    irq, 1);
1054	}
1055	actual = i;
1056
1057	if (bootverbose) {
1058		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1059		if (actual == 1)
1060			device_printf(child, "using IRQ %lu for MSI-X\n",
1061			    rle->start);
1062		else {
1063			int run;
1064
1065			/*
1066			 * Be fancy and try to print contiguous runs of
1067			 * IRQ values as ranges.  'irq' is the previous IRQ.
1068			 * 'run' is true if we are in a range.
1069			 */
1070			device_printf(child, "using IRQs %lu", rle->start);
1071			irq = rle->start;
1072			run = 0;
1073			for (i = 1; i < actual; i++) {
1074				rle = resource_list_find(&dinfo->resources,
1075				    SYS_RES_IRQ, i + 1);
1076
1077				/* Still in a run? */
1078				if (rle->start == irq + 1) {
1079					run = 1;
1080					irq++;
1081					continue;
1082				}
1083
1084				/* Finish previous range. */
1085				if (run) {
1086					printf("-%d", irq);
1087					run = 0;
1088				}
1089
1090				/* Start new range. */
1091				printf(",%lu", rle->start);
1092				irq = rle->start;
1093			}
1094
1095			/* Unfinished range? */
1096			if (run)
1097				printf("%d", irq);
1098			printf(" for MSI-X\n");
1099		}
1100	}
1101
1102	/* Mask all vectors. */
1103	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1104		pci_mask_msix(child, i);
1105
1106	/* Update control register to enable MSI-X. */
1107	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1108	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1109	    cfg->msix.msix_ctrl, 2);
1110
1111	/* Update counts of alloc'd messages. */
1112	cfg->msix.msix_alloc = actual;
1113	*count = actual;
1114	return (0);
1115}
1116
1117static int
1118pci_release_msix(device_t dev, device_t child)
1119{
1120	struct pci_devinfo *dinfo = device_get_ivars(child);
1121	pcicfgregs *cfg = &dinfo->cfg;
1122	struct resource_list_entry *rle;
1123	int i;
1124
1125	/* Do we have any messages to release? */
1126	if (cfg->msix.msix_alloc == 0)
1127		return (ENODEV);
1128
1129	/* Make sure none of the resources are allocated. */
1130	for (i = 0; i < cfg->msix.msix_alloc; i++) {
1131		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1132		KASSERT(rle != NULL, ("missing MSI resource"));
1133		if (rle->res != NULL)
1134			return (EBUSY);
1135	}
1136
1137	/* Update control register with to disable MSI-X. */
1138	cfg->msix.msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1139	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1140	    cfg->msix.msix_ctrl, 2);
1141
1142	/* Release the messages. */
1143	for (i = 0; i < cfg->msix.msix_alloc; i++) {
1144		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1145		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1146		    rle->start);
1147		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1148	}
1149
1150	/* Update alloc count. */
1151	cfg->msix.msix_alloc = 0;
1152	return (0);
1153}
1154
1155/*
1156 * Support for MSI message signalled interrupts.
1157 */
1158void
1159pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1160{
1161	struct pci_devinfo *dinfo = device_get_ivars(dev);
1162	pcicfgregs *cfg = &dinfo->cfg;
1163
1164	/* Write data and address values. */
1165	cfg->msi.msi_addr = address;
1166	cfg->msi.msi_data = data;
1167	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1168	    address & 0xffffffff, 4);
1169	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1170		pci_write_config(dev, cfg->msi.msi_location +
1171		    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1172		pci_write_config(dev, cfg->msi.msi_location +
1173		    PCIR_MSI_DATA_64BIT, data, 2);
1174	} else
1175		pci_write_config(dev, cfg->msi.msi_location +
1176		    PCIR_MSI_DATA, data, 2);
1177
1178	/* Enable MSI in the control register. */
1179	cfg->msi.msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1180	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1181	    cfg->msi.msi_ctrl, 2);
1182}
1183
1184/*
1185 * Restore MSI registers during resume.  If MSI is enabled then
1186 * restore the data and address registers in addition to the control
1187 * register.
1188 */
1189static void
1190pci_resume_msi(device_t dev)
1191{
1192	struct pci_devinfo *dinfo = device_get_ivars(dev);
1193	pcicfgregs *cfg = &dinfo->cfg;
1194	uint64_t address;
1195	uint16_t data;
1196
1197	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1198		address = cfg->msi.msi_addr;
1199		data = cfg->msi.msi_data;
1200		pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1201		    address & 0xffffffff, 4);
1202		if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1203			pci_write_config(dev, cfg->msi.msi_location +
1204			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1205			pci_write_config(dev, cfg->msi.msi_location +
1206			    PCIR_MSI_DATA_64BIT, data, 2);
1207		} else
1208			pci_write_config(dev, cfg->msi.msi_location +
1209			    PCIR_MSI_DATA, data, 2);
1210	}
1211	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1212	    cfg->msi.msi_ctrl, 2);
1213}
1214
1215/*
1216 * Returns true if the specified device is blacklisted because MSI
1217 * doesn't work.
1218 */
1219int
1220pci_msi_device_blacklisted(device_t dev)
1221{
1222	struct pci_quirk *q;
1223
1224	if (!pci_honor_msi_blacklist)
1225		return (0);
1226
1227	for (q = &pci_quirks[0]; q->devid; q++) {
1228		if (q->devid == pci_get_devid(dev) &&
1229		    q->type == PCI_QUIRK_DISABLE_MSI)
1230			return (1);
1231	}
1232	return (0);
1233}
1234
1235/*
1236 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1237 * we just check for blacklisted chipsets as represented by the
1238 * host-PCI bridge at device 0:0:0.  In the future, it may become
1239 * necessary to check other system attributes, such as the kenv values
1240 * that give the motherboard manufacturer and model number.
1241 */
1242static int
1243pci_msi_blacklisted(void)
1244{
1245	device_t dev;
1246
1247	if (!pci_honor_msi_blacklist)
1248		return (0);
1249
1250	dev = pci_find_bsf(0, 0, 0);
1251	if (dev != NULL)
1252		return (pci_msi_device_blacklisted(dev));
1253	return (0);
1254}
1255
1256/*
1257 * Attempt to allocate *count MSI messages.  The actual number allocated is
1258 * returned in *count.  After this function returns, each message will be
1259 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1260 */
1261int
1262pci_alloc_msi_method(device_t dev, device_t child, int *count)
1263{
1264	struct pci_devinfo *dinfo = device_get_ivars(child);
1265	pcicfgregs *cfg = &dinfo->cfg;
1266	struct resource_list_entry *rle;
1267	int actual, error, i, irqs[32];
1268	uint16_t ctrl;
1269
1270	/* Don't let count == 0 get us into trouble. */
1271	if (*count == 0)
1272		return (EINVAL);
1273
1274	/* If rid 0 is allocated, then fail. */
1275	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1276	if (rle != NULL && rle->res != NULL)
1277		return (ENXIO);
1278
1279	/* If MSI is blacklisted for this system, fail. */
1280	if (pci_msi_blacklisted())
1281		return (ENXIO);
1282
1283	/* Try MSI-X first. */
1284	error = pci_alloc_msix(dev, child, count);
1285	if (error != ENODEV)
1286		return (error);
1287
1288	/* MSI capability present? */
1289	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1290		return (ENODEV);
1291
1292	/* Already have allocated messages? */
1293	if (cfg->msi.msi_alloc != 0)
1294		return (ENXIO);
1295
1296	if (bootverbose)
1297		device_printf(child,
1298		    "attempting to allocate %d MSI vectors (%d supported)\n",
1299		    *count, cfg->msi.msi_msgnum);
1300
1301	/* Don't ask for more than the device supports. */
1302	actual = min(*count, cfg->msi.msi_msgnum);
1303
1304	/* Don't ask for more than 32 messages. */
1305	actual = min(actual, 32);
1306
1307	/* MSI requires power of 2 number of messages. */
1308	if (!powerof2(actual))
1309		return (EINVAL);
1310
1311	for (;;) {
1312		/* Try to allocate N messages. */
1313		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1314		    cfg->msi.msi_msgnum, irqs);
1315		if (error == 0)
1316			break;
1317		if (actual == 1)
1318			return (error);
1319
1320		/* Try N / 2. */
1321		actual >>= 1;
1322	}
1323
1324	/*
1325	 * We now have N actual messages mapped onto SYS_RES_IRQ
1326	 * resources in the irqs[] array, so add new resources
1327	 * starting at rid 1.
1328	 */
1329	for (i = 0; i < actual; i++)
1330		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1331		    irqs[i], irqs[i], 1);
1332
1333	if (bootverbose) {
1334		if (actual == 1)
1335			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1336		else {
1337			int run;
1338
1339			/*
1340			 * Be fancy and try to print contiguous runs
1341			 * of IRQ values as ranges.  'run' is true if
1342			 * we are in a range.
1343			 */
1344			device_printf(child, "using IRQs %d", irqs[0]);
1345			run = 0;
1346			for (i = 1; i < actual; i++) {
1347
1348				/* Still in a run? */
1349				if (irqs[i] == irqs[i - 1] + 1) {
1350					run = 1;
1351					continue;
1352				}
1353
1354				/* Finish previous range. */
1355				if (run) {
1356					printf("-%d", irqs[i - 1]);
1357					run = 0;
1358				}
1359
1360				/* Start new range. */
1361				printf(",%d", irqs[i]);
1362			}
1363
1364			/* Unfinished range? */
1365			if (run)
1366				printf("%d", irqs[actual - 1]);
1367			printf(" for MSI\n");
1368		}
1369	}
1370
1371	/* Update control register with actual count and enable MSI. */
1372	ctrl = cfg->msi.msi_ctrl;
1373	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1374	ctrl |= (ffs(actual) - 1) << 4;
1375	cfg->msi.msi_ctrl = ctrl;
1376	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1377
1378	/* Update counts of alloc'd messages. */
1379	cfg->msi.msi_alloc = actual;
1380	*count = actual;
1381	return (0);
1382}
1383
1384/* Release the MSI messages associated with this device. */
1385int
1386pci_release_msi_method(device_t dev, device_t child)
1387{
1388	struct pci_devinfo *dinfo = device_get_ivars(child);
1389	pcicfgregs *cfg = &dinfo->cfg;
1390	struct resource_list_entry *rle;
1391	int error, i, irqs[32];
1392
1393	/* Try MSI-X first. */
1394	error = pci_release_msix(dev, child);
1395	if (error != ENODEV)
1396		return (error);
1397
1398	/* Do we have any messages to release? */
1399	if (cfg->msi.msi_alloc == 0)
1400		return (ENODEV);
1401	KASSERT(cfg->msi.msi_alloc <= 32, ("more than 32 alloc'd messages"));
1402
1403	/* Make sure none of the resources are allocated. */
1404	for (i = 0; i < cfg->msi.msi_alloc; i++) {
1405		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1406		KASSERT(rle != NULL, ("missing MSI resource"));
1407		if (rle->res != NULL)
1408			return (EBUSY);
1409		irqs[i] = rle->start;
1410	}
1411
1412	/* Update control register with 0 count and disable MSI. */
1413	cfg->msi.msi_ctrl &= ~(PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE);
1414	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL,
1415	    cfg->msi.msi_ctrl, 2);
1416
1417	/* Release the messages. */
1418	PCIB_RELEASE_MSI(device_get_parent(dev), child, cfg->msi.msi_alloc,
1419	    irqs);
1420	for (i = 0; i < cfg->msi.msi_alloc; i++)
1421		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1422
1423	/* Update alloc count. */
1424	cfg->msi.msi_alloc = 0;
1425	return (0);
1426}
1427
1428/*
1429 * Return the max supported MSI or MSI-X messages this device supports.
1430 * Basically, assuming the MD code can alloc messages, this function
1431 * should return the maximum value that pci_alloc_msi() can return.  Thus,
1432 * it is subject to the tunables, etc.
1433 */
1434int
1435pci_msi_count_method(device_t dev, device_t child)
1436{
1437	struct pci_devinfo *dinfo = device_get_ivars(child);
1438	pcicfgregs *cfg = &dinfo->cfg;
1439
1440	if (pci_do_msix && cfg->msix.msix_location != 0)
1441		return (cfg->msix.msix_msgnum);
1442	if (pci_do_msi && cfg->msi.msi_location != 0)
1443		return (cfg->msi.msi_msgnum);
1444	return (0);
1445}
1446
1447/* free pcicfgregs structure and all depending data structures */
1448
1449int
1450pci_freecfg(struct pci_devinfo *dinfo)
1451{
1452	struct devlist *devlist_head;
1453	int i;
1454
1455	devlist_head = &pci_devq;
1456
1457	if (dinfo->cfg.vpd.vpd_reg) {
1458		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1459		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1460			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1461		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1462		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1463			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1464		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1465	}
1466	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1467	free(dinfo, M_DEVBUF);
1468
1469	/* increment the generation count */
1470	pci_generation++;
1471
1472	/* we're losing one device */
1473	pci_numdevs--;
1474	return (0);
1475}
1476
1477/*
1478 * PCI power manangement
1479 */
1480int
1481pci_set_powerstate_method(device_t dev, device_t child, int state)
1482{
1483	struct pci_devinfo *dinfo = device_get_ivars(child);
1484	pcicfgregs *cfg = &dinfo->cfg;
1485	uint16_t status;
1486	int result, oldstate, highest, delay;
1487
1488	if (cfg->pp.pp_cap == 0)
1489		return (EOPNOTSUPP);
1490
1491	/*
1492	 * Optimize a no state change request away.  While it would be OK to
1493	 * write to the hardware in theory, some devices have shown odd
1494	 * behavior when going from D3 -> D3.
1495	 */
1496	oldstate = pci_get_powerstate(child);
1497	if (oldstate == state)
1498		return (0);
1499
1500	/*
1501	 * The PCI power management specification states that after a state
1502	 * transition between PCI power states, system software must
1503	 * guarantee a minimal delay before the function accesses the device.
1504	 * Compute the worst case delay that we need to guarantee before we
1505	 * access the device.  Many devices will be responsive much more
1506	 * quickly than this delay, but there are some that don't respond
1507	 * instantly to state changes.  Transitions to/from D3 state require
1508	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
1509	 * is done below with DELAY rather than a sleeper function because
1510	 * this function can be called from contexts where we cannot sleep.
1511	 */
1512	highest = (oldstate > state) ? oldstate : state;
1513	if (highest == PCI_POWERSTATE_D3)
1514	    delay = 10000;
1515	else if (highest == PCI_POWERSTATE_D2)
1516	    delay = 200;
1517	else
1518	    delay = 0;
1519	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
1520	    & ~PCIM_PSTAT_DMASK;
1521	result = 0;
1522	switch (state) {
1523	case PCI_POWERSTATE_D0:
1524		status |= PCIM_PSTAT_D0;
1525		break;
1526	case PCI_POWERSTATE_D1:
1527		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
1528			return (EOPNOTSUPP);
1529		status |= PCIM_PSTAT_D1;
1530		break;
1531	case PCI_POWERSTATE_D2:
1532		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
1533			return (EOPNOTSUPP);
1534		status |= PCIM_PSTAT_D2;
1535		break;
1536	case PCI_POWERSTATE_D3:
1537		status |= PCIM_PSTAT_D3;
1538		break;
1539	default:
1540		return (EINVAL);
1541	}
1542
1543	if (bootverbose)
1544		printf(
1545		    "pci%d:%d:%d: Transition from D%d to D%d\n",
1546		    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func,
1547		    oldstate, state);
1548
1549	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
1550	if (delay)
1551		DELAY(delay);
1552	return (0);
1553}
1554
1555int
1556pci_get_powerstate_method(device_t dev, device_t child)
1557{
1558	struct pci_devinfo *dinfo = device_get_ivars(child);
1559	pcicfgregs *cfg = &dinfo->cfg;
1560	uint16_t status;
1561	int result;
1562
1563	if (cfg->pp.pp_cap != 0) {
1564		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
1565		switch (status & PCIM_PSTAT_DMASK) {
1566		case PCIM_PSTAT_D0:
1567			result = PCI_POWERSTATE_D0;
1568			break;
1569		case PCIM_PSTAT_D1:
1570			result = PCI_POWERSTATE_D1;
1571			break;
1572		case PCIM_PSTAT_D2:
1573			result = PCI_POWERSTATE_D2;
1574			break;
1575		case PCIM_PSTAT_D3:
1576			result = PCI_POWERSTATE_D3;
1577			break;
1578		default:
1579			result = PCI_POWERSTATE_UNKNOWN;
1580			break;
1581		}
1582	} else {
1583		/* No support, device is always at D0 */
1584		result = PCI_POWERSTATE_D0;
1585	}
1586	return (result);
1587}
1588
1589/*
1590 * Some convenience functions for PCI device drivers.
1591 */
1592
1593static __inline void
1594pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
1595{
1596	uint16_t	command;
1597
1598	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1599	command |= bit;
1600	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1601}
1602
1603static __inline void
1604pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
1605{
1606	uint16_t	command;
1607
1608	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1609	command &= ~bit;
1610	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1611}
1612
1613int
1614pci_enable_busmaster_method(device_t dev, device_t child)
1615{
1616	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1617	return (0);
1618}
1619
1620int
1621pci_disable_busmaster_method(device_t dev, device_t child)
1622{
1623	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1624	return (0);
1625}
1626
1627int
1628pci_enable_io_method(device_t dev, device_t child, int space)
1629{
1630	uint16_t command;
1631	uint16_t bit;
1632	char *error;
1633
1634	bit = 0;
1635	error = NULL;
1636
1637	switch(space) {
1638	case SYS_RES_IOPORT:
1639		bit = PCIM_CMD_PORTEN;
1640		error = "port";
1641		break;
1642	case SYS_RES_MEMORY:
1643		bit = PCIM_CMD_MEMEN;
1644		error = "memory";
1645		break;
1646	default:
1647		return (EINVAL);
1648	}
1649	pci_set_command_bit(dev, child, bit);
1650	/* Some devices seem to need a brief stall here, what do to? */
1651	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1652	if (command & bit)
1653		return (0);
1654	device_printf(child, "failed to enable %s mapping!\n", error);
1655	return (ENXIO);
1656}
1657
1658int
1659pci_disable_io_method(device_t dev, device_t child, int space)
1660{
1661	uint16_t command;
1662	uint16_t bit;
1663	char *error;
1664
1665	bit = 0;
1666	error = NULL;
1667
1668	switch(space) {
1669	case SYS_RES_IOPORT:
1670		bit = PCIM_CMD_PORTEN;
1671		error = "port";
1672		break;
1673	case SYS_RES_MEMORY:
1674		bit = PCIM_CMD_MEMEN;
1675		error = "memory";
1676		break;
1677	default:
1678		return (EINVAL);
1679	}
1680	pci_clear_command_bit(dev, child, bit);
1681	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1682	if (command & bit) {
1683		device_printf(child, "failed to disable %s mapping!\n", error);
1684		return (ENXIO);
1685	}
1686	return (0);
1687}
1688
1689/*
1690 * New style pci driver.  Parent device is either a pci-host-bridge or a
1691 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
1692 */
1693
1694void
1695pci_print_verbose(struct pci_devinfo *dinfo)
1696{
1697	int i;
1698
1699	if (bootverbose) {
1700		pcicfgregs *cfg = &dinfo->cfg;
1701
1702		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
1703		    cfg->vendor, cfg->device, cfg->revid);
1704		printf("\tbus=%d, slot=%d, func=%d\n",
1705		    cfg->bus, cfg->slot, cfg->func);
1706		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
1707		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
1708		    cfg->mfdev);
1709		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
1710		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
1711		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
1712		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
1713		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
1714		if (cfg->intpin > 0)
1715			printf("\tintpin=%c, irq=%d\n",
1716			    cfg->intpin +'a' -1, cfg->intline);
1717		if (cfg->pp.pp_cap) {
1718			uint16_t status;
1719
1720			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
1721			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
1722			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
1723			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
1724			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
1725			    status & PCIM_PSTAT_DMASK);
1726		}
1727		if (cfg->vpd.vpd_reg) {
1728			printf("\tVPD Ident: %s\n", cfg->vpd.vpd_ident);
1729			for (i = 0; i < cfg->vpd.vpd_rocnt; i++) {
1730				struct vpd_readonly *vrop;
1731				vrop = &cfg->vpd.vpd_ros[i];
1732				if (strncmp("CP", vrop->keyword, 2) == 0)
1733					printf("\tCP: id %d, BAR%d, off %#x\n",
1734					    vrop->value[0], vrop->value[1],
1735					    le16toh(
1736					      *(uint16_t *)&vrop->value[2]));
1737				else if (strncmp("RV", vrop->keyword, 2) == 0)
1738					printf("\tRV: %#hhx\n", vrop->value[0]);
1739				else
1740					printf("\t%.2s: %s\n", vrop->keyword,
1741					    vrop->value);
1742			}
1743			for (i = 0; i < cfg->vpd.vpd_wcnt; i++) {
1744				struct vpd_write *vwp;
1745				vwp = &cfg->vpd.vpd_w[i];
1746				if (strncmp("RW", vwp->keyword, 2) != 0)
1747					printf("\t%.2s(%#x-%#x): %s\n",
1748					    vwp->keyword, vwp->start,
1749					    vwp->start + vwp->len, vwp->value);
1750			}
1751		}
1752		if (cfg->msi.msi_location) {
1753			int ctrl;
1754
1755			ctrl = cfg->msi.msi_ctrl;
1756			printf("\tMSI supports %d message%s%s%s\n",
1757			    cfg->msi.msi_msgnum,
1758			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
1759			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
1760			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
1761		}
1762		if (cfg->msix.msix_location) {
1763			printf("\tMSI-X supports %d message%s ",
1764			    cfg->msix.msix_msgnum,
1765			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
1766			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
1767				printf("in map 0x%x\n",
1768				    cfg->msix.msix_table_bar);
1769			else
1770				printf("in maps 0x%x and 0x%x\n",
1771				    cfg->msix.msix_table_bar,
1772				    cfg->msix.msix_pba_bar);
1773		}
1774	}
1775}
1776
1777static int
1778pci_porten(device_t pcib, int b, int s, int f)
1779{
1780	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1781		& PCIM_CMD_PORTEN) != 0;
1782}
1783
1784static int
1785pci_memen(device_t pcib, int b, int s, int f)
1786{
1787	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1788		& PCIM_CMD_MEMEN) != 0;
1789}
1790
1791/*
1792 * Add a resource based on a pci map register. Return 1 if the map
1793 * register is a 32bit map register or 2 if it is a 64bit register.
1794 */
1795static int
1796pci_add_map(device_t pcib, device_t bus, device_t dev,
1797    int b, int s, int f, int reg, struct resource_list *rl, int force,
1798    int prefetch)
1799{
1800	uint32_t map;
1801	pci_addr_t base;
1802	pci_addr_t start, end, count;
1803	uint8_t ln2size;
1804	uint8_t ln2range;
1805	uint32_t testval;
1806	uint16_t cmd;
1807	int type;
1808	int barlen;
1809	struct resource *res;
1810
1811	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1812	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
1813	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1814	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
1815
1816	if (pci_maptype(map) & PCI_MAPMEM)
1817		type = SYS_RES_MEMORY;
1818	else
1819		type = SYS_RES_IOPORT;
1820	ln2size = pci_mapsize(testval);
1821	ln2range = pci_maprange(testval);
1822	base = pci_mapbase(map);
1823	barlen = ln2range == 64 ? 2 : 1;
1824
1825	/*
1826	 * For I/O registers, if bottom bit is set, and the next bit up
1827	 * isn't clear, we know we have a BAR that doesn't conform to the
1828	 * spec, so ignore it.  Also, sanity check the size of the data
1829	 * areas to the type of memory involved.  Memory must be at least
1830	 * 16 bytes in size, while I/O ranges must be at least 4.
1831	 */
1832	if ((testval & 0x1) == 0x1 &&
1833	    (testval & 0x2) != 0)
1834		return (barlen);
1835	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
1836	    (type == SYS_RES_IOPORT && ln2size < 2))
1837		return (barlen);
1838
1839	if (ln2range == 64)
1840		/* Read the other half of a 64bit map register */
1841		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
1842	if (bootverbose) {
1843		printf("\tmap[%02x]: type %x, range %2d, base %#jx, size %2d",
1844		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
1845		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
1846			printf(", port disabled\n");
1847		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
1848			printf(", memory disabled\n");
1849		else
1850			printf(", enabled\n");
1851	}
1852
1853	/*
1854	 * If base is 0, then we have problems.  It is best to ignore
1855	 * such entries for the moment.  These will be allocated later if
1856	 * the driver specifically requests them.  However, some
1857	 * removable busses look better when all resources are allocated,
1858	 * so allow '0' to be overriden.
1859	 *
1860	 * Similarly treat maps whose values is the same as the test value
1861	 * read back.  These maps have had all f's written to them by the
1862	 * BIOS in an attempt to disable the resources.
1863	 */
1864	if (!force && (base == 0 || map == testval))
1865		return (barlen);
1866	if ((u_long)base != base) {
1867		device_printf(bus,
1868		    "pci%d:%d:%d bar %#x too many address bits", b, s, f, reg);
1869		return (barlen);
1870	}
1871
1872	/*
1873	 * This code theoretically does the right thing, but has
1874	 * undesirable side effects in some cases where peripherals
1875	 * respond oddly to having these bits enabled.  Let the user
1876	 * be able to turn them off (since pci_enable_io_modes is 1 by
1877	 * default).
1878	 */
1879	if (pci_enable_io_modes) {
1880		/* Turn on resources that have been left off by a lazy BIOS */
1881		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
1882			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
1883			cmd |= PCIM_CMD_PORTEN;
1884			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
1885		}
1886		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
1887			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
1888			cmd |= PCIM_CMD_MEMEN;
1889			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
1890		}
1891	} else {
1892		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
1893			return (barlen);
1894		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
1895			return (barlen);
1896	}
1897
1898	count = 1 << ln2size;
1899	if (base == 0 || base == pci_mapbase(testval)) {
1900		start = 0;	/* Let the parent deside */
1901		end = ~0ULL;
1902	} else {
1903		start = base;
1904		end = base + (1 << ln2size) - 1;
1905	}
1906	resource_list_add(rl, type, reg, start, end, count);
1907
1908	/*
1909	 * Not quite sure what to do on failure of allocating the resource
1910	 * since I can postulate several right answers.
1911	 */
1912	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
1913	    prefetch ? RF_PREFETCHABLE : 0);
1914	if (res == NULL)
1915		return (barlen);
1916	start = rman_get_start(res);
1917	if ((u_long)start != start) {
1918		/* Wait a minute!  this platform can't do this address. */
1919		device_printf(bus,
1920		    "pci%d.%d.%x bar %#x start %#jx, too many bits.",
1921		    b, s, f, reg, (uintmax_t)start);
1922		resource_list_release(rl, bus, dev, type, reg, res);
1923		return (barlen);
1924	}
1925	pci_write_config(dev, reg, start, 4);
1926	if (ln2range == 64)
1927		pci_write_config(dev, reg + 4, start >> 32, 4);
1928	return (barlen);
1929}
1930
1931/*
1932 * For ATA devices we need to decide early what addressing mode to use.
1933 * Legacy demands that the primary and secondary ATA ports sits on the
1934 * same addresses that old ISA hardware did. This dictates that we use
1935 * those addresses and ignore the BAR's if we cannot set PCI native
1936 * addressing mode.
1937 */
1938static void
1939pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
1940    int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
1941{
1942	int rid, type, progif;
1943#if 0
1944	/* if this device supports PCI native addressing use it */
1945	progif = pci_read_config(dev, PCIR_PROGIF, 1);
1946	if ((progif & 0x8a) == 0x8a) {
1947		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
1948		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
1949			printf("Trying ATA native PCI addressing mode\n");
1950			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
1951		}
1952	}
1953#endif
1954	progif = pci_read_config(dev, PCIR_PROGIF, 1);
1955	type = SYS_RES_IOPORT;
1956	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
1957		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
1958		    prefetchmask & (1 << 0));
1959		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
1960		    prefetchmask & (1 << 1));
1961	} else {
1962		rid = PCIR_BAR(0);
1963		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
1964		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
1965		    0);
1966		rid = PCIR_BAR(1);
1967		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
1968		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
1969		    0);
1970	}
1971	if (progif & PCIP_STORAGE_IDE_MODESEC) {
1972		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
1973		    prefetchmask & (1 << 2));
1974		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
1975		    prefetchmask & (1 << 3));
1976	} else {
1977		rid = PCIR_BAR(2);
1978		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
1979		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
1980		    0);
1981		rid = PCIR_BAR(3);
1982		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
1983		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
1984		    0);
1985	}
1986	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
1987	    prefetchmask & (1 << 4));
1988	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
1989	    prefetchmask & (1 << 5));
1990}
1991
1992static void
1993pci_assign_interrupt(device_t bus, device_t dev, int force_route)
1994{
1995	struct pci_devinfo *dinfo = device_get_ivars(dev);
1996	pcicfgregs *cfg = &dinfo->cfg;
1997	char tunable_name[64];
1998	int irq;
1999
2000	/* Has to have an intpin to have an interrupt. */
2001	if (cfg->intpin == 0)
2002		return;
2003
2004	/* Let the user override the IRQ with a tunable. */
2005	irq = PCI_INVALID_IRQ;
2006	snprintf(tunable_name, sizeof(tunable_name), "hw.pci%d.%d.INT%c.irq",
2007	    cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2008	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2009		irq = PCI_INVALID_IRQ;
2010
2011	/*
2012	 * If we didn't get an IRQ via the tunable, then we either use the
2013	 * IRQ value in the intline register or we ask the bus to route an
2014	 * interrupt for us.  If force_route is true, then we only use the
2015	 * value in the intline register if the bus was unable to assign an
2016	 * IRQ.
2017	 */
2018	if (!PCI_INTERRUPT_VALID(irq)) {
2019		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2020			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2021		if (!PCI_INTERRUPT_VALID(irq))
2022			irq = cfg->intline;
2023	}
2024
2025	/* If after all that we don't have an IRQ, just bail. */
2026	if (!PCI_INTERRUPT_VALID(irq))
2027		return;
2028
2029	/* Update the config register if it changed. */
2030	if (irq != cfg->intline) {
2031		cfg->intline = irq;
2032		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2033	}
2034
2035	/* Add this IRQ as rid 0 interrupt resource. */
2036	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2037}
2038
2039void
2040pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2041{
2042	device_t pcib;
2043	struct pci_devinfo *dinfo = device_get_ivars(dev);
2044	pcicfgregs *cfg = &dinfo->cfg;
2045	struct resource_list *rl = &dinfo->resources;
2046	struct pci_quirk *q;
2047	int b, i, f, s;
2048
2049	pcib = device_get_parent(bus);
2050
2051	b = cfg->bus;
2052	s = cfg->slot;
2053	f = cfg->func;
2054
2055	/* ATA devices needs special map treatment */
2056	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2057	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2058	    (pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV))
2059		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2060	else
2061		for (i = 0; i < cfg->nummaps;)
2062			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2063			    rl, force, prefetchmask & (1 << i));
2064
2065	/*
2066	 * Add additional, quirked resources.
2067	 */
2068	for (q = &pci_quirks[0]; q->devid; q++) {
2069		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2070		    && q->type == PCI_QUIRK_MAP_REG)
2071			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2072			  force, 0);
2073	}
2074
2075	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2076#ifdef __PCI_REROUTE_INTERRUPT
2077		/*
2078		 * Try to re-route interrupts. Sometimes the BIOS or
2079		 * firmware may leave bogus values in these registers.
2080		 * If the re-route fails, then just stick with what we
2081		 * have.
2082		 */
2083		pci_assign_interrupt(bus, dev, 1);
2084#else
2085		pci_assign_interrupt(bus, dev, 0);
2086#endif
2087	}
2088}
2089
2090void
2091pci_add_children(device_t dev, int busno, size_t dinfo_size)
2092{
2093#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2094	device_t pcib = device_get_parent(dev);
2095	struct pci_devinfo *dinfo;
2096	int maxslots;
2097	int s, f, pcifunchigh;
2098	uint8_t hdrtype;
2099
2100	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2101	    ("dinfo_size too small"));
2102	maxslots = PCIB_MAXSLOTS(pcib);
2103	for (s = 0; s <= maxslots; s++) {
2104		pcifunchigh = 0;
2105		f = 0;
2106		DELAY(1);
2107		hdrtype = REG(PCIR_HDRTYPE, 1);
2108		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2109			continue;
2110		if (hdrtype & PCIM_MFDEV)
2111			pcifunchigh = PCI_FUNCMAX;
2112		for (f = 0; f <= pcifunchigh; f++) {
2113			dinfo = pci_read_device(pcib, busno, s, f, dinfo_size);
2114			if (dinfo != NULL) {
2115				pci_add_child(dev, dinfo);
2116			}
2117		}
2118	}
2119#undef REG
2120}
2121
2122void
2123pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2124{
2125	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2126	device_set_ivars(dinfo->cfg.dev, dinfo);
2127	resource_list_init(&dinfo->resources);
2128	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2129	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2130	pci_print_verbose(dinfo);
2131	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2132}
2133
2134static int
2135pci_probe(device_t dev)
2136{
2137
2138	device_set_desc(dev, "PCI bus");
2139
2140	/* Allow other subclasses to override this driver. */
2141	return (-1000);
2142}
2143
2144static int
2145pci_attach(device_t dev)
2146{
2147	int busno;
2148
2149	/*
2150	 * Since there can be multiple independantly numbered PCI
2151	 * busses on systems with multiple PCI domains, we can't use
2152	 * the unit number to decide which bus we are probing. We ask
2153	 * the parent pcib what our bus number is.
2154	 */
2155	busno = pcib_get_bus(dev);
2156	if (bootverbose)
2157		device_printf(dev, "physical bus=%d\n", busno);
2158
2159	pci_add_children(dev, busno, sizeof(struct pci_devinfo));
2160
2161	return (bus_generic_attach(dev));
2162}
2163
2164int
2165pci_suspend(device_t dev)
2166{
2167	int dstate, error, i, numdevs;
2168	device_t acpi_dev, child, *devlist;
2169	struct pci_devinfo *dinfo;
2170
2171	/*
2172	 * Save the PCI configuration space for each child and set the
2173	 * device in the appropriate power state for this sleep state.
2174	 */
2175	acpi_dev = NULL;
2176	if (pci_do_power_resume)
2177		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2178	device_get_children(dev, &devlist, &numdevs);
2179	for (i = 0; i < numdevs; i++) {
2180		child = devlist[i];
2181		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2182		pci_cfg_save(child, dinfo, 0);
2183	}
2184
2185	/* Suspend devices before potentially powering them down. */
2186	error = bus_generic_suspend(dev);
2187	if (error) {
2188		free(devlist, M_TEMP);
2189		return (error);
2190	}
2191
2192	/*
2193	 * Always set the device to D3.  If ACPI suggests a different
2194	 * power state, use it instead.  If ACPI is not present, the
2195	 * firmware is responsible for managing device power.  Skip
2196	 * children who aren't attached since they are powered down
2197	 * separately.  Only manage type 0 devices for now.
2198	 */
2199	for (i = 0; acpi_dev && i < numdevs; i++) {
2200		child = devlist[i];
2201		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2202		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2203			dstate = PCI_POWERSTATE_D3;
2204			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2205			pci_set_powerstate(child, dstate);
2206		}
2207	}
2208	free(devlist, M_TEMP);
2209	return (0);
2210}
2211
2212int
2213pci_resume(device_t dev)
2214{
2215	int i, numdevs;
2216	device_t acpi_dev, child, *devlist;
2217	struct pci_devinfo *dinfo;
2218
2219	/*
2220	 * Set each child to D0 and restore its PCI configuration space.
2221	 */
2222	acpi_dev = NULL;
2223	if (pci_do_power_resume)
2224		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2225	device_get_children(dev, &devlist, &numdevs);
2226	for (i = 0; i < numdevs; i++) {
2227		/*
2228		 * Notify ACPI we're going to D0 but ignore the result.  If
2229		 * ACPI is not present, the firmware is responsible for
2230		 * managing device power.  Only manage type 0 devices for now.
2231		 */
2232		child = devlist[i];
2233		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2234		if (acpi_dev && device_is_attached(child) &&
2235		    dinfo->cfg.hdrtype == 0) {
2236			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2237			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2238		}
2239
2240		/* Now the device is powered up, restore its config space. */
2241		pci_cfg_restore(child, dinfo);
2242	}
2243	free(devlist, M_TEMP);
2244	return (bus_generic_resume(dev));
2245}
2246
2247static void
2248pci_load_vendor_data(void)
2249{
2250	caddr_t vendordata, info;
2251
2252	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2253		info = preload_search_info(vendordata, MODINFO_ADDR);
2254		pci_vendordata = *(char **)info;
2255		info = preload_search_info(vendordata, MODINFO_SIZE);
2256		pci_vendordata_size = *(size_t *)info;
2257		/* terminate the database */
2258		pci_vendordata[pci_vendordata_size] = '\n';
2259	}
2260}
2261
2262void
2263pci_driver_added(device_t dev, driver_t *driver)
2264{
2265	int numdevs;
2266	device_t *devlist;
2267	device_t child;
2268	struct pci_devinfo *dinfo;
2269	int i;
2270
2271	if (bootverbose)
2272		device_printf(dev, "driver added\n");
2273	DEVICE_IDENTIFY(driver, dev);
2274	device_get_children(dev, &devlist, &numdevs);
2275	for (i = 0; i < numdevs; i++) {
2276		child = devlist[i];
2277		if (device_get_state(child) != DS_NOTPRESENT)
2278			continue;
2279		dinfo = device_get_ivars(child);
2280		pci_print_verbose(dinfo);
2281		if (bootverbose)
2282			printf("pci%d:%d:%d: reprobing on driver added\n",
2283			    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func);
2284		pci_cfg_restore(child, dinfo);
2285		if (device_probe_and_attach(child) != 0)
2286			pci_cfg_save(child, dinfo, 1);
2287	}
2288	free(devlist, M_TEMP);
2289}
2290
2291int
2292pci_print_child(device_t dev, device_t child)
2293{
2294	struct pci_devinfo *dinfo;
2295	struct resource_list *rl;
2296	int retval = 0;
2297
2298	dinfo = device_get_ivars(child);
2299	rl = &dinfo->resources;
2300
2301	retval += bus_print_child_header(dev, child);
2302
2303	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2304	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2305	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2306	if (device_get_flags(dev))
2307		retval += printf(" flags %#x", device_get_flags(dev));
2308
2309	retval += printf(" at device %d.%d", pci_get_slot(child),
2310	    pci_get_function(child));
2311
2312	retval += bus_print_child_footer(dev, child);
2313
2314	return (retval);
2315}
2316
2317static struct
2318{
2319	int	class;
2320	int	subclass;
2321	char	*desc;
2322} pci_nomatch_tab[] = {
2323	{PCIC_OLD,		-1,			"old"},
2324	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2325	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
2326	{PCIC_STORAGE,		-1,			"mass storage"},
2327	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
2328	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
2329	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
2330	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
2331	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
2332	{PCIC_NETWORK,		-1,			"network"},
2333	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
2334	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
2335	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
2336	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
2337	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
2338	{PCIC_DISPLAY,		-1,			"display"},
2339	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
2340	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
2341	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
2342	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
2343	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
2344	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
2345	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
2346	{PCIC_MEMORY,		-1,			"memory"},
2347	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
2348	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
2349	{PCIC_BRIDGE,		-1,			"bridge"},
2350	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
2351	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
2352	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
2353	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
2354	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
2355	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
2356	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
2357	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
2358	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
2359	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
2360	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
2361	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
2362	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
2363	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
2364	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
2365	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
2366	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
2367	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
2368	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
2369	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
2370	{PCIC_INPUTDEV,		-1,			"input device"},
2371	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
2372	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
2373	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
2374	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
2375	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
2376	{PCIC_DOCKING,		-1,			"docking station"},
2377	{PCIC_PROCESSOR,	-1,			"processor"},
2378	{PCIC_SERIALBUS,	-1,			"serial bus"},
2379	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
2380	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
2381	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
2382	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
2383	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
2384	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
2385	{PCIC_WIRELESS,		-1,			"wireless controller"},
2386	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
2387	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
2388	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
2389	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
2390	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
2391	{PCIC_SATCOM,		-1,			"satellite communication"},
2392	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
2393	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
2394	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
2395	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
2396	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
2397	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
2398	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
2399	{PCIC_DASP,		-1,			"dasp"},
2400	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
2401	{0, 0,		NULL}
2402};
2403
2404void
2405pci_probe_nomatch(device_t dev, device_t child)
2406{
2407	int	i;
2408	char	*cp, *scp, *device;
2409
2410	/*
2411	 * Look for a listing for this device in a loaded device database.
2412	 */
2413	if ((device = pci_describe_device(child)) != NULL) {
2414		device_printf(dev, "<%s>", device);
2415		free(device, M_DEVBUF);
2416	} else {
2417		/*
2418		 * Scan the class/subclass descriptions for a general
2419		 * description.
2420		 */
2421		cp = "unknown";
2422		scp = NULL;
2423		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
2424			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
2425				if (pci_nomatch_tab[i].subclass == -1) {
2426					cp = pci_nomatch_tab[i].desc;
2427				} else if (pci_nomatch_tab[i].subclass ==
2428				    pci_get_subclass(child)) {
2429					scp = pci_nomatch_tab[i].desc;
2430				}
2431			}
2432		}
2433		device_printf(dev, "<%s%s%s>",
2434		    cp ? cp : "",
2435		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
2436		    scp ? scp : "");
2437	}
2438	printf(" at device %d.%d (no driver attached)\n",
2439	    pci_get_slot(child), pci_get_function(child));
2440	if (pci_do_power_nodriver)
2441		pci_cfg_save(child,
2442		    (struct pci_devinfo *) device_get_ivars(child), 1);
2443	return;
2444}
2445
2446/*
2447 * Parse the PCI device database, if loaded, and return a pointer to a
2448 * description of the device.
2449 *
2450 * The database is flat text formatted as follows:
2451 *
2452 * Any line not in a valid format is ignored.
2453 * Lines are terminated with newline '\n' characters.
2454 *
2455 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
2456 * the vendor name.
2457 *
2458 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
2459 * - devices cannot be listed without a corresponding VENDOR line.
2460 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
2461 * another TAB, then the device name.
2462 */
2463
2464/*
2465 * Assuming (ptr) points to the beginning of a line in the database,
2466 * return the vendor or device and description of the next entry.
2467 * The value of (vendor) or (device) inappropriate for the entry type
2468 * is set to -1.  Returns nonzero at the end of the database.
2469 *
2470 * Note that this is slightly unrobust in the face of corrupt data;
2471 * we attempt to safeguard against this by spamming the end of the
2472 * database with a newline when we initialise.
2473 */
2474static int
2475pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
2476{
2477	char	*cp = *ptr;
2478	int	left;
2479
2480	*device = -1;
2481	*vendor = -1;
2482	**desc = '\0';
2483	for (;;) {
2484		left = pci_vendordata_size - (cp - pci_vendordata);
2485		if (left <= 0) {
2486			*ptr = cp;
2487			return(1);
2488		}
2489
2490		/* vendor entry? */
2491		if (*cp != '\t' &&
2492		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
2493			break;
2494		/* device entry? */
2495		if (*cp == '\t' &&
2496		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
2497			break;
2498
2499		/* skip to next line */
2500		while (*cp != '\n' && left > 0) {
2501			cp++;
2502			left--;
2503		}
2504		if (*cp == '\n') {
2505			cp++;
2506			left--;
2507		}
2508	}
2509	/* skip to next line */
2510	while (*cp != '\n' && left > 0) {
2511		cp++;
2512		left--;
2513	}
2514	if (*cp == '\n' && left > 0)
2515		cp++;
2516	*ptr = cp;
2517	return(0);
2518}
2519
2520static char *
2521pci_describe_device(device_t dev)
2522{
2523	int	vendor, device;
2524	char	*desc, *vp, *dp, *line;
2525
2526	desc = vp = dp = NULL;
2527
2528	/*
2529	 * If we have no vendor data, we can't do anything.
2530	 */
2531	if (pci_vendordata == NULL)
2532		goto out;
2533
2534	/*
2535	 * Scan the vendor data looking for this device
2536	 */
2537	line = pci_vendordata;
2538	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2539		goto out;
2540	for (;;) {
2541		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
2542			goto out;
2543		if (vendor == pci_get_vendor(dev))
2544			break;
2545	}
2546	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2547		goto out;
2548	for (;;) {
2549		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
2550			*dp = 0;
2551			break;
2552		}
2553		if (vendor != -1) {
2554			*dp = 0;
2555			break;
2556		}
2557		if (device == pci_get_device(dev))
2558			break;
2559	}
2560	if (dp[0] == '\0')
2561		snprintf(dp, 80, "0x%x", pci_get_device(dev));
2562	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
2563	    NULL)
2564		sprintf(desc, "%s, %s", vp, dp);
2565 out:
2566	if (vp != NULL)
2567		free(vp, M_DEVBUF);
2568	if (dp != NULL)
2569		free(dp, M_DEVBUF);
2570	return(desc);
2571}
2572
2573int
2574pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
2575{
2576	struct pci_devinfo *dinfo;
2577	pcicfgregs *cfg;
2578
2579	dinfo = device_get_ivars(child);
2580	cfg = &dinfo->cfg;
2581
2582	switch (which) {
2583	case PCI_IVAR_ETHADDR:
2584		/*
2585		 * The generic accessor doesn't deal with failure, so
2586		 * we set the return value, then return an error.
2587		 */
2588		*((uint8_t **) result) = NULL;
2589		return (EINVAL);
2590	case PCI_IVAR_SUBVENDOR:
2591		*result = cfg->subvendor;
2592		break;
2593	case PCI_IVAR_SUBDEVICE:
2594		*result = cfg->subdevice;
2595		break;
2596	case PCI_IVAR_VENDOR:
2597		*result = cfg->vendor;
2598		break;
2599	case PCI_IVAR_DEVICE:
2600		*result = cfg->device;
2601		break;
2602	case PCI_IVAR_DEVID:
2603		*result = (cfg->device << 16) | cfg->vendor;
2604		break;
2605	case PCI_IVAR_CLASS:
2606		*result = cfg->baseclass;
2607		break;
2608	case PCI_IVAR_SUBCLASS:
2609		*result = cfg->subclass;
2610		break;
2611	case PCI_IVAR_PROGIF:
2612		*result = cfg->progif;
2613		break;
2614	case PCI_IVAR_REVID:
2615		*result = cfg->revid;
2616		break;
2617	case PCI_IVAR_INTPIN:
2618		*result = cfg->intpin;
2619		break;
2620	case PCI_IVAR_IRQ:
2621		*result = cfg->intline;
2622		break;
2623	case PCI_IVAR_BUS:
2624		*result = cfg->bus;
2625		break;
2626	case PCI_IVAR_SLOT:
2627		*result = cfg->slot;
2628		break;
2629	case PCI_IVAR_FUNCTION:
2630		*result = cfg->func;
2631		break;
2632	case PCI_IVAR_CMDREG:
2633		*result = cfg->cmdreg;
2634		break;
2635	case PCI_IVAR_CACHELNSZ:
2636		*result = cfg->cachelnsz;
2637		break;
2638	case PCI_IVAR_MINGNT:
2639		*result = cfg->mingnt;
2640		break;
2641	case PCI_IVAR_MAXLAT:
2642		*result = cfg->maxlat;
2643		break;
2644	case PCI_IVAR_LATTIMER:
2645		*result = cfg->lattimer;
2646		break;
2647	default:
2648		return (ENOENT);
2649	}
2650	return (0);
2651}
2652
2653int
2654pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
2655{
2656	struct pci_devinfo *dinfo;
2657
2658	dinfo = device_get_ivars(child);
2659
2660	switch (which) {
2661	case PCI_IVAR_INTPIN:
2662		dinfo->cfg.intpin = value;
2663		return (0);
2664	case PCI_IVAR_ETHADDR:
2665	case PCI_IVAR_SUBVENDOR:
2666	case PCI_IVAR_SUBDEVICE:
2667	case PCI_IVAR_VENDOR:
2668	case PCI_IVAR_DEVICE:
2669	case PCI_IVAR_DEVID:
2670	case PCI_IVAR_CLASS:
2671	case PCI_IVAR_SUBCLASS:
2672	case PCI_IVAR_PROGIF:
2673	case PCI_IVAR_REVID:
2674	case PCI_IVAR_IRQ:
2675	case PCI_IVAR_BUS:
2676	case PCI_IVAR_SLOT:
2677	case PCI_IVAR_FUNCTION:
2678		return (EINVAL);	/* disallow for now */
2679
2680	default:
2681		return (ENOENT);
2682	}
2683}
2684
2685
2686#include "opt_ddb.h"
2687#ifdef DDB
2688#include <ddb/ddb.h>
2689#include <sys/cons.h>
2690
2691/*
2692 * List resources based on pci map registers, used for within ddb
2693 */
2694
2695DB_SHOW_COMMAND(pciregs, db_pci_dump)
2696{
2697	struct pci_devinfo *dinfo;
2698	struct devlist *devlist_head;
2699	struct pci_conf *p;
2700	const char *name;
2701	int i, error, none_count;
2702
2703	none_count = 0;
2704	/* get the head of the device queue */
2705	devlist_head = &pci_devq;
2706
2707	/*
2708	 * Go through the list of devices and print out devices
2709	 */
2710	for (error = 0, i = 0,
2711	     dinfo = STAILQ_FIRST(devlist_head);
2712	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
2713	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
2714
2715		/* Populate pd_name and pd_unit */
2716		name = NULL;
2717		if (dinfo->cfg.dev)
2718			name = device_get_name(dinfo->cfg.dev);
2719
2720		p = &dinfo->conf;
2721		db_printf("%s%d@pci%d:%d:%d:\tclass=0x%06x card=0x%08x "
2722			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
2723			(name && *name) ? name : "none",
2724			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
2725			none_count++,
2726			p->pc_sel.pc_bus, p->pc_sel.pc_dev,
2727			p->pc_sel.pc_func, (p->pc_class << 16) |
2728			(p->pc_subclass << 8) | p->pc_progif,
2729			(p->pc_subdevice << 16) | p->pc_subvendor,
2730			(p->pc_device << 16) | p->pc_vendor,
2731			p->pc_revid, p->pc_hdr);
2732	}
2733}
2734#endif /* DDB */
2735
2736static struct resource *
2737pci_alloc_map(device_t dev, device_t child, int type, int *rid,
2738    u_long start, u_long end, u_long count, u_int flags)
2739{
2740	struct pci_devinfo *dinfo = device_get_ivars(child);
2741	struct resource_list *rl = &dinfo->resources;
2742	struct resource_list_entry *rle;
2743	struct resource *res;
2744	pci_addr_t map, testval;
2745	int mapsize;
2746
2747	/*
2748	 * Weed out the bogons, and figure out how large the BAR/map
2749	 * is.  Bars that read back 0 here are bogus and unimplemented.
2750	 * Note: atapci in legacy mode are special and handled elsewhere
2751	 * in the code.  If you have a atapci device in legacy mode and
2752	 * it fails here, that other code is broken.
2753	 */
2754	res = NULL;
2755	map = pci_read_config(child, *rid, 4);
2756	pci_write_config(child, *rid, 0xffffffff, 4);
2757	testval = pci_read_config(child, *rid, 4);
2758	if (pci_maprange(testval) == 64)
2759		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
2760	if (pci_mapbase(testval) == 0)
2761		goto out;
2762	if (pci_maptype(testval) & PCI_MAPMEM) {
2763		if (type != SYS_RES_MEMORY) {
2764			if (bootverbose)
2765				device_printf(dev,
2766				    "child %s requested type %d for rid %#x,"
2767				    " but the BAR says it is an memio\n",
2768				    device_get_nameunit(child), type, *rid);
2769			goto out;
2770		}
2771	} else {
2772		if (type != SYS_RES_IOPORT) {
2773			if (bootverbose)
2774				device_printf(dev,
2775				    "child %s requested type %d for rid %#x,"
2776				    " but the BAR says it is an ioport\n",
2777				    device_get_nameunit(child), type, *rid);
2778			goto out;
2779		}
2780	}
2781	/*
2782	 * For real BARs, we need to override the size that
2783	 * the driver requests, because that's what the BAR
2784	 * actually uses and we would otherwise have a
2785	 * situation where we might allocate the excess to
2786	 * another driver, which won't work.
2787	 */
2788	mapsize = pci_mapsize(testval);
2789	count = 1UL << mapsize;
2790	if (RF_ALIGNMENT(flags) < mapsize)
2791		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
2792
2793	/*
2794	 * Allocate enough resource, and then write back the
2795	 * appropriate bar for that resource.
2796	 */
2797	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
2798	    start, end, count, flags);
2799	if (res == NULL) {
2800		device_printf(child,
2801		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
2802		    count, *rid, type, start, end);
2803		goto out;
2804	}
2805	resource_list_add(rl, type, *rid, start, end, count);
2806	rle = resource_list_find(rl, type, *rid);
2807	if (rle == NULL)
2808		panic("pci_alloc_map: unexpectedly can't find resource.");
2809	rle->res = res;
2810	rle->start = rman_get_start(res);
2811	rle->end = rman_get_end(res);
2812	rle->count = count;
2813	if (bootverbose)
2814		device_printf(child,
2815		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
2816		    count, *rid, type, rman_get_start(res));
2817	map = rman_get_start(res);
2818out:;
2819	pci_write_config(child, *rid, map, 4);
2820	if (pci_maprange(testval) == 64)
2821		pci_write_config(child, *rid + 4, map >> 32, 4);
2822	return (res);
2823}
2824
2825
2826struct resource *
2827pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
2828		   u_long start, u_long end, u_long count, u_int flags)
2829{
2830	struct pci_devinfo *dinfo = device_get_ivars(child);
2831	struct resource_list *rl = &dinfo->resources;
2832	struct resource_list_entry *rle;
2833	pcicfgregs *cfg = &dinfo->cfg;
2834
2835	/*
2836	 * Perform lazy resource allocation
2837	 */
2838	if (device_get_parent(child) == dev) {
2839		switch (type) {
2840		case SYS_RES_IRQ:
2841			/*
2842			 * Can't alloc legacy interrupt once MSI messages
2843			 * have been allocated.
2844			 */
2845			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
2846			    cfg->msix.msix_alloc > 0))
2847				return (NULL);
2848			/*
2849			 * If the child device doesn't have an
2850			 * interrupt routed and is deserving of an
2851			 * interrupt, try to assign it one.
2852			 */
2853			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
2854			    (cfg->intpin != 0))
2855				pci_assign_interrupt(dev, child, 0);
2856			break;
2857		case SYS_RES_IOPORT:
2858		case SYS_RES_MEMORY:
2859			if (*rid < PCIR_BAR(cfg->nummaps)) {
2860				/*
2861				 * Enable the I/O mode.  We should
2862				 * also be assigning resources too
2863				 * when none are present.  The
2864				 * resource_list_alloc kind of sorta does
2865				 * this...
2866				 */
2867				if (PCI_ENABLE_IO(dev, child, type))
2868					return (NULL);
2869			}
2870			rle = resource_list_find(rl, type, *rid);
2871			if (rle == NULL)
2872				return (pci_alloc_map(dev, child, type, rid,
2873				    start, end, count, flags));
2874			break;
2875		}
2876		/*
2877		 * If we've already allocated the resource, then
2878		 * return it now.  But first we may need to activate
2879		 * it, since we don't allocate the resource as active
2880		 * above.  Normally this would be done down in the
2881		 * nexus, but since we short-circuit that path we have
2882		 * to do its job here.  Not sure if we should free the
2883		 * resource if it fails to activate.
2884		 */
2885		rle = resource_list_find(rl, type, *rid);
2886		if (rle != NULL && rle->res != NULL) {
2887			if (bootverbose)
2888				device_printf(child,
2889			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
2890				    rman_get_size(rle->res), *rid, type,
2891				    rman_get_start(rle->res));
2892			if ((flags & RF_ACTIVE) &&
2893			    bus_generic_activate_resource(dev, child, type,
2894			    *rid, rle->res) != 0)
2895				return NULL;
2896			return (rle->res);
2897		}
2898	}
2899	return (resource_list_alloc(rl, dev, child, type, rid,
2900	    start, end, count, flags));
2901}
2902
2903void
2904pci_delete_resource(device_t dev, device_t child, int type, int rid)
2905{
2906	struct pci_devinfo *dinfo;
2907	struct resource_list *rl;
2908	struct resource_list_entry *rle;
2909
2910	if (device_get_parent(child) != dev)
2911		return;
2912
2913	dinfo = device_get_ivars(child);
2914	rl = &dinfo->resources;
2915	rle = resource_list_find(rl, type, rid);
2916	if (rle) {
2917		if (rle->res) {
2918			if (rman_get_device(rle->res) != dev ||
2919			    rman_get_flags(rle->res) & RF_ACTIVE) {
2920				device_printf(dev, "delete_resource: "
2921				    "Resource still owned by child, oops. "
2922				    "(type=%d, rid=%d, addr=%lx)\n",
2923				    rle->type, rle->rid,
2924				    rman_get_start(rle->res));
2925				return;
2926			}
2927			bus_release_resource(dev, type, rid, rle->res);
2928		}
2929		resource_list_delete(rl, type, rid);
2930	}
2931	/*
2932	 * Why do we turn off the PCI configuration BAR when we delete a
2933	 * resource? -- imp
2934	 */
2935	pci_write_config(child, rid, 0, 4);
2936	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
2937}
2938
2939struct resource_list *
2940pci_get_resource_list (device_t dev, device_t child)
2941{
2942	struct pci_devinfo *dinfo = device_get_ivars(child);
2943
2944	return (&dinfo->resources);
2945}
2946
2947uint32_t
2948pci_read_config_method(device_t dev, device_t child, int reg, int width)
2949{
2950	struct pci_devinfo *dinfo = device_get_ivars(child);
2951	pcicfgregs *cfg = &dinfo->cfg;
2952
2953	return (PCIB_READ_CONFIG(device_get_parent(dev),
2954	    cfg->bus, cfg->slot, cfg->func, reg, width));
2955}
2956
2957void
2958pci_write_config_method(device_t dev, device_t child, int reg,
2959    uint32_t val, int width)
2960{
2961	struct pci_devinfo *dinfo = device_get_ivars(child);
2962	pcicfgregs *cfg = &dinfo->cfg;
2963
2964	PCIB_WRITE_CONFIG(device_get_parent(dev),
2965	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
2966}
2967
2968int
2969pci_child_location_str_method(device_t dev, device_t child, char *buf,
2970    size_t buflen)
2971{
2972
2973	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
2974	    pci_get_function(child));
2975	return (0);
2976}
2977
2978int
2979pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
2980    size_t buflen)
2981{
2982	struct pci_devinfo *dinfo;
2983	pcicfgregs *cfg;
2984
2985	dinfo = device_get_ivars(child);
2986	cfg = &dinfo->cfg;
2987	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
2988	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
2989	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
2990	    cfg->progif);
2991	return (0);
2992}
2993
2994int
2995pci_assign_interrupt_method(device_t dev, device_t child)
2996{
2997	struct pci_devinfo *dinfo = device_get_ivars(child);
2998	pcicfgregs *cfg = &dinfo->cfg;
2999
3000	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3001	    cfg->intpin));
3002}
3003
3004static int
3005pci_modevent(module_t mod, int what, void *arg)
3006{
3007	static struct cdev *pci_cdev;
3008
3009	switch (what) {
3010	case MOD_LOAD:
3011		STAILQ_INIT(&pci_devq);
3012		pci_generation = 0;
3013		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3014		    "pci");
3015		pci_load_vendor_data();
3016		break;
3017
3018	case MOD_UNLOAD:
3019		destroy_dev(pci_cdev);
3020		break;
3021	}
3022
3023	return (0);
3024}
3025
3026void
3027pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3028{
3029	int i;
3030
3031	/*
3032	 * Only do header type 0 devices.  Type 1 devices are bridges,
3033	 * which we know need special treatment.  Type 2 devices are
3034	 * cardbus bridges which also require special treatment.
3035	 * Other types are unknown, and we err on the side of safety
3036	 * by ignoring them.
3037	 */
3038	if (dinfo->cfg.hdrtype != 0)
3039		return;
3040
3041	/*
3042	 * Restore the device to full power mode.  We must do this
3043	 * before we restore the registers because moving from D3 to
3044	 * D0 will cause the chip's BARs and some other registers to
3045	 * be reset to some unknown power on reset values.  Cut down
3046	 * the noise on boot by doing nothing if we are already in
3047	 * state D0.
3048	 */
3049	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3050		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3051	}
3052	for (i = 0; i < dinfo->cfg.nummaps; i++)
3053		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3054	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3055	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3056	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3057	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3058	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3059	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3060	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3061	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3062	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3063	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3064
3065	/*
3066	 * Restore MSI configuration if it is present.  If MSI is enabled,
3067	 * then restore the data and addr registers.
3068	 */
3069	if (dinfo->cfg.msi.msi_location != 0)
3070		pci_resume_msi(dev);
3071}
3072
3073void
3074pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3075{
3076	int i;
3077	uint32_t cls;
3078	int ps;
3079
3080	/*
3081	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3082	 * we know need special treatment.  Type 2 devices are cardbus bridges
3083	 * which also require special treatment.  Other types are unknown, and
3084	 * we err on the side of safety by ignoring them.  Powering down
3085	 * bridges should not be undertaken lightly.
3086	 */
3087	if (dinfo->cfg.hdrtype != 0)
3088		return;
3089	for (i = 0; i < dinfo->cfg.nummaps; i++)
3090		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3091	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3092
3093	/*
3094	 * Some drivers apparently write to these registers w/o updating our
3095	 * cached copy.  No harm happens if we update the copy, so do so here
3096	 * so we can restore them.  The COMMAND register is modified by the
3097	 * bus w/o updating the cache.  This should represent the normally
3098	 * writable portion of the 'defined' part of type 0 headers.  In
3099	 * theory we also need to save/restore the PCI capability structures
3100	 * we know about, but apart from power we don't know any that are
3101	 * writable.
3102	 */
3103	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3104	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3105	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3106	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3107	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3108	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3109	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3110	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3111	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3112	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3113	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3114	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3115	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3116	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3117	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3118
3119	/*
3120	 * don't set the state for display devices, base peripherals and
3121	 * memory devices since bad things happen when they are powered down.
3122	 * We should (a) have drivers that can easily detach and (b) use
3123	 * generic drivers for these devices so that some device actually
3124	 * attaches.  We need to make sure that when we implement (a) we don't
3125	 * power the device down on a reattach.
3126	 */
3127	cls = pci_get_class(dev);
3128	if (!setstate)
3129		return;
3130	switch (pci_do_power_nodriver)
3131	{
3132		case 0:		/* NO powerdown at all */
3133			return;
3134		case 1:		/* Conservative about what to power down */
3135			if (cls == PCIC_STORAGE)
3136				return;
3137			/*FALLTHROUGH*/
3138		case 2:		/* Agressive about what to power down */
3139			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3140			    cls == PCIC_BASEPERIPH)
3141				return;
3142			/*FALLTHROUGH*/
3143		case 3:		/* Power down everything */
3144			break;
3145	}
3146	/*
3147	 * PCI spec says we can only go into D3 state from D0 state.
3148	 * Transition from D[12] into D0 before going to D3 state.
3149	 */
3150	ps = pci_get_powerstate(dev);
3151	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3152		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3153	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3154		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3155}
3156