pci.c revision 165973
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/pci/pci.c 165973 2007-01-12 13:33:56Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54
55#if defined(__i386__) || defined(__amd64__)
56#include <machine/intr_machdep.h>
57#endif
58
59#include <sys/pciio.h>
60#include <dev/pci/pcireg.h>
61#include <dev/pci/pcivar.h>
62#include <dev/pci/pci_private.h>
63
64#include "pcib_if.h"
65#include "pci_if.h"
66
67#ifdef __HAVE_ACPI
68#include <contrib/dev/acpica/acpi.h>
69#include "acpi_if.h"
70#else
71#define	ACPI_PWR_FOR_SLEEP(x, y, z)
72#endif
73
74static uint32_t		pci_mapbase(unsigned mapreg);
75static int		pci_maptype(unsigned mapreg);
76static int		pci_mapsize(unsigned testval);
77static int		pci_maprange(unsigned mapreg);
78static void		pci_fixancient(pcicfgregs *cfg);
79
80static int		pci_porten(device_t pcib, int b, int s, int f);
81static int		pci_memen(device_t pcib, int b, int s, int f);
82static void		pci_assign_interrupt(device_t bus, device_t dev,
83			    int force_route);
84static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85			    int b, int s, int f, int reg,
86			    struct resource_list *rl, int force, int prefetch);
87static int		pci_probe(device_t dev);
88static int		pci_attach(device_t dev);
89static void		pci_load_vendor_data(void);
90static int		pci_describe_parse_line(char **ptr, int *vendor,
91			    int *device, char **desc);
92static char		*pci_describe_device(device_t dev);
93static int		pci_modevent(module_t mod, int what, void *arg);
94static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95			    pcicfgregs *cfg);
96static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97static uint32_t		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98			    int reg);
99#if 0
100static void		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101			    int reg, uint32_t data);
102#endif
103static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104
105static device_method_t pci_methods[] = {
106	/* Device interface */
107	DEVMETHOD(device_probe,		pci_probe),
108	DEVMETHOD(device_attach,	pci_attach),
109	DEVMETHOD(device_detach,	bus_generic_detach),
110	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
111	DEVMETHOD(device_suspend,	pci_suspend),
112	DEVMETHOD(device_resume,	pci_resume),
113
114	/* Bus interface */
115	DEVMETHOD(bus_print_child,	pci_print_child),
116	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
117	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
118	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
119	DEVMETHOD(bus_driver_added,	pci_driver_added),
120	DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr),
121	DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
122
123	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
124	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
125	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
126	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
127	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
128	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
129	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
130	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
131	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
132	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
133
134	/* PCI interface */
135	DEVMETHOD(pci_read_config,	pci_read_config_method),
136	DEVMETHOD(pci_write_config,	pci_write_config_method),
137	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
138	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
139	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
140	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
141	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
142	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
143	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
144	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
145	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
146	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
147	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
148	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
149	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
150
151	{ 0, 0 }
152};
153
154DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
155
156static devclass_t pci_devclass;
157DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
158MODULE_VERSION(pci, 1);
159
160static char	*pci_vendordata;
161static size_t	pci_vendordata_size;
162
163
164struct pci_quirk {
165	uint32_t devid;	/* Vendor/device of the card */
166	int	type;
167#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
168#define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
169	int	arg1;
170	int	arg2;
171};
172
173struct pci_quirk pci_quirks[] = {
174	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
175	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
176	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
177	/* As does the Serverworks OSB4 (the SMBus mapping register) */
178	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
179
180	/*
181	 * MSI doesn't work on earlier Intel chipsets including the
182	 * E7501, E7505, and the E7210.
183	 */
184	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
185	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
186	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
187
188	{ 0 }
189};
190
191/* map register information */
192#define	PCI_MAPMEM	0x01	/* memory map */
193#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
194#define	PCI_MAPPORT	0x04	/* port map */
195
196struct devlist pci_devq;
197uint32_t pci_generation;
198uint32_t pci_numdevs = 0;
199
200/* sysctl vars */
201SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
202
203static int pci_enable_io_modes = 1;
204TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
205SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
206    &pci_enable_io_modes, 1,
207    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
208enable these bits correctly.  We'd like to do this all the time, but there\n\
209are some peripherals that this causes problems with.");
210
211static int pci_do_power_nodriver = 0;
212TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
213SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
214    &pci_do_power_nodriver, 0,
215  "Place a function into D3 state when no driver attaches to it.  0 means\n\
216disable.  1 means conservatively place devices into D3 state.  2 means\n\
217agressively place devices into D3 state.  3 means put absolutely everything\n\
218in D3 state.");
219
220static int pci_do_power_resume = 1;
221TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
222SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
223    &pci_do_power_resume, 1,
224  "Transition from D3 -> D0 on resume.");
225
226static int pci_do_msi = 1;
227TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
228SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
229    "Enable support for MSI interrupts");
230
231static int pci_do_msix = 1;
232TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
233SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
234    "Enable support for MSI-X interrupts");
235
236static int pci_honor_msi_blacklist = 1;
237TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
238SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
239    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
240
241/* Find a device_t by bus/slot/function */
242
243device_t
244pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
245{
246	struct pci_devinfo *dinfo;
247
248	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
249		if ((dinfo->cfg.bus == bus) &&
250		    (dinfo->cfg.slot == slot) &&
251		    (dinfo->cfg.func == func)) {
252			return (dinfo->cfg.dev);
253		}
254	}
255
256	return (NULL);
257}
258
259/* Find a device_t by vendor/device ID */
260
261device_t
262pci_find_device(uint16_t vendor, uint16_t device)
263{
264	struct pci_devinfo *dinfo;
265
266	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
267		if ((dinfo->cfg.vendor == vendor) &&
268		    (dinfo->cfg.device == device)) {
269			return (dinfo->cfg.dev);
270		}
271	}
272
273	return (NULL);
274}
275
276/* return base address of memory or port map */
277
278static uint32_t
279pci_mapbase(uint32_t mapreg)
280{
281	int mask = 0x03;
282	if ((mapreg & 0x01) == 0)
283		mask = 0x0f;
284	return (mapreg & ~mask);
285}
286
287/* return map type of memory or port map */
288
289static int
290pci_maptype(unsigned mapreg)
291{
292	static uint8_t maptype[0x10] = {
293		PCI_MAPMEM,		PCI_MAPPORT,
294		PCI_MAPMEM,		0,
295		PCI_MAPMEM,		PCI_MAPPORT,
296		0,			0,
297		PCI_MAPMEM|PCI_MAPMEMP,	PCI_MAPPORT,
298		PCI_MAPMEM|PCI_MAPMEMP, 0,
299		PCI_MAPMEM|PCI_MAPMEMP,	PCI_MAPPORT,
300		0,			0,
301	};
302
303	return maptype[mapreg & 0x0f];
304}
305
306/* return log2 of map size decoded for memory or port map */
307
308static int
309pci_mapsize(uint32_t testval)
310{
311	int ln2size;
312
313	testval = pci_mapbase(testval);
314	ln2size = 0;
315	if (testval != 0) {
316		while ((testval & 1) == 0)
317		{
318			ln2size++;
319			testval >>= 1;
320		}
321	}
322	return (ln2size);
323}
324
325/* return log2 of address range supported by map register */
326
327static int
328pci_maprange(unsigned mapreg)
329{
330	int ln2range = 0;
331	switch (mapreg & 0x07) {
332	case 0x00:
333	case 0x01:
334	case 0x05:
335		ln2range = 32;
336		break;
337	case 0x02:
338		ln2range = 20;
339		break;
340	case 0x04:
341		ln2range = 64;
342		break;
343	}
344	return (ln2range);
345}
346
347/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
348
349static void
350pci_fixancient(pcicfgregs *cfg)
351{
352	if (cfg->hdrtype != 0)
353		return;
354
355	/* PCI to PCI bridges use header type 1 */
356	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
357		cfg->hdrtype = 1;
358}
359
360/* extract header type specific config data */
361
362static void
363pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
364{
365#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
366	switch (cfg->hdrtype) {
367	case 0:
368		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
369		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
370		cfg->nummaps	    = PCI_MAXMAPS_0;
371		break;
372	case 1:
373		cfg->subvendor      = REG(PCIR_SUBVEND_1, 2);
374		cfg->subdevice      = REG(PCIR_SUBDEV_1, 2);
375		cfg->nummaps	    = PCI_MAXMAPS_1;
376		break;
377	case 2:
378		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
379		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
380		cfg->nummaps	    = PCI_MAXMAPS_2;
381		break;
382	}
383#undef REG
384}
385
386/* read configuration header into pcicfgregs structure */
387struct pci_devinfo *
388pci_read_device(device_t pcib, int b, int s, int f, size_t size)
389{
390#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
391	pcicfgregs *cfg = NULL;
392	struct pci_devinfo *devlist_entry;
393	struct devlist *devlist_head;
394
395	devlist_head = &pci_devq;
396
397	devlist_entry = NULL;
398
399	if (REG(PCIR_DEVVENDOR, 4) != -1) {
400		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
401		if (devlist_entry == NULL)
402			return (NULL);
403
404		cfg = &devlist_entry->cfg;
405
406		cfg->bus		= b;
407		cfg->slot		= s;
408		cfg->func		= f;
409		cfg->vendor		= REG(PCIR_VENDOR, 2);
410		cfg->device		= REG(PCIR_DEVICE, 2);
411		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
412		cfg->statreg		= REG(PCIR_STATUS, 2);
413		cfg->baseclass		= REG(PCIR_CLASS, 1);
414		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
415		cfg->progif		= REG(PCIR_PROGIF, 1);
416		cfg->revid		= REG(PCIR_REVID, 1);
417		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
418		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
419		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
420		cfg->intpin		= REG(PCIR_INTPIN, 1);
421		cfg->intline		= REG(PCIR_INTLINE, 1);
422
423		cfg->mingnt		= REG(PCIR_MINGNT, 1);
424		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
425
426		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
427		cfg->hdrtype		&= ~PCIM_MFDEV;
428
429		pci_fixancient(cfg);
430		pci_hdrtypedata(pcib, b, s, f, cfg);
431
432		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
433			pci_read_extcap(pcib, cfg);
434
435		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
436
437		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
438		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
439		devlist_entry->conf.pc_sel.pc_func = cfg->func;
440		devlist_entry->conf.pc_hdr = cfg->hdrtype;
441
442		devlist_entry->conf.pc_subvendor = cfg->subvendor;
443		devlist_entry->conf.pc_subdevice = cfg->subdevice;
444		devlist_entry->conf.pc_vendor = cfg->vendor;
445		devlist_entry->conf.pc_device = cfg->device;
446
447		devlist_entry->conf.pc_class = cfg->baseclass;
448		devlist_entry->conf.pc_subclass = cfg->subclass;
449		devlist_entry->conf.pc_progif = cfg->progif;
450		devlist_entry->conf.pc_revid = cfg->revid;
451
452		pci_numdevs++;
453		pci_generation++;
454	}
455	return (devlist_entry);
456#undef REG
457}
458
459static void
460pci_read_extcap(device_t pcib, pcicfgregs *cfg)
461{
462#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
463#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
464#if defined(__i386__) || defined(__amd64__)
465	uint64_t addr;
466#endif
467	uint32_t val;
468	int	ptr, nextptr, ptrptr;
469
470	switch (cfg->hdrtype & PCIM_HDRTYPE) {
471	case 0:
472	case 1:
473		ptrptr = PCIR_CAP_PTR;
474		break;
475	case 2:
476		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
477		break;
478	default:
479		return;		/* no extended capabilities support */
480	}
481	nextptr = REG(ptrptr, 1);	/* sanity check? */
482
483	/*
484	 * Read capability entries.
485	 */
486	while (nextptr != 0) {
487		/* Sanity check */
488		if (nextptr > 255) {
489			printf("illegal PCI extended capability offset %d\n",
490			    nextptr);
491			return;
492		}
493		/* Find the next entry */
494		ptr = nextptr;
495		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
496
497		/* Process this entry */
498		switch (REG(ptr + PCICAP_ID, 1)) {
499		case PCIY_PMG:		/* PCI power management */
500			if (cfg->pp.pp_cap == 0) {
501				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
502				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
503				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
504				if ((nextptr - ptr) > PCIR_POWER_DATA)
505					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
506			}
507			break;
508#if defined(__i386__) || defined(__amd64__)
509		case PCIY_HT:		/* HyperTransport */
510			/* Determine HT-specific capability type. */
511			val = REG(ptr + PCIR_HT_COMMAND, 2);
512			switch (val & PCIM_HTCMD_CAP_MASK) {
513			case PCIM_HTCAP_MSI_MAPPING:
514				/* Sanity check the mapping window. */
515				addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
516				addr <<= 32;
517				addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
518				if (addr != MSI_INTEL_ADDR_BASE)
519					device_printf(pcib,
520		    "HT Bridge at %d:%d:%d has non-default MSI window 0x%llx\n",
521					    cfg->bus, cfg->slot, cfg->func,
522					    (long long)addr);
523
524				/* Enable MSI -> HT mapping. */
525				val |= PCIM_HTCMD_MSI_ENABLE;
526				WREG(ptr + PCIR_HT_COMMAND, val, 2);
527				break;
528			}
529			break;
530#endif
531		case PCIY_MSI:		/* PCI MSI */
532			cfg->msi.msi_location = ptr;
533			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
534			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
535						     PCIM_MSICTRL_MMC_MASK)>>1);
536			break;
537		case PCIY_MSIX:		/* PCI MSI-X */
538			cfg->msix.msix_location = ptr;
539			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
540			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
541			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
542			val = REG(ptr + PCIR_MSIX_TABLE, 4);
543			cfg->msix.msix_table_bar = PCIR_BAR(val &
544			    PCIM_MSIX_BIR_MASK);
545			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
546			val = REG(ptr + PCIR_MSIX_PBA, 4);
547			cfg->msix.msix_pba_bar = PCIR_BAR(val &
548			    PCIM_MSIX_BIR_MASK);
549			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
550			break;
551		case PCIY_VPD:		/* PCI Vital Product Data */
552			cfg->vpd.vpd_reg = ptr;
553			pci_read_vpd(pcib, cfg);
554			break;
555		default:
556			break;
557		}
558	}
559/* REG and WREG use carry through to next functions */
560}
561
562/*
563 * PCI Vital Product Data
564 */
565static uint32_t
566pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg)
567{
568
569	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
570
571	WREG(cfg->vpd.vpd_reg + 2, reg, 2);
572	while ((REG(cfg->vpd.vpd_reg + 2, 2) & 0x8000) != 0x8000)
573		DELAY(1);	/* limit looping */
574
575	return REG(cfg->vpd.vpd_reg + 4, 4);
576}
577
578#if 0
579static void
580pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
581{
582	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
583
584	WREG(cfg->vpd.vpd_reg + 4, data, 4);
585	WREG(cfg->vpd.vpd_reg + 2, reg | 0x8000, 2);
586	while ((REG(cfg->vpd.vpd_reg + 2, 2) & 0x8000) == 0x8000)
587		DELAY(1);	/* limit looping */
588
589	return;
590}
591#endif
592
593struct vpd_readstate {
594	device_t	pcib;
595	pcicfgregs	*cfg;
596	uint32_t	val;
597	int		bytesinval;
598	int		off;
599	uint8_t		cksum;
600};
601
602static uint8_t
603vpd_nextbyte(struct vpd_readstate *vrs)
604{
605	uint8_t byte;
606
607	if (vrs->bytesinval == 0) {
608		vrs->val = le32toh(pci_read_vpd_reg(vrs->pcib, vrs->cfg,
609		    vrs->off));
610		vrs->off += 4;
611		byte = vrs->val & 0xff;
612		vrs->bytesinval = 3;
613	} else {
614		vrs->val = vrs->val >> 8;
615		byte = vrs->val & 0xff;
616		vrs->bytesinval--;
617	}
618
619	vrs->cksum += byte;
620	return byte;
621}
622
623static void
624pci_read_vpd(device_t pcib, pcicfgregs *cfg)
625{
626	struct vpd_readstate vrs;
627	int state;
628	int name;
629	int remain;
630	int end;
631	int i;
632	uint8_t byte;
633	int alloc, off;		/* alloc/off for RO/W arrays */
634	int cksumvalid;
635	int dflen;
636
637	/* init vpd reader */
638	vrs.bytesinval = 0;
639	vrs.off = 0;
640	vrs.pcib = pcib;
641	vrs.cfg = cfg;
642	vrs.cksum = 0;
643
644	state = 0;
645	name = remain = i = 0;	/* shut up stupid gcc */
646	alloc = off = 0;	/* shut up stupid gcc */
647	dflen = 0;		/* shut up stupid gcc */
648	end = 0;
649	cksumvalid = -1;
650	for (; !end;) {
651		byte = vpd_nextbyte(&vrs);
652#if 0
653		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
654		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
655		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
656#endif
657		switch (state) {
658		case 0:		/* item name */
659			if (byte & 0x80) {
660				remain = vpd_nextbyte(&vrs);
661				remain |= vpd_nextbyte(&vrs) << 8;
662				if (remain > (0x7f*4 - vrs.off)) {
663					end = 1;
664					printf(
665			    "pci%d:%d:%d: invalid vpd data, remain %#x\n",
666					    cfg->bus, cfg->slot, cfg->func,
667					    remain);
668				}
669				name = byte & 0x7f;
670			} else {
671				remain = byte & 0x7;
672				name = (byte >> 3) & 0xf;
673			}
674			switch (name) {
675			case 0x2:	/* String */
676				cfg->vpd.vpd_ident = malloc(remain + 1,
677				    M_DEVBUF, M_WAITOK);
678				i = 0;
679				state = 1;
680				break;
681			case 0xf:	/* End */
682				end = 1;
683				state = -1;
684				break;
685			case 0x10:	/* VPD-R */
686				alloc = 8;
687				off = 0;
688				cfg->vpd.vpd_ros = malloc(alloc *
689				    sizeof *cfg->vpd.vpd_ros, M_DEVBUF,
690				    M_WAITOK);
691				state = 2;
692				break;
693			case 0x11:	/* VPD-W */
694				alloc = 8;
695				off = 0;
696				cfg->vpd.vpd_w = malloc(alloc *
697				    sizeof *cfg->vpd.vpd_w, M_DEVBUF,
698				    M_WAITOK);
699				state = 5;
700				break;
701			default:	/* Invalid data, abort */
702				end = 1;
703				continue;
704			}
705			break;
706
707		case 1:	/* Identifier String */
708			cfg->vpd.vpd_ident[i++] = byte;
709			remain--;
710			if (remain == 0)  {
711				cfg->vpd.vpd_ident[i] = '\0';
712				state = 0;
713			}
714			break;
715
716		case 2:	/* VPD-R Keyword Header */
717			if (off == alloc) {
718				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
719				    (alloc *= 2) * sizeof *cfg->vpd.vpd_ros,
720				    M_DEVBUF, M_WAITOK);
721			}
722			cfg->vpd.vpd_ros[off].keyword[0] = byte;
723			cfg->vpd.vpd_ros[off].keyword[1] = vpd_nextbyte(&vrs);
724			dflen = vpd_nextbyte(&vrs);
725			if (dflen == 0 &&
726			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
727			    2) == 0) {
728				/*
729				 * if this happens, we can't trust the rest
730				 * of the VPD.
731				 */
732				printf("pci%d:%d:%d: bad keyword length: %d\n",
733				    cfg->bus, cfg->slot, cfg->func, dflen);
734				cksumvalid = 0;
735				end = 1;
736				break;
737			} else if (dflen == 0) {
738				cfg->vpd.vpd_ros[off].value = malloc(1 *
739				    sizeof *cfg->vpd.vpd_ros[off].value,
740				    M_DEVBUF, M_WAITOK);
741				cfg->vpd.vpd_ros[off].value[0] = '\x00';
742			} else
743				cfg->vpd.vpd_ros[off].value = malloc(
744				    (dflen + 1) *
745				    sizeof *cfg->vpd.vpd_ros[off].value,
746				    M_DEVBUF, M_WAITOK);
747			remain -= 3;
748			i = 0;
749			/* keep in sync w/ state 3's transistions */
750			if (dflen == 0 && remain == 0)
751				state = 0;
752			else if (dflen == 0)
753				state = 2;
754			else
755				state = 3;
756			break;
757
758		case 3:	/* VPD-R Keyword Value */
759			cfg->vpd.vpd_ros[off].value[i++] = byte;
760			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
761			    "RV", 2) == 0 && cksumvalid == -1) {
762				if (vrs.cksum == 0)
763					cksumvalid = 1;
764				else {
765					printf(
766				    "pci%d:%d:%d: bad VPD cksum, remain %hhu\n",
767					    cfg->bus, cfg->slot, cfg->func,
768					    vrs.cksum);
769					cksumvalid = 0;
770					end = 1;
771					break;
772				}
773			}
774			dflen--;
775			remain--;
776			/* keep in sync w/ state 2's transistions */
777			if (dflen == 0)
778				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
779			if (dflen == 0 && remain == 0) {
780				cfg->vpd.vpd_rocnt = off;
781				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
782				    off * sizeof *cfg->vpd.vpd_ros,
783				    M_DEVBUF, M_WAITOK);
784				state = 0;
785			} else if (dflen == 0)
786				state = 2;
787			break;
788
789		case 4:
790			remain--;
791			if (remain == 0)
792				state = 0;
793			break;
794
795		case 5:	/* VPD-W Keyword Header */
796			if (off == alloc) {
797				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
798				    (alloc *= 2) * sizeof *cfg->vpd.vpd_w,
799				    M_DEVBUF, M_WAITOK);
800			}
801			cfg->vpd.vpd_w[off].keyword[0] = byte;
802			cfg->vpd.vpd_w[off].keyword[1] = vpd_nextbyte(&vrs);
803			cfg->vpd.vpd_w[off].len = dflen = vpd_nextbyte(&vrs);
804			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
805			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
806			    sizeof *cfg->vpd.vpd_w[off].value,
807			    M_DEVBUF, M_WAITOK);
808			remain -= 3;
809			i = 0;
810			/* keep in sync w/ state 6's transistions */
811			if (dflen == 0 && remain == 0)
812				state = 0;
813			else if (dflen == 0)
814				state = 5;
815			else
816				state = 6;
817			break;
818
819		case 6:	/* VPD-W Keyword Value */
820			cfg->vpd.vpd_w[off].value[i++] = byte;
821			dflen--;
822			remain--;
823			/* keep in sync w/ state 5's transistions */
824			if (dflen == 0)
825				cfg->vpd.vpd_w[off++].value[i++] = '\0';
826			if (dflen == 0 && remain == 0) {
827				cfg->vpd.vpd_wcnt = off;
828				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
829				    off * sizeof *cfg->vpd.vpd_w,
830				    M_DEVBUF, M_WAITOK);
831				state = 0;
832			} else if (dflen == 0)
833				state = 5;
834			break;
835
836		default:
837			printf("pci%d:%d:%d: invalid state: %d\n",
838			    cfg->bus, cfg->slot, cfg->func, state);
839			end = 1;
840			break;
841		}
842	}
843
844	if (cksumvalid == 0) {
845		/* read-only data bad, clean up */
846		for (; off; off--)
847			free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
848
849		free(cfg->vpd.vpd_ros, M_DEVBUF);
850		cfg->vpd.vpd_ros = NULL;
851	}
852#undef REG
853#undef WREG
854}
855
856int
857pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
858{
859	struct pci_devinfo *dinfo = device_get_ivars(child);
860	pcicfgregs *cfg = &dinfo->cfg;
861
862	*identptr = cfg->vpd.vpd_ident;
863
864	if (*identptr == NULL)
865		return ENXIO;
866
867	return 0;
868}
869
870int
871pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
872	const char **vptr)
873{
874	struct pci_devinfo *dinfo = device_get_ivars(child);
875	pcicfgregs *cfg = &dinfo->cfg;
876	int i;
877
878	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
879		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
880		    sizeof cfg->vpd.vpd_ros[i].keyword) == 0) {
881			*vptr = cfg->vpd.vpd_ros[i].value;
882		}
883
884	if (i != cfg->vpd.vpd_rocnt)
885		return 0;
886
887	*vptr = NULL;
888	return ENXIO;
889}
890
891/*
892 * Return the offset in configuration space of the requested extended
893 * capability entry or 0 if the specified capability was not found.
894 */
895int
896pci_find_extcap_method(device_t dev, device_t child, int capability,
897    int *capreg)
898{
899	struct pci_devinfo *dinfo = device_get_ivars(child);
900	pcicfgregs *cfg = &dinfo->cfg;
901	u_int32_t status;
902	u_int8_t ptr;
903
904	/*
905	 * Check the CAP_LIST bit of the PCI status register first.
906	 */
907	status = pci_read_config(child, PCIR_STATUS, 2);
908	if (!(status & PCIM_STATUS_CAPPRESENT))
909		return (ENXIO);
910
911	/*
912	 * Determine the start pointer of the capabilities list.
913	 */
914	switch (cfg->hdrtype & PCIM_HDRTYPE) {
915	case 0:
916	case 1:
917		ptr = PCIR_CAP_PTR;
918		break;
919	case 2:
920		ptr = PCIR_CAP_PTR_2;
921		break;
922	default:
923		/* XXX: panic? */
924		return (ENXIO);		/* no extended capabilities support */
925	}
926	ptr = pci_read_config(child, ptr, 1);
927
928	/*
929	 * Traverse the capabilities list.
930	 */
931	while (ptr != 0) {
932		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
933			if (capreg != NULL)
934				*capreg = ptr;
935			return (0);
936		}
937		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
938	}
939
940	return (ENOENT);
941}
942
943/*
944 * Support for MSI-X message interrupts.
945 */
946void
947pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
948{
949	struct pci_devinfo *dinfo = device_get_ivars(dev);
950	pcicfgregs *cfg = &dinfo->cfg;
951	uint32_t offset;
952
953	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
954	offset = cfg->msix.msix_table_offset + index * 16;
955	bus_write_4(cfg->msix.msix_table_res, offset, address & 0xffffffff);
956	bus_write_4(cfg->msix.msix_table_res, offset + 4, address >> 32);
957	bus_write_4(cfg->msix.msix_table_res, offset + 8, data);
958}
959
960void
961pci_mask_msix(device_t dev, u_int index)
962{
963	struct pci_devinfo *dinfo = device_get_ivars(dev);
964	pcicfgregs *cfg = &dinfo->cfg;
965	uint32_t offset, val;
966
967	KASSERT(cfg->msix.msix_msgnum > index, ("bogus index"));
968	offset = cfg->msix.msix_table_offset + index * 16 + 12;
969	val = bus_read_4(cfg->msix.msix_table_res, offset);
970	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
971		val |= PCIM_MSIX_VCTRL_MASK;
972		bus_write_4(cfg->msix.msix_table_res, offset, val);
973	}
974}
975
976void
977pci_unmask_msix(device_t dev, u_int index)
978{
979	struct pci_devinfo *dinfo = device_get_ivars(dev);
980	pcicfgregs *cfg = &dinfo->cfg;
981	uint32_t offset, val;
982
983	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
984	offset = cfg->msix.msix_table_offset + index * 16 + 12;
985	val = bus_read_4(cfg->msix.msix_table_res, offset);
986	if (val & PCIM_MSIX_VCTRL_MASK) {
987		val &= ~PCIM_MSIX_VCTRL_MASK;
988		bus_write_4(cfg->msix.msix_table_res, offset, val);
989	}
990}
991
992int
993pci_pending_msix(device_t dev, u_int index)
994{
995	struct pci_devinfo *dinfo = device_get_ivars(dev);
996	pcicfgregs *cfg = &dinfo->cfg;
997	uint32_t offset, bit;
998
999	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1000	offset = cfg->msix.msix_pba_offset + (index / 4) * 4;
1001	bit = 1 << index % 32;
1002	return (bus_read_4(cfg->msix.msix_pba_res, offset) & bit);
1003}
1004
1005static int
1006pci_alloc_msix(device_t dev, device_t child, int *count)
1007{
1008	struct pci_devinfo *dinfo = device_get_ivars(child);
1009	pcicfgregs *cfg = &dinfo->cfg;
1010	struct resource_list_entry *rle;
1011	int actual, error, i, irq, max;
1012
1013	/* MSI-X capability present? */
1014	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1015		return (ENODEV);
1016
1017	/* Make sure the appropriate BARs are mapped. */
1018	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1019	    cfg->msix.msix_table_bar);
1020	if (rle == NULL || rle->res == NULL ||
1021	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1022		return (ENXIO);
1023	cfg->msix.msix_table_res = rle->res;
1024	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1025		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1026		    cfg->msix.msix_pba_bar);
1027		if (rle == NULL || rle->res == NULL ||
1028		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1029			return (ENXIO);
1030	}
1031	cfg->msix.msix_pba_res = rle->res;
1032
1033	/* Already have allocated messages? */
1034	if (cfg->msix.msix_alloc != 0)
1035		return (ENXIO);
1036
1037	if (bootverbose)
1038		device_printf(child,
1039		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1040		    *count, cfg->msix.msix_msgnum);
1041	max = min(*count, cfg->msix.msix_msgnum);
1042	for (i = 0; i < max; i++) {
1043		/* Allocate a message. */
1044		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, i,
1045		    &irq);
1046		if (error)
1047			break;
1048		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1049		    irq, 1);
1050	}
1051	actual = i;
1052
1053	if (bootverbose) {
1054		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1055		if (actual == 1)
1056			device_printf(child, "using IRQ %lu for MSI-X\n",
1057			    rle->start);
1058		else {
1059			int run;
1060
1061			/*
1062			 * Be fancy and try to print contiguous runs of
1063			 * IRQ values as ranges.  'irq' is the previous IRQ.
1064			 * 'run' is true if we are in a range.
1065			 */
1066			device_printf(child, "using IRQs %lu", rle->start);
1067			irq = rle->start;
1068			run = 0;
1069			for (i = 1; i < actual; i++) {
1070				rle = resource_list_find(&dinfo->resources,
1071				    SYS_RES_IRQ, i + 1);
1072
1073				/* Still in a run? */
1074				if (rle->start == irq + 1) {
1075					run = 1;
1076					irq++;
1077					continue;
1078				}
1079
1080				/* Finish previous range. */
1081				if (run) {
1082					printf("-%d", irq);
1083					run = 0;
1084				}
1085
1086				/* Start new range. */
1087				printf(",%lu", rle->start);
1088				irq = rle->start;
1089			}
1090
1091			/* Unfinished range? */
1092			if (run)
1093				printf("%d", irq);
1094			printf(" for MSI-X\n");
1095		}
1096	}
1097
1098	/* Mask all vectors. */
1099	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1100		pci_mask_msix(child, i);
1101
1102	/* Update control register to enable MSI-X. */
1103	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1104	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1105	    cfg->msix.msix_ctrl, 2);
1106
1107	/* Update counts of alloc'd messages. */
1108	cfg->msix.msix_alloc = actual;
1109	*count = actual;
1110	return (0);
1111}
1112
1113static int
1114pci_release_msix(device_t dev, device_t child)
1115{
1116	struct pci_devinfo *dinfo = device_get_ivars(child);
1117	pcicfgregs *cfg = &dinfo->cfg;
1118	struct resource_list_entry *rle;
1119	int i;
1120
1121	/* Do we have any messages to release? */
1122	if (cfg->msix.msix_alloc == 0)
1123		return (ENODEV);
1124
1125	/* Make sure none of the resources are allocated. */
1126	for (i = 0; i < cfg->msix.msix_alloc; i++) {
1127		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1128		KASSERT(rle != NULL, ("missing MSI resource"));
1129		if (rle->res != NULL)
1130			return (EBUSY);
1131	}
1132
1133	/* Update control register with to disable MSI-X. */
1134	cfg->msix.msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1135	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1136	    cfg->msix.msix_ctrl, 2);
1137
1138	/* Release the messages. */
1139	for (i = 0; i < cfg->msix.msix_alloc; i++) {
1140		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1141		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1142		    rle->start);
1143		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1144	}
1145
1146	/* Update alloc count. */
1147	cfg->msix.msix_alloc = 0;
1148	return (0);
1149}
1150
1151/*
1152 * Support for MSI message signalled interrupts.
1153 */
1154void
1155pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1156{
1157	struct pci_devinfo *dinfo = device_get_ivars(dev);
1158	pcicfgregs *cfg = &dinfo->cfg;
1159
1160	/* Write data and address values. */
1161	cfg->msi.msi_addr = address;
1162	cfg->msi.msi_data = data;
1163	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1164	    address & 0xffffffff, 4);
1165	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1166		pci_write_config(dev, cfg->msi.msi_location +
1167		    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1168		pci_write_config(dev, cfg->msi.msi_location +
1169		    PCIR_MSI_DATA_64BIT, data, 2);
1170	} else
1171		pci_write_config(dev, cfg->msi.msi_location +
1172		    PCIR_MSI_DATA, data, 2);
1173
1174	/* Enable MSI in the control register. */
1175	cfg->msi.msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1176	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1177	    cfg->msi.msi_ctrl, 2);
1178}
1179
1180/*
1181 * Restore MSI registers during resume.  If MSI is enabled then
1182 * restore the data and address registers in addition to the control
1183 * register.
1184 */
1185static void
1186pci_resume_msi(device_t dev)
1187{
1188	struct pci_devinfo *dinfo = device_get_ivars(dev);
1189	pcicfgregs *cfg = &dinfo->cfg;
1190	uint64_t address;
1191	uint16_t data;
1192
1193	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1194		address = cfg->msi.msi_addr;
1195		data = cfg->msi.msi_data;
1196		pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1197		    address & 0xffffffff, 4);
1198		if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1199			pci_write_config(dev, cfg->msi.msi_location +
1200			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1201			pci_write_config(dev, cfg->msi.msi_location +
1202			    PCIR_MSI_DATA_64BIT, data, 2);
1203		} else
1204			pci_write_config(dev, cfg->msi.msi_location +
1205			    PCIR_MSI_DATA, data, 2);
1206	}
1207	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1208	    cfg->msi.msi_ctrl, 2);
1209}
1210
1211/*
1212 * Returns true if the specified device is blacklisted because MSI
1213 * doesn't work.
1214 */
1215int
1216pci_msi_device_blacklisted(device_t dev)
1217{
1218	struct pci_quirk *q;
1219
1220	if (!pci_honor_msi_blacklist)
1221		return (0);
1222
1223	for (q = &pci_quirks[0]; q->devid; q++) {
1224		if (q->devid == pci_get_devid(dev) &&
1225		    q->type == PCI_QUIRK_DISABLE_MSI)
1226			return (1);
1227	}
1228	return (0);
1229}
1230
1231/*
1232 * Determine if MSI is blacklisted globally on this sytem.  Currently,
1233 * we just check for blacklisted chipsets as represented by the
1234 * host-PCI bridge at device 0:0:0.  In the future, it may become
1235 * necessary to check other system attributes, such as the kenv values
1236 * that give the motherboard manufacturer and model number.
1237 */
1238static int
1239pci_msi_blacklisted(void)
1240{
1241	device_t dev;
1242
1243	if (!pci_honor_msi_blacklist)
1244		return (0);
1245
1246	dev = pci_find_bsf(0, 0, 0);
1247	if (dev != NULL)
1248		return (pci_msi_device_blacklisted(dev));
1249	return (0);
1250}
1251
1252/*
1253 * Attempt to allocate *count MSI messages.  The actual number allocated is
1254 * returned in *count.  After this function returns, each message will be
1255 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1256 */
1257int
1258pci_alloc_msi_method(device_t dev, device_t child, int *count)
1259{
1260	struct pci_devinfo *dinfo = device_get_ivars(child);
1261	pcicfgregs *cfg = &dinfo->cfg;
1262	struct resource_list_entry *rle;
1263	int actual, error, i, irqs[32];
1264	uint16_t ctrl;
1265
1266	/* Don't let count == 0 get us into trouble. */
1267	if (*count == 0)
1268		return (EINVAL);
1269
1270	/* If rid 0 is allocated, then fail. */
1271	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1272	if (rle != NULL && rle->res != NULL)
1273		return (ENXIO);
1274
1275	/* If MSI is blacklisted for this system, fail. */
1276	if (pci_msi_blacklisted())
1277		return (ENXIO);
1278
1279	/* Try MSI-X first. */
1280	error = pci_alloc_msix(dev, child, count);
1281	if (error != ENODEV)
1282		return (error);
1283
1284	/* MSI capability present? */
1285	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1286		return (ENODEV);
1287
1288	/* Already have allocated messages? */
1289	if (cfg->msi.msi_alloc != 0)
1290		return (ENXIO);
1291
1292	if (bootverbose)
1293		device_printf(child,
1294		    "attempting to allocate %d MSI vectors (%d supported)\n",
1295		    *count, cfg->msi.msi_msgnum);
1296
1297	/* Don't ask for more than the device supports. */
1298	actual = min(*count, cfg->msi.msi_msgnum);
1299
1300	/* Don't ask for more than 32 messages. */
1301	actual = min(actual, 32);
1302
1303	/* MSI requires power of 2 number of messages. */
1304	if (!powerof2(actual))
1305		return (EINVAL);
1306
1307	for (;;) {
1308		/* Try to allocate N messages. */
1309		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1310		    cfg->msi.msi_msgnum, irqs);
1311		if (error == 0)
1312			break;
1313		if (actual == 1)
1314			return (error);
1315
1316		/* Try N / 2. */
1317		actual >>= 1;
1318	}
1319
1320	/*
1321	 * We now have N actual messages mapped onto SYS_RES_IRQ
1322	 * resources in the irqs[] array, so add new resources
1323	 * starting at rid 1.
1324	 */
1325	for (i = 0; i < actual; i++)
1326		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1327		    irqs[i], irqs[i], 1);
1328
1329	if (bootverbose) {
1330		if (actual == 1)
1331			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1332		else {
1333			int run;
1334
1335			/*
1336			 * Be fancy and try to print contiguous runs
1337			 * of IRQ values as ranges.  'run' is true if
1338			 * we are in a range.
1339			 */
1340			device_printf(child, "using IRQs %d", irqs[0]);
1341			run = 0;
1342			for (i = 1; i < actual; i++) {
1343
1344				/* Still in a run? */
1345				if (irqs[i] == irqs[i - 1] + 1) {
1346					run = 1;
1347					continue;
1348				}
1349
1350				/* Finish previous range. */
1351				if (run) {
1352					printf("-%d", irqs[i - 1]);
1353					run = 0;
1354				}
1355
1356				/* Start new range. */
1357				printf(",%d", irqs[i]);
1358			}
1359
1360			/* Unfinished range? */
1361			if (run)
1362				printf("%d", irqs[actual - 1]);
1363			printf(" for MSI\n");
1364		}
1365	}
1366
1367	/* Update control register with actual count and enable MSI. */
1368	ctrl = cfg->msi.msi_ctrl;
1369	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1370	ctrl |= (ffs(actual) - 1) << 4;
1371	cfg->msi.msi_ctrl = ctrl;
1372	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1373
1374	/* Update counts of alloc'd messages. */
1375	cfg->msi.msi_alloc = actual;
1376	*count = actual;
1377	return (0);
1378}
1379
1380/* Release the MSI messages associated with this device. */
1381int
1382pci_release_msi_method(device_t dev, device_t child)
1383{
1384	struct pci_devinfo *dinfo = device_get_ivars(child);
1385	pcicfgregs *cfg = &dinfo->cfg;
1386	struct resource_list_entry *rle;
1387	int error, i, irqs[32];
1388
1389	/* Try MSI-X first. */
1390	error = pci_release_msix(dev, child);
1391	if (error != ENODEV)
1392		return (error);
1393
1394	/* Do we have any messages to release? */
1395	if (cfg->msi.msi_alloc == 0)
1396		return (ENODEV);
1397	KASSERT(cfg->msi.msi_alloc <= 32, ("more than 32 alloc'd messages"));
1398
1399	/* Make sure none of the resources are allocated. */
1400	for (i = 0; i < cfg->msi.msi_alloc; i++) {
1401		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1402		KASSERT(rle != NULL, ("missing MSI resource"));
1403		if (rle->res != NULL)
1404			return (EBUSY);
1405		irqs[i] = rle->start;
1406	}
1407
1408	/* Update control register with 0 count and disable MSI. */
1409	cfg->msi.msi_ctrl &= ~(PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE);
1410	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL,
1411	    cfg->msi.msi_ctrl, 2);
1412
1413	/* Release the messages. */
1414	PCIB_RELEASE_MSI(device_get_parent(dev), child, cfg->msi.msi_alloc,
1415	    irqs);
1416	for (i = 0; i < cfg->msi.msi_alloc; i++)
1417		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1418
1419	/* Update alloc count. */
1420	cfg->msi.msi_alloc = 0;
1421	return (0);
1422}
1423
1424/*
1425 * Return the max supported MSI or MSI-X messages this device supports.
1426 * Basically, assuming the MD code can alloc messages, this function
1427 * should return the maximum value that pci_alloc_msi() can return.  Thus,
1428 * it is subject to the tunables, etc.
1429 */
1430int
1431pci_msi_count_method(device_t dev, device_t child)
1432{
1433	struct pci_devinfo *dinfo = device_get_ivars(child);
1434	pcicfgregs *cfg = &dinfo->cfg;
1435
1436	if (pci_do_msix && cfg->msix.msix_location != 0)
1437		return (cfg->msix.msix_msgnum);
1438	if (pci_do_msi && cfg->msi.msi_location != 0)
1439		return (cfg->msi.msi_msgnum);
1440	return (0);
1441}
1442
1443/* free pcicfgregs structure and all depending data structures */
1444
1445int
1446pci_freecfg(struct pci_devinfo *dinfo)
1447{
1448	struct devlist *devlist_head;
1449	int i;
1450
1451	devlist_head = &pci_devq;
1452
1453	if (dinfo->cfg.vpd.vpd_reg) {
1454		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1455		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1456			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1457		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1458		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1459			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1460		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1461	}
1462	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1463	free(dinfo, M_DEVBUF);
1464
1465	/* increment the generation count */
1466	pci_generation++;
1467
1468	/* we're losing one device */
1469	pci_numdevs--;
1470	return (0);
1471}
1472
1473/*
1474 * PCI power manangement
1475 */
1476int
1477pci_set_powerstate_method(device_t dev, device_t child, int state)
1478{
1479	struct pci_devinfo *dinfo = device_get_ivars(child);
1480	pcicfgregs *cfg = &dinfo->cfg;
1481	uint16_t status;
1482	int result, oldstate, highest, delay;
1483
1484	if (cfg->pp.pp_cap == 0)
1485		return (EOPNOTSUPP);
1486
1487	/*
1488	 * Optimize a no state change request away.  While it would be OK to
1489	 * write to the hardware in theory, some devices have shown odd
1490	 * behavior when going from D3 -> D3.
1491	 */
1492	oldstate = pci_get_powerstate(child);
1493	if (oldstate == state)
1494		return (0);
1495
1496	/*
1497	 * The PCI power management specification states that after a state
1498	 * transition between PCI power states, system software must
1499	 * guarantee a minimal delay before the function accesses the device.
1500	 * Compute the worst case delay that we need to guarantee before we
1501	 * access the device.  Many devices will be responsive much more
1502	 * quickly than this delay, but there are some that don't respond
1503	 * instantly to state changes.  Transitions to/from D3 state require
1504	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
1505	 * is done below with DELAY rather than a sleeper function because
1506	 * this function can be called from contexts where we cannot sleep.
1507	 */
1508	highest = (oldstate > state) ? oldstate : state;
1509	if (highest == PCI_POWERSTATE_D3)
1510	    delay = 10000;
1511	else if (highest == PCI_POWERSTATE_D2)
1512	    delay = 200;
1513	else
1514	    delay = 0;
1515	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
1516	    & ~PCIM_PSTAT_DMASK;
1517	result = 0;
1518	switch (state) {
1519	case PCI_POWERSTATE_D0:
1520		status |= PCIM_PSTAT_D0;
1521		break;
1522	case PCI_POWERSTATE_D1:
1523		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
1524			return (EOPNOTSUPP);
1525		status |= PCIM_PSTAT_D1;
1526		break;
1527	case PCI_POWERSTATE_D2:
1528		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
1529			return (EOPNOTSUPP);
1530		status |= PCIM_PSTAT_D2;
1531		break;
1532	case PCI_POWERSTATE_D3:
1533		status |= PCIM_PSTAT_D3;
1534		break;
1535	default:
1536		return (EINVAL);
1537	}
1538
1539	if (bootverbose)
1540		printf(
1541		    "pci%d:%d:%d: Transition from D%d to D%d\n",
1542		    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func,
1543		    oldstate, state);
1544
1545	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
1546	if (delay)
1547		DELAY(delay);
1548	return (0);
1549}
1550
1551int
1552pci_get_powerstate_method(device_t dev, device_t child)
1553{
1554	struct pci_devinfo *dinfo = device_get_ivars(child);
1555	pcicfgregs *cfg = &dinfo->cfg;
1556	uint16_t status;
1557	int result;
1558
1559	if (cfg->pp.pp_cap != 0) {
1560		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
1561		switch (status & PCIM_PSTAT_DMASK) {
1562		case PCIM_PSTAT_D0:
1563			result = PCI_POWERSTATE_D0;
1564			break;
1565		case PCIM_PSTAT_D1:
1566			result = PCI_POWERSTATE_D1;
1567			break;
1568		case PCIM_PSTAT_D2:
1569			result = PCI_POWERSTATE_D2;
1570			break;
1571		case PCIM_PSTAT_D3:
1572			result = PCI_POWERSTATE_D3;
1573			break;
1574		default:
1575			result = PCI_POWERSTATE_UNKNOWN;
1576			break;
1577		}
1578	} else {
1579		/* No support, device is always at D0 */
1580		result = PCI_POWERSTATE_D0;
1581	}
1582	return (result);
1583}
1584
1585/*
1586 * Some convenience functions for PCI device drivers.
1587 */
1588
1589static __inline void
1590pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
1591{
1592	uint16_t	command;
1593
1594	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1595	command |= bit;
1596	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1597}
1598
1599static __inline void
1600pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
1601{
1602	uint16_t	command;
1603
1604	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1605	command &= ~bit;
1606	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1607}
1608
1609int
1610pci_enable_busmaster_method(device_t dev, device_t child)
1611{
1612	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1613	return (0);
1614}
1615
1616int
1617pci_disable_busmaster_method(device_t dev, device_t child)
1618{
1619	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1620	return (0);
1621}
1622
1623int
1624pci_enable_io_method(device_t dev, device_t child, int space)
1625{
1626	uint16_t command;
1627	uint16_t bit;
1628	char *error;
1629
1630	bit = 0;
1631	error = NULL;
1632
1633	switch(space) {
1634	case SYS_RES_IOPORT:
1635		bit = PCIM_CMD_PORTEN;
1636		error = "port";
1637		break;
1638	case SYS_RES_MEMORY:
1639		bit = PCIM_CMD_MEMEN;
1640		error = "memory";
1641		break;
1642	default:
1643		return (EINVAL);
1644	}
1645	pci_set_command_bit(dev, child, bit);
1646	/* Some devices seem to need a brief stall here, what do to? */
1647	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1648	if (command & bit)
1649		return (0);
1650	device_printf(child, "failed to enable %s mapping!\n", error);
1651	return (ENXIO);
1652}
1653
1654int
1655pci_disable_io_method(device_t dev, device_t child, int space)
1656{
1657	uint16_t command;
1658	uint16_t bit;
1659	char *error;
1660
1661	bit = 0;
1662	error = NULL;
1663
1664	switch(space) {
1665	case SYS_RES_IOPORT:
1666		bit = PCIM_CMD_PORTEN;
1667		error = "port";
1668		break;
1669	case SYS_RES_MEMORY:
1670		bit = PCIM_CMD_MEMEN;
1671		error = "memory";
1672		break;
1673	default:
1674		return (EINVAL);
1675	}
1676	pci_clear_command_bit(dev, child, bit);
1677	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1678	if (command & bit) {
1679		device_printf(child, "failed to disable %s mapping!\n", error);
1680		return (ENXIO);
1681	}
1682	return (0);
1683}
1684
1685/*
1686 * New style pci driver.  Parent device is either a pci-host-bridge or a
1687 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
1688 */
1689
1690void
1691pci_print_verbose(struct pci_devinfo *dinfo)
1692{
1693	int i;
1694
1695	if (bootverbose) {
1696		pcicfgregs *cfg = &dinfo->cfg;
1697
1698		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
1699		    cfg->vendor, cfg->device, cfg->revid);
1700		printf("\tbus=%d, slot=%d, func=%d\n",
1701		    cfg->bus, cfg->slot, cfg->func);
1702		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
1703		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
1704		    cfg->mfdev);
1705		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
1706		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
1707		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
1708		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
1709		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
1710		if (cfg->intpin > 0)
1711			printf("\tintpin=%c, irq=%d\n",
1712			    cfg->intpin +'a' -1, cfg->intline);
1713		if (cfg->pp.pp_cap) {
1714			uint16_t status;
1715
1716			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
1717			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
1718			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
1719			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
1720			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
1721			    status & PCIM_PSTAT_DMASK);
1722		}
1723		if (cfg->vpd.vpd_reg) {
1724			printf("\tVPD Ident: %s\n", cfg->vpd.vpd_ident);
1725			for (i = 0; i < cfg->vpd.vpd_rocnt; i++) {
1726				struct vpd_readonly *vrop;
1727				vrop = &cfg->vpd.vpd_ros[i];
1728				if (strncmp("CP", vrop->keyword, 2) == 0)
1729					printf("\tCP: id %d, BAR%d, off %#x\n",
1730					    vrop->value[0], vrop->value[1],
1731					    le16toh(
1732					      *(uint16_t *)&vrop->value[2]));
1733				else if (strncmp("RV", vrop->keyword, 2) == 0)
1734					printf("\tRV: %#hhx\n", vrop->value[0]);
1735				else
1736					printf("\t%.2s: %s\n", vrop->keyword,
1737					    vrop->value);
1738			}
1739			for (i = 0; i < cfg->vpd.vpd_wcnt; i++) {
1740				struct vpd_write *vwp;
1741				vwp = &cfg->vpd.vpd_w[i];
1742				if (strncmp("RW", vwp->keyword, 2) != 0)
1743					printf("\t%.2s(%#x-%#x): %s\n",
1744					    vwp->keyword, vwp->start,
1745					    vwp->start + vwp->len, vwp->value);
1746			}
1747		}
1748		if (cfg->msi.msi_location) {
1749			int ctrl;
1750
1751			ctrl = cfg->msi.msi_ctrl;
1752			printf("\tMSI supports %d message%s%s%s\n",
1753			    cfg->msi.msi_msgnum,
1754			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
1755			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
1756			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
1757		}
1758		if (cfg->msix.msix_location) {
1759			printf("\tMSI-X supports %d message%s ",
1760			    cfg->msix.msix_msgnum,
1761			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
1762			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
1763				printf("in map 0x%x\n",
1764				    cfg->msix.msix_table_bar);
1765			else
1766				printf("in maps 0x%x and 0x%x\n",
1767				    cfg->msix.msix_table_bar,
1768				    cfg->msix.msix_pba_bar);
1769		}
1770	}
1771}
1772
1773static int
1774pci_porten(device_t pcib, int b, int s, int f)
1775{
1776	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1777		& PCIM_CMD_PORTEN) != 0;
1778}
1779
1780static int
1781pci_memen(device_t pcib, int b, int s, int f)
1782{
1783	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1784		& PCIM_CMD_MEMEN) != 0;
1785}
1786
1787/*
1788 * Add a resource based on a pci map register. Return 1 if the map
1789 * register is a 32bit map register or 2 if it is a 64bit register.
1790 */
1791static int
1792pci_add_map(device_t pcib, device_t bus, device_t dev,
1793    int b, int s, int f, int reg, struct resource_list *rl, int force,
1794    int prefetch)
1795{
1796	uint32_t map;
1797	pci_addr_t base;
1798	pci_addr_t start, end, count;
1799	uint8_t ln2size;
1800	uint8_t ln2range;
1801	uint32_t testval;
1802	uint16_t cmd;
1803	int type;
1804	int barlen;
1805	struct resource *res;
1806
1807	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1808	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
1809	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1810	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
1811
1812	if (pci_maptype(map) & PCI_MAPMEM)
1813		type = SYS_RES_MEMORY;
1814	else
1815		type = SYS_RES_IOPORT;
1816	ln2size = pci_mapsize(testval);
1817	ln2range = pci_maprange(testval);
1818	base = pci_mapbase(map);
1819	barlen = ln2range == 64 ? 2 : 1;
1820
1821	/*
1822	 * For I/O registers, if bottom bit is set, and the next bit up
1823	 * isn't clear, we know we have a BAR that doesn't conform to the
1824	 * spec, so ignore it.  Also, sanity check the size of the data
1825	 * areas to the type of memory involved.  Memory must be at least
1826	 * 16 bytes in size, while I/O ranges must be at least 4.
1827	 */
1828	if ((testval & 0x1) == 0x1 &&
1829	    (testval & 0x2) != 0)
1830		return (barlen);
1831	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
1832	    (type == SYS_RES_IOPORT && ln2size < 2))
1833		return (barlen);
1834
1835	if (ln2range == 64)
1836		/* Read the other half of a 64bit map register */
1837		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
1838	if (bootverbose) {
1839		printf("\tmap[%02x]: type %x, range %2d, base %#jx, size %2d",
1840		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
1841		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
1842			printf(", port disabled\n");
1843		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
1844			printf(", memory disabled\n");
1845		else
1846			printf(", enabled\n");
1847	}
1848
1849	/*
1850	 * If base is 0, then we have problems.  It is best to ignore
1851	 * such entries for the moment.  These will be allocated later if
1852	 * the driver specifically requests them.  However, some
1853	 * removable busses look better when all resources are allocated,
1854	 * so allow '0' to be overriden.
1855	 *
1856	 * Similarly treat maps whose values is the same as the test value
1857	 * read back.  These maps have had all f's written to them by the
1858	 * BIOS in an attempt to disable the resources.
1859	 */
1860	if (!force && (base == 0 || map == testval))
1861		return (barlen);
1862	if ((u_long)base != base) {
1863		device_printf(bus,
1864		    "pci%d:%d:%d bar %#x too many address bits", b, s, f, reg);
1865		return (barlen);
1866	}
1867
1868	/*
1869	 * This code theoretically does the right thing, but has
1870	 * undesirable side effects in some cases where peripherals
1871	 * respond oddly to having these bits enabled.  Let the user
1872	 * be able to turn them off (since pci_enable_io_modes is 1 by
1873	 * default).
1874	 */
1875	if (pci_enable_io_modes) {
1876		/* Turn on resources that have been left off by a lazy BIOS */
1877		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
1878			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
1879			cmd |= PCIM_CMD_PORTEN;
1880			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
1881		}
1882		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
1883			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
1884			cmd |= PCIM_CMD_MEMEN;
1885			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
1886		}
1887	} else {
1888		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
1889			return (barlen);
1890		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
1891			return (barlen);
1892	}
1893
1894	count = 1 << ln2size;
1895	if (base == 0 || base == pci_mapbase(testval)) {
1896		start = 0;	/* Let the parent deside */
1897		end = ~0ULL;
1898	} else {
1899		start = base;
1900		end = base + (1 << ln2size) - 1;
1901	}
1902	resource_list_add(rl, type, reg, start, end, count);
1903
1904	/*
1905	 * Not quite sure what to do on failure of allocating the resource
1906	 * since I can postulate several right answers.
1907	 */
1908	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
1909	    prefetch ? RF_PREFETCHABLE : 0);
1910	if (res == NULL)
1911		return (barlen);
1912	start = rman_get_start(res);
1913	if ((u_long)start != start) {
1914		/* Wait a minute!  this platform can't do this address. */
1915		device_printf(bus,
1916		    "pci%d.%d.%x bar %#x start %#jx, too many bits.",
1917		    b, s, f, reg, (uintmax_t)start);
1918		resource_list_release(rl, bus, dev, type, reg, res);
1919		return (barlen);
1920	}
1921	pci_write_config(dev, reg, start, 4);
1922	if (ln2range == 64)
1923		pci_write_config(dev, reg + 4, start >> 32, 4);
1924	return (barlen);
1925}
1926
1927/*
1928 * For ATA devices we need to decide early what addressing mode to use.
1929 * Legacy demands that the primary and secondary ATA ports sits on the
1930 * same addresses that old ISA hardware did. This dictates that we use
1931 * those addresses and ignore the BAR's if we cannot set PCI native
1932 * addressing mode.
1933 */
1934static void
1935pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
1936    int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
1937{
1938	int rid, type, progif;
1939#if 0
1940	/* if this device supports PCI native addressing use it */
1941	progif = pci_read_config(dev, PCIR_PROGIF, 1);
1942	if ((progif & 0x8a) == 0x8a) {
1943		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
1944		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
1945			printf("Trying ATA native PCI addressing mode\n");
1946			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
1947		}
1948	}
1949#endif
1950	progif = pci_read_config(dev, PCIR_PROGIF, 1);
1951	type = SYS_RES_IOPORT;
1952	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
1953		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
1954		    prefetchmask & (1 << 0));
1955		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
1956		    prefetchmask & (1 << 1));
1957	} else {
1958		rid = PCIR_BAR(0);
1959		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
1960		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
1961		    0);
1962		rid = PCIR_BAR(1);
1963		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
1964		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
1965		    0);
1966	}
1967	if (progif & PCIP_STORAGE_IDE_MODESEC) {
1968		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
1969		    prefetchmask & (1 << 2));
1970		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
1971		    prefetchmask & (1 << 3));
1972	} else {
1973		rid = PCIR_BAR(2);
1974		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
1975		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
1976		    0);
1977		rid = PCIR_BAR(3);
1978		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
1979		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
1980		    0);
1981	}
1982	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
1983	    prefetchmask & (1 << 4));
1984	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
1985	    prefetchmask & (1 << 5));
1986}
1987
1988static void
1989pci_assign_interrupt(device_t bus, device_t dev, int force_route)
1990{
1991	struct pci_devinfo *dinfo = device_get_ivars(dev);
1992	pcicfgregs *cfg = &dinfo->cfg;
1993	char tunable_name[64];
1994	int irq;
1995
1996	/* Has to have an intpin to have an interrupt. */
1997	if (cfg->intpin == 0)
1998		return;
1999
2000	/* Let the user override the IRQ with a tunable. */
2001	irq = PCI_INVALID_IRQ;
2002	snprintf(tunable_name, sizeof(tunable_name), "hw.pci%d.%d.INT%c.irq",
2003	    cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2004	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2005		irq = PCI_INVALID_IRQ;
2006
2007	/*
2008	 * If we didn't get an IRQ via the tunable, then we either use the
2009	 * IRQ value in the intline register or we ask the bus to route an
2010	 * interrupt for us.  If force_route is true, then we only use the
2011	 * value in the intline register if the bus was unable to assign an
2012	 * IRQ.
2013	 */
2014	if (!PCI_INTERRUPT_VALID(irq)) {
2015		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2016			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2017		if (!PCI_INTERRUPT_VALID(irq))
2018			irq = cfg->intline;
2019	}
2020
2021	/* If after all that we don't have an IRQ, just bail. */
2022	if (!PCI_INTERRUPT_VALID(irq))
2023		return;
2024
2025	/* Update the config register if it changed. */
2026	if (irq != cfg->intline) {
2027		cfg->intline = irq;
2028		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2029	}
2030
2031	/* Add this IRQ as rid 0 interrupt resource. */
2032	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2033}
2034
2035void
2036pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2037{
2038	device_t pcib;
2039	struct pci_devinfo *dinfo = device_get_ivars(dev);
2040	pcicfgregs *cfg = &dinfo->cfg;
2041	struct resource_list *rl = &dinfo->resources;
2042	struct pci_quirk *q;
2043	int b, i, f, s;
2044
2045	pcib = device_get_parent(bus);
2046
2047	b = cfg->bus;
2048	s = cfg->slot;
2049	f = cfg->func;
2050
2051	/* ATA devices needs special map treatment */
2052	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2053	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2054	    (pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV))
2055		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2056	else
2057		for (i = 0; i < cfg->nummaps;)
2058			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2059			    rl, force, prefetchmask & (1 << i));
2060
2061	/*
2062	 * Add additional, quirked resources.
2063	 */
2064	for (q = &pci_quirks[0]; q->devid; q++) {
2065		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2066		    && q->type == PCI_QUIRK_MAP_REG)
2067			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2068			  force, 0);
2069	}
2070
2071	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2072#ifdef __PCI_REROUTE_INTERRUPT
2073		/*
2074		 * Try to re-route interrupts. Sometimes the BIOS or
2075		 * firmware may leave bogus values in these registers.
2076		 * If the re-route fails, then just stick with what we
2077		 * have.
2078		 */
2079		pci_assign_interrupt(bus, dev, 1);
2080#else
2081		pci_assign_interrupt(bus, dev, 0);
2082#endif
2083	}
2084}
2085
2086void
2087pci_add_children(device_t dev, int busno, size_t dinfo_size)
2088{
2089#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2090	device_t pcib = device_get_parent(dev);
2091	struct pci_devinfo *dinfo;
2092	int maxslots;
2093	int s, f, pcifunchigh;
2094	uint8_t hdrtype;
2095
2096	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2097	    ("dinfo_size too small"));
2098	maxslots = PCIB_MAXSLOTS(pcib);
2099	for (s = 0; s <= maxslots; s++) {
2100		pcifunchigh = 0;
2101		f = 0;
2102		DELAY(1);
2103		hdrtype = REG(PCIR_HDRTYPE, 1);
2104		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2105			continue;
2106		if (hdrtype & PCIM_MFDEV)
2107			pcifunchigh = PCI_FUNCMAX;
2108		for (f = 0; f <= pcifunchigh; f++) {
2109			dinfo = pci_read_device(pcib, busno, s, f, dinfo_size);
2110			if (dinfo != NULL) {
2111				pci_add_child(dev, dinfo);
2112			}
2113		}
2114	}
2115#undef REG
2116}
2117
2118void
2119pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2120{
2121	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2122	device_set_ivars(dinfo->cfg.dev, dinfo);
2123	resource_list_init(&dinfo->resources);
2124	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2125	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2126	pci_print_verbose(dinfo);
2127	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2128}
2129
2130static int
2131pci_probe(device_t dev)
2132{
2133
2134	device_set_desc(dev, "PCI bus");
2135
2136	/* Allow other subclasses to override this driver. */
2137	return (-1000);
2138}
2139
2140static int
2141pci_attach(device_t dev)
2142{
2143	int busno;
2144
2145	/*
2146	 * Since there can be multiple independantly numbered PCI
2147	 * busses on systems with multiple PCI domains, we can't use
2148	 * the unit number to decide which bus we are probing. We ask
2149	 * the parent pcib what our bus number is.
2150	 */
2151	busno = pcib_get_bus(dev);
2152	if (bootverbose)
2153		device_printf(dev, "physical bus=%d\n", busno);
2154
2155	pci_add_children(dev, busno, sizeof(struct pci_devinfo));
2156
2157	return (bus_generic_attach(dev));
2158}
2159
2160int
2161pci_suspend(device_t dev)
2162{
2163	int dstate, error, i, numdevs;
2164	device_t acpi_dev, child, *devlist;
2165	struct pci_devinfo *dinfo;
2166
2167	/*
2168	 * Save the PCI configuration space for each child and set the
2169	 * device in the appropriate power state for this sleep state.
2170	 */
2171	acpi_dev = NULL;
2172	if (pci_do_power_resume)
2173		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2174	device_get_children(dev, &devlist, &numdevs);
2175	for (i = 0; i < numdevs; i++) {
2176		child = devlist[i];
2177		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2178		pci_cfg_save(child, dinfo, 0);
2179	}
2180
2181	/* Suspend devices before potentially powering them down. */
2182	error = bus_generic_suspend(dev);
2183	if (error) {
2184		free(devlist, M_TEMP);
2185		return (error);
2186	}
2187
2188	/*
2189	 * Always set the device to D3.  If ACPI suggests a different
2190	 * power state, use it instead.  If ACPI is not present, the
2191	 * firmware is responsible for managing device power.  Skip
2192	 * children who aren't attached since they are powered down
2193	 * separately.  Only manage type 0 devices for now.
2194	 */
2195	for (i = 0; acpi_dev && i < numdevs; i++) {
2196		child = devlist[i];
2197		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2198		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2199			dstate = PCI_POWERSTATE_D3;
2200			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2201			pci_set_powerstate(child, dstate);
2202		}
2203	}
2204	free(devlist, M_TEMP);
2205	return (0);
2206}
2207
2208int
2209pci_resume(device_t dev)
2210{
2211	int i, numdevs;
2212	device_t acpi_dev, child, *devlist;
2213	struct pci_devinfo *dinfo;
2214
2215	/*
2216	 * Set each child to D0 and restore its PCI configuration space.
2217	 */
2218	acpi_dev = NULL;
2219	if (pci_do_power_resume)
2220		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2221	device_get_children(dev, &devlist, &numdevs);
2222	for (i = 0; i < numdevs; i++) {
2223		/*
2224		 * Notify ACPI we're going to D0 but ignore the result.  If
2225		 * ACPI is not present, the firmware is responsible for
2226		 * managing device power.  Only manage type 0 devices for now.
2227		 */
2228		child = devlist[i];
2229		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2230		if (acpi_dev && device_is_attached(child) &&
2231		    dinfo->cfg.hdrtype == 0) {
2232			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2233			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2234		}
2235
2236		/* Now the device is powered up, restore its config space. */
2237		pci_cfg_restore(child, dinfo);
2238	}
2239	free(devlist, M_TEMP);
2240	return (bus_generic_resume(dev));
2241}
2242
2243static void
2244pci_load_vendor_data(void)
2245{
2246	caddr_t vendordata, info;
2247
2248	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2249		info = preload_search_info(vendordata, MODINFO_ADDR);
2250		pci_vendordata = *(char **)info;
2251		info = preload_search_info(vendordata, MODINFO_SIZE);
2252		pci_vendordata_size = *(size_t *)info;
2253		/* terminate the database */
2254		pci_vendordata[pci_vendordata_size] = '\n';
2255	}
2256}
2257
2258void
2259pci_driver_added(device_t dev, driver_t *driver)
2260{
2261	int numdevs;
2262	device_t *devlist;
2263	device_t child;
2264	struct pci_devinfo *dinfo;
2265	int i;
2266
2267	if (bootverbose)
2268		device_printf(dev, "driver added\n");
2269	DEVICE_IDENTIFY(driver, dev);
2270	device_get_children(dev, &devlist, &numdevs);
2271	for (i = 0; i < numdevs; i++) {
2272		child = devlist[i];
2273		if (device_get_state(child) != DS_NOTPRESENT)
2274			continue;
2275		dinfo = device_get_ivars(child);
2276		pci_print_verbose(dinfo);
2277		if (bootverbose)
2278			printf("pci%d:%d:%d: reprobing on driver added\n",
2279			    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func);
2280		pci_cfg_restore(child, dinfo);
2281		if (device_probe_and_attach(child) != 0)
2282			pci_cfg_save(child, dinfo, 1);
2283	}
2284	free(devlist, M_TEMP);
2285}
2286
2287int
2288pci_print_child(device_t dev, device_t child)
2289{
2290	struct pci_devinfo *dinfo;
2291	struct resource_list *rl;
2292	int retval = 0;
2293
2294	dinfo = device_get_ivars(child);
2295	rl = &dinfo->resources;
2296
2297	retval += bus_print_child_header(dev, child);
2298
2299	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2300	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2301	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2302	if (device_get_flags(dev))
2303		retval += printf(" flags %#x", device_get_flags(dev));
2304
2305	retval += printf(" at device %d.%d", pci_get_slot(child),
2306	    pci_get_function(child));
2307
2308	retval += bus_print_child_footer(dev, child);
2309
2310	return (retval);
2311}
2312
2313static struct
2314{
2315	int	class;
2316	int	subclass;
2317	char	*desc;
2318} pci_nomatch_tab[] = {
2319	{PCIC_OLD,		-1,			"old"},
2320	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2321	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
2322	{PCIC_STORAGE,		-1,			"mass storage"},
2323	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
2324	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
2325	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
2326	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
2327	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
2328	{PCIC_NETWORK,		-1,			"network"},
2329	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
2330	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
2331	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
2332	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
2333	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
2334	{PCIC_DISPLAY,		-1,			"display"},
2335	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
2336	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
2337	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
2338	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
2339	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
2340	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
2341	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
2342	{PCIC_MEMORY,		-1,			"memory"},
2343	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
2344	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
2345	{PCIC_BRIDGE,		-1,			"bridge"},
2346	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
2347	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
2348	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
2349	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
2350	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
2351	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
2352	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
2353	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
2354	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
2355	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
2356	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
2357	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
2358	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
2359	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
2360	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
2361	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
2362	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
2363	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
2364	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
2365	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
2366	{PCIC_INPUTDEV,		-1,			"input device"},
2367	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
2368	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
2369	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
2370	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
2371	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
2372	{PCIC_DOCKING,		-1,			"docking station"},
2373	{PCIC_PROCESSOR,	-1,			"processor"},
2374	{PCIC_SERIALBUS,	-1,			"serial bus"},
2375	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
2376	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
2377	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
2378	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
2379	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
2380	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
2381	{PCIC_WIRELESS,		-1,			"wireless controller"},
2382	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
2383	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
2384	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
2385	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
2386	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
2387	{PCIC_SATCOM,		-1,			"satellite communication"},
2388	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
2389	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
2390	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
2391	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
2392	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
2393	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
2394	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
2395	{PCIC_DASP,		-1,			"dasp"},
2396	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
2397	{0, 0,		NULL}
2398};
2399
2400void
2401pci_probe_nomatch(device_t dev, device_t child)
2402{
2403	int	i;
2404	char	*cp, *scp, *device;
2405
2406	/*
2407	 * Look for a listing for this device in a loaded device database.
2408	 */
2409	if ((device = pci_describe_device(child)) != NULL) {
2410		device_printf(dev, "<%s>", device);
2411		free(device, M_DEVBUF);
2412	} else {
2413		/*
2414		 * Scan the class/subclass descriptions for a general
2415		 * description.
2416		 */
2417		cp = "unknown";
2418		scp = NULL;
2419		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
2420			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
2421				if (pci_nomatch_tab[i].subclass == -1) {
2422					cp = pci_nomatch_tab[i].desc;
2423				} else if (pci_nomatch_tab[i].subclass ==
2424				    pci_get_subclass(child)) {
2425					scp = pci_nomatch_tab[i].desc;
2426				}
2427			}
2428		}
2429		device_printf(dev, "<%s%s%s>",
2430		    cp ? cp : "",
2431		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
2432		    scp ? scp : "");
2433	}
2434	printf(" at device %d.%d (no driver attached)\n",
2435	    pci_get_slot(child), pci_get_function(child));
2436	if (pci_do_power_nodriver)
2437		pci_cfg_save(child,
2438		    (struct pci_devinfo *) device_get_ivars(child), 1);
2439	return;
2440}
2441
2442/*
2443 * Parse the PCI device database, if loaded, and return a pointer to a
2444 * description of the device.
2445 *
2446 * The database is flat text formatted as follows:
2447 *
2448 * Any line not in a valid format is ignored.
2449 * Lines are terminated with newline '\n' characters.
2450 *
2451 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
2452 * the vendor name.
2453 *
2454 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
2455 * - devices cannot be listed without a corresponding VENDOR line.
2456 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
2457 * another TAB, then the device name.
2458 */
2459
2460/*
2461 * Assuming (ptr) points to the beginning of a line in the database,
2462 * return the vendor or device and description of the next entry.
2463 * The value of (vendor) or (device) inappropriate for the entry type
2464 * is set to -1.  Returns nonzero at the end of the database.
2465 *
2466 * Note that this is slightly unrobust in the face of corrupt data;
2467 * we attempt to safeguard against this by spamming the end of the
2468 * database with a newline when we initialise.
2469 */
2470static int
2471pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
2472{
2473	char	*cp = *ptr;
2474	int	left;
2475
2476	*device = -1;
2477	*vendor = -1;
2478	**desc = '\0';
2479	for (;;) {
2480		left = pci_vendordata_size - (cp - pci_vendordata);
2481		if (left <= 0) {
2482			*ptr = cp;
2483			return(1);
2484		}
2485
2486		/* vendor entry? */
2487		if (*cp != '\t' &&
2488		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
2489			break;
2490		/* device entry? */
2491		if (*cp == '\t' &&
2492		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
2493			break;
2494
2495		/* skip to next line */
2496		while (*cp != '\n' && left > 0) {
2497			cp++;
2498			left--;
2499		}
2500		if (*cp == '\n') {
2501			cp++;
2502			left--;
2503		}
2504	}
2505	/* skip to next line */
2506	while (*cp != '\n' && left > 0) {
2507		cp++;
2508		left--;
2509	}
2510	if (*cp == '\n' && left > 0)
2511		cp++;
2512	*ptr = cp;
2513	return(0);
2514}
2515
2516static char *
2517pci_describe_device(device_t dev)
2518{
2519	int	vendor, device;
2520	char	*desc, *vp, *dp, *line;
2521
2522	desc = vp = dp = NULL;
2523
2524	/*
2525	 * If we have no vendor data, we can't do anything.
2526	 */
2527	if (pci_vendordata == NULL)
2528		goto out;
2529
2530	/*
2531	 * Scan the vendor data looking for this device
2532	 */
2533	line = pci_vendordata;
2534	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2535		goto out;
2536	for (;;) {
2537		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
2538			goto out;
2539		if (vendor == pci_get_vendor(dev))
2540			break;
2541	}
2542	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2543		goto out;
2544	for (;;) {
2545		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
2546			*dp = 0;
2547			break;
2548		}
2549		if (vendor != -1) {
2550			*dp = 0;
2551			break;
2552		}
2553		if (device == pci_get_device(dev))
2554			break;
2555	}
2556	if (dp[0] == '\0')
2557		snprintf(dp, 80, "0x%x", pci_get_device(dev));
2558	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
2559	    NULL)
2560		sprintf(desc, "%s, %s", vp, dp);
2561 out:
2562	if (vp != NULL)
2563		free(vp, M_DEVBUF);
2564	if (dp != NULL)
2565		free(dp, M_DEVBUF);
2566	return(desc);
2567}
2568
2569int
2570pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
2571{
2572	struct pci_devinfo *dinfo;
2573	pcicfgregs *cfg;
2574
2575	dinfo = device_get_ivars(child);
2576	cfg = &dinfo->cfg;
2577
2578	switch (which) {
2579	case PCI_IVAR_ETHADDR:
2580		/*
2581		 * The generic accessor doesn't deal with failure, so
2582		 * we set the return value, then return an error.
2583		 */
2584		*((uint8_t **) result) = NULL;
2585		return (EINVAL);
2586	case PCI_IVAR_SUBVENDOR:
2587		*result = cfg->subvendor;
2588		break;
2589	case PCI_IVAR_SUBDEVICE:
2590		*result = cfg->subdevice;
2591		break;
2592	case PCI_IVAR_VENDOR:
2593		*result = cfg->vendor;
2594		break;
2595	case PCI_IVAR_DEVICE:
2596		*result = cfg->device;
2597		break;
2598	case PCI_IVAR_DEVID:
2599		*result = (cfg->device << 16) | cfg->vendor;
2600		break;
2601	case PCI_IVAR_CLASS:
2602		*result = cfg->baseclass;
2603		break;
2604	case PCI_IVAR_SUBCLASS:
2605		*result = cfg->subclass;
2606		break;
2607	case PCI_IVAR_PROGIF:
2608		*result = cfg->progif;
2609		break;
2610	case PCI_IVAR_REVID:
2611		*result = cfg->revid;
2612		break;
2613	case PCI_IVAR_INTPIN:
2614		*result = cfg->intpin;
2615		break;
2616	case PCI_IVAR_IRQ:
2617		*result = cfg->intline;
2618		break;
2619	case PCI_IVAR_BUS:
2620		*result = cfg->bus;
2621		break;
2622	case PCI_IVAR_SLOT:
2623		*result = cfg->slot;
2624		break;
2625	case PCI_IVAR_FUNCTION:
2626		*result = cfg->func;
2627		break;
2628	case PCI_IVAR_CMDREG:
2629		*result = cfg->cmdreg;
2630		break;
2631	case PCI_IVAR_CACHELNSZ:
2632		*result = cfg->cachelnsz;
2633		break;
2634	case PCI_IVAR_MINGNT:
2635		*result = cfg->mingnt;
2636		break;
2637	case PCI_IVAR_MAXLAT:
2638		*result = cfg->maxlat;
2639		break;
2640	case PCI_IVAR_LATTIMER:
2641		*result = cfg->lattimer;
2642		break;
2643	default:
2644		return (ENOENT);
2645	}
2646	return (0);
2647}
2648
2649int
2650pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
2651{
2652	struct pci_devinfo *dinfo;
2653
2654	dinfo = device_get_ivars(child);
2655
2656	switch (which) {
2657	case PCI_IVAR_INTPIN:
2658		dinfo->cfg.intpin = value;
2659		return (0);
2660	case PCI_IVAR_ETHADDR:
2661	case PCI_IVAR_SUBVENDOR:
2662	case PCI_IVAR_SUBDEVICE:
2663	case PCI_IVAR_VENDOR:
2664	case PCI_IVAR_DEVICE:
2665	case PCI_IVAR_DEVID:
2666	case PCI_IVAR_CLASS:
2667	case PCI_IVAR_SUBCLASS:
2668	case PCI_IVAR_PROGIF:
2669	case PCI_IVAR_REVID:
2670	case PCI_IVAR_IRQ:
2671	case PCI_IVAR_BUS:
2672	case PCI_IVAR_SLOT:
2673	case PCI_IVAR_FUNCTION:
2674		return (EINVAL);	/* disallow for now */
2675
2676	default:
2677		return (ENOENT);
2678	}
2679}
2680
2681
2682#include "opt_ddb.h"
2683#ifdef DDB
2684#include <ddb/ddb.h>
2685#include <sys/cons.h>
2686
2687/*
2688 * List resources based on pci map registers, used for within ddb
2689 */
2690
2691DB_SHOW_COMMAND(pciregs, db_pci_dump)
2692{
2693	struct pci_devinfo *dinfo;
2694	struct devlist *devlist_head;
2695	struct pci_conf *p;
2696	const char *name;
2697	int i, error, none_count;
2698
2699	none_count = 0;
2700	/* get the head of the device queue */
2701	devlist_head = &pci_devq;
2702
2703	/*
2704	 * Go through the list of devices and print out devices
2705	 */
2706	for (error = 0, i = 0,
2707	     dinfo = STAILQ_FIRST(devlist_head);
2708	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
2709	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
2710
2711		/* Populate pd_name and pd_unit */
2712		name = NULL;
2713		if (dinfo->cfg.dev)
2714			name = device_get_name(dinfo->cfg.dev);
2715
2716		p = &dinfo->conf;
2717		db_printf("%s%d@pci%d:%d:%d:\tclass=0x%06x card=0x%08x "
2718			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
2719			(name && *name) ? name : "none",
2720			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
2721			none_count++,
2722			p->pc_sel.pc_bus, p->pc_sel.pc_dev,
2723			p->pc_sel.pc_func, (p->pc_class << 16) |
2724			(p->pc_subclass << 8) | p->pc_progif,
2725			(p->pc_subdevice << 16) | p->pc_subvendor,
2726			(p->pc_device << 16) | p->pc_vendor,
2727			p->pc_revid, p->pc_hdr);
2728	}
2729}
2730#endif /* DDB */
2731
2732static struct resource *
2733pci_alloc_map(device_t dev, device_t child, int type, int *rid,
2734    u_long start, u_long end, u_long count, u_int flags)
2735{
2736	struct pci_devinfo *dinfo = device_get_ivars(child);
2737	struct resource_list *rl = &dinfo->resources;
2738	struct resource_list_entry *rle;
2739	struct resource *res;
2740	pci_addr_t map, testval;
2741	int mapsize;
2742
2743	/*
2744	 * Weed out the bogons, and figure out how large the BAR/map
2745	 * is.  Bars that read back 0 here are bogus and unimplemented.
2746	 * Note: atapci in legacy mode are special and handled elsewhere
2747	 * in the code.  If you have a atapci device in legacy mode and
2748	 * it fails here, that other code is broken.
2749	 */
2750	res = NULL;
2751	map = pci_read_config(child, *rid, 4);
2752	pci_write_config(child, *rid, 0xffffffff, 4);
2753	testval = pci_read_config(child, *rid, 4);
2754	if (pci_maprange(testval) == 64)
2755		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
2756	if (pci_mapbase(testval) == 0)
2757		goto out;
2758	if (pci_maptype(testval) & PCI_MAPMEM) {
2759		if (type != SYS_RES_MEMORY) {
2760			if (bootverbose)
2761				device_printf(dev,
2762				    "child %s requested type %d for rid %#x,"
2763				    " but the BAR says it is an memio\n",
2764				    device_get_nameunit(child), type, *rid);
2765			goto out;
2766		}
2767	} else {
2768		if (type != SYS_RES_IOPORT) {
2769			if (bootverbose)
2770				device_printf(dev,
2771				    "child %s requested type %d for rid %#x,"
2772				    " but the BAR says it is an ioport\n",
2773				    device_get_nameunit(child), type, *rid);
2774			goto out;
2775		}
2776	}
2777	/*
2778	 * For real BARs, we need to override the size that
2779	 * the driver requests, because that's what the BAR
2780	 * actually uses and we would otherwise have a
2781	 * situation where we might allocate the excess to
2782	 * another driver, which won't work.
2783	 */
2784	mapsize = pci_mapsize(testval);
2785	count = 1UL << mapsize;
2786	if (RF_ALIGNMENT(flags) < mapsize)
2787		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
2788
2789	/*
2790	 * Allocate enough resource, and then write back the
2791	 * appropriate bar for that resource.
2792	 */
2793	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
2794	    start, end, count, flags);
2795	if (res == NULL) {
2796		device_printf(child,
2797		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
2798		    count, *rid, type, start, end);
2799		goto out;
2800	}
2801	resource_list_add(rl, type, *rid, start, end, count);
2802	rle = resource_list_find(rl, type, *rid);
2803	if (rle == NULL)
2804		panic("pci_alloc_map: unexpectedly can't find resource.");
2805	rle->res = res;
2806	rle->start = rman_get_start(res);
2807	rle->end = rman_get_end(res);
2808	rle->count = count;
2809	if (bootverbose)
2810		device_printf(child,
2811		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
2812		    count, *rid, type, rman_get_start(res));
2813	map = rman_get_start(res);
2814out:;
2815	pci_write_config(child, *rid, map, 4);
2816	if (pci_maprange(testval) == 64)
2817		pci_write_config(child, *rid + 4, map >> 32, 4);
2818	return (res);
2819}
2820
2821
2822struct resource *
2823pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
2824		   u_long start, u_long end, u_long count, u_int flags)
2825{
2826	struct pci_devinfo *dinfo = device_get_ivars(child);
2827	struct resource_list *rl = &dinfo->resources;
2828	struct resource_list_entry *rle;
2829	pcicfgregs *cfg = &dinfo->cfg;
2830
2831	/*
2832	 * Perform lazy resource allocation
2833	 */
2834	if (device_get_parent(child) == dev) {
2835		switch (type) {
2836		case SYS_RES_IRQ:
2837			/*
2838			 * Can't alloc legacy interrupt once MSI messages
2839			 * have been allocated.
2840			 */
2841			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
2842			    cfg->msix.msix_alloc > 0))
2843				return (NULL);
2844			/*
2845			 * If the child device doesn't have an
2846			 * interrupt routed and is deserving of an
2847			 * interrupt, try to assign it one.
2848			 */
2849			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
2850			    (cfg->intpin != 0))
2851				pci_assign_interrupt(dev, child, 0);
2852			break;
2853		case SYS_RES_IOPORT:
2854		case SYS_RES_MEMORY:
2855			if (*rid < PCIR_BAR(cfg->nummaps)) {
2856				/*
2857				 * Enable the I/O mode.  We should
2858				 * also be assigning resources too
2859				 * when none are present.  The
2860				 * resource_list_alloc kind of sorta does
2861				 * this...
2862				 */
2863				if (PCI_ENABLE_IO(dev, child, type))
2864					return (NULL);
2865			}
2866			rle = resource_list_find(rl, type, *rid);
2867			if (rle == NULL)
2868				return (pci_alloc_map(dev, child, type, rid,
2869				    start, end, count, flags));
2870			break;
2871		}
2872		/*
2873		 * If we've already allocated the resource, then
2874		 * return it now.  But first we may need to activate
2875		 * it, since we don't allocate the resource as active
2876		 * above.  Normally this would be done down in the
2877		 * nexus, but since we short-circuit that path we have
2878		 * to do its job here.  Not sure if we should free the
2879		 * resource if it fails to activate.
2880		 */
2881		rle = resource_list_find(rl, type, *rid);
2882		if (rle != NULL && rle->res != NULL) {
2883			if (bootverbose)
2884				device_printf(child,
2885			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
2886				    rman_get_size(rle->res), *rid, type,
2887				    rman_get_start(rle->res));
2888			if ((flags & RF_ACTIVE) &&
2889			    bus_generic_activate_resource(dev, child, type,
2890			    *rid, rle->res) != 0)
2891				return NULL;
2892			return (rle->res);
2893		}
2894	}
2895	return (resource_list_alloc(rl, dev, child, type, rid,
2896	    start, end, count, flags));
2897}
2898
2899void
2900pci_delete_resource(device_t dev, device_t child, int type, int rid)
2901{
2902	struct pci_devinfo *dinfo;
2903	struct resource_list *rl;
2904	struct resource_list_entry *rle;
2905
2906	if (device_get_parent(child) != dev)
2907		return;
2908
2909	dinfo = device_get_ivars(child);
2910	rl = &dinfo->resources;
2911	rle = resource_list_find(rl, type, rid);
2912	if (rle) {
2913		if (rle->res) {
2914			if (rman_get_device(rle->res) != dev ||
2915			    rman_get_flags(rle->res) & RF_ACTIVE) {
2916				device_printf(dev, "delete_resource: "
2917				    "Resource still owned by child, oops. "
2918				    "(type=%d, rid=%d, addr=%lx)\n",
2919				    rle->type, rle->rid,
2920				    rman_get_start(rle->res));
2921				return;
2922			}
2923			bus_release_resource(dev, type, rid, rle->res);
2924		}
2925		resource_list_delete(rl, type, rid);
2926	}
2927	/*
2928	 * Why do we turn off the PCI configuration BAR when we delete a
2929	 * resource? -- imp
2930	 */
2931	pci_write_config(child, rid, 0, 4);
2932	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
2933}
2934
2935struct resource_list *
2936pci_get_resource_list (device_t dev, device_t child)
2937{
2938	struct pci_devinfo *dinfo = device_get_ivars(child);
2939
2940	return (&dinfo->resources);
2941}
2942
2943uint32_t
2944pci_read_config_method(device_t dev, device_t child, int reg, int width)
2945{
2946	struct pci_devinfo *dinfo = device_get_ivars(child);
2947	pcicfgregs *cfg = &dinfo->cfg;
2948
2949	return (PCIB_READ_CONFIG(device_get_parent(dev),
2950	    cfg->bus, cfg->slot, cfg->func, reg, width));
2951}
2952
2953void
2954pci_write_config_method(device_t dev, device_t child, int reg,
2955    uint32_t val, int width)
2956{
2957	struct pci_devinfo *dinfo = device_get_ivars(child);
2958	pcicfgregs *cfg = &dinfo->cfg;
2959
2960	PCIB_WRITE_CONFIG(device_get_parent(dev),
2961	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
2962}
2963
2964int
2965pci_child_location_str_method(device_t dev, device_t child, char *buf,
2966    size_t buflen)
2967{
2968
2969	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
2970	    pci_get_function(child));
2971	return (0);
2972}
2973
2974int
2975pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
2976    size_t buflen)
2977{
2978	struct pci_devinfo *dinfo;
2979	pcicfgregs *cfg;
2980
2981	dinfo = device_get_ivars(child);
2982	cfg = &dinfo->cfg;
2983	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
2984	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
2985	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
2986	    cfg->progif);
2987	return (0);
2988}
2989
2990int
2991pci_assign_interrupt_method(device_t dev, device_t child)
2992{
2993	struct pci_devinfo *dinfo = device_get_ivars(child);
2994	pcicfgregs *cfg = &dinfo->cfg;
2995
2996	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
2997	    cfg->intpin));
2998}
2999
3000static int
3001pci_modevent(module_t mod, int what, void *arg)
3002{
3003	static struct cdev *pci_cdev;
3004
3005	switch (what) {
3006	case MOD_LOAD:
3007		STAILQ_INIT(&pci_devq);
3008		pci_generation = 0;
3009		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3010		    "pci");
3011		pci_load_vendor_data();
3012		break;
3013
3014	case MOD_UNLOAD:
3015		destroy_dev(pci_cdev);
3016		break;
3017	}
3018
3019	return (0);
3020}
3021
3022void
3023pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3024{
3025	int i;
3026
3027	/*
3028	 * Only do header type 0 devices.  Type 1 devices are bridges,
3029	 * which we know need special treatment.  Type 2 devices are
3030	 * cardbus bridges which also require special treatment.
3031	 * Other types are unknown, and we err on the side of safety
3032	 * by ignoring them.
3033	 */
3034	if (dinfo->cfg.hdrtype != 0)
3035		return;
3036
3037	/*
3038	 * Restore the device to full power mode.  We must do this
3039	 * before we restore the registers because moving from D3 to
3040	 * D0 will cause the chip's BARs and some other registers to
3041	 * be reset to some unknown power on reset values.  Cut down
3042	 * the noise on boot by doing nothing if we are already in
3043	 * state D0.
3044	 */
3045	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3046		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3047	}
3048	for (i = 0; i < dinfo->cfg.nummaps; i++)
3049		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3050	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3051	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3052	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3053	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3054	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3055	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3056	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3057	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3058	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3059	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3060
3061	/*
3062	 * Restore MSI configuration if it is present.  If MSI is enabled,
3063	 * then restore the data and addr registers.
3064	 */
3065	if (dinfo->cfg.msi.msi_location != 0)
3066		pci_resume_msi(dev);
3067}
3068
3069void
3070pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3071{
3072	int i;
3073	uint32_t cls;
3074	int ps;
3075
3076	/*
3077	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3078	 * we know need special treatment.  Type 2 devices are cardbus bridges
3079	 * which also require special treatment.  Other types are unknown, and
3080	 * we err on the side of safety by ignoring them.  Powering down
3081	 * bridges should not be undertaken lightly.
3082	 */
3083	if (dinfo->cfg.hdrtype != 0)
3084		return;
3085	for (i = 0; i < dinfo->cfg.nummaps; i++)
3086		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3087	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3088
3089	/*
3090	 * Some drivers apparently write to these registers w/o updating our
3091	 * cached copy.  No harm happens if we update the copy, so do so here
3092	 * so we can restore them.  The COMMAND register is modified by the
3093	 * bus w/o updating the cache.  This should represent the normally
3094	 * writable portion of the 'defined' part of type 0 headers.  In
3095	 * theory we also need to save/restore the PCI capability structures
3096	 * we know about, but apart from power we don't know any that are
3097	 * writable.
3098	 */
3099	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3100	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3101	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3102	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3103	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3104	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3105	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3106	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3107	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3108	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3109	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3110	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3111	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3112	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3113	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3114
3115	/*
3116	 * don't set the state for display devices, base peripherals and
3117	 * memory devices since bad things happen when they are powered down.
3118	 * We should (a) have drivers that can easily detach and (b) use
3119	 * generic drivers for these devices so that some device actually
3120	 * attaches.  We need to make sure that when we implement (a) we don't
3121	 * power the device down on a reattach.
3122	 */
3123	cls = pci_get_class(dev);
3124	if (!setstate)
3125		return;
3126	switch (pci_do_power_nodriver)
3127	{
3128		case 0:		/* NO powerdown at all */
3129			return;
3130		case 1:		/* Conservative about what to power down */
3131			if (cls == PCIC_STORAGE)
3132				return;
3133			/*FALLTHROUGH*/
3134		case 2:		/* Agressive about what to power down */
3135			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3136			    cls == PCIC_BASEPERIPH)
3137				return;
3138			/*FALLTHROUGH*/
3139		case 3:		/* Power down everything */
3140			break;
3141	}
3142	/*
3143	 * PCI spec says we can only go into D3 state from D0 state.
3144	 * Transition from D[12] into D0 before going to D3 state.
3145	 */
3146	ps = pci_get_powerstate(dev);
3147	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3148		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3149	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3150		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3151}
3152