1// SPDX-License-Identifier: GPL-2.0
2/*
3 * PCI Bus Services, see include/linux/pci.h for further explanation.
4 *
5 * Copyright 1993 -- 1997 Drew Eckhardt, Frederic Potter,
6 * David Mosberger-Tang
7 *
8 * Copyright 1997 -- 2000 Martin Mares <mj@ucw.cz>
9 */
10
11#include <linux/acpi.h>
12#include <linux/kernel.h>
13#include <linux/delay.h>
14#include <linux/dmi.h>
15#include <linux/init.h>
16#include <linux/msi.h>
17#include <linux/of.h>
18#include <linux/pci.h>
19#include <linux/pm.h>
20#include <linux/slab.h>
21#include <linux/module.h>
22#include <linux/spinlock.h>
23#include <linux/string.h>
24#include <linux/log2.h>
25#include <linux/logic_pio.h>
26#include <linux/pm_wakeup.h>
27#include <linux/device.h>
28#include <linux/pm_runtime.h>
29#include <linux/pci_hotplug.h>
30#include <linux/vmalloc.h>
31#include <asm/dma.h>
32#include <linux/aer.h>
33#include <linux/bitfield.h>
34#include "pci.h"
35
36DEFINE_MUTEX(pci_slot_mutex);
37
38const char *pci_power_names[] = {
39	"error", "D0", "D1", "D2", "D3hot", "D3cold", "unknown",
40};
41EXPORT_SYMBOL_GPL(pci_power_names);
42
43#ifdef CONFIG_X86_32
44int isa_dma_bridge_buggy;
45EXPORT_SYMBOL(isa_dma_bridge_buggy);
46#endif
47
48int pci_pci_problems;
49EXPORT_SYMBOL(pci_pci_problems);
50
51unsigned int pci_pm_d3hot_delay;
52
53static void pci_pme_list_scan(struct work_struct *work);
54
55static LIST_HEAD(pci_pme_list);
56static DEFINE_MUTEX(pci_pme_list_mutex);
57static DECLARE_DELAYED_WORK(pci_pme_work, pci_pme_list_scan);
58
59struct pci_pme_device {
60	struct list_head list;
61	struct pci_dev *dev;
62};
63
64#define PME_TIMEOUT 1000 /* How long between PME checks */
65
66/*
67 * Following exit from Conventional Reset, devices must be ready within 1 sec
68 * (PCIe r6.0 sec 6.6.1).  A D3cold to D0 transition implies a Conventional
69 * Reset (PCIe r6.0 sec 5.8).
70 */
71#define PCI_RESET_WAIT 1000 /* msec */
72
73/*
74 * Devices may extend the 1 sec period through Request Retry Status
75 * completions (PCIe r6.0 sec 2.3.1).  The spec does not provide an upper
76 * limit, but 60 sec ought to be enough for any device to become
77 * responsive.
78 */
79#define PCIE_RESET_READY_POLL_MS 60000 /* msec */
80
81static void pci_dev_d3_sleep(struct pci_dev *dev)
82{
83	unsigned int delay_ms = max(dev->d3hot_delay, pci_pm_d3hot_delay);
84	unsigned int upper;
85
86	if (delay_ms) {
87		/* Use a 20% upper bound, 1ms minimum */
88		upper = max(DIV_ROUND_CLOSEST(delay_ms, 5), 1U);
89		usleep_range(delay_ms * USEC_PER_MSEC,
90			     (delay_ms + upper) * USEC_PER_MSEC);
91	}
92}
93
94bool pci_reset_supported(struct pci_dev *dev)
95{
96	return dev->reset_methods[0] != 0;
97}
98
99#ifdef CONFIG_PCI_DOMAINS
100int pci_domains_supported = 1;
101#endif
102
103#define DEFAULT_CARDBUS_IO_SIZE		(256)
104#define DEFAULT_CARDBUS_MEM_SIZE	(64*1024*1024)
105/* pci=cbmemsize=nnM,cbiosize=nn can override this */
106unsigned long pci_cardbus_io_size = DEFAULT_CARDBUS_IO_SIZE;
107unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE;
108
109#define DEFAULT_HOTPLUG_IO_SIZE		(256)
110#define DEFAULT_HOTPLUG_MMIO_SIZE	(2*1024*1024)
111#define DEFAULT_HOTPLUG_MMIO_PREF_SIZE	(2*1024*1024)
112/* hpiosize=nn can override this */
113unsigned long pci_hotplug_io_size  = DEFAULT_HOTPLUG_IO_SIZE;
114/*
115 * pci=hpmmiosize=nnM overrides non-prefetchable MMIO size,
116 * pci=hpmmioprefsize=nnM overrides prefetchable MMIO size;
117 * pci=hpmemsize=nnM overrides both
118 */
119unsigned long pci_hotplug_mmio_size = DEFAULT_HOTPLUG_MMIO_SIZE;
120unsigned long pci_hotplug_mmio_pref_size = DEFAULT_HOTPLUG_MMIO_PREF_SIZE;
121
122#define DEFAULT_HOTPLUG_BUS_SIZE	1
123unsigned long pci_hotplug_bus_size = DEFAULT_HOTPLUG_BUS_SIZE;
124
125
126/* PCIe MPS/MRRS strategy; can be overridden by kernel command-line param */
127#ifdef CONFIG_PCIE_BUS_TUNE_OFF
128enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_TUNE_OFF;
129#elif defined CONFIG_PCIE_BUS_SAFE
130enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_SAFE;
131#elif defined CONFIG_PCIE_BUS_PERFORMANCE
132enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_PERFORMANCE;
133#elif defined CONFIG_PCIE_BUS_PEER2PEER
134enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_PEER2PEER;
135#else
136enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_DEFAULT;
137#endif
138
139/*
140 * The default CLS is used if arch didn't set CLS explicitly and not
141 * all pci devices agree on the same value.  Arch can override either
142 * the dfl or actual value as it sees fit.  Don't forget this is
143 * measured in 32-bit words, not bytes.
144 */
145u8 pci_dfl_cache_line_size __ro_after_init = L1_CACHE_BYTES >> 2;
146u8 pci_cache_line_size __ro_after_init ;
147
148/*
149 * If we set up a device for bus mastering, we need to check the latency
150 * timer as certain BIOSes forget to set it properly.
151 */
152unsigned int pcibios_max_latency = 255;
153
154/* If set, the PCIe ARI capability will not be used. */
155static bool pcie_ari_disabled;
156
157/* If set, the PCIe ATS capability will not be used. */
158static bool pcie_ats_disabled;
159
160/* If set, the PCI config space of each device is printed during boot. */
161bool pci_early_dump;
162
163bool pci_ats_disabled(void)
164{
165	return pcie_ats_disabled;
166}
167EXPORT_SYMBOL_GPL(pci_ats_disabled);
168
169/* Disable bridge_d3 for all PCIe ports */
170static bool pci_bridge_d3_disable;
171/* Force bridge_d3 for all PCIe ports */
172static bool pci_bridge_d3_force;
173
174static int __init pcie_port_pm_setup(char *str)
175{
176	if (!strcmp(str, "off"))
177		pci_bridge_d3_disable = true;
178	else if (!strcmp(str, "force"))
179		pci_bridge_d3_force = true;
180	return 1;
181}
182__setup("pcie_port_pm=", pcie_port_pm_setup);
183
184/**
185 * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
186 * @bus: pointer to PCI bus structure to search
187 *
188 * Given a PCI bus, returns the highest PCI bus number present in the set
189 * including the given PCI bus and its list of child PCI buses.
190 */
191unsigned char pci_bus_max_busnr(struct pci_bus *bus)
192{
193	struct pci_bus *tmp;
194	unsigned char max, n;
195
196	max = bus->busn_res.end;
197	list_for_each_entry(tmp, &bus->children, node) {
198		n = pci_bus_max_busnr(tmp);
199		if (n > max)
200			max = n;
201	}
202	return max;
203}
204EXPORT_SYMBOL_GPL(pci_bus_max_busnr);
205
206/**
207 * pci_status_get_and_clear_errors - return and clear error bits in PCI_STATUS
208 * @pdev: the PCI device
209 *
210 * Returns error bits set in PCI_STATUS and clears them.
211 */
212int pci_status_get_and_clear_errors(struct pci_dev *pdev)
213{
214	u16 status;
215	int ret;
216
217	ret = pci_read_config_word(pdev, PCI_STATUS, &status);
218	if (ret != PCIBIOS_SUCCESSFUL)
219		return -EIO;
220
221	status &= PCI_STATUS_ERROR_BITS;
222	if (status)
223		pci_write_config_word(pdev, PCI_STATUS, status);
224
225	return status;
226}
227EXPORT_SYMBOL_GPL(pci_status_get_and_clear_errors);
228
229#ifdef CONFIG_HAS_IOMEM
230static void __iomem *__pci_ioremap_resource(struct pci_dev *pdev, int bar,
231					    bool write_combine)
232{
233	struct resource *res = &pdev->resource[bar];
234	resource_size_t start = res->start;
235	resource_size_t size = resource_size(res);
236
237	/*
238	 * Make sure the BAR is actually a memory resource, not an IO resource
239	 */
240	if (res->flags & IORESOURCE_UNSET || !(res->flags & IORESOURCE_MEM)) {
241		pci_err(pdev, "can't ioremap BAR %d: %pR\n", bar, res);
242		return NULL;
243	}
244
245	if (write_combine)
246		return ioremap_wc(start, size);
247
248	return ioremap(start, size);
249}
250
251void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar)
252{
253	return __pci_ioremap_resource(pdev, bar, false);
254}
255EXPORT_SYMBOL_GPL(pci_ioremap_bar);
256
257void __iomem *pci_ioremap_wc_bar(struct pci_dev *pdev, int bar)
258{
259	return __pci_ioremap_resource(pdev, bar, true);
260}
261EXPORT_SYMBOL_GPL(pci_ioremap_wc_bar);
262#endif
263
264/**
265 * pci_dev_str_match_path - test if a path string matches a device
266 * @dev: the PCI device to test
267 * @path: string to match the device against
268 * @endptr: pointer to the string after the match
269 *
270 * Test if a string (typically from a kernel parameter) formatted as a
271 * path of device/function addresses matches a PCI device. The string must
272 * be of the form:
273 *
274 *   [<domain>:]<bus>:<device>.<func>[/<device>.<func>]*
275 *
276 * A path for a device can be obtained using 'lspci -t'.  Using a path
277 * is more robust against bus renumbering than using only a single bus,
278 * device and function address.
279 *
280 * Returns 1 if the string matches the device, 0 if it does not and
281 * a negative error code if it fails to parse the string.
282 */
283static int pci_dev_str_match_path(struct pci_dev *dev, const char *path,
284				  const char **endptr)
285{
286	int ret;
287	unsigned int seg, bus, slot, func;
288	char *wpath, *p;
289	char end;
290
291	*endptr = strchrnul(path, ';');
292
293	wpath = kmemdup_nul(path, *endptr - path, GFP_ATOMIC);
294	if (!wpath)
295		return -ENOMEM;
296
297	while (1) {
298		p = strrchr(wpath, '/');
299		if (!p)
300			break;
301		ret = sscanf(p, "/%x.%x%c", &slot, &func, &end);
302		if (ret != 2) {
303			ret = -EINVAL;
304			goto free_and_exit;
305		}
306
307		if (dev->devfn != PCI_DEVFN(slot, func)) {
308			ret = 0;
309			goto free_and_exit;
310		}
311
312		/*
313		 * Note: we don't need to get a reference to the upstream
314		 * bridge because we hold a reference to the top level
315		 * device which should hold a reference to the bridge,
316		 * and so on.
317		 */
318		dev = pci_upstream_bridge(dev);
319		if (!dev) {
320			ret = 0;
321			goto free_and_exit;
322		}
323
324		*p = 0;
325	}
326
327	ret = sscanf(wpath, "%x:%x:%x.%x%c", &seg, &bus, &slot,
328		     &func, &end);
329	if (ret != 4) {
330		seg = 0;
331		ret = sscanf(wpath, "%x:%x.%x%c", &bus, &slot, &func, &end);
332		if (ret != 3) {
333			ret = -EINVAL;
334			goto free_and_exit;
335		}
336	}
337
338	ret = (seg == pci_domain_nr(dev->bus) &&
339	       bus == dev->bus->number &&
340	       dev->devfn == PCI_DEVFN(slot, func));
341
342free_and_exit:
343	kfree(wpath);
344	return ret;
345}
346
347/**
348 * pci_dev_str_match - test if a string matches a device
349 * @dev: the PCI device to test
350 * @p: string to match the device against
351 * @endptr: pointer to the string after the match
352 *
353 * Test if a string (typically from a kernel parameter) matches a specified
354 * PCI device. The string may be of one of the following formats:
355 *
356 *   [<domain>:]<bus>:<device>.<func>[/<device>.<func>]*
357 *   pci:<vendor>:<device>[:<subvendor>:<subdevice>]
358 *
359 * The first format specifies a PCI bus/device/function address which
360 * may change if new hardware is inserted, if motherboard firmware changes,
361 * or due to changes caused in kernel parameters. If the domain is
362 * left unspecified, it is taken to be 0.  In order to be robust against
363 * bus renumbering issues, a path of PCI device/function numbers may be used
364 * to address the specific device.  The path for a device can be determined
365 * through the use of 'lspci -t'.
366 *
367 * The second format matches devices using IDs in the configuration
368 * space which may match multiple devices in the system. A value of 0
369 * for any field will match all devices. (Note: this differs from
370 * in-kernel code that uses PCI_ANY_ID which is ~0; this is for
371 * legacy reasons and convenience so users don't have to specify
372 * FFFFFFFFs on the command line.)
373 *
374 * Returns 1 if the string matches the device, 0 if it does not and
375 * a negative error code if the string cannot be parsed.
376 */
377static int pci_dev_str_match(struct pci_dev *dev, const char *p,
378			     const char **endptr)
379{
380	int ret;
381	int count;
382	unsigned short vendor, device, subsystem_vendor, subsystem_device;
383
384	if (strncmp(p, "pci:", 4) == 0) {
385		/* PCI vendor/device (subvendor/subdevice) IDs are specified */
386		p += 4;
387		ret = sscanf(p, "%hx:%hx:%hx:%hx%n", &vendor, &device,
388			     &subsystem_vendor, &subsystem_device, &count);
389		if (ret != 4) {
390			ret = sscanf(p, "%hx:%hx%n", &vendor, &device, &count);
391			if (ret != 2)
392				return -EINVAL;
393
394			subsystem_vendor = 0;
395			subsystem_device = 0;
396		}
397
398		p += count;
399
400		if ((!vendor || vendor == dev->vendor) &&
401		    (!device || device == dev->device) &&
402		    (!subsystem_vendor ||
403			    subsystem_vendor == dev->subsystem_vendor) &&
404		    (!subsystem_device ||
405			    subsystem_device == dev->subsystem_device))
406			goto found;
407	} else {
408		/*
409		 * PCI Bus, Device, Function IDs are specified
410		 * (optionally, may include a path of devfns following it)
411		 */
412		ret = pci_dev_str_match_path(dev, p, &p);
413		if (ret < 0)
414			return ret;
415		else if (ret)
416			goto found;
417	}
418
419	*endptr = p;
420	return 0;
421
422found:
423	*endptr = p;
424	return 1;
425}
426
427static u8 __pci_find_next_cap_ttl(struct pci_bus *bus, unsigned int devfn,
428				  u8 pos, int cap, int *ttl)
429{
430	u8 id;
431	u16 ent;
432
433	pci_bus_read_config_byte(bus, devfn, pos, &pos);
434
435	while ((*ttl)--) {
436		if (pos < 0x40)
437			break;
438		pos &= ~3;
439		pci_bus_read_config_word(bus, devfn, pos, &ent);
440
441		id = ent & 0xff;
442		if (id == 0xff)
443			break;
444		if (id == cap)
445			return pos;
446		pos = (ent >> 8);
447	}
448	return 0;
449}
450
451static u8 __pci_find_next_cap(struct pci_bus *bus, unsigned int devfn,
452			      u8 pos, int cap)
453{
454	int ttl = PCI_FIND_CAP_TTL;
455
456	return __pci_find_next_cap_ttl(bus, devfn, pos, cap, &ttl);
457}
458
459u8 pci_find_next_capability(struct pci_dev *dev, u8 pos, int cap)
460{
461	return __pci_find_next_cap(dev->bus, dev->devfn,
462				   pos + PCI_CAP_LIST_NEXT, cap);
463}
464EXPORT_SYMBOL_GPL(pci_find_next_capability);
465
466static u8 __pci_bus_find_cap_start(struct pci_bus *bus,
467				    unsigned int devfn, u8 hdr_type)
468{
469	u16 status;
470
471	pci_bus_read_config_word(bus, devfn, PCI_STATUS, &status);
472	if (!(status & PCI_STATUS_CAP_LIST))
473		return 0;
474
475	switch (hdr_type) {
476	case PCI_HEADER_TYPE_NORMAL:
477	case PCI_HEADER_TYPE_BRIDGE:
478		return PCI_CAPABILITY_LIST;
479	case PCI_HEADER_TYPE_CARDBUS:
480		return PCI_CB_CAPABILITY_LIST;
481	}
482
483	return 0;
484}
485
486/**
487 * pci_find_capability - query for devices' capabilities
488 * @dev: PCI device to query
489 * @cap: capability code
490 *
491 * Tell if a device supports a given PCI capability.
492 * Returns the address of the requested capability structure within the
493 * device's PCI configuration space or 0 in case the device does not
494 * support it.  Possible values for @cap include:
495 *
496 *  %PCI_CAP_ID_PM           Power Management
497 *  %PCI_CAP_ID_AGP          Accelerated Graphics Port
498 *  %PCI_CAP_ID_VPD          Vital Product Data
499 *  %PCI_CAP_ID_SLOTID       Slot Identification
500 *  %PCI_CAP_ID_MSI          Message Signalled Interrupts
501 *  %PCI_CAP_ID_CHSWP        CompactPCI HotSwap
502 *  %PCI_CAP_ID_PCIX         PCI-X
503 *  %PCI_CAP_ID_EXP          PCI Express
504 */
505u8 pci_find_capability(struct pci_dev *dev, int cap)
506{
507	u8 pos;
508
509	pos = __pci_bus_find_cap_start(dev->bus, dev->devfn, dev->hdr_type);
510	if (pos)
511		pos = __pci_find_next_cap(dev->bus, dev->devfn, pos, cap);
512
513	return pos;
514}
515EXPORT_SYMBOL(pci_find_capability);
516
517/**
518 * pci_bus_find_capability - query for devices' capabilities
519 * @bus: the PCI bus to query
520 * @devfn: PCI device to query
521 * @cap: capability code
522 *
523 * Like pci_find_capability() but works for PCI devices that do not have a
524 * pci_dev structure set up yet.
525 *
526 * Returns the address of the requested capability structure within the
527 * device's PCI configuration space or 0 in case the device does not
528 * support it.
529 */
530u8 pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap)
531{
532	u8 hdr_type, pos;
533
534	pci_bus_read_config_byte(bus, devfn, PCI_HEADER_TYPE, &hdr_type);
535
536	pos = __pci_bus_find_cap_start(bus, devfn, hdr_type & PCI_HEADER_TYPE_MASK);
537	if (pos)
538		pos = __pci_find_next_cap(bus, devfn, pos, cap);
539
540	return pos;
541}
542EXPORT_SYMBOL(pci_bus_find_capability);
543
544/**
545 * pci_find_next_ext_capability - Find an extended capability
546 * @dev: PCI device to query
547 * @start: address at which to start looking (0 to start at beginning of list)
548 * @cap: capability code
549 *
550 * Returns the address of the next matching extended capability structure
551 * within the device's PCI configuration space or 0 if the device does
552 * not support it.  Some capabilities can occur several times, e.g., the
553 * vendor-specific capability, and this provides a way to find them all.
554 */
555u16 pci_find_next_ext_capability(struct pci_dev *dev, u16 start, int cap)
556{
557	u32 header;
558	int ttl;
559	u16 pos = PCI_CFG_SPACE_SIZE;
560
561	/* minimum 8 bytes per capability */
562	ttl = (PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) / 8;
563
564	if (dev->cfg_size <= PCI_CFG_SPACE_SIZE)
565		return 0;
566
567	if (start)
568		pos = start;
569
570	if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL)
571		return 0;
572
573	/*
574	 * If we have no capabilities, this is indicated by cap ID,
575	 * cap version and next pointer all being 0.
576	 */
577	if (header == 0)
578		return 0;
579
580	while (ttl-- > 0) {
581		if (PCI_EXT_CAP_ID(header) == cap && pos != start)
582			return pos;
583
584		pos = PCI_EXT_CAP_NEXT(header);
585		if (pos < PCI_CFG_SPACE_SIZE)
586			break;
587
588		if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL)
589			break;
590	}
591
592	return 0;
593}
594EXPORT_SYMBOL_GPL(pci_find_next_ext_capability);
595
596/**
597 * pci_find_ext_capability - Find an extended capability
598 * @dev: PCI device to query
599 * @cap: capability code
600 *
601 * Returns the address of the requested extended capability structure
602 * within the device's PCI configuration space or 0 if the device does
603 * not support it.  Possible values for @cap include:
604 *
605 *  %PCI_EXT_CAP_ID_ERR		Advanced Error Reporting
606 *  %PCI_EXT_CAP_ID_VC		Virtual Channel
607 *  %PCI_EXT_CAP_ID_DSN		Device Serial Number
608 *  %PCI_EXT_CAP_ID_PWR		Power Budgeting
609 */
610u16 pci_find_ext_capability(struct pci_dev *dev, int cap)
611{
612	return pci_find_next_ext_capability(dev, 0, cap);
613}
614EXPORT_SYMBOL_GPL(pci_find_ext_capability);
615
616/**
617 * pci_get_dsn - Read and return the 8-byte Device Serial Number
618 * @dev: PCI device to query
619 *
620 * Looks up the PCI_EXT_CAP_ID_DSN and reads the 8 bytes of the Device Serial
621 * Number.
622 *
623 * Returns the DSN, or zero if the capability does not exist.
624 */
625u64 pci_get_dsn(struct pci_dev *dev)
626{
627	u32 dword;
628	u64 dsn;
629	int pos;
630
631	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DSN);
632	if (!pos)
633		return 0;
634
635	/*
636	 * The Device Serial Number is two dwords offset 4 bytes from the
637	 * capability position. The specification says that the first dword is
638	 * the lower half, and the second dword is the upper half.
639	 */
640	pos += 4;
641	pci_read_config_dword(dev, pos, &dword);
642	dsn = (u64)dword;
643	pci_read_config_dword(dev, pos + 4, &dword);
644	dsn |= ((u64)dword) << 32;
645
646	return dsn;
647}
648EXPORT_SYMBOL_GPL(pci_get_dsn);
649
650static u8 __pci_find_next_ht_cap(struct pci_dev *dev, u8 pos, int ht_cap)
651{
652	int rc, ttl = PCI_FIND_CAP_TTL;
653	u8 cap, mask;
654
655	if (ht_cap == HT_CAPTYPE_SLAVE || ht_cap == HT_CAPTYPE_HOST)
656		mask = HT_3BIT_CAP_MASK;
657	else
658		mask = HT_5BIT_CAP_MASK;
659
660	pos = __pci_find_next_cap_ttl(dev->bus, dev->devfn, pos,
661				      PCI_CAP_ID_HT, &ttl);
662	while (pos) {
663		rc = pci_read_config_byte(dev, pos + 3, &cap);
664		if (rc != PCIBIOS_SUCCESSFUL)
665			return 0;
666
667		if ((cap & mask) == ht_cap)
668			return pos;
669
670		pos = __pci_find_next_cap_ttl(dev->bus, dev->devfn,
671					      pos + PCI_CAP_LIST_NEXT,
672					      PCI_CAP_ID_HT, &ttl);
673	}
674
675	return 0;
676}
677
678/**
679 * pci_find_next_ht_capability - query a device's HyperTransport capabilities
680 * @dev: PCI device to query
681 * @pos: Position from which to continue searching
682 * @ht_cap: HyperTransport capability code
683 *
684 * To be used in conjunction with pci_find_ht_capability() to search for
685 * all capabilities matching @ht_cap. @pos should always be a value returned
686 * from pci_find_ht_capability().
687 *
688 * NB. To be 100% safe against broken PCI devices, the caller should take
689 * steps to avoid an infinite loop.
690 */
691u8 pci_find_next_ht_capability(struct pci_dev *dev, u8 pos, int ht_cap)
692{
693	return __pci_find_next_ht_cap(dev, pos + PCI_CAP_LIST_NEXT, ht_cap);
694}
695EXPORT_SYMBOL_GPL(pci_find_next_ht_capability);
696
697/**
698 * pci_find_ht_capability - query a device's HyperTransport capabilities
699 * @dev: PCI device to query
700 * @ht_cap: HyperTransport capability code
701 *
702 * Tell if a device supports a given HyperTransport capability.
703 * Returns an address within the device's PCI configuration space
704 * or 0 in case the device does not support the request capability.
705 * The address points to the PCI capability, of type PCI_CAP_ID_HT,
706 * which has a HyperTransport capability matching @ht_cap.
707 */
708u8 pci_find_ht_capability(struct pci_dev *dev, int ht_cap)
709{
710	u8 pos;
711
712	pos = __pci_bus_find_cap_start(dev->bus, dev->devfn, dev->hdr_type);
713	if (pos)
714		pos = __pci_find_next_ht_cap(dev, pos, ht_cap);
715
716	return pos;
717}
718EXPORT_SYMBOL_GPL(pci_find_ht_capability);
719
720/**
721 * pci_find_vsec_capability - Find a vendor-specific extended capability
722 * @dev: PCI device to query
723 * @vendor: Vendor ID for which capability is defined
724 * @cap: Vendor-specific capability ID
725 *
726 * If @dev has Vendor ID @vendor, search for a VSEC capability with
727 * VSEC ID @cap. If found, return the capability offset in
728 * config space; otherwise return 0.
729 */
730u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap)
731{
732	u16 vsec = 0;
733	u32 header;
734	int ret;
735
736	if (vendor != dev->vendor)
737		return 0;
738
739	while ((vsec = pci_find_next_ext_capability(dev, vsec,
740						     PCI_EXT_CAP_ID_VNDR))) {
741		ret = pci_read_config_dword(dev, vsec + PCI_VNDR_HEADER, &header);
742		if (ret != PCIBIOS_SUCCESSFUL)
743			continue;
744
745		if (PCI_VNDR_HEADER_ID(header) == cap)
746			return vsec;
747	}
748
749	return 0;
750}
751EXPORT_SYMBOL_GPL(pci_find_vsec_capability);
752
753/**
754 * pci_find_dvsec_capability - Find DVSEC for vendor
755 * @dev: PCI device to query
756 * @vendor: Vendor ID to match for the DVSEC
757 * @dvsec: Designated Vendor-specific capability ID
758 *
759 * If DVSEC has Vendor ID @vendor and DVSEC ID @dvsec return the capability
760 * offset in config space; otherwise return 0.
761 */
762u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec)
763{
764	int pos;
765
766	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DVSEC);
767	if (!pos)
768		return 0;
769
770	while (pos) {
771		u16 v, id;
772
773		pci_read_config_word(dev, pos + PCI_DVSEC_HEADER1, &v);
774		pci_read_config_word(dev, pos + PCI_DVSEC_HEADER2, &id);
775		if (vendor == v && dvsec == id)
776			return pos;
777
778		pos = pci_find_next_ext_capability(dev, pos, PCI_EXT_CAP_ID_DVSEC);
779	}
780
781	return 0;
782}
783EXPORT_SYMBOL_GPL(pci_find_dvsec_capability);
784
785/**
786 * pci_find_parent_resource - return resource region of parent bus of given
787 *			      region
788 * @dev: PCI device structure contains resources to be searched
789 * @res: child resource record for which parent is sought
790 *
791 * For given resource region of given device, return the resource region of
792 * parent bus the given region is contained in.
793 */
794struct resource *pci_find_parent_resource(const struct pci_dev *dev,
795					  struct resource *res)
796{
797	const struct pci_bus *bus = dev->bus;
798	struct resource *r;
799
800	pci_bus_for_each_resource(bus, r) {
801		if (!r)
802			continue;
803		if (resource_contains(r, res)) {
804
805			/*
806			 * If the window is prefetchable but the BAR is
807			 * not, the allocator made a mistake.
808			 */
809			if (r->flags & IORESOURCE_PREFETCH &&
810			    !(res->flags & IORESOURCE_PREFETCH))
811				return NULL;
812
813			/*
814			 * If we're below a transparent bridge, there may
815			 * be both a positively-decoded aperture and a
816			 * subtractively-decoded region that contain the BAR.
817			 * We want the positively-decoded one, so this depends
818			 * on pci_bus_for_each_resource() giving us those
819			 * first.
820			 */
821			return r;
822		}
823	}
824	return NULL;
825}
826EXPORT_SYMBOL(pci_find_parent_resource);
827
828/**
829 * pci_find_resource - Return matching PCI device resource
830 * @dev: PCI device to query
831 * @res: Resource to look for
832 *
833 * Goes over standard PCI resources (BARs) and checks if the given resource
834 * is partially or fully contained in any of them. In that case the
835 * matching resource is returned, %NULL otherwise.
836 */
837struct resource *pci_find_resource(struct pci_dev *dev, struct resource *res)
838{
839	int i;
840
841	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
842		struct resource *r = &dev->resource[i];
843
844		if (r->start && resource_contains(r, res))
845			return r;
846	}
847
848	return NULL;
849}
850EXPORT_SYMBOL(pci_find_resource);
851
852/**
853 * pci_resource_name - Return the name of the PCI resource
854 * @dev: PCI device to query
855 * @i: index of the resource
856 *
857 * Return the standard PCI resource (BAR) name according to their index.
858 */
859const char *pci_resource_name(struct pci_dev *dev, unsigned int i)
860{
861	static const char * const bar_name[] = {
862		"BAR 0",
863		"BAR 1",
864		"BAR 2",
865		"BAR 3",
866		"BAR 4",
867		"BAR 5",
868		"ROM",
869#ifdef CONFIG_PCI_IOV
870		"VF BAR 0",
871		"VF BAR 1",
872		"VF BAR 2",
873		"VF BAR 3",
874		"VF BAR 4",
875		"VF BAR 5",
876#endif
877		"bridge window",	/* "io" included in %pR */
878		"bridge window",	/* "mem" included in %pR */
879		"bridge window",	/* "mem pref" included in %pR */
880	};
881	static const char * const cardbus_name[] = {
882		"BAR 1",
883		"unknown",
884		"unknown",
885		"unknown",
886		"unknown",
887		"unknown",
888#ifdef CONFIG_PCI_IOV
889		"unknown",
890		"unknown",
891		"unknown",
892		"unknown",
893		"unknown",
894		"unknown",
895#endif
896		"CardBus bridge window 0",	/* I/O */
897		"CardBus bridge window 1",	/* I/O */
898		"CardBus bridge window 0",	/* mem */
899		"CardBus bridge window 1",	/* mem */
900	};
901
902	if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS &&
903	    i < ARRAY_SIZE(cardbus_name))
904		return cardbus_name[i];
905
906	if (i < ARRAY_SIZE(bar_name))
907		return bar_name[i];
908
909	return "unknown";
910}
911
912/**
913 * pci_wait_for_pending - wait for @mask bit(s) to clear in status word @pos
914 * @dev: the PCI device to operate on
915 * @pos: config space offset of status word
916 * @mask: mask of bit(s) to care about in status word
917 *
918 * Return 1 when mask bit(s) in status word clear, 0 otherwise.
919 */
920int pci_wait_for_pending(struct pci_dev *dev, int pos, u16 mask)
921{
922	int i;
923
924	/* Wait for Transaction Pending bit clean */
925	for (i = 0; i < 4; i++) {
926		u16 status;
927		if (i)
928			msleep((1 << (i - 1)) * 100);
929
930		pci_read_config_word(dev, pos, &status);
931		if (!(status & mask))
932			return 1;
933	}
934
935	return 0;
936}
937
938static int pci_acs_enable;
939
940/**
941 * pci_request_acs - ask for ACS to be enabled if supported
942 */
943void pci_request_acs(void)
944{
945	pci_acs_enable = 1;
946}
947
948static const char *disable_acs_redir_param;
949
950/**
951 * pci_disable_acs_redir - disable ACS redirect capabilities
952 * @dev: the PCI device
953 *
954 * For only devices specified in the disable_acs_redir parameter.
955 */
956static void pci_disable_acs_redir(struct pci_dev *dev)
957{
958	int ret = 0;
959	const char *p;
960	int pos;
961	u16 ctrl;
962
963	if (!disable_acs_redir_param)
964		return;
965
966	p = disable_acs_redir_param;
967	while (*p) {
968		ret = pci_dev_str_match(dev, p, &p);
969		if (ret < 0) {
970			pr_info_once("PCI: Can't parse disable_acs_redir parameter: %s\n",
971				     disable_acs_redir_param);
972
973			break;
974		} else if (ret == 1) {
975			/* Found a match */
976			break;
977		}
978
979		if (*p != ';' && *p != ',') {
980			/* End of param or invalid format */
981			break;
982		}
983		p++;
984	}
985
986	if (ret != 1)
987		return;
988
989	if (!pci_dev_specific_disable_acs_redir(dev))
990		return;
991
992	pos = dev->acs_cap;
993	if (!pos) {
994		pci_warn(dev, "cannot disable ACS redirect for this hardware as it does not have ACS capabilities\n");
995		return;
996	}
997
998	pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl);
999
1000	/* P2P Request & Completion Redirect */
1001	ctrl &= ~(PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC);
1002
1003	pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl);
1004
1005	pci_info(dev, "disabled ACS redirect\n");
1006}
1007
1008/**
1009 * pci_std_enable_acs - enable ACS on devices using standard ACS capabilities
1010 * @dev: the PCI device
1011 */
1012static void pci_std_enable_acs(struct pci_dev *dev)
1013{
1014	int pos;
1015	u16 cap;
1016	u16 ctrl;
1017
1018	pos = dev->acs_cap;
1019	if (!pos)
1020		return;
1021
1022	pci_read_config_word(dev, pos + PCI_ACS_CAP, &cap);
1023	pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl);
1024
1025	/* Source Validation */
1026	ctrl |= (cap & PCI_ACS_SV);
1027
1028	/* P2P Request Redirect */
1029	ctrl |= (cap & PCI_ACS_RR);
1030
1031	/* P2P Completion Redirect */
1032	ctrl |= (cap & PCI_ACS_CR);
1033
1034	/* Upstream Forwarding */
1035	ctrl |= (cap & PCI_ACS_UF);
1036
1037	/* Enable Translation Blocking for external devices and noats */
1038	if (pci_ats_disabled() || dev->external_facing || dev->untrusted)
1039		ctrl |= (cap & PCI_ACS_TB);
1040
1041	pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl);
1042}
1043
1044/**
1045 * pci_enable_acs - enable ACS if hardware support it
1046 * @dev: the PCI device
1047 */
1048static void pci_enable_acs(struct pci_dev *dev)
1049{
1050	if (!pci_acs_enable)
1051		goto disable_acs_redir;
1052
1053	if (!pci_dev_specific_enable_acs(dev))
1054		goto disable_acs_redir;
1055
1056	pci_std_enable_acs(dev);
1057
1058disable_acs_redir:
1059	/*
1060	 * Note: pci_disable_acs_redir() must be called even if ACS was not
1061	 * enabled by the kernel because it may have been enabled by
1062	 * platform firmware.  So if we are told to disable it, we should
1063	 * always disable it after setting the kernel's default
1064	 * preferences.
1065	 */
1066	pci_disable_acs_redir(dev);
1067}
1068
1069/**
1070 * pcie_read_tlp_log - read TLP Header Log
1071 * @dev: PCIe device
1072 * @where: PCI Config offset of TLP Header Log
1073 * @tlp_log: TLP Log structure to fill
1074 *
1075 * Fill @tlp_log from TLP Header Log registers, e.g., AER or DPC.
1076 *
1077 * Return: 0 on success and filled TLP Log structure, <0 on error.
1078 */
1079int pcie_read_tlp_log(struct pci_dev *dev, int where,
1080		      struct pcie_tlp_log *tlp_log)
1081{
1082	int i, ret;
1083
1084	memset(tlp_log, 0, sizeof(*tlp_log));
1085
1086	for (i = 0; i < 4; i++) {
1087		ret = pci_read_config_dword(dev, where + i * 4,
1088					    &tlp_log->dw[i]);
1089		if (ret)
1090			return pcibios_err_to_errno(ret);
1091	}
1092
1093	return 0;
1094}
1095EXPORT_SYMBOL_GPL(pcie_read_tlp_log);
1096
1097/**
1098 * pci_restore_bars - restore a device's BAR values (e.g. after wake-up)
1099 * @dev: PCI device to have its BARs restored
1100 *
1101 * Restore the BAR values for a given device, so as to make it
1102 * accessible by its driver.
1103 */
1104static void pci_restore_bars(struct pci_dev *dev)
1105{
1106	int i;
1107
1108	for (i = 0; i < PCI_BRIDGE_RESOURCES; i++)
1109		pci_update_resource(dev, i);
1110}
1111
1112static inline bool platform_pci_power_manageable(struct pci_dev *dev)
1113{
1114	if (pci_use_mid_pm())
1115		return true;
1116
1117	return acpi_pci_power_manageable(dev);
1118}
1119
1120static inline int platform_pci_set_power_state(struct pci_dev *dev,
1121					       pci_power_t t)
1122{
1123	if (pci_use_mid_pm())
1124		return mid_pci_set_power_state(dev, t);
1125
1126	return acpi_pci_set_power_state(dev, t);
1127}
1128
1129static inline pci_power_t platform_pci_get_power_state(struct pci_dev *dev)
1130{
1131	if (pci_use_mid_pm())
1132		return mid_pci_get_power_state(dev);
1133
1134	return acpi_pci_get_power_state(dev);
1135}
1136
1137static inline void platform_pci_refresh_power_state(struct pci_dev *dev)
1138{
1139	if (!pci_use_mid_pm())
1140		acpi_pci_refresh_power_state(dev);
1141}
1142
1143static inline pci_power_t platform_pci_choose_state(struct pci_dev *dev)
1144{
1145	if (pci_use_mid_pm())
1146		return PCI_POWER_ERROR;
1147
1148	return acpi_pci_choose_state(dev);
1149}
1150
1151static inline int platform_pci_set_wakeup(struct pci_dev *dev, bool enable)
1152{
1153	if (pci_use_mid_pm())
1154		return PCI_POWER_ERROR;
1155
1156	return acpi_pci_wakeup(dev, enable);
1157}
1158
1159static inline bool platform_pci_need_resume(struct pci_dev *dev)
1160{
1161	if (pci_use_mid_pm())
1162		return false;
1163
1164	return acpi_pci_need_resume(dev);
1165}
1166
1167static inline bool platform_pci_bridge_d3(struct pci_dev *dev)
1168{
1169	if (pci_use_mid_pm())
1170		return false;
1171
1172	return acpi_pci_bridge_d3(dev);
1173}
1174
1175/**
1176 * pci_update_current_state - Read power state of given device and cache it
1177 * @dev: PCI device to handle.
1178 * @state: State to cache in case the device doesn't have the PM capability
1179 *
1180 * The power state is read from the PMCSR register, which however is
1181 * inaccessible in D3cold.  The platform firmware is therefore queried first
1182 * to detect accessibility of the register.  In case the platform firmware
1183 * reports an incorrect state or the device isn't power manageable by the
1184 * platform at all, we try to detect D3cold by testing accessibility of the
1185 * vendor ID in config space.
1186 */
1187void pci_update_current_state(struct pci_dev *dev, pci_power_t state)
1188{
1189	if (platform_pci_get_power_state(dev) == PCI_D3cold) {
1190		dev->current_state = PCI_D3cold;
1191	} else if (dev->pm_cap) {
1192		u16 pmcsr;
1193
1194		pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
1195		if (PCI_POSSIBLE_ERROR(pmcsr)) {
1196			dev->current_state = PCI_D3cold;
1197			return;
1198		}
1199		dev->current_state = pmcsr & PCI_PM_CTRL_STATE_MASK;
1200	} else {
1201		dev->current_state = state;
1202	}
1203}
1204
1205/**
1206 * pci_refresh_power_state - Refresh the given device's power state data
1207 * @dev: Target PCI device.
1208 *
1209 * Ask the platform to refresh the devices power state information and invoke
1210 * pci_update_current_state() to update its current PCI power state.
1211 */
1212void pci_refresh_power_state(struct pci_dev *dev)
1213{
1214	platform_pci_refresh_power_state(dev);
1215	pci_update_current_state(dev, dev->current_state);
1216}
1217
1218/**
1219 * pci_platform_power_transition - Use platform to change device power state
1220 * @dev: PCI device to handle.
1221 * @state: State to put the device into.
1222 */
1223int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state)
1224{
1225	int error;
1226
1227	error = platform_pci_set_power_state(dev, state);
1228	if (!error)
1229		pci_update_current_state(dev, state);
1230	else if (!dev->pm_cap) /* Fall back to PCI_D0 */
1231		dev->current_state = PCI_D0;
1232
1233	return error;
1234}
1235EXPORT_SYMBOL_GPL(pci_platform_power_transition);
1236
1237static int pci_resume_one(struct pci_dev *pci_dev, void *ign)
1238{
1239	pm_request_resume(&pci_dev->dev);
1240	return 0;
1241}
1242
1243/**
1244 * pci_resume_bus - Walk given bus and runtime resume devices on it
1245 * @bus: Top bus of the subtree to walk.
1246 */
1247void pci_resume_bus(struct pci_bus *bus)
1248{
1249	if (bus)
1250		pci_walk_bus(bus, pci_resume_one, NULL);
1251}
1252
1253static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
1254{
1255	int delay = 1;
1256	bool retrain = false;
1257	struct pci_dev *bridge;
1258
1259	if (pci_is_pcie(dev)) {
1260		bridge = pci_upstream_bridge(dev);
1261		if (bridge)
1262			retrain = true;
1263	}
1264
1265	/*
1266	 * After reset, the device should not silently discard config
1267	 * requests, but it may still indicate that it needs more time by
1268	 * responding to them with CRS completions.  The Root Port will
1269	 * generally synthesize ~0 (PCI_ERROR_RESPONSE) data to complete
1270	 * the read (except when CRS SV is enabled and the read was for the
1271	 * Vendor ID; in that case it synthesizes 0x0001 data).
1272	 *
1273	 * Wait for the device to return a non-CRS completion.  Read the
1274	 * Command register instead of Vendor ID so we don't have to
1275	 * contend with the CRS SV value.
1276	 */
1277	for (;;) {
1278		u32 id;
1279
1280		if (pci_dev_is_disconnected(dev)) {
1281			pci_dbg(dev, "disconnected; not waiting\n");
1282			return -ENOTTY;
1283		}
1284
1285		pci_read_config_dword(dev, PCI_COMMAND, &id);
1286		if (!PCI_POSSIBLE_ERROR(id))
1287			break;
1288
1289		if (delay > timeout) {
1290			pci_warn(dev, "not ready %dms after %s; giving up\n",
1291				 delay - 1, reset_type);
1292			return -ENOTTY;
1293		}
1294
1295		if (delay > PCI_RESET_WAIT) {
1296			if (retrain) {
1297				retrain = false;
1298				if (pcie_failed_link_retrain(bridge)) {
1299					delay = 1;
1300					continue;
1301				}
1302			}
1303			pci_info(dev, "not ready %dms after %s; waiting\n",
1304				 delay - 1, reset_type);
1305		}
1306
1307		msleep(delay);
1308		delay *= 2;
1309	}
1310
1311	if (delay > PCI_RESET_WAIT)
1312		pci_info(dev, "ready %dms after %s\n", delay - 1,
1313			 reset_type);
1314	else
1315		pci_dbg(dev, "ready %dms after %s\n", delay - 1,
1316			reset_type);
1317
1318	return 0;
1319}
1320
1321/**
1322 * pci_power_up - Put the given device into D0
1323 * @dev: PCI device to power up
1324 *
1325 * On success, return 0 or 1, depending on whether or not it is necessary to
1326 * restore the device's BARs subsequently (1 is returned in that case).
1327 *
1328 * On failure, return a negative error code.  Always return failure if @dev
1329 * lacks a Power Management Capability, even if the platform was able to
1330 * put the device in D0 via non-PCI means.
1331 */
1332int pci_power_up(struct pci_dev *dev)
1333{
1334	bool need_restore;
1335	pci_power_t state;
1336	u16 pmcsr;
1337
1338	platform_pci_set_power_state(dev, PCI_D0);
1339
1340	if (!dev->pm_cap) {
1341		state = platform_pci_get_power_state(dev);
1342		if (state == PCI_UNKNOWN)
1343			dev->current_state = PCI_D0;
1344		else
1345			dev->current_state = state;
1346
1347		return -EIO;
1348	}
1349
1350	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
1351	if (PCI_POSSIBLE_ERROR(pmcsr)) {
1352		pci_err(dev, "Unable to change power state from %s to D0, device inaccessible\n",
1353			pci_power_name(dev->current_state));
1354		dev->current_state = PCI_D3cold;
1355		return -EIO;
1356	}
1357
1358	state = pmcsr & PCI_PM_CTRL_STATE_MASK;
1359
1360	need_restore = (state == PCI_D3hot || dev->current_state >= PCI_D3hot) &&
1361			!(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET);
1362
1363	if (state == PCI_D0)
1364		goto end;
1365
1366	/*
1367	 * Force the entire word to 0. This doesn't affect PME_Status, disables
1368	 * PME_En, and sets PowerState to 0.
1369	 */
1370	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, 0);
1371
1372	/* Mandatory transition delays; see PCI PM 1.2. */
1373	if (state == PCI_D3hot)
1374		pci_dev_d3_sleep(dev);
1375	else if (state == PCI_D2)
1376		udelay(PCI_PM_D2_DELAY);
1377
1378end:
1379	dev->current_state = PCI_D0;
1380	if (need_restore)
1381		return 1;
1382
1383	return 0;
1384}
1385
1386/**
1387 * pci_set_full_power_state - Put a PCI device into D0 and update its state
1388 * @dev: PCI device to power up
1389 * @locked: whether pci_bus_sem is held
1390 *
1391 * Call pci_power_up() to put @dev into D0, read from its PCI_PM_CTRL register
1392 * to confirm the state change, restore its BARs if they might be lost and
1393 * reconfigure ASPM in accordance with the new power state.
1394 *
1395 * If pci_restore_state() is going to be called right after a power state change
1396 * to D0, it is more efficient to use pci_power_up() directly instead of this
1397 * function.
1398 */
1399static int pci_set_full_power_state(struct pci_dev *dev, bool locked)
1400{
1401	u16 pmcsr;
1402	int ret;
1403
1404	ret = pci_power_up(dev);
1405	if (ret < 0) {
1406		if (dev->current_state == PCI_D0)
1407			return 0;
1408
1409		return ret;
1410	}
1411
1412	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
1413	dev->current_state = pmcsr & PCI_PM_CTRL_STATE_MASK;
1414	if (dev->current_state != PCI_D0) {
1415		pci_info_ratelimited(dev, "Refused to change power state from %s to D0\n",
1416				     pci_power_name(dev->current_state));
1417	} else if (ret > 0) {
1418		/*
1419		 * According to section 5.4.1 of the "PCI BUS POWER MANAGEMENT
1420		 * INTERFACE SPECIFICATION, REV. 1.2", a device transitioning
1421		 * from D3hot to D0 _may_ perform an internal reset, thereby
1422		 * going to "D0 Uninitialized" rather than "D0 Initialized".
1423		 * For example, at least some versions of the 3c905B and the
1424		 * 3c556B exhibit this behaviour.
1425		 *
1426		 * At least some laptop BIOSen (e.g. the Thinkpad T21) leave
1427		 * devices in a D3hot state at boot.  Consequently, we need to
1428		 * restore at least the BARs so that the device will be
1429		 * accessible to its driver.
1430		 */
1431		pci_restore_bars(dev);
1432	}
1433
1434	if (dev->bus->self)
1435		pcie_aspm_pm_state_change(dev->bus->self, locked);
1436
1437	return 0;
1438}
1439
1440/**
1441 * __pci_dev_set_current_state - Set current state of a PCI device
1442 * @dev: Device to handle
1443 * @data: pointer to state to be set
1444 */
1445static int __pci_dev_set_current_state(struct pci_dev *dev, void *data)
1446{
1447	pci_power_t state = *(pci_power_t *)data;
1448
1449	dev->current_state = state;
1450	return 0;
1451}
1452
1453/**
1454 * pci_bus_set_current_state - Walk given bus and set current state of devices
1455 * @bus: Top bus of the subtree to walk.
1456 * @state: state to be set
1457 */
1458void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state)
1459{
1460	if (bus)
1461		pci_walk_bus(bus, __pci_dev_set_current_state, &state);
1462}
1463
1464static void __pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state, bool locked)
1465{
1466	if (!bus)
1467		return;
1468
1469	if (locked)
1470		pci_walk_bus_locked(bus, __pci_dev_set_current_state, &state);
1471	else
1472		pci_walk_bus(bus, __pci_dev_set_current_state, &state);
1473}
1474
1475/**
1476 * pci_set_low_power_state - Put a PCI device into a low-power state.
1477 * @dev: PCI device to handle.
1478 * @state: PCI power state (D1, D2, D3hot) to put the device into.
1479 * @locked: whether pci_bus_sem is held
1480 *
1481 * Use the device's PCI_PM_CTRL register to put it into a low-power state.
1482 *
1483 * RETURN VALUE:
1484 * -EINVAL if the requested state is invalid.
1485 * -EIO if device does not support PCI PM or its PM capabilities register has a
1486 * wrong version, or device doesn't support the requested state.
1487 * 0 if device already is in the requested state.
1488 * 0 if device's power state has been successfully changed.
1489 */
1490static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state, bool locked)
1491{
1492	u16 pmcsr;
1493
1494	if (!dev->pm_cap)
1495		return -EIO;
1496
1497	/*
1498	 * Validate transition: We can enter D0 from any state, but if
1499	 * we're already in a low-power state, we can only go deeper.  E.g.,
1500	 * we can go from D1 to D3, but we can't go directly from D3 to D1;
1501	 * we'd have to go from D3 to D0, then to D1.
1502	 */
1503	if (dev->current_state <= PCI_D3cold && dev->current_state > state) {
1504		pci_dbg(dev, "Invalid power transition (from %s to %s)\n",
1505			pci_power_name(dev->current_state),
1506			pci_power_name(state));
1507		return -EINVAL;
1508	}
1509
1510	/* Check if this device supports the desired state */
1511	if ((state == PCI_D1 && !dev->d1_support)
1512	   || (state == PCI_D2 && !dev->d2_support))
1513		return -EIO;
1514
1515	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
1516	if (PCI_POSSIBLE_ERROR(pmcsr)) {
1517		pci_err(dev, "Unable to change power state from %s to %s, device inaccessible\n",
1518			pci_power_name(dev->current_state),
1519			pci_power_name(state));
1520		dev->current_state = PCI_D3cold;
1521		return -EIO;
1522	}
1523
1524	pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
1525	pmcsr |= state;
1526
1527	/* Enter specified state */
1528	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr);
1529
1530	/* Mandatory power management transition delays; see PCI PM 1.2. */
1531	if (state == PCI_D3hot)
1532		pci_dev_d3_sleep(dev);
1533	else if (state == PCI_D2)
1534		udelay(PCI_PM_D2_DELAY);
1535
1536	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
1537	dev->current_state = pmcsr & PCI_PM_CTRL_STATE_MASK;
1538	if (dev->current_state != state)
1539		pci_info_ratelimited(dev, "Refused to change power state from %s to %s\n",
1540				     pci_power_name(dev->current_state),
1541				     pci_power_name(state));
1542
1543	if (dev->bus->self)
1544		pcie_aspm_pm_state_change(dev->bus->self, locked);
1545
1546	return 0;
1547}
1548
1549static int __pci_set_power_state(struct pci_dev *dev, pci_power_t state, bool locked)
1550{
1551	int error;
1552
1553	/* Bound the state we're entering */
1554	if (state > PCI_D3cold)
1555		state = PCI_D3cold;
1556	else if (state < PCI_D0)
1557		state = PCI_D0;
1558	else if ((state == PCI_D1 || state == PCI_D2) && pci_no_d1d2(dev))
1559
1560		/*
1561		 * If the device or the parent bridge do not support PCI
1562		 * PM, ignore the request if we're doing anything other
1563		 * than putting it into D0 (which would only happen on
1564		 * boot).
1565		 */
1566		return 0;
1567
1568	/* Check if we're already there */
1569	if (dev->current_state == state)
1570		return 0;
1571
1572	if (state == PCI_D0)
1573		return pci_set_full_power_state(dev, locked);
1574
1575	/*
1576	 * This device is quirked not to be put into D3, so don't put it in
1577	 * D3
1578	 */
1579	if (state >= PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3))
1580		return 0;
1581
1582	if (state == PCI_D3cold) {
1583		/*
1584		 * To put the device in D3cold, put it into D3hot in the native
1585		 * way, then put it into D3cold using platform ops.
1586		 */
1587		error = pci_set_low_power_state(dev, PCI_D3hot, locked);
1588
1589		if (pci_platform_power_transition(dev, PCI_D3cold))
1590			return error;
1591
1592		/* Powering off a bridge may power off the whole hierarchy */
1593		if (dev->current_state == PCI_D3cold)
1594			__pci_bus_set_current_state(dev->subordinate, PCI_D3cold, locked);
1595	} else {
1596		error = pci_set_low_power_state(dev, state, locked);
1597
1598		if (pci_platform_power_transition(dev, state))
1599			return error;
1600	}
1601
1602	return 0;
1603}
1604
1605/**
1606 * pci_set_power_state - Set the power state of a PCI device
1607 * @dev: PCI device to handle.
1608 * @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
1609 *
1610 * Transition a device to a new power state, using the platform firmware and/or
1611 * the device's PCI PM registers.
1612 *
1613 * RETURN VALUE:
1614 * -EINVAL if the requested state is invalid.
1615 * -EIO if device does not support PCI PM or its PM capabilities register has a
1616 * wrong version, or device doesn't support the requested state.
1617 * 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
1618 * 0 if device already is in the requested state.
1619 * 0 if the transition is to D3 but D3 is not supported.
1620 * 0 if device's power state has been successfully changed.
1621 */
1622int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
1623{
1624	return __pci_set_power_state(dev, state, false);
1625}
1626EXPORT_SYMBOL(pci_set_power_state);
1627
1628int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state)
1629{
1630	lockdep_assert_held(&pci_bus_sem);
1631
1632	return __pci_set_power_state(dev, state, true);
1633}
1634EXPORT_SYMBOL(pci_set_power_state_locked);
1635
1636#define PCI_EXP_SAVE_REGS	7
1637
1638static struct pci_cap_saved_state *_pci_find_saved_cap(struct pci_dev *pci_dev,
1639						       u16 cap, bool extended)
1640{
1641	struct pci_cap_saved_state *tmp;
1642
1643	hlist_for_each_entry(tmp, &pci_dev->saved_cap_space, next) {
1644		if (tmp->cap.cap_extended == extended && tmp->cap.cap_nr == cap)
1645			return tmp;
1646	}
1647	return NULL;
1648}
1649
1650struct pci_cap_saved_state *pci_find_saved_cap(struct pci_dev *dev, char cap)
1651{
1652	return _pci_find_saved_cap(dev, cap, false);
1653}
1654
1655struct pci_cap_saved_state *pci_find_saved_ext_cap(struct pci_dev *dev, u16 cap)
1656{
1657	return _pci_find_saved_cap(dev, cap, true);
1658}
1659
1660static int pci_save_pcie_state(struct pci_dev *dev)
1661{
1662	int i = 0;
1663	struct pci_cap_saved_state *save_state;
1664	u16 *cap;
1665
1666	if (!pci_is_pcie(dev))
1667		return 0;
1668
1669	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
1670	if (!save_state) {
1671		pci_err(dev, "buffer not found in %s\n", __func__);
1672		return -ENOMEM;
1673	}
1674
1675	cap = (u16 *)&save_state->cap.data[0];
1676	pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &cap[i++]);
1677	pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &cap[i++]);
1678	pcie_capability_read_word(dev, PCI_EXP_SLTCTL, &cap[i++]);
1679	pcie_capability_read_word(dev, PCI_EXP_RTCTL,  &cap[i++]);
1680	pcie_capability_read_word(dev, PCI_EXP_DEVCTL2, &cap[i++]);
1681	pcie_capability_read_word(dev, PCI_EXP_LNKCTL2, &cap[i++]);
1682	pcie_capability_read_word(dev, PCI_EXP_SLTCTL2, &cap[i++]);
1683
1684	pci_save_aspm_l1ss_state(dev);
1685	pci_save_ltr_state(dev);
1686
1687	return 0;
1688}
1689
1690static void pci_restore_pcie_state(struct pci_dev *dev)
1691{
1692	int i = 0;
1693	struct pci_cap_saved_state *save_state;
1694	u16 *cap;
1695
1696	/*
1697	 * Restore max latencies (in the LTR capability) before enabling
1698	 * LTR itself in PCI_EXP_DEVCTL2.
1699	 */
1700	pci_restore_ltr_state(dev);
1701	pci_restore_aspm_l1ss_state(dev);
1702
1703	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
1704	if (!save_state)
1705		return;
1706
1707	/*
1708	 * Downstream ports reset the LTR enable bit when link goes down.
1709	 * Check and re-configure the bit here before restoring device.
1710	 * PCIe r5.0, sec 7.5.3.16.
1711	 */
1712	pci_bridge_reconfigure_ltr(dev);
1713
1714	cap = (u16 *)&save_state->cap.data[0];
1715	pcie_capability_write_word(dev, PCI_EXP_DEVCTL, cap[i++]);
1716	pcie_capability_write_word(dev, PCI_EXP_LNKCTL, cap[i++]);
1717	pcie_capability_write_word(dev, PCI_EXP_SLTCTL, cap[i++]);
1718	pcie_capability_write_word(dev, PCI_EXP_RTCTL, cap[i++]);
1719	pcie_capability_write_word(dev, PCI_EXP_DEVCTL2, cap[i++]);
1720	pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, cap[i++]);
1721	pcie_capability_write_word(dev, PCI_EXP_SLTCTL2, cap[i++]);
1722}
1723
1724static int pci_save_pcix_state(struct pci_dev *dev)
1725{
1726	int pos;
1727	struct pci_cap_saved_state *save_state;
1728
1729	pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
1730	if (!pos)
1731		return 0;
1732
1733	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX);
1734	if (!save_state) {
1735		pci_err(dev, "buffer not found in %s\n", __func__);
1736		return -ENOMEM;
1737	}
1738
1739	pci_read_config_word(dev, pos + PCI_X_CMD,
1740			     (u16 *)save_state->cap.data);
1741
1742	return 0;
1743}
1744
1745static void pci_restore_pcix_state(struct pci_dev *dev)
1746{
1747	int i = 0, pos;
1748	struct pci_cap_saved_state *save_state;
1749	u16 *cap;
1750
1751	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX);
1752	pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
1753	if (!save_state || !pos)
1754		return;
1755	cap = (u16 *)&save_state->cap.data[0];
1756
1757	pci_write_config_word(dev, pos + PCI_X_CMD, cap[i++]);
1758}
1759
1760/**
1761 * pci_save_state - save the PCI configuration space of a device before
1762 *		    suspending
1763 * @dev: PCI device that we're dealing with
1764 */
1765int pci_save_state(struct pci_dev *dev)
1766{
1767	int i;
1768	/* XXX: 100% dword access ok here? */
1769	for (i = 0; i < 16; i++) {
1770		pci_read_config_dword(dev, i * 4, &dev->saved_config_space[i]);
1771		pci_dbg(dev, "save config %#04x: %#010x\n",
1772			i * 4, dev->saved_config_space[i]);
1773	}
1774	dev->state_saved = true;
1775
1776	i = pci_save_pcie_state(dev);
1777	if (i != 0)
1778		return i;
1779
1780	i = pci_save_pcix_state(dev);
1781	if (i != 0)
1782		return i;
1783
1784	pci_save_dpc_state(dev);
1785	pci_save_aer_state(dev);
1786	pci_save_ptm_state(dev);
1787	return pci_save_vc_state(dev);
1788}
1789EXPORT_SYMBOL(pci_save_state);
1790
1791static void pci_restore_config_dword(struct pci_dev *pdev, int offset,
1792				     u32 saved_val, int retry, bool force)
1793{
1794	u32 val;
1795
1796	pci_read_config_dword(pdev, offset, &val);
1797	if (!force && val == saved_val)
1798		return;
1799
1800	for (;;) {
1801		pci_dbg(pdev, "restore config %#04x: %#010x -> %#010x\n",
1802			offset, val, saved_val);
1803		pci_write_config_dword(pdev, offset, saved_val);
1804		if (retry-- <= 0)
1805			return;
1806
1807		pci_read_config_dword(pdev, offset, &val);
1808		if (val == saved_val)
1809			return;
1810
1811		mdelay(1);
1812	}
1813}
1814
1815static void pci_restore_config_space_range(struct pci_dev *pdev,
1816					   int start, int end, int retry,
1817					   bool force)
1818{
1819	int index;
1820
1821	for (index = end; index >= start; index--)
1822		pci_restore_config_dword(pdev, 4 * index,
1823					 pdev->saved_config_space[index],
1824					 retry, force);
1825}
1826
1827static void pci_restore_config_space(struct pci_dev *pdev)
1828{
1829	if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
1830		pci_restore_config_space_range(pdev, 10, 15, 0, false);
1831		/* Restore BARs before the command register. */
1832		pci_restore_config_space_range(pdev, 4, 9, 10, false);
1833		pci_restore_config_space_range(pdev, 0, 3, 0, false);
1834	} else if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
1835		pci_restore_config_space_range(pdev, 12, 15, 0, false);
1836
1837		/*
1838		 * Force rewriting of prefetch registers to avoid S3 resume
1839		 * issues on Intel PCI bridges that occur when these
1840		 * registers are not explicitly written.
1841		 */
1842		pci_restore_config_space_range(pdev, 9, 11, 0, true);
1843		pci_restore_config_space_range(pdev, 0, 8, 0, false);
1844	} else {
1845		pci_restore_config_space_range(pdev, 0, 15, 0, false);
1846	}
1847}
1848
1849static void pci_restore_rebar_state(struct pci_dev *pdev)
1850{
1851	unsigned int pos, nbars, i;
1852	u32 ctrl;
1853
1854	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR);
1855	if (!pos)
1856		return;
1857
1858	pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl);
1859	nbars = FIELD_GET(PCI_REBAR_CTRL_NBAR_MASK, ctrl);
1860
1861	for (i = 0; i < nbars; i++, pos += 8) {
1862		struct resource *res;
1863		int bar_idx, size;
1864
1865		pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl);
1866		bar_idx = ctrl & PCI_REBAR_CTRL_BAR_IDX;
1867		res = pdev->resource + bar_idx;
1868		size = pci_rebar_bytes_to_size(resource_size(res));
1869		ctrl &= ~PCI_REBAR_CTRL_BAR_SIZE;
1870		ctrl |= FIELD_PREP(PCI_REBAR_CTRL_BAR_SIZE, size);
1871		pci_write_config_dword(pdev, pos + PCI_REBAR_CTRL, ctrl);
1872	}
1873}
1874
1875/**
1876 * pci_restore_state - Restore the saved state of a PCI device
1877 * @dev: PCI device that we're dealing with
1878 */
1879void pci_restore_state(struct pci_dev *dev)
1880{
1881	if (!dev->state_saved)
1882		return;
1883
1884	pci_restore_pcie_state(dev);
1885	pci_restore_pasid_state(dev);
1886	pci_restore_pri_state(dev);
1887	pci_restore_ats_state(dev);
1888	pci_restore_vc_state(dev);
1889	pci_restore_rebar_state(dev);
1890	pci_restore_dpc_state(dev);
1891	pci_restore_ptm_state(dev);
1892
1893	pci_aer_clear_status(dev);
1894	pci_restore_aer_state(dev);
1895
1896	pci_restore_config_space(dev);
1897
1898	pci_restore_pcix_state(dev);
1899	pci_restore_msi_state(dev);
1900
1901	/* Restore ACS and IOV configuration state */
1902	pci_enable_acs(dev);
1903	pci_restore_iov_state(dev);
1904
1905	dev->state_saved = false;
1906}
1907EXPORT_SYMBOL(pci_restore_state);
1908
1909struct pci_saved_state {
1910	u32 config_space[16];
1911	struct pci_cap_saved_data cap[];
1912};
1913
1914/**
1915 * pci_store_saved_state - Allocate and return an opaque struct containing
1916 *			   the device saved state.
1917 * @dev: PCI device that we're dealing with
1918 *
1919 * Return NULL if no state or error.
1920 */
1921struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev)
1922{
1923	struct pci_saved_state *state;
1924	struct pci_cap_saved_state *tmp;
1925	struct pci_cap_saved_data *cap;
1926	size_t size;
1927
1928	if (!dev->state_saved)
1929		return NULL;
1930
1931	size = sizeof(*state) + sizeof(struct pci_cap_saved_data);
1932
1933	hlist_for_each_entry(tmp, &dev->saved_cap_space, next)
1934		size += sizeof(struct pci_cap_saved_data) + tmp->cap.size;
1935
1936	state = kzalloc(size, GFP_KERNEL);
1937	if (!state)
1938		return NULL;
1939
1940	memcpy(state->config_space, dev->saved_config_space,
1941	       sizeof(state->config_space));
1942
1943	cap = state->cap;
1944	hlist_for_each_entry(tmp, &dev->saved_cap_space, next) {
1945		size_t len = sizeof(struct pci_cap_saved_data) + tmp->cap.size;
1946		memcpy(cap, &tmp->cap, len);
1947		cap = (struct pci_cap_saved_data *)((u8 *)cap + len);
1948	}
1949	/* Empty cap_save terminates list */
1950
1951	return state;
1952}
1953EXPORT_SYMBOL_GPL(pci_store_saved_state);
1954
1955/**
1956 * pci_load_saved_state - Reload the provided save state into struct pci_dev.
1957 * @dev: PCI device that we're dealing with
1958 * @state: Saved state returned from pci_store_saved_state()
1959 */
1960int pci_load_saved_state(struct pci_dev *dev,
1961			 struct pci_saved_state *state)
1962{
1963	struct pci_cap_saved_data *cap;
1964
1965	dev->state_saved = false;
1966
1967	if (!state)
1968		return 0;
1969
1970	memcpy(dev->saved_config_space, state->config_space,
1971	       sizeof(state->config_space));
1972
1973	cap = state->cap;
1974	while (cap->size) {
1975		struct pci_cap_saved_state *tmp;
1976
1977		tmp = _pci_find_saved_cap(dev, cap->cap_nr, cap->cap_extended);
1978		if (!tmp || tmp->cap.size != cap->size)
1979			return -EINVAL;
1980
1981		memcpy(tmp->cap.data, cap->data, tmp->cap.size);
1982		cap = (struct pci_cap_saved_data *)((u8 *)cap +
1983		       sizeof(struct pci_cap_saved_data) + cap->size);
1984	}
1985
1986	dev->state_saved = true;
1987	return 0;
1988}
1989EXPORT_SYMBOL_GPL(pci_load_saved_state);
1990
1991/**
1992 * pci_load_and_free_saved_state - Reload the save state pointed to by state,
1993 *				   and free the memory allocated for it.
1994 * @dev: PCI device that we're dealing with
1995 * @state: Pointer to saved state returned from pci_store_saved_state()
1996 */
1997int pci_load_and_free_saved_state(struct pci_dev *dev,
1998				  struct pci_saved_state **state)
1999{
2000	int ret = pci_load_saved_state(dev, *state);
2001	kfree(*state);
2002	*state = NULL;
2003	return ret;
2004}
2005EXPORT_SYMBOL_GPL(pci_load_and_free_saved_state);
2006
2007int __weak pcibios_enable_device(struct pci_dev *dev, int bars)
2008{
2009	return pci_enable_resources(dev, bars);
2010}
2011
2012static int do_pci_enable_device(struct pci_dev *dev, int bars)
2013{
2014	int err;
2015	struct pci_dev *bridge;
2016	u16 cmd;
2017	u8 pin;
2018
2019	err = pci_set_power_state(dev, PCI_D0);
2020	if (err < 0 && err != -EIO)
2021		return err;
2022
2023	bridge = pci_upstream_bridge(dev);
2024	if (bridge)
2025		pcie_aspm_powersave_config_link(bridge);
2026
2027	err = pcibios_enable_device(dev, bars);
2028	if (err < 0)
2029		return err;
2030	pci_fixup_device(pci_fixup_enable, dev);
2031
2032	if (dev->msi_enabled || dev->msix_enabled)
2033		return 0;
2034
2035	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
2036	if (pin) {
2037		pci_read_config_word(dev, PCI_COMMAND, &cmd);
2038		if (cmd & PCI_COMMAND_INTX_DISABLE)
2039			pci_write_config_word(dev, PCI_COMMAND,
2040					      cmd & ~PCI_COMMAND_INTX_DISABLE);
2041	}
2042
2043	return 0;
2044}
2045
2046/**
2047 * pci_reenable_device - Resume abandoned device
2048 * @dev: PCI device to be resumed
2049 *
2050 * NOTE: This function is a backend of pci_default_resume() and is not supposed
2051 * to be called by normal code, write proper resume handler and use it instead.
2052 */
2053int pci_reenable_device(struct pci_dev *dev)
2054{
2055	if (pci_is_enabled(dev))
2056		return do_pci_enable_device(dev, (1 << PCI_NUM_RESOURCES) - 1);
2057	return 0;
2058}
2059EXPORT_SYMBOL(pci_reenable_device);
2060
2061static void pci_enable_bridge(struct pci_dev *dev)
2062{
2063	struct pci_dev *bridge;
2064	int retval;
2065
2066	bridge = pci_upstream_bridge(dev);
2067	if (bridge)
2068		pci_enable_bridge(bridge);
2069
2070	if (pci_is_enabled(dev)) {
2071		if (!dev->is_busmaster)
2072			pci_set_master(dev);
2073		return;
2074	}
2075
2076	retval = pci_enable_device(dev);
2077	if (retval)
2078		pci_err(dev, "Error enabling bridge (%d), continuing\n",
2079			retval);
2080	pci_set_master(dev);
2081}
2082
2083static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags)
2084{
2085	struct pci_dev *bridge;
2086	int err;
2087	int i, bars = 0;
2088
2089	/*
2090	 * Power state could be unknown at this point, either due to a fresh
2091	 * boot or a device removal call.  So get the current power state
2092	 * so that things like MSI message writing will behave as expected
2093	 * (e.g. if the device really is in D0 at enable time).
2094	 */
2095	pci_update_current_state(dev, dev->current_state);
2096
2097	if (atomic_inc_return(&dev->enable_cnt) > 1)
2098		return 0;		/* already enabled */
2099
2100	bridge = pci_upstream_bridge(dev);
2101	if (bridge)
2102		pci_enable_bridge(bridge);
2103
2104	/* only skip sriov related */
2105	for (i = 0; i <= PCI_ROM_RESOURCE; i++)
2106		if (dev->resource[i].flags & flags)
2107			bars |= (1 << i);
2108	for (i = PCI_BRIDGE_RESOURCES; i < DEVICE_COUNT_RESOURCE; i++)
2109		if (dev->resource[i].flags & flags)
2110			bars |= (1 << i);
2111
2112	err = do_pci_enable_device(dev, bars);
2113	if (err < 0)
2114		atomic_dec(&dev->enable_cnt);
2115	return err;
2116}
2117
2118/**
2119 * pci_enable_device_mem - Initialize a device for use with Memory space
2120 * @dev: PCI device to be initialized
2121 *
2122 * Initialize device before it's used by a driver. Ask low-level code
2123 * to enable Memory resources. Wake up the device if it was suspended.
2124 * Beware, this function can fail.
2125 */
2126int pci_enable_device_mem(struct pci_dev *dev)
2127{
2128	return pci_enable_device_flags(dev, IORESOURCE_MEM);
2129}
2130EXPORT_SYMBOL(pci_enable_device_mem);
2131
2132/**
2133 * pci_enable_device - Initialize device before it's used by a driver.
2134 * @dev: PCI device to be initialized
2135 *
2136 * Initialize device before it's used by a driver. Ask low-level code
2137 * to enable I/O and memory. Wake up the device if it was suspended.
2138 * Beware, this function can fail.
2139 *
2140 * Note we don't actually enable the device many times if we call
2141 * this function repeatedly (we just increment the count).
2142 */
2143int pci_enable_device(struct pci_dev *dev)
2144{
2145	return pci_enable_device_flags(dev, IORESOURCE_MEM | IORESOURCE_IO);
2146}
2147EXPORT_SYMBOL(pci_enable_device);
2148
2149/*
2150 * pcibios_device_add - provide arch specific hooks when adding device dev
2151 * @dev: the PCI device being added
2152 *
2153 * Permits the platform to provide architecture specific functionality when
2154 * devices are added. This is the default implementation. Architecture
2155 * implementations can override this.
2156 */
2157int __weak pcibios_device_add(struct pci_dev *dev)
2158{
2159	return 0;
2160}
2161
2162/**
2163 * pcibios_release_device - provide arch specific hooks when releasing
2164 *			    device dev
2165 * @dev: the PCI device being released
2166 *
2167 * Permits the platform to provide architecture specific functionality when
2168 * devices are released. This is the default implementation. Architecture
2169 * implementations can override this.
2170 */
2171void __weak pcibios_release_device(struct pci_dev *dev) {}
2172
2173/**
2174 * pcibios_disable_device - disable arch specific PCI resources for device dev
2175 * @dev: the PCI device to disable
2176 *
2177 * Disables architecture specific PCI resources for the device. This
2178 * is the default implementation. Architecture implementations can
2179 * override this.
2180 */
2181void __weak pcibios_disable_device(struct pci_dev *dev) {}
2182
2183static void do_pci_disable_device(struct pci_dev *dev)
2184{
2185	u16 pci_command;
2186
2187	pci_read_config_word(dev, PCI_COMMAND, &pci_command);
2188	if (pci_command & PCI_COMMAND_MASTER) {
2189		pci_command &= ~PCI_COMMAND_MASTER;
2190		pci_write_config_word(dev, PCI_COMMAND, pci_command);
2191	}
2192
2193	pcibios_disable_device(dev);
2194}
2195
2196/**
2197 * pci_disable_enabled_device - Disable device without updating enable_cnt
2198 * @dev: PCI device to disable
2199 *
2200 * NOTE: This function is a backend of PCI power management routines and is
2201 * not supposed to be called drivers.
2202 */
2203void pci_disable_enabled_device(struct pci_dev *dev)
2204{
2205	if (pci_is_enabled(dev))
2206		do_pci_disable_device(dev);
2207}
2208
2209/**
2210 * pci_disable_device - Disable PCI device after use
2211 * @dev: PCI device to be disabled
2212 *
2213 * Signal to the system that the PCI device is not in use by the system
2214 * anymore.  This only involves disabling PCI bus-mastering, if active.
2215 *
2216 * Note we don't actually disable the device until all callers of
2217 * pci_enable_device() have called pci_disable_device().
2218 */
2219void pci_disable_device(struct pci_dev *dev)
2220{
2221	struct pci_devres *dr;
2222
2223	dr = find_pci_dr(dev);
2224	if (dr)
2225		dr->enabled = 0;
2226
2227	dev_WARN_ONCE(&dev->dev, atomic_read(&dev->enable_cnt) <= 0,
2228		      "disabling already-disabled device");
2229
2230	if (atomic_dec_return(&dev->enable_cnt) != 0)
2231		return;
2232
2233	do_pci_disable_device(dev);
2234
2235	dev->is_busmaster = 0;
2236}
2237EXPORT_SYMBOL(pci_disable_device);
2238
2239/**
2240 * pcibios_set_pcie_reset_state - set reset state for device dev
2241 * @dev: the PCIe device reset
2242 * @state: Reset state to enter into
2243 *
2244 * Set the PCIe reset state for the device. This is the default
2245 * implementation. Architecture implementations can override this.
2246 */
2247int __weak pcibios_set_pcie_reset_state(struct pci_dev *dev,
2248					enum pcie_reset_state state)
2249{
2250	return -EINVAL;
2251}
2252
2253/**
2254 * pci_set_pcie_reset_state - set reset state for device dev
2255 * @dev: the PCIe device reset
2256 * @state: Reset state to enter into
2257 *
2258 * Sets the PCI reset state for the device.
2259 */
2260int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
2261{
2262	return pcibios_set_pcie_reset_state(dev, state);
2263}
2264EXPORT_SYMBOL_GPL(pci_set_pcie_reset_state);
2265
2266#ifdef CONFIG_PCIEAER
2267void pcie_clear_device_status(struct pci_dev *dev)
2268{
2269	u16 sta;
2270
2271	pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta);
2272	pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta);
2273}
2274#endif
2275
2276/**
2277 * pcie_clear_root_pme_status - Clear root port PME interrupt status.
2278 * @dev: PCIe root port or event collector.
2279 */
2280void pcie_clear_root_pme_status(struct pci_dev *dev)
2281{
2282	pcie_capability_set_dword(dev, PCI_EXP_RTSTA, PCI_EXP_RTSTA_PME);
2283}
2284
2285/**
2286 * pci_check_pme_status - Check if given device has generated PME.
2287 * @dev: Device to check.
2288 *
2289 * Check the PME status of the device and if set, clear it and clear PME enable
2290 * (if set).  Return 'true' if PME status and PME enable were both set or
2291 * 'false' otherwise.
2292 */
2293bool pci_check_pme_status(struct pci_dev *dev)
2294{
2295	int pmcsr_pos;
2296	u16 pmcsr;
2297	bool ret = false;
2298
2299	if (!dev->pm_cap)
2300		return false;
2301
2302	pmcsr_pos = dev->pm_cap + PCI_PM_CTRL;
2303	pci_read_config_word(dev, pmcsr_pos, &pmcsr);
2304	if (!(pmcsr & PCI_PM_CTRL_PME_STATUS))
2305		return false;
2306
2307	/* Clear PME status. */
2308	pmcsr |= PCI_PM_CTRL_PME_STATUS;
2309	if (pmcsr & PCI_PM_CTRL_PME_ENABLE) {
2310		/* Disable PME to avoid interrupt flood. */
2311		pmcsr &= ~PCI_PM_CTRL_PME_ENABLE;
2312		ret = true;
2313	}
2314
2315	pci_write_config_word(dev, pmcsr_pos, pmcsr);
2316
2317	return ret;
2318}
2319
2320/**
2321 * pci_pme_wakeup - Wake up a PCI device if its PME Status bit is set.
2322 * @dev: Device to handle.
2323 * @pme_poll_reset: Whether or not to reset the device's pme_poll flag.
2324 *
2325 * Check if @dev has generated PME and queue a resume request for it in that
2326 * case.
2327 */
2328static int pci_pme_wakeup(struct pci_dev *dev, void *pme_poll_reset)
2329{
2330	if (pme_poll_reset && dev->pme_poll)
2331		dev->pme_poll = false;
2332
2333	if (pci_check_pme_status(dev)) {
2334		pci_wakeup_event(dev);
2335		pm_request_resume(&dev->dev);
2336	}
2337	return 0;
2338}
2339
2340/**
2341 * pci_pme_wakeup_bus - Walk given bus and wake up devices on it, if necessary.
2342 * @bus: Top bus of the subtree to walk.
2343 */
2344void pci_pme_wakeup_bus(struct pci_bus *bus)
2345{
2346	if (bus)
2347		pci_walk_bus(bus, pci_pme_wakeup, (void *)true);
2348}
2349
2350
2351/**
2352 * pci_pme_capable - check the capability of PCI device to generate PME#
2353 * @dev: PCI device to handle.
2354 * @state: PCI state from which device will issue PME#.
2355 */
2356bool pci_pme_capable(struct pci_dev *dev, pci_power_t state)
2357{
2358	if (!dev->pm_cap)
2359		return false;
2360
2361	return !!(dev->pme_support & (1 << state));
2362}
2363EXPORT_SYMBOL(pci_pme_capable);
2364
2365static void pci_pme_list_scan(struct work_struct *work)
2366{
2367	struct pci_pme_device *pme_dev, *n;
2368
2369	mutex_lock(&pci_pme_list_mutex);
2370	list_for_each_entry_safe(pme_dev, n, &pci_pme_list, list) {
2371		struct pci_dev *pdev = pme_dev->dev;
2372
2373		if (pdev->pme_poll) {
2374			struct pci_dev *bridge = pdev->bus->self;
2375			struct device *dev = &pdev->dev;
2376			struct device *bdev = bridge ? &bridge->dev : NULL;
2377			int bref = 0;
2378
2379			/*
2380			 * If we have a bridge, it should be in an active/D0
2381			 * state or the configuration space of subordinate
2382			 * devices may not be accessible or stable over the
2383			 * course of the call.
2384			 */
2385			if (bdev) {
2386				bref = pm_runtime_get_if_active(bdev);
2387				if (!bref)
2388					continue;
2389
2390				if (bridge->current_state != PCI_D0)
2391					goto put_bridge;
2392			}
2393
2394			/*
2395			 * The device itself should be suspended but config
2396			 * space must be accessible, therefore it cannot be in
2397			 * D3cold.
2398			 */
2399			if (pm_runtime_suspended(dev) &&
2400			    pdev->current_state != PCI_D3cold)
2401				pci_pme_wakeup(pdev, NULL);
2402
2403put_bridge:
2404			if (bref > 0)
2405				pm_runtime_put(bdev);
2406		} else {
2407			list_del(&pme_dev->list);
2408			kfree(pme_dev);
2409		}
2410	}
2411	if (!list_empty(&pci_pme_list))
2412		queue_delayed_work(system_freezable_wq, &pci_pme_work,
2413				   msecs_to_jiffies(PME_TIMEOUT));
2414	mutex_unlock(&pci_pme_list_mutex);
2415}
2416
2417static void __pci_pme_active(struct pci_dev *dev, bool enable)
2418{
2419	u16 pmcsr;
2420
2421	if (!dev->pme_support)
2422		return;
2423
2424	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
2425	/* Clear PME_Status by writing 1 to it and enable PME# */
2426	pmcsr |= PCI_PM_CTRL_PME_STATUS | PCI_PM_CTRL_PME_ENABLE;
2427	if (!enable)
2428		pmcsr &= ~PCI_PM_CTRL_PME_ENABLE;
2429
2430	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr);
2431}
2432
2433/**
2434 * pci_pme_restore - Restore PME configuration after config space restore.
2435 * @dev: PCI device to update.
2436 */
2437void pci_pme_restore(struct pci_dev *dev)
2438{
2439	u16 pmcsr;
2440
2441	if (!dev->pme_support)
2442		return;
2443
2444	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
2445	if (dev->wakeup_prepared) {
2446		pmcsr |= PCI_PM_CTRL_PME_ENABLE;
2447		pmcsr &= ~PCI_PM_CTRL_PME_STATUS;
2448	} else {
2449		pmcsr &= ~PCI_PM_CTRL_PME_ENABLE;
2450		pmcsr |= PCI_PM_CTRL_PME_STATUS;
2451	}
2452	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr);
2453}
2454
2455/**
2456 * pci_pme_active - enable or disable PCI device's PME# function
2457 * @dev: PCI device to handle.
2458 * @enable: 'true' to enable PME# generation; 'false' to disable it.
2459 *
2460 * The caller must verify that the device is capable of generating PME# before
2461 * calling this function with @enable equal to 'true'.
2462 */
2463void pci_pme_active(struct pci_dev *dev, bool enable)
2464{
2465	__pci_pme_active(dev, enable);
2466
2467	/*
2468	 * PCI (as opposed to PCIe) PME requires that the device have
2469	 * its PME# line hooked up correctly. Not all hardware vendors
2470	 * do this, so the PME never gets delivered and the device
2471	 * remains asleep. The easiest way around this is to
2472	 * periodically walk the list of suspended devices and check
2473	 * whether any have their PME flag set. The assumption is that
2474	 * we'll wake up often enough anyway that this won't be a huge
2475	 * hit, and the power savings from the devices will still be a
2476	 * win.
2477	 *
2478	 * Although PCIe uses in-band PME message instead of PME# line
2479	 * to report PME, PME does not work for some PCIe devices in
2480	 * reality.  For example, there are devices that set their PME
2481	 * status bits, but don't really bother to send a PME message;
2482	 * there are PCI Express Root Ports that don't bother to
2483	 * trigger interrupts when they receive PME messages from the
2484	 * devices below.  So PME poll is used for PCIe devices too.
2485	 */
2486
2487	if (dev->pme_poll) {
2488		struct pci_pme_device *pme_dev;
2489		if (enable) {
2490			pme_dev = kmalloc(sizeof(struct pci_pme_device),
2491					  GFP_KERNEL);
2492			if (!pme_dev) {
2493				pci_warn(dev, "can't enable PME#\n");
2494				return;
2495			}
2496			pme_dev->dev = dev;
2497			mutex_lock(&pci_pme_list_mutex);
2498			list_add(&pme_dev->list, &pci_pme_list);
2499			if (list_is_singular(&pci_pme_list))
2500				queue_delayed_work(system_freezable_wq,
2501						   &pci_pme_work,
2502						   msecs_to_jiffies(PME_TIMEOUT));
2503			mutex_unlock(&pci_pme_list_mutex);
2504		} else {
2505			mutex_lock(&pci_pme_list_mutex);
2506			list_for_each_entry(pme_dev, &pci_pme_list, list) {
2507				if (pme_dev->dev == dev) {
2508					list_del(&pme_dev->list);
2509					kfree(pme_dev);
2510					break;
2511				}
2512			}
2513			mutex_unlock(&pci_pme_list_mutex);
2514		}
2515	}
2516
2517	pci_dbg(dev, "PME# %s\n", enable ? "enabled" : "disabled");
2518}
2519EXPORT_SYMBOL(pci_pme_active);
2520
2521/**
2522 * __pci_enable_wake - enable PCI device as wakeup event source
2523 * @dev: PCI device affected
2524 * @state: PCI state from which device will issue wakeup events
2525 * @enable: True to enable event generation; false to disable
2526 *
2527 * This enables the device as a wakeup event source, or disables it.
2528 * When such events involves platform-specific hooks, those hooks are
2529 * called automatically by this routine.
2530 *
2531 * Devices with legacy power management (no standard PCI PM capabilities)
2532 * always require such platform hooks.
2533 *
2534 * RETURN VALUE:
2535 * 0 is returned on success
2536 * -EINVAL is returned if device is not supposed to wake up the system
2537 * Error code depending on the platform is returned if both the platform and
2538 * the native mechanism fail to enable the generation of wake-up events
2539 */
2540static int __pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable)
2541{
2542	int ret = 0;
2543
2544	/*
2545	 * Bridges that are not power-manageable directly only signal
2546	 * wakeup on behalf of subordinate devices which is set up
2547	 * elsewhere, so skip them. However, bridges that are
2548	 * power-manageable may signal wakeup for themselves (for example,
2549	 * on a hotplug event) and they need to be covered here.
2550	 */
2551	if (!pci_power_manageable(dev))
2552		return 0;
2553
2554	/* Don't do the same thing twice in a row for one device. */
2555	if (!!enable == !!dev->wakeup_prepared)
2556		return 0;
2557
2558	/*
2559	 * According to "PCI System Architecture" 4th ed. by Tom Shanley & Don
2560	 * Anderson we should be doing PME# wake enable followed by ACPI wake
2561	 * enable.  To disable wake-up we call the platform first, for symmetry.
2562	 */
2563
2564	if (enable) {
2565		int error;
2566
2567		/*
2568		 * Enable PME signaling if the device can signal PME from
2569		 * D3cold regardless of whether or not it can signal PME from
2570		 * the current target state, because that will allow it to
2571		 * signal PME when the hierarchy above it goes into D3cold and
2572		 * the device itself ends up in D3cold as a result of that.
2573		 */
2574		if (pci_pme_capable(dev, state) || pci_pme_capable(dev, PCI_D3cold))
2575			pci_pme_active(dev, true);
2576		else
2577			ret = 1;
2578		error = platform_pci_set_wakeup(dev, true);
2579		if (ret)
2580			ret = error;
2581		if (!ret)
2582			dev->wakeup_prepared = true;
2583	} else {
2584		platform_pci_set_wakeup(dev, false);
2585		pci_pme_active(dev, false);
2586		dev->wakeup_prepared = false;
2587	}
2588
2589	return ret;
2590}
2591
2592/**
2593 * pci_enable_wake - change wakeup settings for a PCI device
2594 * @pci_dev: Target device
2595 * @state: PCI state from which device will issue wakeup events
2596 * @enable: Whether or not to enable event generation
2597 *
2598 * If @enable is set, check device_may_wakeup() for the device before calling
2599 * __pci_enable_wake() for it.
2600 */
2601int pci_enable_wake(struct pci_dev *pci_dev, pci_power_t state, bool enable)
2602{
2603	if (enable && !device_may_wakeup(&pci_dev->dev))
2604		return -EINVAL;
2605
2606	return __pci_enable_wake(pci_dev, state, enable);
2607}
2608EXPORT_SYMBOL(pci_enable_wake);
2609
2610/**
2611 * pci_wake_from_d3 - enable/disable device to wake up from D3_hot or D3_cold
2612 * @dev: PCI device to prepare
2613 * @enable: True to enable wake-up event generation; false to disable
2614 *
2615 * Many drivers want the device to wake up the system from D3_hot or D3_cold
2616 * and this function allows them to set that up cleanly - pci_enable_wake()
2617 * should not be called twice in a row to enable wake-up due to PCI PM vs ACPI
2618 * ordering constraints.
2619 *
2620 * This function only returns error code if the device is not allowed to wake
2621 * up the system from sleep or it is not capable of generating PME# from both
2622 * D3_hot and D3_cold and the platform is unable to enable wake-up power for it.
2623 */
2624int pci_wake_from_d3(struct pci_dev *dev, bool enable)
2625{
2626	return pci_pme_capable(dev, PCI_D3cold) ?
2627			pci_enable_wake(dev, PCI_D3cold, enable) :
2628			pci_enable_wake(dev, PCI_D3hot, enable);
2629}
2630EXPORT_SYMBOL(pci_wake_from_d3);
2631
2632/**
2633 * pci_target_state - find an appropriate low power state for a given PCI dev
2634 * @dev: PCI device
2635 * @wakeup: Whether or not wakeup functionality will be enabled for the device.
2636 *
2637 * Use underlying platform code to find a supported low power state for @dev.
2638 * If the platform can't manage @dev, return the deepest state from which it
2639 * can generate wake events, based on any available PME info.
2640 */
2641static pci_power_t pci_target_state(struct pci_dev *dev, bool wakeup)
2642{
2643	if (platform_pci_power_manageable(dev)) {
2644		/*
2645		 * Call the platform to find the target state for the device.
2646		 */
2647		pci_power_t state = platform_pci_choose_state(dev);
2648
2649		switch (state) {
2650		case PCI_POWER_ERROR:
2651		case PCI_UNKNOWN:
2652			return PCI_D3hot;
2653
2654		case PCI_D1:
2655		case PCI_D2:
2656			if (pci_no_d1d2(dev))
2657				return PCI_D3hot;
2658		}
2659
2660		return state;
2661	}
2662
2663	/*
2664	 * If the device is in D3cold even though it's not power-manageable by
2665	 * the platform, it may have been powered down by non-standard means.
2666	 * Best to let it slumber.
2667	 */
2668	if (dev->current_state == PCI_D3cold)
2669		return PCI_D3cold;
2670	else if (!dev->pm_cap)
2671		return PCI_D0;
2672
2673	if (wakeup && dev->pme_support) {
2674		pci_power_t state = PCI_D3hot;
2675
2676		/*
2677		 * Find the deepest state from which the device can generate
2678		 * PME#.
2679		 */
2680		while (state && !(dev->pme_support & (1 << state)))
2681			state--;
2682
2683		if (state)
2684			return state;
2685		else if (dev->pme_support & 1)
2686			return PCI_D0;
2687	}
2688
2689	return PCI_D3hot;
2690}
2691
2692/**
2693 * pci_prepare_to_sleep - prepare PCI device for system-wide transition
2694 *			  into a sleep state
2695 * @dev: Device to handle.
2696 *
2697 * Choose the power state appropriate for the device depending on whether
2698 * it can wake up the system and/or is power manageable by the platform
2699 * (PCI_D3hot is the default) and put the device into that state.
2700 */
2701int pci_prepare_to_sleep(struct pci_dev *dev)
2702{
2703	bool wakeup = device_may_wakeup(&dev->dev);
2704	pci_power_t target_state = pci_target_state(dev, wakeup);
2705	int error;
2706
2707	if (target_state == PCI_POWER_ERROR)
2708		return -EIO;
2709
2710	pci_enable_wake(dev, target_state, wakeup);
2711
2712	error = pci_set_power_state(dev, target_state);
2713
2714	if (error)
2715		pci_enable_wake(dev, target_state, false);
2716
2717	return error;
2718}
2719EXPORT_SYMBOL(pci_prepare_to_sleep);
2720
2721/**
2722 * pci_back_from_sleep - turn PCI device on during system-wide transition
2723 *			 into working state
2724 * @dev: Device to handle.
2725 *
2726 * Disable device's system wake-up capability and put it into D0.
2727 */
2728int pci_back_from_sleep(struct pci_dev *dev)
2729{
2730	int ret = pci_set_power_state(dev, PCI_D0);
2731
2732	if (ret)
2733		return ret;
2734
2735	pci_enable_wake(dev, PCI_D0, false);
2736	return 0;
2737}
2738EXPORT_SYMBOL(pci_back_from_sleep);
2739
2740/**
2741 * pci_finish_runtime_suspend - Carry out PCI-specific part of runtime suspend.
2742 * @dev: PCI device being suspended.
2743 *
2744 * Prepare @dev to generate wake-up events at run time and put it into a low
2745 * power state.
2746 */
2747int pci_finish_runtime_suspend(struct pci_dev *dev)
2748{
2749	pci_power_t target_state;
2750	int error;
2751
2752	target_state = pci_target_state(dev, device_can_wakeup(&dev->dev));
2753	if (target_state == PCI_POWER_ERROR)
2754		return -EIO;
2755
2756	__pci_enable_wake(dev, target_state, pci_dev_run_wake(dev));
2757
2758	error = pci_set_power_state(dev, target_state);
2759
2760	if (error)
2761		pci_enable_wake(dev, target_state, false);
2762
2763	return error;
2764}
2765
2766/**
2767 * pci_dev_run_wake - Check if device can generate run-time wake-up events.
2768 * @dev: Device to check.
2769 *
2770 * Return true if the device itself is capable of generating wake-up events
2771 * (through the platform or using the native PCIe PME) or if the device supports
2772 * PME and one of its upstream bridges can generate wake-up events.
2773 */
2774bool pci_dev_run_wake(struct pci_dev *dev)
2775{
2776	struct pci_bus *bus = dev->bus;
2777
2778	if (!dev->pme_support)
2779		return false;
2780
2781	/* PME-capable in principle, but not from the target power state */
2782	if (!pci_pme_capable(dev, pci_target_state(dev, true)))
2783		return false;
2784
2785	if (device_can_wakeup(&dev->dev))
2786		return true;
2787
2788	while (bus->parent) {
2789		struct pci_dev *bridge = bus->self;
2790
2791		if (device_can_wakeup(&bridge->dev))
2792			return true;
2793
2794		bus = bus->parent;
2795	}
2796
2797	/* We have reached the root bus. */
2798	if (bus->bridge)
2799		return device_can_wakeup(bus->bridge);
2800
2801	return false;
2802}
2803EXPORT_SYMBOL_GPL(pci_dev_run_wake);
2804
2805/**
2806 * pci_dev_need_resume - Check if it is necessary to resume the device.
2807 * @pci_dev: Device to check.
2808 *
2809 * Return 'true' if the device is not runtime-suspended or it has to be
2810 * reconfigured due to wakeup settings difference between system and runtime
2811 * suspend, or the current power state of it is not suitable for the upcoming
2812 * (system-wide) transition.
2813 */
2814bool pci_dev_need_resume(struct pci_dev *pci_dev)
2815{
2816	struct device *dev = &pci_dev->dev;
2817	pci_power_t target_state;
2818
2819	if (!pm_runtime_suspended(dev) || platform_pci_need_resume(pci_dev))
2820		return true;
2821
2822	target_state = pci_target_state(pci_dev, device_may_wakeup(dev));
2823
2824	/*
2825	 * If the earlier platform check has not triggered, D3cold is just power
2826	 * removal on top of D3hot, so no need to resume the device in that
2827	 * case.
2828	 */
2829	return target_state != pci_dev->current_state &&
2830		target_state != PCI_D3cold &&
2831		pci_dev->current_state != PCI_D3hot;
2832}
2833
2834/**
2835 * pci_dev_adjust_pme - Adjust PME setting for a suspended device.
2836 * @pci_dev: Device to check.
2837 *
2838 * If the device is suspended and it is not configured for system wakeup,
2839 * disable PME for it to prevent it from waking up the system unnecessarily.
2840 *
2841 * Note that if the device's power state is D3cold and the platform check in
2842 * pci_dev_need_resume() has not triggered, the device's configuration need not
2843 * be changed.
2844 */
2845void pci_dev_adjust_pme(struct pci_dev *pci_dev)
2846{
2847	struct device *dev = &pci_dev->dev;
2848
2849	spin_lock_irq(&dev->power.lock);
2850
2851	if (pm_runtime_suspended(dev) && !device_may_wakeup(dev) &&
2852	    pci_dev->current_state < PCI_D3cold)
2853		__pci_pme_active(pci_dev, false);
2854
2855	spin_unlock_irq(&dev->power.lock);
2856}
2857
2858/**
2859 * pci_dev_complete_resume - Finalize resume from system sleep for a device.
2860 * @pci_dev: Device to handle.
2861 *
2862 * If the device is runtime suspended and wakeup-capable, enable PME for it as
2863 * it might have been disabled during the prepare phase of system suspend if
2864 * the device was not configured for system wakeup.
2865 */
2866void pci_dev_complete_resume(struct pci_dev *pci_dev)
2867{
2868	struct device *dev = &pci_dev->dev;
2869
2870	if (!pci_dev_run_wake(pci_dev))
2871		return;
2872
2873	spin_lock_irq(&dev->power.lock);
2874
2875	if (pm_runtime_suspended(dev) && pci_dev->current_state < PCI_D3cold)
2876		__pci_pme_active(pci_dev, true);
2877
2878	spin_unlock_irq(&dev->power.lock);
2879}
2880
2881/**
2882 * pci_choose_state - Choose the power state of a PCI device.
2883 * @dev: Target PCI device.
2884 * @state: Target state for the whole system.
2885 *
2886 * Returns PCI power state suitable for @dev and @state.
2887 */
2888pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state)
2889{
2890	if (state.event == PM_EVENT_ON)
2891		return PCI_D0;
2892
2893	return pci_target_state(dev, false);
2894}
2895EXPORT_SYMBOL(pci_choose_state);
2896
2897void pci_config_pm_runtime_get(struct pci_dev *pdev)
2898{
2899	struct device *dev = &pdev->dev;
2900	struct device *parent = dev->parent;
2901
2902	if (parent)
2903		pm_runtime_get_sync(parent);
2904	pm_runtime_get_noresume(dev);
2905	/*
2906	 * pdev->current_state is set to PCI_D3cold during suspending,
2907	 * so wait until suspending completes
2908	 */
2909	pm_runtime_barrier(dev);
2910	/*
2911	 * Only need to resume devices in D3cold, because config
2912	 * registers are still accessible for devices suspended but
2913	 * not in D3cold.
2914	 */
2915	if (pdev->current_state == PCI_D3cold)
2916		pm_runtime_resume(dev);
2917}
2918
2919void pci_config_pm_runtime_put(struct pci_dev *pdev)
2920{
2921	struct device *dev = &pdev->dev;
2922	struct device *parent = dev->parent;
2923
2924	pm_runtime_put(dev);
2925	if (parent)
2926		pm_runtime_put_sync(parent);
2927}
2928
2929static const struct dmi_system_id bridge_d3_blacklist[] = {
2930#ifdef CONFIG_X86
2931	{
2932		/*
2933		 * Gigabyte X299 root port is not marked as hotplug capable
2934		 * which allows Linux to power manage it.  However, this
2935		 * confuses the BIOS SMI handler so don't power manage root
2936		 * ports on that system.
2937		 */
2938		.ident = "X299 DESIGNARE EX-CF",
2939		.matches = {
2940			DMI_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
2941			DMI_MATCH(DMI_BOARD_NAME, "X299 DESIGNARE EX-CF"),
2942		},
2943	},
2944	{
2945		/*
2946		 * Downstream device is not accessible after putting a root port
2947		 * into D3cold and back into D0 on Elo Continental Z2 board
2948		 */
2949		.ident = "Elo Continental Z2",
2950		.matches = {
2951			DMI_MATCH(DMI_BOARD_VENDOR, "Elo Touch Solutions"),
2952			DMI_MATCH(DMI_BOARD_NAME, "Geminilake"),
2953			DMI_MATCH(DMI_BOARD_VERSION, "Continental Z2"),
2954		},
2955	},
2956	{
2957		/*
2958		 * Changing power state of root port dGPU is connected fails
2959		 * https://gitlab.freedesktop.org/drm/amd/-/issues/3229
2960		 */
2961		.ident = "Hewlett-Packard HP Pavilion 17 Notebook PC/1972",
2962		.matches = {
2963			DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
2964			DMI_MATCH(DMI_BOARD_NAME, "1972"),
2965			DMI_MATCH(DMI_BOARD_VERSION, "95.33"),
2966		},
2967	},
2968#endif
2969	{ }
2970};
2971
2972/**
2973 * pci_bridge_d3_possible - Is it possible to put the bridge into D3
2974 * @bridge: Bridge to check
2975 *
2976 * This function checks if it is possible to move the bridge to D3.
2977 * Currently we only allow D3 for recent enough PCIe ports and Thunderbolt.
2978 */
2979bool pci_bridge_d3_possible(struct pci_dev *bridge)
2980{
2981	if (!pci_is_pcie(bridge))
2982		return false;
2983
2984	switch (pci_pcie_type(bridge)) {
2985	case PCI_EXP_TYPE_ROOT_PORT:
2986	case PCI_EXP_TYPE_UPSTREAM:
2987	case PCI_EXP_TYPE_DOWNSTREAM:
2988		if (pci_bridge_d3_disable)
2989			return false;
2990
2991		/*
2992		 * Hotplug ports handled by firmware in System Management Mode
2993		 * may not be put into D3 by the OS (Thunderbolt on non-Macs).
2994		 */
2995		if (bridge->is_hotplug_bridge && !pciehp_is_native(bridge))
2996			return false;
2997
2998		if (pci_bridge_d3_force)
2999			return true;
3000
3001		/* Even the oldest 2010 Thunderbolt controller supports D3. */
3002		if (bridge->is_thunderbolt)
3003			return true;
3004
3005		/* Platform might know better if the bridge supports D3 */
3006		if (platform_pci_bridge_d3(bridge))
3007			return true;
3008
3009		/*
3010		 * Hotplug ports handled natively by the OS were not validated
3011		 * by vendors for runtime D3 at least until 2018 because there
3012		 * was no OS support.
3013		 */
3014		if (bridge->is_hotplug_bridge)
3015			return false;
3016
3017		if (dmi_check_system(bridge_d3_blacklist))
3018			return false;
3019
3020		/*
3021		 * It should be safe to put PCIe ports from 2015 or newer
3022		 * to D3.
3023		 */
3024		if (dmi_get_bios_year() >= 2015)
3025			return true;
3026		break;
3027	}
3028
3029	return false;
3030}
3031
3032static int pci_dev_check_d3cold(struct pci_dev *dev, void *data)
3033{
3034	bool *d3cold_ok = data;
3035
3036	if (/* The device needs to be allowed to go D3cold ... */
3037	    dev->no_d3cold || !dev->d3cold_allowed ||
3038
3039	    /* ... and if it is wakeup capable to do so from D3cold. */
3040	    (device_may_wakeup(&dev->dev) &&
3041	     !pci_pme_capable(dev, PCI_D3cold)) ||
3042
3043	    /* If it is a bridge it must be allowed to go to D3. */
3044	    !pci_power_manageable(dev))
3045
3046		*d3cold_ok = false;
3047
3048	return !*d3cold_ok;
3049}
3050
3051/*
3052 * pci_bridge_d3_update - Update bridge D3 capabilities
3053 * @dev: PCI device which is changed
3054 *
3055 * Update upstream bridge PM capabilities accordingly depending on if the
3056 * device PM configuration was changed or the device is being removed.  The
3057 * change is also propagated upstream.
3058 */
3059void pci_bridge_d3_update(struct pci_dev *dev)
3060{
3061	bool remove = !device_is_registered(&dev->dev);
3062	struct pci_dev *bridge;
3063	bool d3cold_ok = true;
3064
3065	bridge = pci_upstream_bridge(dev);
3066	if (!bridge || !pci_bridge_d3_possible(bridge))
3067		return;
3068
3069	/*
3070	 * If D3 is currently allowed for the bridge, removing one of its
3071	 * children won't change that.
3072	 */
3073	if (remove && bridge->bridge_d3)
3074		return;
3075
3076	/*
3077	 * If D3 is currently allowed for the bridge and a child is added or
3078	 * changed, disallowance of D3 can only be caused by that child, so
3079	 * we only need to check that single device, not any of its siblings.
3080	 *
3081	 * If D3 is currently not allowed for the bridge, checking the device
3082	 * first may allow us to skip checking its siblings.
3083	 */
3084	if (!remove)
3085		pci_dev_check_d3cold(dev, &d3cold_ok);
3086
3087	/*
3088	 * If D3 is currently not allowed for the bridge, this may be caused
3089	 * either by the device being changed/removed or any of its siblings,
3090	 * so we need to go through all children to find out if one of them
3091	 * continues to block D3.
3092	 */
3093	if (d3cold_ok && !bridge->bridge_d3)
3094		pci_walk_bus(bridge->subordinate, pci_dev_check_d3cold,
3095			     &d3cold_ok);
3096
3097	if (bridge->bridge_d3 != d3cold_ok) {
3098		bridge->bridge_d3 = d3cold_ok;
3099		/* Propagate change to upstream bridges */
3100		pci_bridge_d3_update(bridge);
3101	}
3102}
3103
3104/**
3105 * pci_d3cold_enable - Enable D3cold for device
3106 * @dev: PCI device to handle
3107 *
3108 * This function can be used in drivers to enable D3cold from the device
3109 * they handle.  It also updates upstream PCI bridge PM capabilities
3110 * accordingly.
3111 */
3112void pci_d3cold_enable(struct pci_dev *dev)
3113{
3114	if (dev->no_d3cold) {
3115		dev->no_d3cold = false;
3116		pci_bridge_d3_update(dev);
3117	}
3118}
3119EXPORT_SYMBOL_GPL(pci_d3cold_enable);
3120
3121/**
3122 * pci_d3cold_disable - Disable D3cold for device
3123 * @dev: PCI device to handle
3124 *
3125 * This function can be used in drivers to disable D3cold from the device
3126 * they handle.  It also updates upstream PCI bridge PM capabilities
3127 * accordingly.
3128 */
3129void pci_d3cold_disable(struct pci_dev *dev)
3130{
3131	if (!dev->no_d3cold) {
3132		dev->no_d3cold = true;
3133		pci_bridge_d3_update(dev);
3134	}
3135}
3136EXPORT_SYMBOL_GPL(pci_d3cold_disable);
3137
3138/**
3139 * pci_pm_init - Initialize PM functions of given PCI device
3140 * @dev: PCI device to handle.
3141 */
3142void pci_pm_init(struct pci_dev *dev)
3143{
3144	int pm;
3145	u16 status;
3146	u16 pmc;
3147
3148	pm_runtime_forbid(&dev->dev);
3149	pm_runtime_set_active(&dev->dev);
3150	pm_runtime_enable(&dev->dev);
3151	device_enable_async_suspend(&dev->dev);
3152	dev->wakeup_prepared = false;
3153
3154	dev->pm_cap = 0;
3155	dev->pme_support = 0;
3156
3157	/* find PCI PM capability in list */
3158	pm = pci_find_capability(dev, PCI_CAP_ID_PM);
3159	if (!pm)
3160		return;
3161	/* Check device's ability to generate PME# */
3162	pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc);
3163
3164	if ((pmc & PCI_PM_CAP_VER_MASK) > 3) {
3165		pci_err(dev, "unsupported PM cap regs version (%u)\n",
3166			pmc & PCI_PM_CAP_VER_MASK);
3167		return;
3168	}
3169
3170	dev->pm_cap = pm;
3171	dev->d3hot_delay = PCI_PM_D3HOT_WAIT;
3172	dev->d3cold_delay = PCI_PM_D3COLD_WAIT;
3173	dev->bridge_d3 = pci_bridge_d3_possible(dev);
3174	dev->d3cold_allowed = true;
3175
3176	dev->d1_support = false;
3177	dev->d2_support = false;
3178	if (!pci_no_d1d2(dev)) {
3179		if (pmc & PCI_PM_CAP_D1)
3180			dev->d1_support = true;
3181		if (pmc & PCI_PM_CAP_D2)
3182			dev->d2_support = true;
3183
3184		if (dev->d1_support || dev->d2_support)
3185			pci_info(dev, "supports%s%s\n",
3186				   dev->d1_support ? " D1" : "",
3187				   dev->d2_support ? " D2" : "");
3188	}
3189
3190	pmc &= PCI_PM_CAP_PME_MASK;
3191	if (pmc) {
3192		pci_info(dev, "PME# supported from%s%s%s%s%s\n",
3193			 (pmc & PCI_PM_CAP_PME_D0) ? " D0" : "",
3194			 (pmc & PCI_PM_CAP_PME_D1) ? " D1" : "",
3195			 (pmc & PCI_PM_CAP_PME_D2) ? " D2" : "",
3196			 (pmc & PCI_PM_CAP_PME_D3hot) ? " D3hot" : "",
3197			 (pmc & PCI_PM_CAP_PME_D3cold) ? " D3cold" : "");
3198		dev->pme_support = FIELD_GET(PCI_PM_CAP_PME_MASK, pmc);
3199		dev->pme_poll = true;
3200		/*
3201		 * Make device's PM flags reflect the wake-up capability, but
3202		 * let the user space enable it to wake up the system as needed.
3203		 */
3204		device_set_wakeup_capable(&dev->dev, true);
3205		/* Disable the PME# generation functionality */
3206		pci_pme_active(dev, false);
3207	}
3208
3209	pci_read_config_word(dev, PCI_STATUS, &status);
3210	if (status & PCI_STATUS_IMM_READY)
3211		dev->imm_ready = 1;
3212}
3213
3214static unsigned long pci_ea_flags(struct pci_dev *dev, u8 prop)
3215{
3216	unsigned long flags = IORESOURCE_PCI_FIXED | IORESOURCE_PCI_EA_BEI;
3217
3218	switch (prop) {
3219	case PCI_EA_P_MEM:
3220	case PCI_EA_P_VF_MEM:
3221		flags |= IORESOURCE_MEM;
3222		break;
3223	case PCI_EA_P_MEM_PREFETCH:
3224	case PCI_EA_P_VF_MEM_PREFETCH:
3225		flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH;
3226		break;
3227	case PCI_EA_P_IO:
3228		flags |= IORESOURCE_IO;
3229		break;
3230	default:
3231		return 0;
3232	}
3233
3234	return flags;
3235}
3236
3237static struct resource *pci_ea_get_resource(struct pci_dev *dev, u8 bei,
3238					    u8 prop)
3239{
3240	if (bei <= PCI_EA_BEI_BAR5 && prop <= PCI_EA_P_IO)
3241		return &dev->resource[bei];
3242#ifdef CONFIG_PCI_IOV
3243	else if (bei >= PCI_EA_BEI_VF_BAR0 && bei <= PCI_EA_BEI_VF_BAR5 &&
3244		 (prop == PCI_EA_P_VF_MEM || prop == PCI_EA_P_VF_MEM_PREFETCH))
3245		return &dev->resource[PCI_IOV_RESOURCES +
3246				      bei - PCI_EA_BEI_VF_BAR0];
3247#endif
3248	else if (bei == PCI_EA_BEI_ROM)
3249		return &dev->resource[PCI_ROM_RESOURCE];
3250	else
3251		return NULL;
3252}
3253
3254/* Read an Enhanced Allocation (EA) entry */
3255static int pci_ea_read(struct pci_dev *dev, int offset)
3256{
3257	struct resource *res;
3258	const char *res_name;
3259	int ent_size, ent_offset = offset;
3260	resource_size_t start, end;
3261	unsigned long flags;
3262	u32 dw0, bei, base, max_offset;
3263	u8 prop;
3264	bool support_64 = (sizeof(resource_size_t) >= 8);
3265
3266	pci_read_config_dword(dev, ent_offset, &dw0);
3267	ent_offset += 4;
3268
3269	/* Entry size field indicates DWORDs after 1st */
3270	ent_size = (FIELD_GET(PCI_EA_ES, dw0) + 1) << 2;
3271
3272	if (!(dw0 & PCI_EA_ENABLE)) /* Entry not enabled */
3273		goto out;
3274
3275	bei = FIELD_GET(PCI_EA_BEI, dw0);
3276	prop = FIELD_GET(PCI_EA_PP, dw0);
3277
3278	/*
3279	 * If the Property is in the reserved range, try the Secondary
3280	 * Property instead.
3281	 */
3282	if (prop > PCI_EA_P_BRIDGE_IO && prop < PCI_EA_P_MEM_RESERVED)
3283		prop = FIELD_GET(PCI_EA_SP, dw0);
3284	if (prop > PCI_EA_P_BRIDGE_IO)
3285		goto out;
3286
3287	res = pci_ea_get_resource(dev, bei, prop);
3288	res_name = pci_resource_name(dev, bei);
3289	if (!res) {
3290		pci_err(dev, "Unsupported EA entry BEI: %u\n", bei);
3291		goto out;
3292	}
3293
3294	flags = pci_ea_flags(dev, prop);
3295	if (!flags) {
3296		pci_err(dev, "Unsupported EA properties: %#x\n", prop);
3297		goto out;
3298	}
3299
3300	/* Read Base */
3301	pci_read_config_dword(dev, ent_offset, &base);
3302	start = (base & PCI_EA_FIELD_MASK);
3303	ent_offset += 4;
3304
3305	/* Read MaxOffset */
3306	pci_read_config_dword(dev, ent_offset, &max_offset);
3307	ent_offset += 4;
3308
3309	/* Read Base MSBs (if 64-bit entry) */
3310	if (base & PCI_EA_IS_64) {
3311		u32 base_upper;
3312
3313		pci_read_config_dword(dev, ent_offset, &base_upper);
3314		ent_offset += 4;
3315
3316		flags |= IORESOURCE_MEM_64;
3317
3318		/* entry starts above 32-bit boundary, can't use */
3319		if (!support_64 && base_upper)
3320			goto out;
3321
3322		if (support_64)
3323			start |= ((u64)base_upper << 32);
3324	}
3325
3326	end = start + (max_offset | 0x03);
3327
3328	/* Read MaxOffset MSBs (if 64-bit entry) */
3329	if (max_offset & PCI_EA_IS_64) {
3330		u32 max_offset_upper;
3331
3332		pci_read_config_dword(dev, ent_offset, &max_offset_upper);
3333		ent_offset += 4;
3334
3335		flags |= IORESOURCE_MEM_64;
3336
3337		/* entry too big, can't use */
3338		if (!support_64 && max_offset_upper)
3339			goto out;
3340
3341		if (support_64)
3342			end += ((u64)max_offset_upper << 32);
3343	}
3344
3345	if (end < start) {
3346		pci_err(dev, "EA Entry crosses address boundary\n");
3347		goto out;
3348	}
3349
3350	if (ent_size != ent_offset - offset) {
3351		pci_err(dev, "EA Entry Size (%d) does not match length read (%d)\n",
3352			ent_size, ent_offset - offset);
3353		goto out;
3354	}
3355
3356	res->name = pci_name(dev);
3357	res->start = start;
3358	res->end = end;
3359	res->flags = flags;
3360
3361	if (bei <= PCI_EA_BEI_BAR5)
3362		pci_info(dev, "%s %pR: from Enhanced Allocation, properties %#02x\n",
3363			 res_name, res, prop);
3364	else if (bei == PCI_EA_BEI_ROM)
3365		pci_info(dev, "%s %pR: from Enhanced Allocation, properties %#02x\n",
3366			 res_name, res, prop);
3367	else if (bei >= PCI_EA_BEI_VF_BAR0 && bei <= PCI_EA_BEI_VF_BAR5)
3368		pci_info(dev, "%s %pR: from Enhanced Allocation, properties %#02x\n",
3369			 res_name, res, prop);
3370	else
3371		pci_info(dev, "BEI %d %pR: from Enhanced Allocation, properties %#02x\n",
3372			   bei, res, prop);
3373
3374out:
3375	return offset + ent_size;
3376}
3377
3378/* Enhanced Allocation Initialization */
3379void pci_ea_init(struct pci_dev *dev)
3380{
3381	int ea;
3382	u8 num_ent;
3383	int offset;
3384	int i;
3385
3386	/* find PCI EA capability in list */
3387	ea = pci_find_capability(dev, PCI_CAP_ID_EA);
3388	if (!ea)
3389		return;
3390
3391	/* determine the number of entries */
3392	pci_bus_read_config_byte(dev->bus, dev->devfn, ea + PCI_EA_NUM_ENT,
3393					&num_ent);
3394	num_ent &= PCI_EA_NUM_ENT_MASK;
3395
3396	offset = ea + PCI_EA_FIRST_ENT;
3397
3398	/* Skip DWORD 2 for type 1 functions */
3399	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
3400		offset += 4;
3401
3402	/* parse each EA entry */
3403	for (i = 0; i < num_ent; ++i)
3404		offset = pci_ea_read(dev, offset);
3405}
3406
3407static void pci_add_saved_cap(struct pci_dev *pci_dev,
3408	struct pci_cap_saved_state *new_cap)
3409{
3410	hlist_add_head(&new_cap->next, &pci_dev->saved_cap_space);
3411}
3412
3413/**
3414 * _pci_add_cap_save_buffer - allocate buffer for saving given
3415 *			      capability registers
3416 * @dev: the PCI device
3417 * @cap: the capability to allocate the buffer for
3418 * @extended: Standard or Extended capability ID
3419 * @size: requested size of the buffer
3420 */
3421static int _pci_add_cap_save_buffer(struct pci_dev *dev, u16 cap,
3422				    bool extended, unsigned int size)
3423{
3424	int pos;
3425	struct pci_cap_saved_state *save_state;
3426
3427	if (extended)
3428		pos = pci_find_ext_capability(dev, cap);
3429	else
3430		pos = pci_find_capability(dev, cap);
3431
3432	if (!pos)
3433		return 0;
3434
3435	save_state = kzalloc(sizeof(*save_state) + size, GFP_KERNEL);
3436	if (!save_state)
3437		return -ENOMEM;
3438
3439	save_state->cap.cap_nr = cap;
3440	save_state->cap.cap_extended = extended;
3441	save_state->cap.size = size;
3442	pci_add_saved_cap(dev, save_state);
3443
3444	return 0;
3445}
3446
3447int pci_add_cap_save_buffer(struct pci_dev *dev, char cap, unsigned int size)
3448{
3449	return _pci_add_cap_save_buffer(dev, cap, false, size);
3450}
3451
3452int pci_add_ext_cap_save_buffer(struct pci_dev *dev, u16 cap, unsigned int size)
3453{
3454	return _pci_add_cap_save_buffer(dev, cap, true, size);
3455}
3456
3457/**
3458 * pci_allocate_cap_save_buffers - allocate buffers for saving capabilities
3459 * @dev: the PCI device
3460 */
3461void pci_allocate_cap_save_buffers(struct pci_dev *dev)
3462{
3463	int error;
3464
3465	error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP,
3466					PCI_EXP_SAVE_REGS * sizeof(u16));
3467	if (error)
3468		pci_err(dev, "unable to preallocate PCI Express save buffer\n");
3469
3470	error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_PCIX, sizeof(u16));
3471	if (error)
3472		pci_err(dev, "unable to preallocate PCI-X save buffer\n");
3473
3474	error = pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_LTR,
3475					    2 * sizeof(u16));
3476	if (error)
3477		pci_err(dev, "unable to allocate suspend buffer for LTR\n");
3478
3479	pci_allocate_vc_save_buffers(dev);
3480}
3481
3482void pci_free_cap_save_buffers(struct pci_dev *dev)
3483{
3484	struct pci_cap_saved_state *tmp;
3485	struct hlist_node *n;
3486
3487	hlist_for_each_entry_safe(tmp, n, &dev->saved_cap_space, next)
3488		kfree(tmp);
3489}
3490
3491/**
3492 * pci_configure_ari - enable or disable ARI forwarding
3493 * @dev: the PCI device
3494 *
3495 * If @dev and its upstream bridge both support ARI, enable ARI in the
3496 * bridge.  Otherwise, disable ARI in the bridge.
3497 */
3498void pci_configure_ari(struct pci_dev *dev)
3499{
3500	u32 cap;
3501	struct pci_dev *bridge;
3502
3503	if (pcie_ari_disabled || !pci_is_pcie(dev) || dev->devfn)
3504		return;
3505
3506	bridge = dev->bus->self;
3507	if (!bridge)
3508		return;
3509
3510	pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &cap);
3511	if (!(cap & PCI_EXP_DEVCAP2_ARI))
3512		return;
3513
3514	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ARI)) {
3515		pcie_capability_set_word(bridge, PCI_EXP_DEVCTL2,
3516					 PCI_EXP_DEVCTL2_ARI);
3517		bridge->ari_enabled = 1;
3518	} else {
3519		pcie_capability_clear_word(bridge, PCI_EXP_DEVCTL2,
3520					   PCI_EXP_DEVCTL2_ARI);
3521		bridge->ari_enabled = 0;
3522	}
3523}
3524
3525static bool pci_acs_flags_enabled(struct pci_dev *pdev, u16 acs_flags)
3526{
3527	int pos;
3528	u16 cap, ctrl;
3529
3530	pos = pdev->acs_cap;
3531	if (!pos)
3532		return false;
3533
3534	/*
3535	 * Except for egress control, capabilities are either required
3536	 * or only required if controllable.  Features missing from the
3537	 * capability field can therefore be assumed as hard-wired enabled.
3538	 */
3539	pci_read_config_word(pdev, pos + PCI_ACS_CAP, &cap);
3540	acs_flags &= (cap | PCI_ACS_EC);
3541
3542	pci_read_config_word(pdev, pos + PCI_ACS_CTRL, &ctrl);
3543	return (ctrl & acs_flags) == acs_flags;
3544}
3545
3546/**
3547 * pci_acs_enabled - test ACS against required flags for a given device
3548 * @pdev: device to test
3549 * @acs_flags: required PCI ACS flags
3550 *
3551 * Return true if the device supports the provided flags.  Automatically
3552 * filters out flags that are not implemented on multifunction devices.
3553 *
3554 * Note that this interface checks the effective ACS capabilities of the
3555 * device rather than the actual capabilities.  For instance, most single
3556 * function endpoints are not required to support ACS because they have no
3557 * opportunity for peer-to-peer access.  We therefore return 'true'
3558 * regardless of whether the device exposes an ACS capability.  This makes
3559 * it much easier for callers of this function to ignore the actual type
3560 * or topology of the device when testing ACS support.
3561 */
3562bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags)
3563{
3564	int ret;
3565
3566	ret = pci_dev_specific_acs_enabled(pdev, acs_flags);
3567	if (ret >= 0)
3568		return ret > 0;
3569
3570	/*
3571	 * Conventional PCI and PCI-X devices never support ACS, either
3572	 * effectively or actually.  The shared bus topology implies that
3573	 * any device on the bus can receive or snoop DMA.
3574	 */
3575	if (!pci_is_pcie(pdev))
3576		return false;
3577
3578	switch (pci_pcie_type(pdev)) {
3579	/*
3580	 * PCI/X-to-PCIe bridges are not specifically mentioned by the spec,
3581	 * but since their primary interface is PCI/X, we conservatively
3582	 * handle them as we would a non-PCIe device.
3583	 */
3584	case PCI_EXP_TYPE_PCIE_BRIDGE:
3585	/*
3586	 * PCIe 3.0, 6.12.1 excludes ACS on these devices.  "ACS is never
3587	 * applicable... must never implement an ACS Extended Capability...".
3588	 * This seems arbitrary, but we take a conservative interpretation
3589	 * of this statement.
3590	 */
3591	case PCI_EXP_TYPE_PCI_BRIDGE:
3592	case PCI_EXP_TYPE_RC_EC:
3593		return false;
3594	/*
3595	 * PCIe 3.0, 6.12.1.1 specifies that downstream and root ports should
3596	 * implement ACS in order to indicate their peer-to-peer capabilities,
3597	 * regardless of whether they are single- or multi-function devices.
3598	 */
3599	case PCI_EXP_TYPE_DOWNSTREAM:
3600	case PCI_EXP_TYPE_ROOT_PORT:
3601		return pci_acs_flags_enabled(pdev, acs_flags);
3602	/*
3603	 * PCIe 3.0, 6.12.1.2 specifies ACS capabilities that should be
3604	 * implemented by the remaining PCIe types to indicate peer-to-peer
3605	 * capabilities, but only when they are part of a multifunction
3606	 * device.  The footnote for section 6.12 indicates the specific
3607	 * PCIe types included here.
3608	 */
3609	case PCI_EXP_TYPE_ENDPOINT:
3610	case PCI_EXP_TYPE_UPSTREAM:
3611	case PCI_EXP_TYPE_LEG_END:
3612	case PCI_EXP_TYPE_RC_END:
3613		if (!pdev->multifunction)
3614			break;
3615
3616		return pci_acs_flags_enabled(pdev, acs_flags);
3617	}
3618
3619	/*
3620	 * PCIe 3.0, 6.12.1.3 specifies no ACS capabilities are applicable
3621	 * to single function devices with the exception of downstream ports.
3622	 */
3623	return true;
3624}
3625
3626/**
3627 * pci_acs_path_enabled - test ACS flags from start to end in a hierarchy
3628 * @start: starting downstream device
3629 * @end: ending upstream device or NULL to search to the root bus
3630 * @acs_flags: required flags
3631 *
3632 * Walk up a device tree from start to end testing PCI ACS support.  If
3633 * any step along the way does not support the required flags, return false.
3634 */
3635bool pci_acs_path_enabled(struct pci_dev *start,
3636			  struct pci_dev *end, u16 acs_flags)
3637{
3638	struct pci_dev *pdev, *parent = start;
3639
3640	do {
3641		pdev = parent;
3642
3643		if (!pci_acs_enabled(pdev, acs_flags))
3644			return false;
3645
3646		if (pci_is_root_bus(pdev->bus))
3647			return (end == NULL);
3648
3649		parent = pdev->bus->self;
3650	} while (pdev != end);
3651
3652	return true;
3653}
3654
3655/**
3656 * pci_acs_init - Initialize ACS if hardware supports it
3657 * @dev: the PCI device
3658 */
3659void pci_acs_init(struct pci_dev *dev)
3660{
3661	dev->acs_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
3662
3663	/*
3664	 * Attempt to enable ACS regardless of capability because some Root
3665	 * Ports (e.g. those quirked with *_intel_pch_acs_*) do not have
3666	 * the standard ACS capability but still support ACS via those
3667	 * quirks.
3668	 */
3669	pci_enable_acs(dev);
3670}
3671
3672/**
3673 * pci_rebar_find_pos - find position of resize ctrl reg for BAR
3674 * @pdev: PCI device
3675 * @bar: BAR to find
3676 *
3677 * Helper to find the position of the ctrl register for a BAR.
3678 * Returns -ENOTSUPP if resizable BARs are not supported at all.
3679 * Returns -ENOENT if no ctrl register for the BAR could be found.
3680 */
3681static int pci_rebar_find_pos(struct pci_dev *pdev, int bar)
3682{
3683	unsigned int pos, nbars, i;
3684	u32 ctrl;
3685
3686	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR);
3687	if (!pos)
3688		return -ENOTSUPP;
3689
3690	pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl);
3691	nbars = FIELD_GET(PCI_REBAR_CTRL_NBAR_MASK, ctrl);
3692
3693	for (i = 0; i < nbars; i++, pos += 8) {
3694		int bar_idx;
3695
3696		pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl);
3697		bar_idx = FIELD_GET(PCI_REBAR_CTRL_BAR_IDX, ctrl);
3698		if (bar_idx == bar)
3699			return pos;
3700	}
3701
3702	return -ENOENT;
3703}
3704
3705/**
3706 * pci_rebar_get_possible_sizes - get possible sizes for BAR
3707 * @pdev: PCI device
3708 * @bar: BAR to query
3709 *
3710 * Get the possible sizes of a resizable BAR as bitmask defined in the spec
3711 * (bit 0=1MB, bit 19=512GB). Returns 0 if BAR isn't resizable.
3712 */
3713u32 pci_rebar_get_possible_sizes(struct pci_dev *pdev, int bar)
3714{
3715	int pos;
3716	u32 cap;
3717
3718	pos = pci_rebar_find_pos(pdev, bar);
3719	if (pos < 0)
3720		return 0;
3721
3722	pci_read_config_dword(pdev, pos + PCI_REBAR_CAP, &cap);
3723	cap = FIELD_GET(PCI_REBAR_CAP_SIZES, cap);
3724
3725	/* Sapphire RX 5600 XT Pulse has an invalid cap dword for BAR 0 */
3726	if (pdev->vendor == PCI_VENDOR_ID_ATI && pdev->device == 0x731f &&
3727	    bar == 0 && cap == 0x700)
3728		return 0x3f00;
3729
3730	return cap;
3731}
3732EXPORT_SYMBOL(pci_rebar_get_possible_sizes);
3733
3734/**
3735 * pci_rebar_get_current_size - get the current size of a BAR
3736 * @pdev: PCI device
3737 * @bar: BAR to set size to
3738 *
3739 * Read the size of a BAR from the resizable BAR config.
3740 * Returns size if found or negative error code.
3741 */
3742int pci_rebar_get_current_size(struct pci_dev *pdev, int bar)
3743{
3744	int pos;
3745	u32 ctrl;
3746
3747	pos = pci_rebar_find_pos(pdev, bar);
3748	if (pos < 0)
3749		return pos;
3750
3751	pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl);
3752	return FIELD_GET(PCI_REBAR_CTRL_BAR_SIZE, ctrl);
3753}
3754
3755/**
3756 * pci_rebar_set_size - set a new size for a BAR
3757 * @pdev: PCI device
3758 * @bar: BAR to set size to
3759 * @size: new size as defined in the spec (0=1MB, 19=512GB)
3760 *
3761 * Set the new size of a BAR as defined in the spec.
3762 * Returns zero if resizing was successful, error code otherwise.
3763 */
3764int pci_rebar_set_size(struct pci_dev *pdev, int bar, int size)
3765{
3766	int pos;
3767	u32 ctrl;
3768
3769	pos = pci_rebar_find_pos(pdev, bar);
3770	if (pos < 0)
3771		return pos;
3772
3773	pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl);
3774	ctrl &= ~PCI_REBAR_CTRL_BAR_SIZE;
3775	ctrl |= FIELD_PREP(PCI_REBAR_CTRL_BAR_SIZE, size);
3776	pci_write_config_dword(pdev, pos + PCI_REBAR_CTRL, ctrl);
3777	return 0;
3778}
3779
3780/**
3781 * pci_enable_atomic_ops_to_root - enable AtomicOp requests to root port
3782 * @dev: the PCI device
3783 * @cap_mask: mask of desired AtomicOp sizes, including one or more of:
3784 *	PCI_EXP_DEVCAP2_ATOMIC_COMP32
3785 *	PCI_EXP_DEVCAP2_ATOMIC_COMP64
3786 *	PCI_EXP_DEVCAP2_ATOMIC_COMP128
3787 *
3788 * Return 0 if all upstream bridges support AtomicOp routing, egress
3789 * blocking is disabled on all upstream ports, and the root port supports
3790 * the requested completion capabilities (32-bit, 64-bit and/or 128-bit
3791 * AtomicOp completion), or negative otherwise.
3792 */
3793int pci_enable_atomic_ops_to_root(struct pci_dev *dev, u32 cap_mask)
3794{
3795	struct pci_bus *bus = dev->bus;
3796	struct pci_dev *bridge;
3797	u32 cap, ctl2;
3798
3799	/*
3800	 * Per PCIe r5.0, sec 9.3.5.10, the AtomicOp Requester Enable bit
3801	 * in Device Control 2 is reserved in VFs and the PF value applies
3802	 * to all associated VFs.
3803	 */
3804	if (dev->is_virtfn)
3805		return -EINVAL;
3806
3807	if (!pci_is_pcie(dev))
3808		return -EINVAL;
3809
3810	/*
3811	 * Per PCIe r4.0, sec 6.15, endpoints and root ports may be
3812	 * AtomicOp requesters.  For now, we only support endpoints as
3813	 * requesters and root ports as completers.  No endpoints as
3814	 * completers, and no peer-to-peer.
3815	 */
3816
3817	switch (pci_pcie_type(dev)) {
3818	case PCI_EXP_TYPE_ENDPOINT:
3819	case PCI_EXP_TYPE_LEG_END:
3820	case PCI_EXP_TYPE_RC_END:
3821		break;
3822	default:
3823		return -EINVAL;
3824	}
3825
3826	while (bus->parent) {
3827		bridge = bus->self;
3828
3829		pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &cap);
3830
3831		switch (pci_pcie_type(bridge)) {
3832		/* Ensure switch ports support AtomicOp routing */
3833		case PCI_EXP_TYPE_UPSTREAM:
3834		case PCI_EXP_TYPE_DOWNSTREAM:
3835			if (!(cap & PCI_EXP_DEVCAP2_ATOMIC_ROUTE))
3836				return -EINVAL;
3837			break;
3838
3839		/* Ensure root port supports all the sizes we care about */
3840		case PCI_EXP_TYPE_ROOT_PORT:
3841			if ((cap & cap_mask) != cap_mask)
3842				return -EINVAL;
3843			break;
3844		}
3845
3846		/* Ensure upstream ports don't block AtomicOps on egress */
3847		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_UPSTREAM) {
3848			pcie_capability_read_dword(bridge, PCI_EXP_DEVCTL2,
3849						   &ctl2);
3850			if (ctl2 & PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK)
3851				return -EINVAL;
3852		}
3853
3854		bus = bus->parent;
3855	}
3856
3857	pcie_capability_set_word(dev, PCI_EXP_DEVCTL2,
3858				 PCI_EXP_DEVCTL2_ATOMIC_REQ);
3859	return 0;
3860}
3861EXPORT_SYMBOL(pci_enable_atomic_ops_to_root);
3862
3863/**
3864 * pci_release_region - Release a PCI bar
3865 * @pdev: PCI device whose resources were previously reserved by
3866 *	  pci_request_region()
3867 * @bar: BAR to release
3868 *
3869 * Releases the PCI I/O and memory resources previously reserved by a
3870 * successful call to pci_request_region().  Call this function only
3871 * after all use of the PCI regions has ceased.
3872 */
3873void pci_release_region(struct pci_dev *pdev, int bar)
3874{
3875	struct pci_devres *dr;
3876
3877	if (pci_resource_len(pdev, bar) == 0)
3878		return;
3879	if (pci_resource_flags(pdev, bar) & IORESOURCE_IO)
3880		release_region(pci_resource_start(pdev, bar),
3881				pci_resource_len(pdev, bar));
3882	else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM)
3883		release_mem_region(pci_resource_start(pdev, bar),
3884				pci_resource_len(pdev, bar));
3885
3886	dr = find_pci_dr(pdev);
3887	if (dr)
3888		dr->region_mask &= ~(1 << bar);
3889}
3890EXPORT_SYMBOL(pci_release_region);
3891
3892/**
3893 * __pci_request_region - Reserved PCI I/O and memory resource
3894 * @pdev: PCI device whose resources are to be reserved
3895 * @bar: BAR to be reserved
3896 * @res_name: Name to be associated with resource.
3897 * @exclusive: whether the region access is exclusive or not
3898 *
3899 * Mark the PCI region associated with PCI device @pdev BAR @bar as
3900 * being reserved by owner @res_name.  Do not access any
3901 * address inside the PCI regions unless this call returns
3902 * successfully.
3903 *
3904 * If @exclusive is set, then the region is marked so that userspace
3905 * is explicitly not allowed to map the resource via /dev/mem or
3906 * sysfs MMIO access.
3907 *
3908 * Returns 0 on success, or %EBUSY on error.  A warning
3909 * message is also printed on failure.
3910 */
3911static int __pci_request_region(struct pci_dev *pdev, int bar,
3912				const char *res_name, int exclusive)
3913{
3914	struct pci_devres *dr;
3915
3916	if (pci_resource_len(pdev, bar) == 0)
3917		return 0;
3918
3919	if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) {
3920		if (!request_region(pci_resource_start(pdev, bar),
3921			    pci_resource_len(pdev, bar), res_name))
3922			goto err_out;
3923	} else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) {
3924		if (!__request_mem_region(pci_resource_start(pdev, bar),
3925					pci_resource_len(pdev, bar), res_name,
3926					exclusive))
3927			goto err_out;
3928	}
3929
3930	dr = find_pci_dr(pdev);
3931	if (dr)
3932		dr->region_mask |= 1 << bar;
3933
3934	return 0;
3935
3936err_out:
3937	pci_warn(pdev, "BAR %d: can't reserve %pR\n", bar,
3938		 &pdev->resource[bar]);
3939	return -EBUSY;
3940}
3941
3942/**
3943 * pci_request_region - Reserve PCI I/O and memory resource
3944 * @pdev: PCI device whose resources are to be reserved
3945 * @bar: BAR to be reserved
3946 * @res_name: Name to be associated with resource
3947 *
3948 * Mark the PCI region associated with PCI device @pdev BAR @bar as
3949 * being reserved by owner @res_name.  Do not access any
3950 * address inside the PCI regions unless this call returns
3951 * successfully.
3952 *
3953 * Returns 0 on success, or %EBUSY on error.  A warning
3954 * message is also printed on failure.
3955 */
3956int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
3957{
3958	return __pci_request_region(pdev, bar, res_name, 0);
3959}
3960EXPORT_SYMBOL(pci_request_region);
3961
3962/**
3963 * pci_release_selected_regions - Release selected PCI I/O and memory resources
3964 * @pdev: PCI device whose resources were previously reserved
3965 * @bars: Bitmask of BARs to be released
3966 *
3967 * Release selected PCI I/O and memory resources previously reserved.
3968 * Call this function only after all use of the PCI regions has ceased.
3969 */
3970void pci_release_selected_regions(struct pci_dev *pdev, int bars)
3971{
3972	int i;
3973
3974	for (i = 0; i < PCI_STD_NUM_BARS; i++)
3975		if (bars & (1 << i))
3976			pci_release_region(pdev, i);
3977}
3978EXPORT_SYMBOL(pci_release_selected_regions);
3979
3980static int __pci_request_selected_regions(struct pci_dev *pdev, int bars,
3981					  const char *res_name, int excl)
3982{
3983	int i;
3984
3985	for (i = 0; i < PCI_STD_NUM_BARS; i++)
3986		if (bars & (1 << i))
3987			if (__pci_request_region(pdev, i, res_name, excl))
3988				goto err_out;
3989	return 0;
3990
3991err_out:
3992	while (--i >= 0)
3993		if (bars & (1 << i))
3994			pci_release_region(pdev, i);
3995
3996	return -EBUSY;
3997}
3998
3999
4000/**
4001 * pci_request_selected_regions - Reserve selected PCI I/O and memory resources
4002 * @pdev: PCI device whose resources are to be reserved
4003 * @bars: Bitmask of BARs to be requested
4004 * @res_name: Name to be associated with resource
4005 */
4006int pci_request_selected_regions(struct pci_dev *pdev, int bars,
4007				 const char *res_name)
4008{
4009	return __pci_request_selected_regions(pdev, bars, res_name, 0);
4010}
4011EXPORT_SYMBOL(pci_request_selected_regions);
4012
4013int pci_request_selected_regions_exclusive(struct pci_dev *pdev, int bars,
4014					   const char *res_name)
4015{
4016	return __pci_request_selected_regions(pdev, bars, res_name,
4017			IORESOURCE_EXCLUSIVE);
4018}
4019EXPORT_SYMBOL(pci_request_selected_regions_exclusive);
4020
4021/**
4022 * pci_release_regions - Release reserved PCI I/O and memory resources
4023 * @pdev: PCI device whose resources were previously reserved by
4024 *	  pci_request_regions()
4025 *
4026 * Releases all PCI I/O and memory resources previously reserved by a
4027 * successful call to pci_request_regions().  Call this function only
4028 * after all use of the PCI regions has ceased.
4029 */
4030
4031void pci_release_regions(struct pci_dev *pdev)
4032{
4033	pci_release_selected_regions(pdev, (1 << PCI_STD_NUM_BARS) - 1);
4034}
4035EXPORT_SYMBOL(pci_release_regions);
4036
4037/**
4038 * pci_request_regions - Reserve PCI I/O and memory resources
4039 * @pdev: PCI device whose resources are to be reserved
4040 * @res_name: Name to be associated with resource.
4041 *
4042 * Mark all PCI regions associated with PCI device @pdev as
4043 * being reserved by owner @res_name.  Do not access any
4044 * address inside the PCI regions unless this call returns
4045 * successfully.
4046 *
4047 * Returns 0 on success, or %EBUSY on error.  A warning
4048 * message is also printed on failure.
4049 */
4050int pci_request_regions(struct pci_dev *pdev, const char *res_name)
4051{
4052	return pci_request_selected_regions(pdev,
4053			((1 << PCI_STD_NUM_BARS) - 1), res_name);
4054}
4055EXPORT_SYMBOL(pci_request_regions);
4056
4057/**
4058 * pci_request_regions_exclusive - Reserve PCI I/O and memory resources
4059 * @pdev: PCI device whose resources are to be reserved
4060 * @res_name: Name to be associated with resource.
4061 *
4062 * Mark all PCI regions associated with PCI device @pdev as being reserved
4063 * by owner @res_name.  Do not access any address inside the PCI regions
4064 * unless this call returns successfully.
4065 *
4066 * pci_request_regions_exclusive() will mark the region so that /dev/mem
4067 * and the sysfs MMIO access will not be allowed.
4068 *
4069 * Returns 0 on success, or %EBUSY on error.  A warning message is also
4070 * printed on failure.
4071 */
4072int pci_request_regions_exclusive(struct pci_dev *pdev, const char *res_name)
4073{
4074	return pci_request_selected_regions_exclusive(pdev,
4075				((1 << PCI_STD_NUM_BARS) - 1), res_name);
4076}
4077EXPORT_SYMBOL(pci_request_regions_exclusive);
4078
4079/*
4080 * Record the PCI IO range (expressed as CPU physical address + size).
4081 * Return a negative value if an error has occurred, zero otherwise
4082 */
4083int pci_register_io_range(struct fwnode_handle *fwnode, phys_addr_t addr,
4084			resource_size_t	size)
4085{
4086	int ret = 0;
4087#ifdef PCI_IOBASE
4088	struct logic_pio_hwaddr *range;
4089
4090	if (!size || addr + size < addr)
4091		return -EINVAL;
4092
4093	range = kzalloc(sizeof(*range), GFP_ATOMIC);
4094	if (!range)
4095		return -ENOMEM;
4096
4097	range->fwnode = fwnode;
4098	range->size = size;
4099	range->hw_start = addr;
4100	range->flags = LOGIC_PIO_CPU_MMIO;
4101
4102	ret = logic_pio_register_range(range);
4103	if (ret)
4104		kfree(range);
4105
4106	/* Ignore duplicates due to deferred probing */
4107	if (ret == -EEXIST)
4108		ret = 0;
4109#endif
4110
4111	return ret;
4112}
4113
4114phys_addr_t pci_pio_to_address(unsigned long pio)
4115{
4116#ifdef PCI_IOBASE
4117	if (pio < MMIO_UPPER_LIMIT)
4118		return logic_pio_to_hwaddr(pio);
4119#endif
4120
4121	return (phys_addr_t) OF_BAD_ADDR;
4122}
4123EXPORT_SYMBOL_GPL(pci_pio_to_address);
4124
4125unsigned long __weak pci_address_to_pio(phys_addr_t address)
4126{
4127#ifdef PCI_IOBASE
4128	return logic_pio_trans_cpuaddr(address);
4129#else
4130	if (address > IO_SPACE_LIMIT)
4131		return (unsigned long)-1;
4132
4133	return (unsigned long) address;
4134#endif
4135}
4136
4137/**
4138 * pci_remap_iospace - Remap the memory mapped I/O space
4139 * @res: Resource describing the I/O space
4140 * @phys_addr: physical address of range to be mapped
4141 *
4142 * Remap the memory mapped I/O space described by the @res and the CPU
4143 * physical address @phys_addr into virtual address space.  Only
4144 * architectures that have memory mapped IO functions defined (and the
4145 * PCI_IOBASE value defined) should call this function.
4146 */
4147#ifndef pci_remap_iospace
4148int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr)
4149{
4150#if defined(PCI_IOBASE) && defined(CONFIG_MMU)
4151	unsigned long vaddr = (unsigned long)PCI_IOBASE + res->start;
4152
4153	if (!(res->flags & IORESOURCE_IO))
4154		return -EINVAL;
4155
4156	if (res->end > IO_SPACE_LIMIT)
4157		return -EINVAL;
4158
4159	return vmap_page_range(vaddr, vaddr + resource_size(res), phys_addr,
4160			       pgprot_device(PAGE_KERNEL));
4161#else
4162	/*
4163	 * This architecture does not have memory mapped I/O space,
4164	 * so this function should never be called
4165	 */
4166	WARN_ONCE(1, "This architecture does not support memory mapped I/O\n");
4167	return -ENODEV;
4168#endif
4169}
4170EXPORT_SYMBOL(pci_remap_iospace);
4171#endif
4172
4173/**
4174 * pci_unmap_iospace - Unmap the memory mapped I/O space
4175 * @res: resource to be unmapped
4176 *
4177 * Unmap the CPU virtual address @res from virtual address space.  Only
4178 * architectures that have memory mapped IO functions defined (and the
4179 * PCI_IOBASE value defined) should call this function.
4180 */
4181void pci_unmap_iospace(struct resource *res)
4182{
4183#if defined(PCI_IOBASE) && defined(CONFIG_MMU)
4184	unsigned long vaddr = (unsigned long)PCI_IOBASE + res->start;
4185
4186	vunmap_range(vaddr, vaddr + resource_size(res));
4187#endif
4188}
4189EXPORT_SYMBOL(pci_unmap_iospace);
4190
4191static void __pci_set_master(struct pci_dev *dev, bool enable)
4192{
4193	u16 old_cmd, cmd;
4194
4195	pci_read_config_word(dev, PCI_COMMAND, &old_cmd);
4196	if (enable)
4197		cmd = old_cmd | PCI_COMMAND_MASTER;
4198	else
4199		cmd = old_cmd & ~PCI_COMMAND_MASTER;
4200	if (cmd != old_cmd) {
4201		pci_dbg(dev, "%s bus mastering\n",
4202			enable ? "enabling" : "disabling");
4203		pci_write_config_word(dev, PCI_COMMAND, cmd);
4204	}
4205	dev->is_busmaster = enable;
4206}
4207
4208/**
4209 * pcibios_setup - process "pci=" kernel boot arguments
4210 * @str: string used to pass in "pci=" kernel boot arguments
4211 *
4212 * Process kernel boot arguments.  This is the default implementation.
4213 * Architecture specific implementations can override this as necessary.
4214 */
4215char * __weak __init pcibios_setup(char *str)
4216{
4217	return str;
4218}
4219
4220/**
4221 * pcibios_set_master - enable PCI bus-mastering for device dev
4222 * @dev: the PCI device to enable
4223 *
4224 * Enables PCI bus-mastering for the device.  This is the default
4225 * implementation.  Architecture specific implementations can override
4226 * this if necessary.
4227 */
4228void __weak pcibios_set_master(struct pci_dev *dev)
4229{
4230	u8 lat;
4231
4232	/* The latency timer doesn't apply to PCIe (either Type 0 or Type 1) */
4233	if (pci_is_pcie(dev))
4234		return;
4235
4236	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
4237	if (lat < 16)
4238		lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
4239	else if (lat > pcibios_max_latency)
4240		lat = pcibios_max_latency;
4241	else
4242		return;
4243
4244	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
4245}
4246
4247/**
4248 * pci_set_master - enables bus-mastering for device dev
4249 * @dev: the PCI device to enable
4250 *
4251 * Enables bus-mastering on the device and calls pcibios_set_master()
4252 * to do the needed arch specific settings.
4253 */
4254void pci_set_master(struct pci_dev *dev)
4255{
4256	__pci_set_master(dev, true);
4257	pcibios_set_master(dev);
4258}
4259EXPORT_SYMBOL(pci_set_master);
4260
4261/**
4262 * pci_clear_master - disables bus-mastering for device dev
4263 * @dev: the PCI device to disable
4264 */
4265void pci_clear_master(struct pci_dev *dev)
4266{
4267	__pci_set_master(dev, false);
4268}
4269EXPORT_SYMBOL(pci_clear_master);
4270
4271/**
4272 * pci_set_cacheline_size - ensure the CACHE_LINE_SIZE register is programmed
4273 * @dev: the PCI device for which MWI is to be enabled
4274 *
4275 * Helper function for pci_set_mwi.
4276 * Originally copied from drivers/net/acenic.c.
4277 * Copyright 1998-2001 by Jes Sorensen, <jes@trained-monkey.org>.
4278 *
4279 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
4280 */
4281int pci_set_cacheline_size(struct pci_dev *dev)
4282{
4283	u8 cacheline_size;
4284
4285	if (!pci_cache_line_size)
4286		return -EINVAL;
4287
4288	/* Validate current setting: the PCI_CACHE_LINE_SIZE must be
4289	   equal to or multiple of the right value. */
4290	pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &cacheline_size);
4291	if (cacheline_size >= pci_cache_line_size &&
4292	    (cacheline_size % pci_cache_line_size) == 0)
4293		return 0;
4294
4295	/* Write the correct value. */
4296	pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, pci_cache_line_size);
4297	/* Read it back. */
4298	pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &cacheline_size);
4299	if (cacheline_size == pci_cache_line_size)
4300		return 0;
4301
4302	pci_dbg(dev, "cache line size of %d is not supported\n",
4303		   pci_cache_line_size << 2);
4304
4305	return -EINVAL;
4306}
4307EXPORT_SYMBOL_GPL(pci_set_cacheline_size);
4308
4309/**
4310 * pci_set_mwi - enables memory-write-invalidate PCI transaction
4311 * @dev: the PCI device for which MWI is enabled
4312 *
4313 * Enables the Memory-Write-Invalidate transaction in %PCI_COMMAND.
4314 *
4315 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
4316 */
4317int pci_set_mwi(struct pci_dev *dev)
4318{
4319#ifdef PCI_DISABLE_MWI
4320	return 0;
4321#else
4322	int rc;
4323	u16 cmd;
4324
4325	rc = pci_set_cacheline_size(dev);
4326	if (rc)
4327		return rc;
4328
4329	pci_read_config_word(dev, PCI_COMMAND, &cmd);
4330	if (!(cmd & PCI_COMMAND_INVALIDATE)) {
4331		pci_dbg(dev, "enabling Mem-Wr-Inval\n");
4332		cmd |= PCI_COMMAND_INVALIDATE;
4333		pci_write_config_word(dev, PCI_COMMAND, cmd);
4334	}
4335	return 0;
4336#endif
4337}
4338EXPORT_SYMBOL(pci_set_mwi);
4339
4340/**
4341 * pci_try_set_mwi - enables memory-write-invalidate PCI transaction
4342 * @dev: the PCI device for which MWI is enabled
4343 *
4344 * Enables the Memory-Write-Invalidate transaction in %PCI_COMMAND.
4345 * Callers are not required to check the return value.
4346 *
4347 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
4348 */
4349int pci_try_set_mwi(struct pci_dev *dev)
4350{
4351#ifdef PCI_DISABLE_MWI
4352	return 0;
4353#else
4354	return pci_set_mwi(dev);
4355#endif
4356}
4357EXPORT_SYMBOL(pci_try_set_mwi);
4358
4359/**
4360 * pci_clear_mwi - disables Memory-Write-Invalidate for device dev
4361 * @dev: the PCI device to disable
4362 *
4363 * Disables PCI Memory-Write-Invalidate transaction on the device
4364 */
4365void pci_clear_mwi(struct pci_dev *dev)
4366{
4367#ifndef PCI_DISABLE_MWI
4368	u16 cmd;
4369
4370	pci_read_config_word(dev, PCI_COMMAND, &cmd);
4371	if (cmd & PCI_COMMAND_INVALIDATE) {
4372		cmd &= ~PCI_COMMAND_INVALIDATE;
4373		pci_write_config_word(dev, PCI_COMMAND, cmd);
4374	}
4375#endif
4376}
4377EXPORT_SYMBOL(pci_clear_mwi);
4378
4379/**
4380 * pci_disable_parity - disable parity checking for device
4381 * @dev: the PCI device to operate on
4382 *
4383 * Disable parity checking for device @dev
4384 */
4385void pci_disable_parity(struct pci_dev *dev)
4386{
4387	u16 cmd;
4388
4389	pci_read_config_word(dev, PCI_COMMAND, &cmd);
4390	if (cmd & PCI_COMMAND_PARITY) {
4391		cmd &= ~PCI_COMMAND_PARITY;
4392		pci_write_config_word(dev, PCI_COMMAND, cmd);
4393	}
4394}
4395
4396/**
4397 * pci_intx - enables/disables PCI INTx for device dev
4398 * @pdev: the PCI device to operate on
4399 * @enable: boolean: whether to enable or disable PCI INTx
4400 *
4401 * Enables/disables PCI INTx for device @pdev
4402 */
4403void pci_intx(struct pci_dev *pdev, int enable)
4404{
4405	u16 pci_command, new;
4406
4407	pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
4408
4409	if (enable)
4410		new = pci_command & ~PCI_COMMAND_INTX_DISABLE;
4411	else
4412		new = pci_command | PCI_COMMAND_INTX_DISABLE;
4413
4414	if (new != pci_command) {
4415		struct pci_devres *dr;
4416
4417		pci_write_config_word(pdev, PCI_COMMAND, new);
4418
4419		dr = find_pci_dr(pdev);
4420		if (dr && !dr->restore_intx) {
4421			dr->restore_intx = 1;
4422			dr->orig_intx = !enable;
4423		}
4424	}
4425}
4426EXPORT_SYMBOL_GPL(pci_intx);
4427
4428/**
4429 * pci_wait_for_pending_transaction - wait for pending transaction
4430 * @dev: the PCI device to operate on
4431 *
4432 * Return 0 if transaction is pending 1 otherwise.
4433 */
4434int pci_wait_for_pending_transaction(struct pci_dev *dev)
4435{
4436	if (!pci_is_pcie(dev))
4437		return 1;
4438
4439	return pci_wait_for_pending(dev, pci_pcie_cap(dev) + PCI_EXP_DEVSTA,
4440				    PCI_EXP_DEVSTA_TRPND);
4441}
4442EXPORT_SYMBOL(pci_wait_for_pending_transaction);
4443
4444/**
4445 * pcie_flr - initiate a PCIe function level reset
4446 * @dev: device to reset
4447 *
4448 * Initiate a function level reset unconditionally on @dev without
4449 * checking any flags and DEVCAP
4450 */
4451int pcie_flr(struct pci_dev *dev)
4452{
4453	if (!pci_wait_for_pending_transaction(dev))
4454		pci_err(dev, "timed out waiting for pending transaction; performing function level reset anyway\n");
4455
4456	pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);
4457
4458	if (dev->imm_ready)
4459		return 0;
4460
4461	/*
4462	 * Per PCIe r4.0, sec 6.6.2, a device must complete an FLR within
4463	 * 100ms, but may silently discard requests while the FLR is in
4464	 * progress.  Wait 100ms before trying to access the device.
4465	 */
4466	msleep(100);
4467
4468	return pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS);
4469}
4470EXPORT_SYMBOL_GPL(pcie_flr);
4471
4472/**
4473 * pcie_reset_flr - initiate a PCIe function level reset
4474 * @dev: device to reset
4475 * @probe: if true, return 0 if device can be reset this way
4476 *
4477 * Initiate a function level reset on @dev.
4478 */
4479int pcie_reset_flr(struct pci_dev *dev, bool probe)
4480{
4481	if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET)
4482		return -ENOTTY;
4483
4484	if (!(dev->devcap & PCI_EXP_DEVCAP_FLR))
4485		return -ENOTTY;
4486
4487	if (probe)
4488		return 0;
4489
4490	return pcie_flr(dev);
4491}
4492EXPORT_SYMBOL_GPL(pcie_reset_flr);
4493
4494static int pci_af_flr(struct pci_dev *dev, bool probe)
4495{
4496	int pos;
4497	u8 cap;
4498
4499	pos = pci_find_capability(dev, PCI_CAP_ID_AF);
4500	if (!pos)
4501		return -ENOTTY;
4502
4503	if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET)
4504		return -ENOTTY;
4505
4506	pci_read_config_byte(dev, pos + PCI_AF_CAP, &cap);
4507	if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR))
4508		return -ENOTTY;
4509
4510	if (probe)
4511		return 0;
4512
4513	/*
4514	 * Wait for Transaction Pending bit to clear.  A word-aligned test
4515	 * is used, so we use the control offset rather than status and shift
4516	 * the test bit to match.
4517	 */
4518	if (!pci_wait_for_pending(dev, pos + PCI_AF_CTRL,
4519				 PCI_AF_STATUS_TP << 8))
4520		pci_err(dev, "timed out waiting for pending transaction; performing AF function level reset anyway\n");
4521
4522	pci_write_config_byte(dev, pos + PCI_AF_CTRL, PCI_AF_CTRL_FLR);
4523
4524	if (dev->imm_ready)
4525		return 0;
4526
4527	/*
4528	 * Per Advanced Capabilities for Conventional PCI ECN, 13 April 2006,
4529	 * updated 27 July 2006; a device must complete an FLR within
4530	 * 100ms, but may silently discard requests while the FLR is in
4531	 * progress.  Wait 100ms before trying to access the device.
4532	 */
4533	msleep(100);
4534
4535	return pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS);
4536}
4537
4538/**
4539 * pci_pm_reset - Put device into PCI_D3 and back into PCI_D0.
4540 * @dev: Device to reset.
4541 * @probe: if true, return 0 if the device can be reset this way.
4542 *
4543 * If @dev supports native PCI PM and its PCI_PM_CTRL_NO_SOFT_RESET flag is
4544 * unset, it will be reinitialized internally when going from PCI_D3hot to
4545 * PCI_D0.  If that's the case and the device is not in a low-power state
4546 * already, force it into PCI_D3hot and back to PCI_D0, causing it to be reset.
4547 *
4548 * NOTE: This causes the caller to sleep for twice the device power transition
4549 * cooldown period, which for the D0->D3hot and D3hot->D0 transitions is 10 ms
4550 * by default (i.e. unless the @dev's d3hot_delay field has a different value).
4551 * Moreover, only devices in D0 can be reset by this function.
4552 */
4553static int pci_pm_reset(struct pci_dev *dev, bool probe)
4554{
4555	u16 csr;
4556
4557	if (!dev->pm_cap || dev->dev_flags & PCI_DEV_FLAGS_NO_PM_RESET)
4558		return -ENOTTY;
4559
4560	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &csr);
4561	if (csr & PCI_PM_CTRL_NO_SOFT_RESET)
4562		return -ENOTTY;
4563
4564	if (probe)
4565		return 0;
4566
4567	if (dev->current_state != PCI_D0)
4568		return -EINVAL;
4569
4570	csr &= ~PCI_PM_CTRL_STATE_MASK;
4571	csr |= PCI_D3hot;
4572	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
4573	pci_dev_d3_sleep(dev);
4574
4575	csr &= ~PCI_PM_CTRL_STATE_MASK;
4576	csr |= PCI_D0;
4577	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
4578	pci_dev_d3_sleep(dev);
4579
4580	return pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS);
4581}
4582
4583/**
4584 * pcie_wait_for_link_status - Wait for link status change
4585 * @pdev: Device whose link to wait for.
4586 * @use_lt: Use the LT bit if TRUE, or the DLLLA bit if FALSE.
4587 * @active: Waiting for active or inactive?
4588 *
4589 * Return 0 if successful, or -ETIMEDOUT if status has not changed within
4590 * PCIE_LINK_RETRAIN_TIMEOUT_MS milliseconds.
4591 */
4592static int pcie_wait_for_link_status(struct pci_dev *pdev,
4593				     bool use_lt, bool active)
4594{
4595	u16 lnksta_mask, lnksta_match;
4596	unsigned long end_jiffies;
4597	u16 lnksta;
4598
4599	lnksta_mask = use_lt ? PCI_EXP_LNKSTA_LT : PCI_EXP_LNKSTA_DLLLA;
4600	lnksta_match = active ? lnksta_mask : 0;
4601
4602	end_jiffies = jiffies + msecs_to_jiffies(PCIE_LINK_RETRAIN_TIMEOUT_MS);
4603	do {
4604		pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta);
4605		if ((lnksta & lnksta_mask) == lnksta_match)
4606			return 0;
4607		msleep(1);
4608	} while (time_before(jiffies, end_jiffies));
4609
4610	return -ETIMEDOUT;
4611}
4612
4613/**
4614 * pcie_retrain_link - Request a link retrain and wait for it to complete
4615 * @pdev: Device whose link to retrain.
4616 * @use_lt: Use the LT bit if TRUE, or the DLLLA bit if FALSE, for status.
4617 *
4618 * Retrain completion status is retrieved from the Link Status Register
4619 * according to @use_lt.  It is not verified whether the use of the DLLLA
4620 * bit is valid.
4621 *
4622 * Return 0 if successful, or -ETIMEDOUT if training has not completed
4623 * within PCIE_LINK_RETRAIN_TIMEOUT_MS milliseconds.
4624 */
4625int pcie_retrain_link(struct pci_dev *pdev, bool use_lt)
4626{
4627	int rc;
4628
4629	/*
4630	 * Ensure the updated LNKCTL parameters are used during link
4631	 * training by checking that there is no ongoing link training that
4632	 * may have started before link parameters were changed, so as to
4633	 * avoid LTSSM race as recommended in Implementation Note at the end
4634	 * of PCIe r6.1 sec 7.5.3.7.
4635	 */
4636	rc = pcie_wait_for_link_status(pdev, true, false);
4637	if (rc)
4638		return rc;
4639
4640	pcie_capability_set_word(pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_RL);
4641	if (pdev->clear_retrain_link) {
4642		/*
4643		 * Due to an erratum in some devices the Retrain Link bit
4644		 * needs to be cleared again manually to allow the link
4645		 * training to succeed.
4646		 */
4647		pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_RL);
4648	}
4649
4650	return pcie_wait_for_link_status(pdev, use_lt, !use_lt);
4651}
4652
4653/**
4654 * pcie_wait_for_link_delay - Wait until link is active or inactive
4655 * @pdev: Bridge device
4656 * @active: waiting for active or inactive?
4657 * @delay: Delay to wait after link has become active (in ms)
4658 *
4659 * Use this to wait till link becomes active or inactive.
4660 */
4661static bool pcie_wait_for_link_delay(struct pci_dev *pdev, bool active,
4662				     int delay)
4663{
4664	int rc;
4665
4666	/*
4667	 * Some controllers might not implement link active reporting. In this
4668	 * case, we wait for 1000 ms + any delay requested by the caller.
4669	 */
4670	if (!pdev->link_active_reporting) {
4671		msleep(PCIE_LINK_RETRAIN_TIMEOUT_MS + delay);
4672		return true;
4673	}
4674
4675	/*
4676	 * PCIe r4.0 sec 6.6.1, a component must enter LTSSM Detect within 20ms,
4677	 * after which we should expect an link active if the reset was
4678	 * successful. If so, software must wait a minimum 100ms before sending
4679	 * configuration requests to devices downstream this port.
4680	 *
4681	 * If the link fails to activate, either the device was physically
4682	 * removed or the link is permanently failed.
4683	 */
4684	if (active)
4685		msleep(20);
4686	rc = pcie_wait_for_link_status(pdev, false, active);
4687	if (active) {
4688		if (rc)
4689			rc = pcie_failed_link_retrain(pdev);
4690		if (rc)
4691			return false;
4692
4693		msleep(delay);
4694		return true;
4695	}
4696
4697	if (rc)
4698		return false;
4699
4700	return true;
4701}
4702
4703/**
4704 * pcie_wait_for_link - Wait until link is active or inactive
4705 * @pdev: Bridge device
4706 * @active: waiting for active or inactive?
4707 *
4708 * Use this to wait till link becomes active or inactive.
4709 */
4710bool pcie_wait_for_link(struct pci_dev *pdev, bool active)
4711{
4712	return pcie_wait_for_link_delay(pdev, active, 100);
4713}
4714
4715/*
4716 * Find maximum D3cold delay required by all the devices on the bus.  The
4717 * spec says 100 ms, but firmware can lower it and we allow drivers to
4718 * increase it as well.
4719 *
4720 * Called with @pci_bus_sem locked for reading.
4721 */
4722static int pci_bus_max_d3cold_delay(const struct pci_bus *bus)
4723{
4724	const struct pci_dev *pdev;
4725	int min_delay = 100;
4726	int max_delay = 0;
4727
4728	list_for_each_entry(pdev, &bus->devices, bus_list) {
4729		if (pdev->d3cold_delay < min_delay)
4730			min_delay = pdev->d3cold_delay;
4731		if (pdev->d3cold_delay > max_delay)
4732			max_delay = pdev->d3cold_delay;
4733	}
4734
4735	return max(min_delay, max_delay);
4736}
4737
4738/**
4739 * pci_bridge_wait_for_secondary_bus - Wait for secondary bus to be accessible
4740 * @dev: PCI bridge
4741 * @reset_type: reset type in human-readable form
4742 *
4743 * Handle necessary delays before access to the devices on the secondary
4744 * side of the bridge are permitted after D3cold to D0 transition
4745 * or Conventional Reset.
4746 *
4747 * For PCIe this means the delays in PCIe 5.0 section 6.6.1. For
4748 * conventional PCI it means Tpvrh + Trhfa specified in PCI 3.0 section
4749 * 4.3.2.
4750 *
4751 * Return 0 on success or -ENOTTY if the first device on the secondary bus
4752 * failed to become accessible.
4753 */
4754int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
4755{
4756	struct pci_dev *child;
4757	int delay;
4758
4759	if (pci_dev_is_disconnected(dev))
4760		return 0;
4761
4762	if (!pci_is_bridge(dev))
4763		return 0;
4764
4765	down_read(&pci_bus_sem);
4766
4767	/*
4768	 * We only deal with devices that are present currently on the bus.
4769	 * For any hot-added devices the access delay is handled in pciehp
4770	 * board_added(). In case of ACPI hotplug the firmware is expected
4771	 * to configure the devices before OS is notified.
4772	 */
4773	if (!dev->subordinate || list_empty(&dev->subordinate->devices)) {
4774		up_read(&pci_bus_sem);
4775		return 0;
4776	}
4777
4778	/* Take d3cold_delay requirements into account */
4779	delay = pci_bus_max_d3cold_delay(dev->subordinate);
4780	if (!delay) {
4781		up_read(&pci_bus_sem);
4782		return 0;
4783	}
4784
4785	child = list_first_entry(&dev->subordinate->devices, struct pci_dev,
4786				 bus_list);
4787	up_read(&pci_bus_sem);
4788
4789	/*
4790	 * Conventional PCI and PCI-X we need to wait Tpvrh + Trhfa before
4791	 * accessing the device after reset (that is 1000 ms + 100 ms).
4792	 */
4793	if (!pci_is_pcie(dev)) {
4794		pci_dbg(dev, "waiting %d ms for secondary bus\n", 1000 + delay);
4795		msleep(1000 + delay);
4796		return 0;
4797	}
4798
4799	/*
4800	 * For PCIe downstream and root ports that do not support speeds
4801	 * greater than 5 GT/s need to wait minimum 100 ms. For higher
4802	 * speeds (gen3) we need to wait first for the data link layer to
4803	 * become active.
4804	 *
4805	 * However, 100 ms is the minimum and the PCIe spec says the
4806	 * software must allow at least 1s before it can determine that the
4807	 * device that did not respond is a broken device. Also device can
4808	 * take longer than that to respond if it indicates so through Request
4809	 * Retry Status completions.
4810	 *
4811	 * Therefore we wait for 100 ms and check for the device presence
4812	 * until the timeout expires.
4813	 */
4814	if (!pcie_downstream_port(dev))
4815		return 0;
4816
4817	if (pcie_get_speed_cap(dev) <= PCIE_SPEED_5_0GT) {
4818		u16 status;
4819
4820		pci_dbg(dev, "waiting %d ms for downstream link\n", delay);
4821		msleep(delay);
4822
4823		if (!pci_dev_wait(child, reset_type, PCI_RESET_WAIT - delay))
4824			return 0;
4825
4826		/*
4827		 * If the port supports active link reporting we now check
4828		 * whether the link is active and if not bail out early with
4829		 * the assumption that the device is not present anymore.
4830		 */
4831		if (!dev->link_active_reporting)
4832			return -ENOTTY;
4833
4834		pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &status);
4835		if (!(status & PCI_EXP_LNKSTA_DLLLA))
4836			return -ENOTTY;
4837
4838		return pci_dev_wait(child, reset_type,
4839				    PCIE_RESET_READY_POLL_MS - PCI_RESET_WAIT);
4840	}
4841
4842	pci_dbg(dev, "waiting %d ms for downstream link, after activation\n",
4843		delay);
4844	if (!pcie_wait_for_link_delay(dev, true, delay)) {
4845		/* Did not train, no need to wait any further */
4846		pci_info(dev, "Data Link Layer Link Active not set in 1000 msec\n");
4847		return -ENOTTY;
4848	}
4849
4850	return pci_dev_wait(child, reset_type,
4851			    PCIE_RESET_READY_POLL_MS - delay);
4852}
4853
4854void pci_reset_secondary_bus(struct pci_dev *dev)
4855{
4856	u16 ctrl;
4857
4858	pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
4859	ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
4860	pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
4861
4862	/*
4863	 * PCI spec v3.0 7.6.4.2 requires minimum Trst of 1ms.  Double
4864	 * this to 2ms to ensure that we meet the minimum requirement.
4865	 */
4866	msleep(2);
4867
4868	ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
4869	pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
4870}
4871
4872void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
4873{
4874	pci_reset_secondary_bus(dev);
4875}
4876
4877/**
4878 * pci_bridge_secondary_bus_reset - Reset the secondary bus on a PCI bridge.
4879 * @dev: Bridge device
4880 *
4881 * Use the bridge control register to assert reset on the secondary bus.
4882 * Devices on the secondary bus are left in power-on state.
4883 */
4884int pci_bridge_secondary_bus_reset(struct pci_dev *dev)
4885{
4886	lock_map_assert_held(&dev->cfg_access_lock);
4887	pcibios_reset_secondary_bus(dev);
4888
4889	return pci_bridge_wait_for_secondary_bus(dev, "bus reset");
4890}
4891EXPORT_SYMBOL_GPL(pci_bridge_secondary_bus_reset);
4892
4893static int pci_parent_bus_reset(struct pci_dev *dev, bool probe)
4894{
4895	struct pci_dev *pdev;
4896
4897	if (pci_is_root_bus(dev->bus) || dev->subordinate ||
4898	    !dev->bus->self || dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET)
4899		return -ENOTTY;
4900
4901	list_for_each_entry(pdev, &dev->bus->devices, bus_list)
4902		if (pdev != dev)
4903			return -ENOTTY;
4904
4905	if (probe)
4906		return 0;
4907
4908	return pci_bridge_secondary_bus_reset(dev->bus->self);
4909}
4910
4911static int pci_reset_hotplug_slot(struct hotplug_slot *hotplug, bool probe)
4912{
4913	int rc = -ENOTTY;
4914
4915	if (!hotplug || !try_module_get(hotplug->owner))
4916		return rc;
4917
4918	if (hotplug->ops->reset_slot)
4919		rc = hotplug->ops->reset_slot(hotplug, probe);
4920
4921	module_put(hotplug->owner);
4922
4923	return rc;
4924}
4925
4926static int pci_dev_reset_slot_function(struct pci_dev *dev, bool probe)
4927{
4928	if (dev->multifunction || dev->subordinate || !dev->slot ||
4929	    dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET)
4930		return -ENOTTY;
4931
4932	return pci_reset_hotplug_slot(dev->slot->hotplug, probe);
4933}
4934
4935static u16 cxl_port_dvsec(struct pci_dev *dev)
4936{
4937	return pci_find_dvsec_capability(dev, PCI_VENDOR_ID_CXL,
4938					 PCI_DVSEC_CXL_PORT);
4939}
4940
4941static bool cxl_sbr_masked(struct pci_dev *dev)
4942{
4943	u16 dvsec, reg;
4944	int rc;
4945
4946	dvsec = cxl_port_dvsec(dev);
4947	if (!dvsec)
4948		return false;
4949
4950	rc = pci_read_config_word(dev, dvsec + PCI_DVSEC_CXL_PORT_CTL, &reg);
4951	if (rc || PCI_POSSIBLE_ERROR(reg))
4952		return false;
4953
4954	/*
4955	 * Per CXL spec r3.1, sec 8.1.5.2, when "Unmask SBR" is 0, the SBR
4956	 * bit in Bridge Control has no effect.  When 1, the Port generates
4957	 * hot reset when the SBR bit is set to 1.
4958	 */
4959	if (reg & PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR)
4960		return false;
4961
4962	return true;
4963}
4964
4965static int pci_reset_bus_function(struct pci_dev *dev, bool probe)
4966{
4967	struct pci_dev *bridge = pci_upstream_bridge(dev);
4968	int rc;
4969
4970	/*
4971	 * If "dev" is below a CXL port that has SBR control masked, SBR
4972	 * won't do anything, so return error.
4973	 */
4974	if (bridge && cxl_sbr_masked(bridge)) {
4975		if (probe)
4976			return 0;
4977
4978		return -ENOTTY;
4979	}
4980
4981	rc = pci_dev_reset_slot_function(dev, probe);
4982	if (rc != -ENOTTY)
4983		return rc;
4984	return pci_parent_bus_reset(dev, probe);
4985}
4986
4987static int cxl_reset_bus_function(struct pci_dev *dev, bool probe)
4988{
4989	struct pci_dev *bridge;
4990	u16 dvsec, reg, val;
4991	int rc;
4992
4993	bridge = pci_upstream_bridge(dev);
4994	if (!bridge)
4995		return -ENOTTY;
4996
4997	dvsec = cxl_port_dvsec(bridge);
4998	if (!dvsec)
4999		return -ENOTTY;
5000
5001	if (probe)
5002		return 0;
5003
5004	rc = pci_read_config_word(bridge, dvsec + PCI_DVSEC_CXL_PORT_CTL, &reg);
5005	if (rc)
5006		return -ENOTTY;
5007
5008	if (reg & PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR) {
5009		val = reg;
5010	} else {
5011		val = reg | PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR;
5012		pci_write_config_word(bridge, dvsec + PCI_DVSEC_CXL_PORT_CTL,
5013				      val);
5014	}
5015
5016	rc = pci_reset_bus_function(dev, probe);
5017
5018	if (reg != val)
5019		pci_write_config_word(bridge, dvsec + PCI_DVSEC_CXL_PORT_CTL,
5020				      reg);
5021
5022	return rc;
5023}
5024
5025void pci_dev_lock(struct pci_dev *dev)
5026{
5027	/* block PM suspend, driver probe, etc. */
5028	device_lock(&dev->dev);
5029	pci_cfg_access_lock(dev);
5030}
5031EXPORT_SYMBOL_GPL(pci_dev_lock);
5032
5033/* Return 1 on successful lock, 0 on contention */
5034int pci_dev_trylock(struct pci_dev *dev)
5035{
5036	if (device_trylock(&dev->dev)) {
5037		if (pci_cfg_access_trylock(dev))
5038			return 1;
5039		device_unlock(&dev->dev);
5040	}
5041
5042	return 0;
5043}
5044EXPORT_SYMBOL_GPL(pci_dev_trylock);
5045
5046void pci_dev_unlock(struct pci_dev *dev)
5047{
5048	pci_cfg_access_unlock(dev);
5049	device_unlock(&dev->dev);
5050}
5051EXPORT_SYMBOL_GPL(pci_dev_unlock);
5052
5053static void pci_dev_save_and_disable(struct pci_dev *dev)
5054{
5055	const struct pci_error_handlers *err_handler =
5056			dev->driver ? dev->driver->err_handler : NULL;
5057
5058	/*
5059	 * dev->driver->err_handler->reset_prepare() is protected against
5060	 * races with ->remove() by the device lock, which must be held by
5061	 * the caller.
5062	 */
5063	if (err_handler && err_handler->reset_prepare)
5064		err_handler->reset_prepare(dev);
5065
5066	/*
5067	 * Wake-up device prior to save.  PM registers default to D0 after
5068	 * reset and a simple register restore doesn't reliably return
5069	 * to a non-D0 state anyway.
5070	 */
5071	pci_set_power_state(dev, PCI_D0);
5072
5073	pci_save_state(dev);
5074	/*
5075	 * Disable the device by clearing the Command register, except for
5076	 * INTx-disable which is set.  This not only disables MMIO and I/O port
5077	 * BARs, but also prevents the device from being Bus Master, preventing
5078	 * DMA from the device including MSI/MSI-X interrupts.  For PCI 2.3
5079	 * compliant devices, INTx-disable prevents legacy interrupts.
5080	 */
5081	pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
5082}
5083
5084static void pci_dev_restore(struct pci_dev *dev)
5085{
5086	const struct pci_error_handlers *err_handler =
5087			dev->driver ? dev->driver->err_handler : NULL;
5088
5089	pci_restore_state(dev);
5090
5091	/*
5092	 * dev->driver->err_handler->reset_done() is protected against
5093	 * races with ->remove() by the device lock, which must be held by
5094	 * the caller.
5095	 */
5096	if (err_handler && err_handler->reset_done)
5097		err_handler->reset_done(dev);
5098}
5099
5100/* dev->reset_methods[] is a 0-terminated list of indices into this array */
5101static const struct pci_reset_fn_method pci_reset_fn_methods[] = {
5102	{ },
5103	{ pci_dev_specific_reset, .name = "device_specific" },
5104	{ pci_dev_acpi_reset, .name = "acpi" },
5105	{ pcie_reset_flr, .name = "flr" },
5106	{ pci_af_flr, .name = "af_flr" },
5107	{ pci_pm_reset, .name = "pm" },
5108	{ pci_reset_bus_function, .name = "bus" },
5109	{ cxl_reset_bus_function, .name = "cxl_bus" },
5110};
5111
5112static ssize_t reset_method_show(struct device *dev,
5113				 struct device_attribute *attr, char *buf)
5114{
5115	struct pci_dev *pdev = to_pci_dev(dev);
5116	ssize_t len = 0;
5117	int i, m;
5118
5119	for (i = 0; i < PCI_NUM_RESET_METHODS; i++) {
5120		m = pdev->reset_methods[i];
5121		if (!m)
5122			break;
5123
5124		len += sysfs_emit_at(buf, len, "%s%s", len ? " " : "",
5125				     pci_reset_fn_methods[m].name);
5126	}
5127
5128	if (len)
5129		len += sysfs_emit_at(buf, len, "\n");
5130
5131	return len;
5132}
5133
5134static int reset_method_lookup(const char *name)
5135{
5136	int m;
5137
5138	for (m = 1; m < PCI_NUM_RESET_METHODS; m++) {
5139		if (sysfs_streq(name, pci_reset_fn_methods[m].name))
5140			return m;
5141	}
5142
5143	return 0;	/* not found */
5144}
5145
5146static ssize_t reset_method_store(struct device *dev,
5147				  struct device_attribute *attr,
5148				  const char *buf, size_t count)
5149{
5150	struct pci_dev *pdev = to_pci_dev(dev);
5151	char *options, *name;
5152	int m, n;
5153	u8 reset_methods[PCI_NUM_RESET_METHODS] = { 0 };
5154
5155	if (sysfs_streq(buf, "")) {
5156		pdev->reset_methods[0] = 0;
5157		pci_warn(pdev, "All device reset methods disabled by user");
5158		return count;
5159	}
5160
5161	if (sysfs_streq(buf, "default")) {
5162		pci_init_reset_methods(pdev);
5163		return count;
5164	}
5165
5166	options = kstrndup(buf, count, GFP_KERNEL);
5167	if (!options)
5168		return -ENOMEM;
5169
5170	n = 0;
5171	while ((name = strsep(&options, " ")) != NULL) {
5172		if (sysfs_streq(name, ""))
5173			continue;
5174
5175		name = strim(name);
5176
5177		m = reset_method_lookup(name);
5178		if (!m) {
5179			pci_err(pdev, "Invalid reset method '%s'", name);
5180			goto error;
5181		}
5182
5183		if (pci_reset_fn_methods[m].reset_fn(pdev, PCI_RESET_PROBE)) {
5184			pci_err(pdev, "Unsupported reset method '%s'", name);
5185			goto error;
5186		}
5187
5188		if (n == PCI_NUM_RESET_METHODS - 1) {
5189			pci_err(pdev, "Too many reset methods\n");
5190			goto error;
5191		}
5192
5193		reset_methods[n++] = m;
5194	}
5195
5196	reset_methods[n] = 0;
5197
5198	/* Warn if dev-specific supported but not highest priority */
5199	if (pci_reset_fn_methods[1].reset_fn(pdev, PCI_RESET_PROBE) == 0 &&
5200	    reset_methods[0] != 1)
5201		pci_warn(pdev, "Device-specific reset disabled/de-prioritized by user");
5202	memcpy(pdev->reset_methods, reset_methods, sizeof(pdev->reset_methods));
5203	kfree(options);
5204	return count;
5205
5206error:
5207	/* Leave previous methods unchanged */
5208	kfree(options);
5209	return -EINVAL;
5210}
5211static DEVICE_ATTR_RW(reset_method);
5212
5213static struct attribute *pci_dev_reset_method_attrs[] = {
5214	&dev_attr_reset_method.attr,
5215	NULL,
5216};
5217
5218static umode_t pci_dev_reset_method_attr_is_visible(struct kobject *kobj,
5219						    struct attribute *a, int n)
5220{
5221	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
5222
5223	if (!pci_reset_supported(pdev))
5224		return 0;
5225
5226	return a->mode;
5227}
5228
5229const struct attribute_group pci_dev_reset_method_attr_group = {
5230	.attrs = pci_dev_reset_method_attrs,
5231	.is_visible = pci_dev_reset_method_attr_is_visible,
5232};
5233
5234/**
5235 * __pci_reset_function_locked - reset a PCI device function while holding
5236 * the @dev mutex lock.
5237 * @dev: PCI device to reset
5238 *
5239 * Some devices allow an individual function to be reset without affecting
5240 * other functions in the same device.  The PCI device must be responsive
5241 * to PCI config space in order to use this function.
5242 *
5243 * The device function is presumed to be unused and the caller is holding
5244 * the device mutex lock when this function is called.
5245 *
5246 * Resetting the device will make the contents of PCI configuration space
5247 * random, so any caller of this must be prepared to reinitialise the
5248 * device including MSI, bus mastering, BARs, decoding IO and memory spaces,
5249 * etc.
5250 *
5251 * Returns 0 if the device function was successfully reset or negative if the
5252 * device doesn't support resetting a single function.
5253 */
5254int __pci_reset_function_locked(struct pci_dev *dev)
5255{
5256	int i, m, rc;
5257
5258	might_sleep();
5259
5260	/*
5261	 * A reset method returns -ENOTTY if it doesn't support this device and
5262	 * we should try the next method.
5263	 *
5264	 * If it returns 0 (success), we're finished.  If it returns any other
5265	 * error, we're also finished: this indicates that further reset
5266	 * mechanisms might be broken on the device.
5267	 */
5268	for (i = 0; i < PCI_NUM_RESET_METHODS; i++) {
5269		m = dev->reset_methods[i];
5270		if (!m)
5271			return -ENOTTY;
5272
5273		rc = pci_reset_fn_methods[m].reset_fn(dev, PCI_RESET_DO_RESET);
5274		if (!rc)
5275			return 0;
5276		if (rc != -ENOTTY)
5277			return rc;
5278	}
5279
5280	return -ENOTTY;
5281}
5282EXPORT_SYMBOL_GPL(__pci_reset_function_locked);
5283
5284/**
5285 * pci_init_reset_methods - check whether device can be safely reset
5286 * and store supported reset mechanisms.
5287 * @dev: PCI device to check for reset mechanisms
5288 *
5289 * Some devices allow an individual function to be reset without affecting
5290 * other functions in the same device.  The PCI device must be in D0-D3hot
5291 * state.
5292 *
5293 * Stores reset mechanisms supported by device in reset_methods byte array
5294 * which is a member of struct pci_dev.
5295 */
5296void pci_init_reset_methods(struct pci_dev *dev)
5297{
5298	int m, i, rc;
5299
5300	BUILD_BUG_ON(ARRAY_SIZE(pci_reset_fn_methods) != PCI_NUM_RESET_METHODS);
5301
5302	might_sleep();
5303
5304	i = 0;
5305	for (m = 1; m < PCI_NUM_RESET_METHODS; m++) {
5306		rc = pci_reset_fn_methods[m].reset_fn(dev, PCI_RESET_PROBE);
5307		if (!rc)
5308			dev->reset_methods[i++] = m;
5309		else if (rc != -ENOTTY)
5310			break;
5311	}
5312
5313	dev->reset_methods[i] = 0;
5314}
5315
5316/**
5317 * pci_reset_function - quiesce and reset a PCI device function
5318 * @dev: PCI device to reset
5319 *
5320 * Some devices allow an individual function to be reset without affecting
5321 * other functions in the same device.  The PCI device must be responsive
5322 * to PCI config space in order to use this function.
5323 *
5324 * This function does not just reset the PCI portion of a device, but
5325 * clears all the state associated with the device.  This function differs
5326 * from __pci_reset_function_locked() in that it saves and restores device state
5327 * over the reset and takes the PCI device lock.
5328 *
5329 * Returns 0 if the device function was successfully reset or negative if the
5330 * device doesn't support resetting a single function.
5331 */
5332int pci_reset_function(struct pci_dev *dev)
5333{
5334	struct pci_dev *bridge;
5335	int rc;
5336
5337	if (!pci_reset_supported(dev))
5338		return -ENOTTY;
5339
5340	/*
5341	 * If there's no upstream bridge, no locking is needed since there is
5342	 * no upstream bridge configuration to hold consistent.
5343	 */
5344	bridge = pci_upstream_bridge(dev);
5345	if (bridge)
5346		pci_dev_lock(bridge);
5347
5348	pci_dev_lock(dev);
5349	pci_dev_save_and_disable(dev);
5350
5351	rc = __pci_reset_function_locked(dev);
5352
5353	pci_dev_restore(dev);
5354	pci_dev_unlock(dev);
5355
5356	if (bridge)
5357		pci_dev_unlock(bridge);
5358
5359	return rc;
5360}
5361EXPORT_SYMBOL_GPL(pci_reset_function);
5362
5363/**
5364 * pci_reset_function_locked - quiesce and reset a PCI device function
5365 * @dev: PCI device to reset
5366 *
5367 * Some devices allow an individual function to be reset without affecting
5368 * other functions in the same device.  The PCI device must be responsive
5369 * to PCI config space in order to use this function.
5370 *
5371 * This function does not just reset the PCI portion of a device, but
5372 * clears all the state associated with the device.  This function differs
5373 * from __pci_reset_function_locked() in that it saves and restores device state
5374 * over the reset.  It also differs from pci_reset_function() in that it
5375 * requires the PCI device lock to be held.
5376 *
5377 * Returns 0 if the device function was successfully reset or negative if the
5378 * device doesn't support resetting a single function.
5379 */
5380int pci_reset_function_locked(struct pci_dev *dev)
5381{
5382	int rc;
5383
5384	if (!pci_reset_supported(dev))
5385		return -ENOTTY;
5386
5387	pci_dev_save_and_disable(dev);
5388
5389	rc = __pci_reset_function_locked(dev);
5390
5391	pci_dev_restore(dev);
5392
5393	return rc;
5394}
5395EXPORT_SYMBOL_GPL(pci_reset_function_locked);
5396
5397/**
5398 * pci_try_reset_function - quiesce and reset a PCI device function
5399 * @dev: PCI device to reset
5400 *
5401 * Same as above, except return -EAGAIN if unable to lock device.
5402 */
5403int pci_try_reset_function(struct pci_dev *dev)
5404{
5405	int rc;
5406
5407	if (!pci_reset_supported(dev))
5408		return -ENOTTY;
5409
5410	if (!pci_dev_trylock(dev))
5411		return -EAGAIN;
5412
5413	pci_dev_save_and_disable(dev);
5414	rc = __pci_reset_function_locked(dev);
5415	pci_dev_restore(dev);
5416	pci_dev_unlock(dev);
5417
5418	return rc;
5419}
5420EXPORT_SYMBOL_GPL(pci_try_reset_function);
5421
5422/* Do any devices on or below this bus prevent a bus reset? */
5423static bool pci_bus_resettable(struct pci_bus *bus)
5424{
5425	struct pci_dev *dev;
5426
5427
5428	if (bus->self && (bus->self->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET))
5429		return false;
5430
5431	list_for_each_entry(dev, &bus->devices, bus_list) {
5432		if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET ||
5433		    (dev->subordinate && !pci_bus_resettable(dev->subordinate)))
5434			return false;
5435	}
5436
5437	return true;
5438}
5439
5440/* Lock devices from the top of the tree down */
5441static void pci_bus_lock(struct pci_bus *bus)
5442{
5443	struct pci_dev *dev;
5444
5445	list_for_each_entry(dev, &bus->devices, bus_list) {
5446		pci_dev_lock(dev);
5447		if (dev->subordinate)
5448			pci_bus_lock(dev->subordinate);
5449	}
5450}
5451
5452/* Unlock devices from the bottom of the tree up */
5453static void pci_bus_unlock(struct pci_bus *bus)
5454{
5455	struct pci_dev *dev;
5456
5457	list_for_each_entry(dev, &bus->devices, bus_list) {
5458		if (dev->subordinate)
5459			pci_bus_unlock(dev->subordinate);
5460		pci_dev_unlock(dev);
5461	}
5462}
5463
5464/* Return 1 on successful lock, 0 on contention */
5465static int pci_bus_trylock(struct pci_bus *bus)
5466{
5467	struct pci_dev *dev;
5468
5469	list_for_each_entry(dev, &bus->devices, bus_list) {
5470		if (!pci_dev_trylock(dev))
5471			goto unlock;
5472		if (dev->subordinate) {
5473			if (!pci_bus_trylock(dev->subordinate)) {
5474				pci_dev_unlock(dev);
5475				goto unlock;
5476			}
5477		}
5478	}
5479	return 1;
5480
5481unlock:
5482	list_for_each_entry_continue_reverse(dev, &bus->devices, bus_list) {
5483		if (dev->subordinate)
5484			pci_bus_unlock(dev->subordinate);
5485		pci_dev_unlock(dev);
5486	}
5487	return 0;
5488}
5489
5490/* Do any devices on or below this slot prevent a bus reset? */
5491static bool pci_slot_resettable(struct pci_slot *slot)
5492{
5493	struct pci_dev *dev;
5494
5495	if (slot->bus->self &&
5496	    (slot->bus->self->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET))
5497		return false;
5498
5499	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
5500		if (!dev->slot || dev->slot != slot)
5501			continue;
5502		if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET ||
5503		    (dev->subordinate && !pci_bus_resettable(dev->subordinate)))
5504			return false;
5505	}
5506
5507	return true;
5508}
5509
5510/* Lock devices from the top of the tree down */
5511static void pci_slot_lock(struct pci_slot *slot)
5512{
5513	struct pci_dev *dev;
5514
5515	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
5516		if (!dev->slot || dev->slot != slot)
5517			continue;
5518		pci_dev_lock(dev);
5519		if (dev->subordinate)
5520			pci_bus_lock(dev->subordinate);
5521	}
5522}
5523
5524/* Unlock devices from the bottom of the tree up */
5525static void pci_slot_unlock(struct pci_slot *slot)
5526{
5527	struct pci_dev *dev;
5528
5529	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
5530		if (!dev->slot || dev->slot != slot)
5531			continue;
5532		if (dev->subordinate)
5533			pci_bus_unlock(dev->subordinate);
5534		pci_dev_unlock(dev);
5535	}
5536}
5537
5538/* Return 1 on successful lock, 0 on contention */
5539static int pci_slot_trylock(struct pci_slot *slot)
5540{
5541	struct pci_dev *dev;
5542
5543	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
5544		if (!dev->slot || dev->slot != slot)
5545			continue;
5546		if (!pci_dev_trylock(dev))
5547			goto unlock;
5548		if (dev->subordinate) {
5549			if (!pci_bus_trylock(dev->subordinate)) {
5550				pci_dev_unlock(dev);
5551				goto unlock;
5552			}
5553		}
5554	}
5555	return 1;
5556
5557unlock:
5558	list_for_each_entry_continue_reverse(dev,
5559					     &slot->bus->devices, bus_list) {
5560		if (!dev->slot || dev->slot != slot)
5561			continue;
5562		if (dev->subordinate)
5563			pci_bus_unlock(dev->subordinate);
5564		pci_dev_unlock(dev);
5565	}
5566	return 0;
5567}
5568
5569/*
5570 * Save and disable devices from the top of the tree down while holding
5571 * the @dev mutex lock for the entire tree.
5572 */
5573static void pci_bus_save_and_disable_locked(struct pci_bus *bus)
5574{
5575	struct pci_dev *dev;
5576
5577	list_for_each_entry(dev, &bus->devices, bus_list) {
5578		pci_dev_save_and_disable(dev);
5579		if (dev->subordinate)
5580			pci_bus_save_and_disable_locked(dev->subordinate);
5581	}
5582}
5583
5584/*
5585 * Restore devices from top of the tree down while holding @dev mutex lock
5586 * for the entire tree.  Parent bridges need to be restored before we can
5587 * get to subordinate devices.
5588 */
5589static void pci_bus_restore_locked(struct pci_bus *bus)
5590{
5591	struct pci_dev *dev;
5592
5593	list_for_each_entry(dev, &bus->devices, bus_list) {
5594		pci_dev_restore(dev);
5595		if (dev->subordinate)
5596			pci_bus_restore_locked(dev->subordinate);
5597	}
5598}
5599
5600/*
5601 * Save and disable devices from the top of the tree down while holding
5602 * the @dev mutex lock for the entire tree.
5603 */
5604static void pci_slot_save_and_disable_locked(struct pci_slot *slot)
5605{
5606	struct pci_dev *dev;
5607
5608	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
5609		if (!dev->slot || dev->slot != slot)
5610			continue;
5611		pci_dev_save_and_disable(dev);
5612		if (dev->subordinate)
5613			pci_bus_save_and_disable_locked(dev->subordinate);
5614	}
5615}
5616
5617/*
5618 * Restore devices from top of the tree down while holding @dev mutex lock
5619 * for the entire tree.  Parent bridges need to be restored before we can
5620 * get to subordinate devices.
5621 */
5622static void pci_slot_restore_locked(struct pci_slot *slot)
5623{
5624	struct pci_dev *dev;
5625
5626	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
5627		if (!dev->slot || dev->slot != slot)
5628			continue;
5629		pci_dev_restore(dev);
5630		if (dev->subordinate)
5631			pci_bus_restore_locked(dev->subordinate);
5632	}
5633}
5634
5635static int pci_slot_reset(struct pci_slot *slot, bool probe)
5636{
5637	int rc;
5638
5639	if (!slot || !pci_slot_resettable(slot))
5640		return -ENOTTY;
5641
5642	if (!probe)
5643		pci_slot_lock(slot);
5644
5645	might_sleep();
5646
5647	rc = pci_reset_hotplug_slot(slot->hotplug, probe);
5648
5649	if (!probe)
5650		pci_slot_unlock(slot);
5651
5652	return rc;
5653}
5654
5655/**
5656 * pci_probe_reset_slot - probe whether a PCI slot can be reset
5657 * @slot: PCI slot to probe
5658 *
5659 * Return 0 if slot can be reset, negative if a slot reset is not supported.
5660 */
5661int pci_probe_reset_slot(struct pci_slot *slot)
5662{
5663	return pci_slot_reset(slot, PCI_RESET_PROBE);
5664}
5665EXPORT_SYMBOL_GPL(pci_probe_reset_slot);
5666
5667/**
5668 * __pci_reset_slot - Try to reset a PCI slot
5669 * @slot: PCI slot to reset
5670 *
5671 * A PCI bus may host multiple slots, each slot may support a reset mechanism
5672 * independent of other slots.  For instance, some slots may support slot power
5673 * control.  In the case of a 1:1 bus to slot architecture, this function may
5674 * wrap the bus reset to avoid spurious slot related events such as hotplug.
5675 * Generally a slot reset should be attempted before a bus reset.  All of the
5676 * function of the slot and any subordinate buses behind the slot are reset
5677 * through this function.  PCI config space of all devices in the slot and
5678 * behind the slot is saved before and restored after reset.
5679 *
5680 * Same as above except return -EAGAIN if the slot cannot be locked
5681 */
5682static int __pci_reset_slot(struct pci_slot *slot)
5683{
5684	int rc;
5685
5686	rc = pci_slot_reset(slot, PCI_RESET_PROBE);
5687	if (rc)
5688		return rc;
5689
5690	if (pci_slot_trylock(slot)) {
5691		pci_slot_save_and_disable_locked(slot);
5692		might_sleep();
5693		rc = pci_reset_hotplug_slot(slot->hotplug, PCI_RESET_DO_RESET);
5694		pci_slot_restore_locked(slot);
5695		pci_slot_unlock(slot);
5696	} else
5697		rc = -EAGAIN;
5698
5699	return rc;
5700}
5701
5702static int pci_bus_reset(struct pci_bus *bus, bool probe)
5703{
5704	int ret;
5705
5706	if (!bus->self || !pci_bus_resettable(bus))
5707		return -ENOTTY;
5708
5709	if (probe)
5710		return 0;
5711
5712	pci_bus_lock(bus);
5713
5714	might_sleep();
5715
5716	ret = pci_bridge_secondary_bus_reset(bus->self);
5717
5718	pci_bus_unlock(bus);
5719
5720	return ret;
5721}
5722
5723/**
5724 * pci_bus_error_reset - reset the bridge's subordinate bus
5725 * @bridge: The parent device that connects to the bus to reset
5726 *
5727 * This function will first try to reset the slots on this bus if the method is
5728 * available. If slot reset fails or is not available, this will fall back to a
5729 * secondary bus reset.
5730 */
5731int pci_bus_error_reset(struct pci_dev *bridge)
5732{
5733	struct pci_bus *bus = bridge->subordinate;
5734	struct pci_slot *slot;
5735
5736	if (!bus)
5737		return -ENOTTY;
5738
5739	mutex_lock(&pci_slot_mutex);
5740	if (list_empty(&bus->slots))
5741		goto bus_reset;
5742
5743	list_for_each_entry(slot, &bus->slots, list)
5744		if (pci_probe_reset_slot(slot))
5745			goto bus_reset;
5746
5747	list_for_each_entry(slot, &bus->slots, list)
5748		if (pci_slot_reset(slot, PCI_RESET_DO_RESET))
5749			goto bus_reset;
5750
5751	mutex_unlock(&pci_slot_mutex);
5752	return 0;
5753bus_reset:
5754	mutex_unlock(&pci_slot_mutex);
5755	return pci_bus_reset(bridge->subordinate, PCI_RESET_DO_RESET);
5756}
5757
5758/**
5759 * pci_probe_reset_bus - probe whether a PCI bus can be reset
5760 * @bus: PCI bus to probe
5761 *
5762 * Return 0 if bus can be reset, negative if a bus reset is not supported.
5763 */
5764int pci_probe_reset_bus(struct pci_bus *bus)
5765{
5766	return pci_bus_reset(bus, PCI_RESET_PROBE);
5767}
5768EXPORT_SYMBOL_GPL(pci_probe_reset_bus);
5769
5770/**
5771 * __pci_reset_bus - Try to reset a PCI bus
5772 * @bus: top level PCI bus to reset
5773 *
5774 * Same as above except return -EAGAIN if the bus cannot be locked
5775 */
5776static int __pci_reset_bus(struct pci_bus *bus)
5777{
5778	int rc;
5779
5780	rc = pci_bus_reset(bus, PCI_RESET_PROBE);
5781	if (rc)
5782		return rc;
5783
5784	if (pci_bus_trylock(bus)) {
5785		pci_bus_save_and_disable_locked(bus);
5786		might_sleep();
5787		rc = pci_bridge_secondary_bus_reset(bus->self);
5788		pci_bus_restore_locked(bus);
5789		pci_bus_unlock(bus);
5790	} else
5791		rc = -EAGAIN;
5792
5793	return rc;
5794}
5795
5796/**
5797 * pci_reset_bus - Try to reset a PCI bus
5798 * @pdev: top level PCI device to reset via slot/bus
5799 *
5800 * Same as above except return -EAGAIN if the bus cannot be locked
5801 */
5802int pci_reset_bus(struct pci_dev *pdev)
5803{
5804	return (!pci_probe_reset_slot(pdev->slot)) ?
5805	    __pci_reset_slot(pdev->slot) : __pci_reset_bus(pdev->bus);
5806}
5807EXPORT_SYMBOL_GPL(pci_reset_bus);
5808
5809/**
5810 * pcix_get_max_mmrbc - get PCI-X maximum designed memory read byte count
5811 * @dev: PCI device to query
5812 *
5813 * Returns mmrbc: maximum designed memory read count in bytes or
5814 * appropriate error value.
5815 */
5816int pcix_get_max_mmrbc(struct pci_dev *dev)
5817{
5818	int cap;
5819	u32 stat;
5820
5821	cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
5822	if (!cap)
5823		return -EINVAL;
5824
5825	if (pci_read_config_dword(dev, cap + PCI_X_STATUS, &stat))
5826		return -EINVAL;
5827
5828	return 512 << FIELD_GET(PCI_X_STATUS_MAX_READ, stat);
5829}
5830EXPORT_SYMBOL(pcix_get_max_mmrbc);
5831
5832/**
5833 * pcix_get_mmrbc - get PCI-X maximum memory read byte count
5834 * @dev: PCI device to query
5835 *
5836 * Returns mmrbc: maximum memory read count in bytes or appropriate error
5837 * value.
5838 */
5839int pcix_get_mmrbc(struct pci_dev *dev)
5840{
5841	int cap;
5842	u16 cmd;
5843
5844	cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
5845	if (!cap)
5846		return -EINVAL;
5847
5848	if (pci_read_config_word(dev, cap + PCI_X_CMD, &cmd))
5849		return -EINVAL;
5850
5851	return 512 << FIELD_GET(PCI_X_CMD_MAX_READ, cmd);
5852}
5853EXPORT_SYMBOL(pcix_get_mmrbc);
5854
5855/**
5856 * pcix_set_mmrbc - set PCI-X maximum memory read byte count
5857 * @dev: PCI device to query
5858 * @mmrbc: maximum memory read count in bytes
5859 *    valid values are 512, 1024, 2048, 4096
5860 *
5861 * If possible sets maximum memory read byte count, some bridges have errata
5862 * that prevent this.
5863 */
5864int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc)
5865{
5866	int cap;
5867	u32 stat, v, o;
5868	u16 cmd;
5869
5870	if (mmrbc < 512 || mmrbc > 4096 || !is_power_of_2(mmrbc))
5871		return -EINVAL;
5872
5873	v = ffs(mmrbc) - 10;
5874
5875	cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
5876	if (!cap)
5877		return -EINVAL;
5878
5879	if (pci_read_config_dword(dev, cap + PCI_X_STATUS, &stat))
5880		return -EINVAL;
5881
5882	if (v > FIELD_GET(PCI_X_STATUS_MAX_READ, stat))
5883		return -E2BIG;
5884
5885	if (pci_read_config_word(dev, cap + PCI_X_CMD, &cmd))
5886		return -EINVAL;
5887
5888	o = FIELD_GET(PCI_X_CMD_MAX_READ, cmd);
5889	if (o != v) {
5890		if (v > o && (dev->bus->bus_flags & PCI_BUS_FLAGS_NO_MMRBC))
5891			return -EIO;
5892
5893		cmd &= ~PCI_X_CMD_MAX_READ;
5894		cmd |= FIELD_PREP(PCI_X_CMD_MAX_READ, v);
5895		if (pci_write_config_word(dev, cap + PCI_X_CMD, cmd))
5896			return -EIO;
5897	}
5898	return 0;
5899}
5900EXPORT_SYMBOL(pcix_set_mmrbc);
5901
5902/**
5903 * pcie_get_readrq - get PCI Express read request size
5904 * @dev: PCI device to query
5905 *
5906 * Returns maximum memory read request in bytes or appropriate error value.
5907 */
5908int pcie_get_readrq(struct pci_dev *dev)
5909{
5910	u16 ctl;
5911
5912	pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &ctl);
5913
5914	return 128 << FIELD_GET(PCI_EXP_DEVCTL_READRQ, ctl);
5915}
5916EXPORT_SYMBOL(pcie_get_readrq);
5917
5918/**
5919 * pcie_set_readrq - set PCI Express maximum memory read request
5920 * @dev: PCI device to query
5921 * @rq: maximum memory read count in bytes
5922 *    valid values are 128, 256, 512, 1024, 2048, 4096
5923 *
5924 * If possible sets maximum memory read request in bytes
5925 */
5926int pcie_set_readrq(struct pci_dev *dev, int rq)
5927{
5928	u16 v;
5929	int ret;
5930	struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
5931
5932	if (rq < 128 || rq > 4096 || !is_power_of_2(rq))
5933		return -EINVAL;
5934
5935	/*
5936	 * If using the "performance" PCIe config, we clamp the read rq
5937	 * size to the max packet size to keep the host bridge from
5938	 * generating requests larger than we can cope with.
5939	 */
5940	if (pcie_bus_config == PCIE_BUS_PERFORMANCE) {
5941		int mps = pcie_get_mps(dev);
5942
5943		if (mps < rq)
5944			rq = mps;
5945	}
5946
5947	v = FIELD_PREP(PCI_EXP_DEVCTL_READRQ, ffs(rq) - 8);
5948
5949	if (bridge->no_inc_mrrs) {
5950		int max_mrrs = pcie_get_readrq(dev);
5951
5952		if (rq > max_mrrs) {
5953			pci_info(dev, "can't set Max_Read_Request_Size to %d; max is %d\n", rq, max_mrrs);
5954			return -EINVAL;
5955		}
5956	}
5957
5958	ret = pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL,
5959						  PCI_EXP_DEVCTL_READRQ, v);
5960
5961	return pcibios_err_to_errno(ret);
5962}
5963EXPORT_SYMBOL(pcie_set_readrq);
5964
5965/**
5966 * pcie_get_mps - get PCI Express maximum payload size
5967 * @dev: PCI device to query
5968 *
5969 * Returns maximum payload size in bytes
5970 */
5971int pcie_get_mps(struct pci_dev *dev)
5972{
5973	u16 ctl;
5974
5975	pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &ctl);
5976
5977	return 128 << FIELD_GET(PCI_EXP_DEVCTL_PAYLOAD, ctl);
5978}
5979EXPORT_SYMBOL(pcie_get_mps);
5980
5981/**
5982 * pcie_set_mps - set PCI Express maximum payload size
5983 * @dev: PCI device to query
5984 * @mps: maximum payload size in bytes
5985 *    valid values are 128, 256, 512, 1024, 2048, 4096
5986 *
5987 * If possible sets maximum payload size
5988 */
5989int pcie_set_mps(struct pci_dev *dev, int mps)
5990{
5991	u16 v;
5992	int ret;
5993
5994	if (mps < 128 || mps > 4096 || !is_power_of_2(mps))
5995		return -EINVAL;
5996
5997	v = ffs(mps) - 8;
5998	if (v > dev->pcie_mpss)
5999		return -EINVAL;
6000	v = FIELD_PREP(PCI_EXP_DEVCTL_PAYLOAD, v);
6001
6002	ret = pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL,
6003						  PCI_EXP_DEVCTL_PAYLOAD, v);
6004
6005	return pcibios_err_to_errno(ret);
6006}
6007EXPORT_SYMBOL(pcie_set_mps);
6008
6009static enum pci_bus_speed to_pcie_link_speed(u16 lnksta)
6010{
6011	return pcie_link_speed[FIELD_GET(PCI_EXP_LNKSTA_CLS, lnksta)];
6012}
6013
6014int pcie_link_speed_mbps(struct pci_dev *pdev)
6015{
6016	u16 lnksta;
6017	int err;
6018
6019	err = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta);
6020	if (err)
6021		return err;
6022
6023	switch (to_pcie_link_speed(lnksta)) {
6024	case PCIE_SPEED_2_5GT:
6025		return 2500;
6026	case PCIE_SPEED_5_0GT:
6027		return 5000;
6028	case PCIE_SPEED_8_0GT:
6029		return 8000;
6030	case PCIE_SPEED_16_0GT:
6031		return 16000;
6032	case PCIE_SPEED_32_0GT:
6033		return 32000;
6034	case PCIE_SPEED_64_0GT:
6035		return 64000;
6036	default:
6037		break;
6038	}
6039
6040	return -EINVAL;
6041}
6042EXPORT_SYMBOL(pcie_link_speed_mbps);
6043
6044/**
6045 * pcie_bandwidth_available - determine minimum link settings of a PCIe
6046 *			      device and its bandwidth limitation
6047 * @dev: PCI device to query
6048 * @limiting_dev: storage for device causing the bandwidth limitation
6049 * @speed: storage for speed of limiting device
6050 * @width: storage for width of limiting device
6051 *
6052 * Walk up the PCI device chain and find the point where the minimum
6053 * bandwidth is available.  Return the bandwidth available there and (if
6054 * limiting_dev, speed, and width pointers are supplied) information about
6055 * that point.  The bandwidth returned is in Mb/s, i.e., megabits/second of
6056 * raw bandwidth.
6057 */
6058u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev,
6059			     enum pci_bus_speed *speed,
6060			     enum pcie_link_width *width)
6061{
6062	u16 lnksta;
6063	enum pci_bus_speed next_speed;
6064	enum pcie_link_width next_width;
6065	u32 bw, next_bw;
6066
6067	if (speed)
6068		*speed = PCI_SPEED_UNKNOWN;
6069	if (width)
6070		*width = PCIE_LNK_WIDTH_UNKNOWN;
6071
6072	bw = 0;
6073
6074	while (dev) {
6075		pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta);
6076
6077		next_speed = to_pcie_link_speed(lnksta);
6078		next_width = FIELD_GET(PCI_EXP_LNKSTA_NLW, lnksta);
6079
6080		next_bw = next_width * PCIE_SPEED2MBS_ENC(next_speed);
6081
6082		/* Check if current device limits the total bandwidth */
6083		if (!bw || next_bw <= bw) {
6084			bw = next_bw;
6085
6086			if (limiting_dev)
6087				*limiting_dev = dev;
6088			if (speed)
6089				*speed = next_speed;
6090			if (width)
6091				*width = next_width;
6092		}
6093
6094		dev = pci_upstream_bridge(dev);
6095	}
6096
6097	return bw;
6098}
6099EXPORT_SYMBOL(pcie_bandwidth_available);
6100
6101/**
6102 * pcie_get_speed_cap - query for the PCI device's link speed capability
6103 * @dev: PCI device to query
6104 *
6105 * Query the PCI device speed capability.  Return the maximum link speed
6106 * supported by the device.
6107 */
6108enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev)
6109{
6110	u32 lnkcap2, lnkcap;
6111
6112	/*
6113	 * Link Capabilities 2 was added in PCIe r3.0, sec 7.8.18.  The
6114	 * implementation note there recommends using the Supported Link
6115	 * Speeds Vector in Link Capabilities 2 when supported.
6116	 *
6117	 * Without Link Capabilities 2, i.e., prior to PCIe r3.0, software
6118	 * should use the Supported Link Speeds field in Link Capabilities,
6119	 * where only 2.5 GT/s and 5.0 GT/s speeds were defined.
6120	 */
6121	pcie_capability_read_dword(dev, PCI_EXP_LNKCAP2, &lnkcap2);
6122
6123	/* PCIe r3.0-compliant */
6124	if (lnkcap2)
6125		return PCIE_LNKCAP2_SLS2SPEED(lnkcap2);
6126
6127	pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap);
6128	if ((lnkcap & PCI_EXP_LNKCAP_SLS) == PCI_EXP_LNKCAP_SLS_5_0GB)
6129		return PCIE_SPEED_5_0GT;
6130	else if ((lnkcap & PCI_EXP_LNKCAP_SLS) == PCI_EXP_LNKCAP_SLS_2_5GB)
6131		return PCIE_SPEED_2_5GT;
6132
6133	return PCI_SPEED_UNKNOWN;
6134}
6135EXPORT_SYMBOL(pcie_get_speed_cap);
6136
6137/**
6138 * pcie_get_width_cap - query for the PCI device's link width capability
6139 * @dev: PCI device to query
6140 *
6141 * Query the PCI device width capability.  Return the maximum link width
6142 * supported by the device.
6143 */
6144enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev)
6145{
6146	u32 lnkcap;
6147
6148	pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap);
6149	if (lnkcap)
6150		return FIELD_GET(PCI_EXP_LNKCAP_MLW, lnkcap);
6151
6152	return PCIE_LNK_WIDTH_UNKNOWN;
6153}
6154EXPORT_SYMBOL(pcie_get_width_cap);
6155
6156/**
6157 * pcie_bandwidth_capable - calculate a PCI device's link bandwidth capability
6158 * @dev: PCI device
6159 * @speed: storage for link speed
6160 * @width: storage for link width
6161 *
6162 * Calculate a PCI device's link bandwidth by querying for its link speed
6163 * and width, multiplying them, and applying encoding overhead.  The result
6164 * is in Mb/s, i.e., megabits/second of raw bandwidth.
6165 */
6166static u32 pcie_bandwidth_capable(struct pci_dev *dev,
6167				  enum pci_bus_speed *speed,
6168				  enum pcie_link_width *width)
6169{
6170	*speed = pcie_get_speed_cap(dev);
6171	*width = pcie_get_width_cap(dev);
6172
6173	if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN)
6174		return 0;
6175
6176	return *width * PCIE_SPEED2MBS_ENC(*speed);
6177}
6178
6179/**
6180 * __pcie_print_link_status - Report the PCI device's link speed and width
6181 * @dev: PCI device to query
6182 * @verbose: Print info even when enough bandwidth is available
6183 *
6184 * If the available bandwidth at the device is less than the device is
6185 * capable of, report the device's maximum possible bandwidth and the
6186 * upstream link that limits its performance.  If @verbose, always print
6187 * the available bandwidth, even if the device isn't constrained.
6188 */
6189void __pcie_print_link_status(struct pci_dev *dev, bool verbose)
6190{
6191	enum pcie_link_width width, width_cap;
6192	enum pci_bus_speed speed, speed_cap;
6193	struct pci_dev *limiting_dev = NULL;
6194	u32 bw_avail, bw_cap;
6195
6196	bw_cap = pcie_bandwidth_capable(dev, &speed_cap, &width_cap);
6197	bw_avail = pcie_bandwidth_available(dev, &limiting_dev, &speed, &width);
6198
6199	if (bw_avail >= bw_cap && verbose)
6200		pci_info(dev, "%u.%03u Gb/s available PCIe bandwidth (%s x%d link)\n",
6201			 bw_cap / 1000, bw_cap % 1000,
6202			 pci_speed_string(speed_cap), width_cap);
6203	else if (bw_avail < bw_cap)
6204		pci_info(dev, "%u.%03u Gb/s available PCIe bandwidth, limited by %s x%d link at %s (capable of %u.%03u Gb/s with %s x%d link)\n",
6205			 bw_avail / 1000, bw_avail % 1000,
6206			 pci_speed_string(speed), width,
6207			 limiting_dev ? pci_name(limiting_dev) : "<unknown>",
6208			 bw_cap / 1000, bw_cap % 1000,
6209			 pci_speed_string(speed_cap), width_cap);
6210}
6211
6212/**
6213 * pcie_print_link_status - Report the PCI device's link speed and width
6214 * @dev: PCI device to query
6215 *
6216 * Report the available bandwidth at the device.
6217 */
6218void pcie_print_link_status(struct pci_dev *dev)
6219{
6220	__pcie_print_link_status(dev, true);
6221}
6222EXPORT_SYMBOL(pcie_print_link_status);
6223
6224/**
6225 * pci_select_bars - Make BAR mask from the type of resource
6226 * @dev: the PCI device for which BAR mask is made
6227 * @flags: resource type mask to be selected
6228 *
6229 * This helper routine makes bar mask from the type of resource.
6230 */
6231int pci_select_bars(struct pci_dev *dev, unsigned long flags)
6232{
6233	int i, bars = 0;
6234	for (i = 0; i < PCI_NUM_RESOURCES; i++)
6235		if (pci_resource_flags(dev, i) & flags)
6236			bars |= (1 << i);
6237	return bars;
6238}
6239EXPORT_SYMBOL(pci_select_bars);
6240
6241/* Some architectures require additional programming to enable VGA */
6242static arch_set_vga_state_t arch_set_vga_state;
6243
6244void __init pci_register_set_vga_state(arch_set_vga_state_t func)
6245{
6246	arch_set_vga_state = func;	/* NULL disables */
6247}
6248
6249static int pci_set_vga_state_arch(struct pci_dev *dev, bool decode,
6250				  unsigned int command_bits, u32 flags)
6251{
6252	if (arch_set_vga_state)
6253		return arch_set_vga_state(dev, decode, command_bits,
6254						flags);
6255	return 0;
6256}
6257
6258/**
6259 * pci_set_vga_state - set VGA decode state on device and parents if requested
6260 * @dev: the PCI device
6261 * @decode: true = enable decoding, false = disable decoding
6262 * @command_bits: PCI_COMMAND_IO and/or PCI_COMMAND_MEMORY
6263 * @flags: traverse ancestors and change bridges
6264 * CHANGE_BRIDGE_ONLY / CHANGE_BRIDGE
6265 */
6266int pci_set_vga_state(struct pci_dev *dev, bool decode,
6267		      unsigned int command_bits, u32 flags)
6268{
6269	struct pci_bus *bus;
6270	struct pci_dev *bridge;
6271	u16 cmd;
6272	int rc;
6273
6274	WARN_ON((flags & PCI_VGA_STATE_CHANGE_DECODES) && (command_bits & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY)));
6275
6276	/* ARCH specific VGA enables */
6277	rc = pci_set_vga_state_arch(dev, decode, command_bits, flags);
6278	if (rc)
6279		return rc;
6280
6281	if (flags & PCI_VGA_STATE_CHANGE_DECODES) {
6282		pci_read_config_word(dev, PCI_COMMAND, &cmd);
6283		if (decode)
6284			cmd |= command_bits;
6285		else
6286			cmd &= ~command_bits;
6287		pci_write_config_word(dev, PCI_COMMAND, cmd);
6288	}
6289
6290	if (!(flags & PCI_VGA_STATE_CHANGE_BRIDGE))
6291		return 0;
6292
6293	bus = dev->bus;
6294	while (bus) {
6295		bridge = bus->self;
6296		if (bridge) {
6297			pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
6298					     &cmd);
6299			if (decode)
6300				cmd |= PCI_BRIDGE_CTL_VGA;
6301			else
6302				cmd &= ~PCI_BRIDGE_CTL_VGA;
6303			pci_write_config_word(bridge, PCI_BRIDGE_CONTROL,
6304					      cmd);
6305		}
6306		bus = bus->parent;
6307	}
6308	return 0;
6309}
6310
6311#ifdef CONFIG_ACPI
6312bool pci_pr3_present(struct pci_dev *pdev)
6313{
6314	struct acpi_device *adev;
6315
6316	if (acpi_disabled)
6317		return false;
6318
6319	adev = ACPI_COMPANION(&pdev->dev);
6320	if (!adev)
6321		return false;
6322
6323	return adev->power.flags.power_resources &&
6324		acpi_has_method(adev->handle, "_PR3");
6325}
6326EXPORT_SYMBOL_GPL(pci_pr3_present);
6327#endif
6328
6329/**
6330 * pci_add_dma_alias - Add a DMA devfn alias for a device
6331 * @dev: the PCI device for which alias is added
6332 * @devfn_from: alias slot and function
6333 * @nr_devfns: number of subsequent devfns to alias
6334 *
6335 * This helper encodes an 8-bit devfn as a bit number in dma_alias_mask
6336 * which is used to program permissible bus-devfn source addresses for DMA
6337 * requests in an IOMMU.  These aliases factor into IOMMU group creation
6338 * and are useful for devices generating DMA requests beyond or different
6339 * from their logical bus-devfn.  Examples include device quirks where the
6340 * device simply uses the wrong devfn, as well as non-transparent bridges
6341 * where the alias may be a proxy for devices in another domain.
6342 *
6343 * IOMMU group creation is performed during device discovery or addition,
6344 * prior to any potential DMA mapping and therefore prior to driver probing
6345 * (especially for userspace assigned devices where IOMMU group definition
6346 * cannot be left as a userspace activity).  DMA aliases should therefore
6347 * be configured via quirks, such as the PCI fixup header quirk.
6348 */
6349void pci_add_dma_alias(struct pci_dev *dev, u8 devfn_from,
6350		       unsigned int nr_devfns)
6351{
6352	int devfn_to;
6353
6354	nr_devfns = min(nr_devfns, (unsigned int)MAX_NR_DEVFNS - devfn_from);
6355	devfn_to = devfn_from + nr_devfns - 1;
6356
6357	if (!dev->dma_alias_mask)
6358		dev->dma_alias_mask = bitmap_zalloc(MAX_NR_DEVFNS, GFP_KERNEL);
6359	if (!dev->dma_alias_mask) {
6360		pci_warn(dev, "Unable to allocate DMA alias mask\n");
6361		return;
6362	}
6363
6364	bitmap_set(dev->dma_alias_mask, devfn_from, nr_devfns);
6365
6366	if (nr_devfns == 1)
6367		pci_info(dev, "Enabling fixed DMA alias to %02x.%d\n",
6368				PCI_SLOT(devfn_from), PCI_FUNC(devfn_from));
6369	else if (nr_devfns > 1)
6370		pci_info(dev, "Enabling fixed DMA alias for devfn range from %02x.%d to %02x.%d\n",
6371				PCI_SLOT(devfn_from), PCI_FUNC(devfn_from),
6372				PCI_SLOT(devfn_to), PCI_FUNC(devfn_to));
6373}
6374
6375bool pci_devs_are_dma_aliases(struct pci_dev *dev1, struct pci_dev *dev2)
6376{
6377	return (dev1->dma_alias_mask &&
6378		test_bit(dev2->devfn, dev1->dma_alias_mask)) ||
6379	       (dev2->dma_alias_mask &&
6380		test_bit(dev1->devfn, dev2->dma_alias_mask)) ||
6381	       pci_real_dma_dev(dev1) == dev2 ||
6382	       pci_real_dma_dev(dev2) == dev1;
6383}
6384
6385bool pci_device_is_present(struct pci_dev *pdev)
6386{
6387	u32 v;
6388
6389	/* Check PF if pdev is a VF, since VF Vendor/Device IDs are 0xffff */
6390	pdev = pci_physfn(pdev);
6391	if (pci_dev_is_disconnected(pdev))
6392		return false;
6393	return pci_bus_read_dev_vendor_id(pdev->bus, pdev->devfn, &v, 0);
6394}
6395EXPORT_SYMBOL_GPL(pci_device_is_present);
6396
6397void pci_ignore_hotplug(struct pci_dev *dev)
6398{
6399	struct pci_dev *bridge = dev->bus->self;
6400
6401	dev->ignore_hotplug = 1;
6402	/* Propagate the "ignore hotplug" setting to the parent bridge. */
6403	if (bridge)
6404		bridge->ignore_hotplug = 1;
6405}
6406EXPORT_SYMBOL_GPL(pci_ignore_hotplug);
6407
6408/**
6409 * pci_real_dma_dev - Get PCI DMA device for PCI device
6410 * @dev: the PCI device that may have a PCI DMA alias
6411 *
6412 * Permits the platform to provide architecture-specific functionality to
6413 * devices needing to alias DMA to another PCI device on another PCI bus. If
6414 * the PCI device is on the same bus, it is recommended to use
6415 * pci_add_dma_alias(). This is the default implementation. Architecture
6416 * implementations can override this.
6417 */
6418struct pci_dev __weak *pci_real_dma_dev(struct pci_dev *dev)
6419{
6420	return dev;
6421}
6422
6423resource_size_t __weak pcibios_default_alignment(void)
6424{
6425	return 0;
6426}
6427
6428/*
6429 * Arches that don't want to expose struct resource to userland as-is in
6430 * sysfs and /proc can implement their own pci_resource_to_user().
6431 */
6432void __weak pci_resource_to_user(const struct pci_dev *dev, int bar,
6433				 const struct resource *rsrc,
6434				 resource_size_t *start, resource_size_t *end)
6435{
6436	*start = rsrc->start;
6437	*end = rsrc->end;
6438}
6439
6440static char *resource_alignment_param;
6441static DEFINE_SPINLOCK(resource_alignment_lock);
6442
6443/**
6444 * pci_specified_resource_alignment - get resource alignment specified by user.
6445 * @dev: the PCI device to get
6446 * @resize: whether or not to change resources' size when reassigning alignment
6447 *
6448 * RETURNS: Resource alignment if it is specified.
6449 *          Zero if it is not specified.
6450 */
6451static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev,
6452							bool *resize)
6453{
6454	int align_order, count;
6455	resource_size_t align = pcibios_default_alignment();
6456	const char *p;
6457	int ret;
6458
6459	spin_lock(&resource_alignment_lock);
6460	p = resource_alignment_param;
6461	if (!p || !*p)
6462		goto out;
6463	if (pci_has_flag(PCI_PROBE_ONLY)) {
6464		align = 0;
6465		pr_info_once("PCI: Ignoring requested alignments (PCI_PROBE_ONLY)\n");
6466		goto out;
6467	}
6468
6469	while (*p) {
6470		count = 0;
6471		if (sscanf(p, "%d%n", &align_order, &count) == 1 &&
6472		    p[count] == '@') {
6473			p += count + 1;
6474			if (align_order > 63) {
6475				pr_err("PCI: Invalid requested alignment (order %d)\n",
6476				       align_order);
6477				align_order = PAGE_SHIFT;
6478			}
6479		} else {
6480			align_order = PAGE_SHIFT;
6481		}
6482
6483		ret = pci_dev_str_match(dev, p, &p);
6484		if (ret == 1) {
6485			*resize = true;
6486			align = 1ULL << align_order;
6487			break;
6488		} else if (ret < 0) {
6489			pr_err("PCI: Can't parse resource_alignment parameter: %s\n",
6490			       p);
6491			break;
6492		}
6493
6494		if (*p != ';' && *p != ',') {
6495			/* End of param or invalid format */
6496			break;
6497		}
6498		p++;
6499	}
6500out:
6501	spin_unlock(&resource_alignment_lock);
6502	return align;
6503}
6504
6505static void pci_request_resource_alignment(struct pci_dev *dev, int bar,
6506					   resource_size_t align, bool resize)
6507{
6508	struct resource *r = &dev->resource[bar];
6509	const char *r_name = pci_resource_name(dev, bar);
6510	resource_size_t size;
6511
6512	if (!(r->flags & IORESOURCE_MEM))
6513		return;
6514
6515	if (r->flags & IORESOURCE_PCI_FIXED) {
6516		pci_info(dev, "%s %pR: ignoring requested alignment %#llx\n",
6517			 r_name, r, (unsigned long long)align);
6518		return;
6519	}
6520
6521	size = resource_size(r);
6522	if (size >= align)
6523		return;
6524
6525	/*
6526	 * Increase the alignment of the resource.  There are two ways we
6527	 * can do this:
6528	 *
6529	 * 1) Increase the size of the resource.  BARs are aligned on their
6530	 *    size, so when we reallocate space for this resource, we'll
6531	 *    allocate it with the larger alignment.  This also prevents
6532	 *    assignment of any other BARs inside the alignment region, so
6533	 *    if we're requesting page alignment, this means no other BARs
6534	 *    will share the page.
6535	 *
6536	 *    The disadvantage is that this makes the resource larger than
6537	 *    the hardware BAR, which may break drivers that compute things
6538	 *    based on the resource size, e.g., to find registers at a
6539	 *    fixed offset before the end of the BAR.
6540	 *
6541	 * 2) Retain the resource size, but use IORESOURCE_STARTALIGN and
6542	 *    set r->start to the desired alignment.  By itself this
6543	 *    doesn't prevent other BARs being put inside the alignment
6544	 *    region, but if we realign *every* resource of every device in
6545	 *    the system, none of them will share an alignment region.
6546	 *
6547	 * When the user has requested alignment for only some devices via
6548	 * the "pci=resource_alignment" argument, "resize" is true and we
6549	 * use the first method.  Otherwise we assume we're aligning all
6550	 * devices and we use the second.
6551	 */
6552
6553	pci_info(dev, "%s %pR: requesting alignment to %#llx\n",
6554		 r_name, r, (unsigned long long)align);
6555
6556	if (resize) {
6557		r->start = 0;
6558		r->end = align - 1;
6559	} else {
6560		r->flags &= ~IORESOURCE_SIZEALIGN;
6561		r->flags |= IORESOURCE_STARTALIGN;
6562		r->start = align;
6563		r->end = r->start + size - 1;
6564	}
6565	r->flags |= IORESOURCE_UNSET;
6566}
6567
6568/*
6569 * This function disables memory decoding and releases memory resources
6570 * of the device specified by kernel's boot parameter 'pci=resource_alignment='.
6571 * It also rounds up size to specified alignment.
6572 * Later on, the kernel will assign page-aligned memory resource back
6573 * to the device.
6574 */
6575void pci_reassigndev_resource_alignment(struct pci_dev *dev)
6576{
6577	int i;
6578	struct resource *r;
6579	resource_size_t align;
6580	u16 command;
6581	bool resize = false;
6582
6583	/*
6584	 * VF BARs are read-only zero according to SR-IOV spec r1.1, sec
6585	 * 3.4.1.11.  Their resources are allocated from the space
6586	 * described by the VF BARx register in the PF's SR-IOV capability.
6587	 * We can't influence their alignment here.
6588	 */
6589	if (dev->is_virtfn)
6590		return;
6591
6592	/* check if specified PCI is target device to reassign */
6593	align = pci_specified_resource_alignment(dev, &resize);
6594	if (!align)
6595		return;
6596
6597	if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
6598	    (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) {
6599		pci_warn(dev, "Can't reassign resources to host bridge\n");
6600		return;
6601	}
6602
6603	pci_read_config_word(dev, PCI_COMMAND, &command);
6604	command &= ~PCI_COMMAND_MEMORY;
6605	pci_write_config_word(dev, PCI_COMMAND, command);
6606
6607	for (i = 0; i <= PCI_ROM_RESOURCE; i++)
6608		pci_request_resource_alignment(dev, i, align, resize);
6609
6610	/*
6611	 * Need to disable bridge's resource window,
6612	 * to enable the kernel to reassign new resource
6613	 * window later on.
6614	 */
6615	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
6616		for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) {
6617			r = &dev->resource[i];
6618			if (!(r->flags & IORESOURCE_MEM))
6619				continue;
6620			r->flags |= IORESOURCE_UNSET;
6621			r->end = resource_size(r) - 1;
6622			r->start = 0;
6623		}
6624		pci_disable_bridge_window(dev);
6625	}
6626}
6627
6628static ssize_t resource_alignment_show(const struct bus_type *bus, char *buf)
6629{
6630	size_t count = 0;
6631
6632	spin_lock(&resource_alignment_lock);
6633	if (resource_alignment_param)
6634		count = sysfs_emit(buf, "%s\n", resource_alignment_param);
6635	spin_unlock(&resource_alignment_lock);
6636
6637	return count;
6638}
6639
6640static ssize_t resource_alignment_store(const struct bus_type *bus,
6641					const char *buf, size_t count)
6642{
6643	char *param, *old, *end;
6644
6645	if (count >= (PAGE_SIZE - 1))
6646		return -EINVAL;
6647
6648	param = kstrndup(buf, count, GFP_KERNEL);
6649	if (!param)
6650		return -ENOMEM;
6651
6652	end = strchr(param, '\n');
6653	if (end)
6654		*end = '\0';
6655
6656	spin_lock(&resource_alignment_lock);
6657	old = resource_alignment_param;
6658	if (strlen(param)) {
6659		resource_alignment_param = param;
6660	} else {
6661		kfree(param);
6662		resource_alignment_param = NULL;
6663	}
6664	spin_unlock(&resource_alignment_lock);
6665
6666	kfree(old);
6667
6668	return count;
6669}
6670
6671static BUS_ATTR_RW(resource_alignment);
6672
6673static int __init pci_resource_alignment_sysfs_init(void)
6674{
6675	return bus_create_file(&pci_bus_type,
6676					&bus_attr_resource_alignment);
6677}
6678late_initcall(pci_resource_alignment_sysfs_init);
6679
6680static void pci_no_domains(void)
6681{
6682#ifdef CONFIG_PCI_DOMAINS
6683	pci_domains_supported = 0;
6684#endif
6685}
6686
6687#ifdef CONFIG_PCI_DOMAINS_GENERIC
6688static DEFINE_IDA(pci_domain_nr_static_ida);
6689static DEFINE_IDA(pci_domain_nr_dynamic_ida);
6690
6691static void of_pci_reserve_static_domain_nr(void)
6692{
6693	struct device_node *np;
6694	int domain_nr;
6695
6696	for_each_node_by_type(np, "pci") {
6697		domain_nr = of_get_pci_domain_nr(np);
6698		if (domain_nr < 0)
6699			continue;
6700		/*
6701		 * Permanently allocate domain_nr in dynamic_ida
6702		 * to prevent it from dynamic allocation.
6703		 */
6704		ida_alloc_range(&pci_domain_nr_dynamic_ida,
6705				domain_nr, domain_nr, GFP_KERNEL);
6706	}
6707}
6708
6709static int of_pci_bus_find_domain_nr(struct device *parent)
6710{
6711	static bool static_domains_reserved = false;
6712	int domain_nr;
6713
6714	/* On the first call scan device tree for static allocations. */
6715	if (!static_domains_reserved) {
6716		of_pci_reserve_static_domain_nr();
6717		static_domains_reserved = true;
6718	}
6719
6720	if (parent) {
6721		/*
6722		 * If domain is in DT, allocate it in static IDA.  This
6723		 * prevents duplicate static allocations in case of errors
6724		 * in DT.
6725		 */
6726		domain_nr = of_get_pci_domain_nr(parent->of_node);
6727		if (domain_nr >= 0)
6728			return ida_alloc_range(&pci_domain_nr_static_ida,
6729					       domain_nr, domain_nr,
6730					       GFP_KERNEL);
6731	}
6732
6733	/*
6734	 * If domain was not specified in DT, choose a free ID from dynamic
6735	 * allocations. All domain numbers from DT are permanently in
6736	 * dynamic allocations to prevent assigning them to other DT nodes
6737	 * without static domain.
6738	 */
6739	return ida_alloc(&pci_domain_nr_dynamic_ida, GFP_KERNEL);
6740}
6741
6742static void of_pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
6743{
6744	if (bus->domain_nr < 0)
6745		return;
6746
6747	/* Release domain from IDA where it was allocated. */
6748	if (of_get_pci_domain_nr(parent->of_node) == bus->domain_nr)
6749		ida_free(&pci_domain_nr_static_ida, bus->domain_nr);
6750	else
6751		ida_free(&pci_domain_nr_dynamic_ida, bus->domain_nr);
6752}
6753
6754int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
6755{
6756	return acpi_disabled ? of_pci_bus_find_domain_nr(parent) :
6757			       acpi_pci_bus_find_domain_nr(bus);
6758}
6759
6760void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
6761{
6762	if (!acpi_disabled)
6763		return;
6764	of_pci_bus_release_domain_nr(bus, parent);
6765}
6766#endif
6767
6768/**
6769 * pci_ext_cfg_avail - can we access extended PCI config space?
6770 *
6771 * Returns 1 if we can access PCI extended config space (offsets
6772 * greater than 0xff). This is the default implementation. Architecture
6773 * implementations can override this.
6774 */
6775int __weak pci_ext_cfg_avail(void)
6776{
6777	return 1;
6778}
6779
6780void __weak pci_fixup_cardbus(struct pci_bus *bus)
6781{
6782}
6783EXPORT_SYMBOL(pci_fixup_cardbus);
6784
6785static int __init pci_setup(char *str)
6786{
6787	while (str) {
6788		char *k = strchr(str, ',');
6789		if (k)
6790			*k++ = 0;
6791		if (*str && (str = pcibios_setup(str)) && *str) {
6792			if (!strcmp(str, "nomsi")) {
6793				pci_no_msi();
6794			} else if (!strncmp(str, "noats", 5)) {
6795				pr_info("PCIe: ATS is disabled\n");
6796				pcie_ats_disabled = true;
6797			} else if (!strcmp(str, "noaer")) {
6798				pci_no_aer();
6799			} else if (!strcmp(str, "earlydump")) {
6800				pci_early_dump = true;
6801			} else if (!strncmp(str, "realloc=", 8)) {
6802				pci_realloc_get_opt(str + 8);
6803			} else if (!strncmp(str, "realloc", 7)) {
6804				pci_realloc_get_opt("on");
6805			} else if (!strcmp(str, "nodomains")) {
6806				pci_no_domains();
6807			} else if (!strncmp(str, "noari", 5)) {
6808				pcie_ari_disabled = true;
6809			} else if (!strncmp(str, "cbiosize=", 9)) {
6810				pci_cardbus_io_size = memparse(str + 9, &str);
6811			} else if (!strncmp(str, "cbmemsize=", 10)) {
6812				pci_cardbus_mem_size = memparse(str + 10, &str);
6813			} else if (!strncmp(str, "resource_alignment=", 19)) {
6814				resource_alignment_param = str + 19;
6815			} else if (!strncmp(str, "ecrc=", 5)) {
6816				pcie_ecrc_get_policy(str + 5);
6817			} else if (!strncmp(str, "hpiosize=", 9)) {
6818				pci_hotplug_io_size = memparse(str + 9, &str);
6819			} else if (!strncmp(str, "hpmmiosize=", 11)) {
6820				pci_hotplug_mmio_size = memparse(str + 11, &str);
6821			} else if (!strncmp(str, "hpmmioprefsize=", 15)) {
6822				pci_hotplug_mmio_pref_size = memparse(str + 15, &str);
6823			} else if (!strncmp(str, "hpmemsize=", 10)) {
6824				pci_hotplug_mmio_size = memparse(str + 10, &str);
6825				pci_hotplug_mmio_pref_size = pci_hotplug_mmio_size;
6826			} else if (!strncmp(str, "hpbussize=", 10)) {
6827				pci_hotplug_bus_size =
6828					simple_strtoul(str + 10, &str, 0);
6829				if (pci_hotplug_bus_size > 0xff)
6830					pci_hotplug_bus_size = DEFAULT_HOTPLUG_BUS_SIZE;
6831			} else if (!strncmp(str, "pcie_bus_tune_off", 17)) {
6832				pcie_bus_config = PCIE_BUS_TUNE_OFF;
6833			} else if (!strncmp(str, "pcie_bus_safe", 13)) {
6834				pcie_bus_config = PCIE_BUS_SAFE;
6835			} else if (!strncmp(str, "pcie_bus_perf", 13)) {
6836				pcie_bus_config = PCIE_BUS_PERFORMANCE;
6837			} else if (!strncmp(str, "pcie_bus_peer2peer", 18)) {
6838				pcie_bus_config = PCIE_BUS_PEER2PEER;
6839			} else if (!strncmp(str, "pcie_scan_all", 13)) {
6840				pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS);
6841			} else if (!strncmp(str, "disable_acs_redir=", 18)) {
6842				disable_acs_redir_param = str + 18;
6843			} else {
6844				pr_err("PCI: Unknown option `%s'\n", str);
6845			}
6846		}
6847		str = k;
6848	}
6849	return 0;
6850}
6851early_param("pci", pci_setup);
6852
6853/*
6854 * 'resource_alignment_param' and 'disable_acs_redir_param' are initialized
6855 * in pci_setup(), above, to point to data in the __initdata section which
6856 * will be freed after the init sequence is complete. We can't allocate memory
6857 * in pci_setup() because some architectures do not have any memory allocation
6858 * service available during an early_param() call. So we allocate memory and
6859 * copy the variable here before the init section is freed.
6860 *
6861 */
6862static int __init pci_realloc_setup_params(void)
6863{
6864	resource_alignment_param = kstrdup(resource_alignment_param,
6865					   GFP_KERNEL);
6866	disable_acs_redir_param = kstrdup(disable_acs_redir_param, GFP_KERNEL);
6867
6868	return 0;
6869}
6870pure_initcall(pci_realloc_setup_params);
6871