1// SPDX-License-Identifier: MIT
2/*
3 * Copyright �� 2021-2023 Intel Corporation
4 * Copyright (C) 2021-2002 Red Hat
5 */
6
7#include <drm/drm_managed.h>
8#include <drm/drm_mm.h>
9
10#include <drm/ttm/ttm_device.h>
11#include <drm/ttm/ttm_placement.h>
12#include <drm/ttm/ttm_range_manager.h>
13
14#include <generated/xe_wa_oob.h>
15
16#include "regs/xe_gt_regs.h"
17#include "regs/xe_regs.h"
18#include "xe_bo.h"
19#include "xe_device.h"
20#include "xe_gt.h"
21#include "xe_mmio.h"
22#include "xe_res_cursor.h"
23#include "xe_sriov.h"
24#include "xe_ttm_stolen_mgr.h"
25#include "xe_ttm_vram_mgr.h"
26#include "xe_wa.h"
27
28struct xe_ttm_stolen_mgr {
29	struct xe_ttm_vram_mgr base;
30
31	/* PCI base offset */
32	resource_size_t io_base;
33	/* GPU base offset */
34	resource_size_t stolen_base;
35
36	void __iomem *mapping;
37};
38
39static inline struct xe_ttm_stolen_mgr *
40to_stolen_mgr(struct ttm_resource_manager *man)
41{
42	return container_of(man, struct xe_ttm_stolen_mgr, base.manager);
43}
44
45/**
46 * xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access
47 * stolen, can we then fallback to mapping through the GGTT.
48 * @xe: xe device
49 *
50 * Some older integrated platforms don't support reliable CPU access for stolen,
51 * however on such hardware we can always use the mappable part of the GGTT for
52 * CPU access. Check if that's the case for this device.
53 */
54bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe)
55{
56	return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe);
57}
58
59static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
60{
61	struct xe_tile *tile = xe_device_get_root_tile(xe);
62	struct xe_gt *mmio = xe_root_mmio_gt(xe);
63	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
64	u64 stolen_size;
65	u64 tile_offset;
66	u64 tile_size;
67
68	tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start;
69	tile_size = tile->mem.vram.actual_physical_size;
70
71	/* Use DSM base address instead for stolen memory */
72	mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
73	if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base))
74		return 0;
75
76	stolen_size = tile_size - mgr->stolen_base;
77
78	/* Verify usage fits in the actual resource available */
79	if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR))
80		mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base;
81
82	/*
83	 * There may be few KB of platform dependent reserved memory at the end
84	 * of vram which is not part of the DSM. Such reserved memory portion is
85	 * always less then DSM granularity so align down the stolen_size to DSM
86	 * granularity to accommodate such reserve vram portion.
87	 */
88	return ALIGN_DOWN(stolen_size, SZ_1M);
89}
90
91static u32 get_wopcm_size(struct xe_device *xe)
92{
93	u32 wopcm_size;
94	u64 val;
95
96	val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED);
97	val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val);
98
99	switch (val) {
100	case 0x5 ... 0x6:
101		val--;
102		fallthrough;
103	case 0x0 ... 0x3:
104		wopcm_size = (1U << val) * SZ_1M;
105		break;
106	default:
107		WARN(1, "Missing case wopcm_size=%llx\n", val);
108		wopcm_size = 0;
109	}
110
111	return wopcm_size;
112}
113
114static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
115{
116	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
117	struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt;
118	u32 stolen_size, wopcm_size;
119	u32 ggc, gms;
120
121	ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC);
122
123	/*
124	 * Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the
125	 * GTT size
126	 */
127	if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK))
128		return 0;
129
130	/*
131	 * Graphics >= 1270 uses the offset to the GSMBASE as address in the
132	 * PTEs, together with the DM flag being set. Previously there was no
133	 * such flag so the address was the io_base.
134	 *
135	 * DSMBASE = GSMBASE + 8MB
136	 */
137	mgr->stolen_base = SZ_8M;
138	mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base;
139
140	/* return valid GMS value, -EIO if invalid */
141	gms = REG_FIELD_GET(GMS_MASK, ggc);
142	switch (gms) {
143	case 0x0 ... 0x04:
144		stolen_size = gms * 32 * SZ_1M;
145		break;
146	case 0xf0 ... 0xfe:
147		stolen_size = (gms - 0xf0 + 1) * 4 * SZ_1M;
148		break;
149	default:
150		return 0;
151	}
152
153	/* Carve out the top of DSM as it contains the reserved WOPCM region */
154	wopcm_size = get_wopcm_size(xe);
155	if (drm_WARN_ON(&xe->drm, !wopcm_size))
156		return 0;
157
158	stolen_size -= wopcm_size;
159
160	if (media_gt && XE_WA(media_gt, 14019821291)) {
161		u64 gscpsmi_base = xe_mmio_read64_2x32(media_gt, GSCPSMI_BASE)
162			& ~GENMASK_ULL(5, 0);
163
164		/*
165		 * This workaround is primarily implemented by the BIOS.  We
166		 * just need to figure out whether the BIOS has applied the
167		 * workaround (meaning the programmed address falls within
168		 * the DSM) and, if so, reserve that part of the DSM to
169		 * prevent accidental reuse.  The DSM location should be just
170		 * below the WOPCM.
171		 */
172		if (gscpsmi_base >= mgr->io_base &&
173		    gscpsmi_base < mgr->io_base + stolen_size) {
174			xe_gt_dbg(media_gt,
175				  "Reserving %llu bytes of DSM for Wa_14019821291\n",
176				  mgr->io_base + stolen_size - gscpsmi_base);
177			stolen_size = gscpsmi_base - mgr->io_base;
178		}
179	}
180
181	if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2)))
182		return 0;
183
184	return stolen_size;
185}
186
187extern struct resource intel_graphics_stolen_res;
188
189static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
190{
191#ifdef CONFIG_X86
192	/* Map into GGTT */
193	mgr->io_base = pci_resource_start(to_pci_dev(xe->drm.dev), 2);
194
195	/* Stolen memory is x86 only */
196	mgr->stolen_base = intel_graphics_stolen_res.start;
197	return resource_size(&intel_graphics_stolen_res);
198#else
199	return 0;
200#endif
201}
202
203void xe_ttm_stolen_mgr_init(struct xe_device *xe)
204{
205	struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL);
206	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
207	u64 stolen_size, io_size, pgsize;
208	int err;
209
210	if (IS_SRIOV_VF(xe))
211		stolen_size = 0;
212	else if (IS_DGFX(xe))
213		stolen_size = detect_bar2_dgfx(xe, mgr);
214	else if (GRAPHICS_VERx100(xe) >= 1270)
215		stolen_size = detect_bar2_integrated(xe, mgr);
216	else
217		stolen_size = detect_stolen(xe, mgr);
218
219	if (!stolen_size) {
220		drm_dbg_kms(&xe->drm, "No stolen memory support\n");
221		return;
222	}
223
224	pgsize = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
225	if (pgsize < PAGE_SIZE)
226		pgsize = PAGE_SIZE;
227
228	/*
229	 * We don't try to attempt partial visible support for stolen vram,
230	 * since stolen is always at the end of vram, and the BAR size is pretty
231	 * much always 256M, with small-bar.
232	 */
233	io_size = 0;
234	if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe))
235		io_size = stolen_size;
236
237	err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size,
238				     io_size, pgsize);
239	if (err) {
240		drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err);
241		return;
242	}
243
244	drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n",
245		    stolen_size);
246
247	if (io_size)
248		mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size);
249}
250
251u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset)
252{
253	struct xe_device *xe = xe_bo_device(bo);
254	struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
255	struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr);
256	struct xe_res_cursor cur;
257
258	XE_WARN_ON(!mgr->io_base);
259
260	if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
261		return mgr->io_base + xe_bo_ggtt_addr(bo) + offset;
262
263	xe_res_first(bo->ttm.resource, offset, 4096, &cur);
264	return mgr->io_base + cur.start;
265}
266
267static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe,
268					       struct xe_ttm_stolen_mgr *mgr,
269					       struct ttm_resource *mem)
270{
271	struct xe_res_cursor cur;
272
273	if (!mgr->io_base)
274		return -EIO;
275
276	xe_res_first(mem, 0, 4096, &cur);
277	mem->bus.offset = cur.start;
278
279	drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS));
280
281	if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping)
282		mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset;
283
284	mem->bus.offset += mgr->io_base;
285	mem->bus.is_iomem = true;
286	mem->bus.caching = ttm_write_combined;
287
288	return 0;
289}
290
291static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe,
292						 struct xe_ttm_stolen_mgr *mgr,
293						 struct ttm_resource *mem)
294{
295#ifdef CONFIG_X86
296	struct xe_bo *bo = ttm_to_xe_bo(mem->bo);
297
298	XE_WARN_ON(IS_DGFX(xe));
299
300	/* XXX: Require BO to be mapped to GGTT? */
301	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_CREATE_GGTT_BIT)))
302		return -EIO;
303
304	/* GGTT is always contiguously mapped */
305	mem->bus.offset = xe_bo_ggtt_addr(bo) + mgr->io_base;
306
307	mem->bus.is_iomem = true;
308	mem->bus.caching = ttm_write_combined;
309
310	return 0;
311#else
312	/* How is it even possible to get here without gen12 stolen? */
313	drm_WARN_ON(&xe->drm, 1);
314	return -EIO;
315#endif
316}
317
318int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem)
319{
320	struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
321	struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL;
322
323	if (!mgr || !mgr->io_base)
324		return -EIO;
325
326	if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
327		return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem);
328	else
329		return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem);
330}
331
332u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe)
333{
334	struct xe_ttm_stolen_mgr *mgr =
335		to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN));
336
337	return mgr->stolen_base;
338}
339