1// SPDX-License-Identifier: MIT
2/*
3 * Copyright �� 2021 Intel Corporation
4 */
5
6#include "xe_ggtt.h"
7
8#include <linux/sizes.h>
9
10#include <drm/drm_managed.h>
11#include <drm/i915_drm.h>
12
13#include "regs/xe_gt_regs.h"
14#include "regs/xe_regs.h"
15#include "xe_assert.h"
16#include "xe_bo.h"
17#include "xe_device.h"
18#include "xe_gt.h"
19#include "xe_gt_printk.h"
20#include "xe_gt_tlb_invalidation.h"
21#include "xe_map.h"
22#include "xe_mmio.h"
23#include "xe_sriov.h"
24#include "xe_wopcm.h"
25
26#define XELPG_GGTT_PTE_PAT0	BIT_ULL(52)
27#define XELPG_GGTT_PTE_PAT1	BIT_ULL(53)
28
29/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
30#define GUC_GGTT_TOP	0xFEE00000
31
32static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
33				   u16 pat_index)
34{
35	u64 pte;
36
37	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
38	pte |= XE_PAGE_PRESENT;
39
40	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
41		pte |= XE_GGTT_PTE_DM;
42
43	return pte;
44}
45
46static u64 xelpg_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
47				    u16 pat_index)
48{
49	struct xe_device *xe = xe_bo_device(bo);
50	u64 pte;
51
52	pte = xelp_ggtt_pte_encode_bo(bo, bo_offset, pat_index);
53
54	xe_assert(xe, pat_index <= 3);
55
56	if (pat_index & BIT(0))
57		pte |= XELPG_GGTT_PTE_PAT0;
58
59	if (pat_index & BIT(1))
60		pte |= XELPG_GGTT_PTE_PAT1;
61
62	return pte;
63}
64
65static unsigned int probe_gsm_size(struct pci_dev *pdev)
66{
67	u16 gmch_ctl, ggms;
68
69	pci_read_config_word(pdev, SNB_GMCH_CTRL, &gmch_ctl);
70	ggms = (gmch_ctl >> BDW_GMCH_GGMS_SHIFT) & BDW_GMCH_GGMS_MASK;
71	return ggms ? SZ_1M << ggms : 0;
72}
73
74void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte)
75{
76	xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK));
77	xe_tile_assert(ggtt->tile, addr < ggtt->size);
78
79	writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]);
80}
81
82static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
83{
84	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB];
85	u64 end = start + size - 1;
86	u64 scratch_pte;
87
88	xe_tile_assert(ggtt->tile, start < end);
89
90	if (ggtt->scratch)
91		scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0,
92							  pat_index);
93	else
94		scratch_pte = 0;
95
96	while (start < end) {
97		xe_ggtt_set_pte(ggtt, start, scratch_pte);
98		start += XE_PAGE_SIZE;
99	}
100}
101
102static void ggtt_fini_early(struct drm_device *drm, void *arg)
103{
104	struct xe_ggtt *ggtt = arg;
105
106	mutex_destroy(&ggtt->lock);
107	drm_mm_takedown(&ggtt->mm);
108}
109
110static void ggtt_fini(struct drm_device *drm, void *arg)
111{
112	struct xe_ggtt *ggtt = arg;
113
114	ggtt->scratch = NULL;
115}
116
117static void primelockdep(struct xe_ggtt *ggtt)
118{
119	if (!IS_ENABLED(CONFIG_LOCKDEP))
120		return;
121
122	fs_reclaim_acquire(GFP_KERNEL);
123	might_lock(&ggtt->lock);
124	fs_reclaim_release(GFP_KERNEL);
125}
126
127static const struct xe_ggtt_pt_ops xelp_pt_ops = {
128	.pte_encode_bo = xelp_ggtt_pte_encode_bo,
129};
130
131static const struct xe_ggtt_pt_ops xelpg_pt_ops = {
132	.pte_encode_bo = xelpg_ggtt_pte_encode_bo,
133};
134
135/*
136 * Early GGTT initialization, which allows to create new mappings usable by the
137 * GuC.
138 * Mappings are not usable by the HW engines, as it doesn't have scratch /
139 * initial clear done to it yet. That will happen in the regular, non-early
140 * GGTT init.
141 */
142int xe_ggtt_init_early(struct xe_ggtt *ggtt)
143{
144	struct xe_device *xe = tile_to_xe(ggtt->tile);
145	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
146	unsigned int gsm_size;
147
148	if (IS_SRIOV_VF(xe))
149		gsm_size = SZ_8M; /* GGTT is expected to be 4GiB */
150	else
151		gsm_size = probe_gsm_size(pdev);
152
153	if (gsm_size == 0) {
154		drm_err(&xe->drm, "Hardware reported no preallocated GSM\n");
155		return -ENOMEM;
156	}
157
158	ggtt->gsm = ggtt->tile->mmio.regs + SZ_8M;
159	ggtt->size = (gsm_size / 8) * (u64) XE_PAGE_SIZE;
160
161	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
162		ggtt->flags |= XE_GGTT_FLAGS_64K;
163
164	/*
165	 * 8B per entry, each points to a 4KB page.
166	 *
167	 * The GuC address space is limited on both ends of the GGTT, because
168	 * the GuC shim HW redirects accesses to those addresses to other HW
169	 * areas instead of going through the GGTT. On the bottom end, the GuC
170	 * can't access offsets below the WOPCM size, while on the top side the
171	 * limit is fixed at GUC_GGTT_TOP. To keep things simple, instead of
172	 * checking each object to see if they are accessed by GuC or not, we
173	 * just exclude those areas from the allocator. Additionally, to
174	 * simplify the driver load, we use the maximum WOPCM size in this logic
175	 * instead of the programmed one, so we don't need to wait until the
176	 * actual size to be programmed is determined (which requires FW fetch)
177	 * before initializing the GGTT. These simplifications might waste space
178	 * in the GGTT (about 20-25 MBs depending on the platform) but we can
179	 * live with this.
180	 *
181	 * Another benifit of this is the GuC bootrom can't access anything
182	 * below the WOPCM max size so anything the bootom needs to access (e.g.
183	 * a RSA key) needs to be placed in the GGTT above the WOPCM max size.
184	 * Starting the GGTT allocations above the WOPCM max give us the correct
185	 * placement for free.
186	 */
187	if (ggtt->size > GUC_GGTT_TOP)
188		ggtt->size = GUC_GGTT_TOP;
189
190	if (GRAPHICS_VERx100(xe) >= 1270)
191		ggtt->pt_ops = &xelpg_pt_ops;
192	else
193		ggtt->pt_ops = &xelp_pt_ops;
194
195	drm_mm_init(&ggtt->mm, xe_wopcm_size(xe),
196		    ggtt->size - xe_wopcm_size(xe));
197	mutex_init(&ggtt->lock);
198	primelockdep(ggtt);
199
200	return drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt);
201}
202
203static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt)
204{
205	struct drm_mm_node *hole;
206	u64 start, end;
207
208	/* Display may have allocated inside ggtt, so be careful with clearing here */
209	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
210	mutex_lock(&ggtt->lock);
211	drm_mm_for_each_hole(hole, &ggtt->mm, start, end)
212		xe_ggtt_clear(ggtt, start, end - start);
213
214	xe_ggtt_invalidate(ggtt);
215	mutex_unlock(&ggtt->lock);
216	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
217}
218
219int xe_ggtt_init(struct xe_ggtt *ggtt)
220{
221	struct xe_device *xe = tile_to_xe(ggtt->tile);
222	unsigned int flags;
223	int err;
224
225	/*
226	 * So we don't need to worry about 64K GGTT layout when dealing with
227	 * scratch entires, rather keep the scratch page in system memory on
228	 * platforms where 64K pages are needed for VRAM.
229	 */
230	flags = XE_BO_CREATE_PINNED_BIT;
231	if (ggtt->flags & XE_GGTT_FLAGS_64K)
232		flags |= XE_BO_CREATE_SYSTEM_BIT;
233	else
234		flags |= XE_BO_CREATE_VRAM_IF_DGFX(ggtt->tile);
235
236	ggtt->scratch = xe_managed_bo_create_pin_map(xe, ggtt->tile, XE_PAGE_SIZE, flags);
237	if (IS_ERR(ggtt->scratch)) {
238		err = PTR_ERR(ggtt->scratch);
239		goto err;
240	}
241
242	xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, ggtt->scratch->size);
243
244	xe_ggtt_initial_clear(ggtt);
245
246	return drmm_add_action_or_reset(&xe->drm, ggtt_fini, ggtt);
247err:
248	ggtt->scratch = NULL;
249	return err;
250}
251
252#define GUC_TLB_INV_CR				XE_REG(0xcee8)
253#define   GUC_TLB_INV_CR_INVALIDATE		REG_BIT(0)
254#define PVC_GUC_TLB_INV_DESC0			XE_REG(0xcf7c)
255#define   PVC_GUC_TLB_INV_DESC0_VALID		REG_BIT(0)
256#define PVC_GUC_TLB_INV_DESC1			XE_REG(0xcf80)
257#define   PVC_GUC_TLB_INV_DESC1_INVALIDATE	REG_BIT(6)
258
259static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
260{
261	if (!gt)
262		return;
263
264	/*
265	 * Invalidation can happen when there's no in-flight work keeping the
266	 * GT awake.  We need to explicitly grab forcewake to ensure the GT
267	 * and GuC are accessible.
268	 */
269	xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
270
271	/* TODO: vfunc for GuC vs. non-GuC */
272
273	if (gt->uc.guc.submission_state.enabled) {
274		int seqno;
275
276		seqno = xe_gt_tlb_invalidation_guc(gt);
277		xe_gt_assert(gt, seqno > 0);
278		if (seqno > 0)
279			xe_gt_tlb_invalidation_wait(gt, seqno);
280	} else if (xe_device_uc_enabled(gt_to_xe(gt))) {
281		struct xe_device *xe = gt_to_xe(gt);
282
283		if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
284			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1,
285					PVC_GUC_TLB_INV_DESC1_INVALIDATE);
286			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0,
287					PVC_GUC_TLB_INV_DESC0_VALID);
288		} else
289			xe_mmio_write32(gt, GUC_TLB_INV_CR,
290					GUC_TLB_INV_CR_INVALIDATE);
291	}
292
293	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
294}
295
296void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
297{
298	/* Each GT in a tile has its own TLB to cache GGTT lookups */
299	ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt);
300	ggtt_invalidate_gt_tlb(ggtt->tile->media_gt);
301}
302
303void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix)
304{
305	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB];
306	u64 addr, scratch_pte;
307
308	scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, pat_index);
309
310	printk("%sGlobal GTT:", prefix);
311	for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) {
312		unsigned int i = addr / XE_PAGE_SIZE;
313
314		xe_tile_assert(ggtt->tile, addr <= U32_MAX);
315		if (ggtt->gsm[i] == scratch_pte)
316			continue;
317
318		printk("%s    ggtt[0x%08x] = 0x%016llx",
319		       prefix, (u32)addr, ggtt->gsm[i]);
320	}
321}
322
323static void xe_ggtt_dump_node(struct xe_ggtt *ggtt,
324			      const struct drm_mm_node *node, const char *description)
325{
326	char buf[10];
327
328	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
329		string_get_size(node->size, 1, STRING_UNITS_2, buf, sizeof(buf));
330		xe_gt_dbg(ggtt->tile->primary_gt, "GGTT %#llx-%#llx (%s) %s\n",
331			  node->start, node->start + node->size, buf, description);
332	}
333}
334
335/**
336 * xe_ggtt_balloon - prevent allocation of specified GGTT addresses
337 * @ggtt: the &xe_ggtt where we want to make reservation
338 * @start: the starting GGTT address of the reserved region
339 * @end: then end GGTT address of the reserved region
340 * @node: the &drm_mm_node to hold reserved GGTT node
341 *
342 * Use xe_ggtt_deballoon() to release a reserved GGTT node.
343 *
344 * Return: 0 on success or a negative error code on failure.
345 */
346int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 end, struct drm_mm_node *node)
347{
348	int err;
349
350	xe_tile_assert(ggtt->tile, start < end);
351	xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE));
352	xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE));
353	xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(node));
354
355	node->color = 0;
356	node->start = start;
357	node->size = end - start;
358
359	mutex_lock(&ggtt->lock);
360	err = drm_mm_reserve_node(&ggtt->mm, node);
361	mutex_unlock(&ggtt->lock);
362
363	if (xe_gt_WARN(ggtt->tile->primary_gt, err,
364		       "Failed to balloon GGTT %#llx-%#llx (%pe)\n",
365		       node->start, node->start + node->size, ERR_PTR(err)))
366		return err;
367
368	xe_ggtt_dump_node(ggtt, node, "balloon");
369	return 0;
370}
371
372/**
373 * xe_ggtt_deballoon - release a reserved GGTT region
374 * @ggtt: the &xe_ggtt where reserved node belongs
375 * @node: the &drm_mm_node with reserved GGTT region
376 *
377 * See xe_ggtt_balloon() for details.
378 */
379void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node)
380{
381	if (!drm_mm_node_allocated(node))
382		return;
383
384	xe_ggtt_dump_node(ggtt, node, "deballoon");
385
386	mutex_lock(&ggtt->lock);
387	drm_mm_remove_node(node);
388	mutex_unlock(&ggtt->lock);
389}
390
391int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node,
392				       u32 size, u32 align, u32 mm_flags)
393{
394	return drm_mm_insert_node_generic(&ggtt->mm, node, size, align, 0,
395					  mm_flags);
396}
397
398int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
399				u32 size, u32 align)
400{
401	int ret;
402
403	mutex_lock(&ggtt->lock);
404	ret = xe_ggtt_insert_special_node_locked(ggtt, node, size,
405						 align, DRM_MM_INSERT_HIGH);
406	mutex_unlock(&ggtt->lock);
407
408	return ret;
409}
410
411void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
412{
413	u16 cache_mode = bo->flags & XE_BO_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB;
414	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode];
415	u64 start = bo->ggtt_node.start;
416	u64 offset, pte;
417
418	for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) {
419		pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index);
420		xe_ggtt_set_pte(ggtt, start + offset, pte);
421	}
422
423	xe_ggtt_invalidate(ggtt);
424}
425
426static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
427				  u64 start, u64 end)
428{
429	int err;
430	u64 alignment = XE_PAGE_SIZE;
431
432	if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
433		alignment = SZ_64K;
434
435	if (XE_WARN_ON(bo->ggtt_node.size)) {
436		/* Someone's already inserted this BO in the GGTT */
437		xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size);
438		return 0;
439	}
440
441	err = xe_bo_validate(bo, NULL, false);
442	if (err)
443		return err;
444
445	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
446	mutex_lock(&ggtt->lock);
447	err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size,
448					  alignment, 0, start, end, 0);
449	if (!err)
450		xe_ggtt_map_bo(ggtt, bo);
451	mutex_unlock(&ggtt->lock);
452	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
453
454	return err;
455}
456
457int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
458			 u64 start, u64 end)
459{
460	return __xe_ggtt_insert_bo_at(ggtt, bo, start, end);
461}
462
463int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
464{
465	return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX);
466}
467
468void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node)
469{
470	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
471	mutex_lock(&ggtt->lock);
472
473	xe_ggtt_clear(ggtt, node->start, node->size);
474	drm_mm_remove_node(node);
475	node->size = 0;
476
477	xe_ggtt_invalidate(ggtt);
478
479	mutex_unlock(&ggtt->lock);
480	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
481}
482
483void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
484{
485	if (XE_WARN_ON(!bo->ggtt_node.size))
486		return;
487
488	/* This BO is not currently in the GGTT */
489	xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size);
490
491	xe_ggtt_remove_node(ggtt, &bo->ggtt_node);
492}
493
494int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p)
495{
496	int err;
497
498	err = mutex_lock_interruptible(&ggtt->lock);
499	if (err)
500		return err;
501
502	drm_mm_print(&ggtt->mm, p);
503	mutex_unlock(&ggtt->lock);
504	return err;
505}
506