1/*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright �� 2014-2016 Intel Corporation
5 */
6
7#include <drm/drm_cache.h>
8
9#include "gt/intel_gt.h"
10#include "gt/intel_tlb.h"
11
12#include "i915_drv.h"
13#include "i915_gem_object.h"
14#include "i915_scatterlist.h"
15#include "i915_gem_lmem.h"
16#include "i915_gem_mman.h"
17
18void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
19				 struct sg_table *pages)
20{
21	struct drm_i915_private *i915 = to_i915(obj->base.dev);
22	unsigned long supported = RUNTIME_INFO(i915)->page_sizes;
23	bool shrinkable;
24	int i;
25
26	assert_object_held_shared(obj);
27
28	if (i915_gem_object_is_volatile(obj))
29		obj->mm.madv = I915_MADV_DONTNEED;
30
31	/* Make the pages coherent with the GPU (flushing any swapin). */
32	if (obj->cache_dirty) {
33		WARN_ON_ONCE(IS_DGFX(i915));
34		obj->write_domain = 0;
35		if (i915_gem_object_has_struct_page(obj))
36			drm_clflush_sg(pages);
37		obj->cache_dirty = false;
38	}
39
40	obj->mm.get_page.sg_pos = pages->sgl;
41	obj->mm.get_page.sg_idx = 0;
42	obj->mm.get_dma_page.sg_pos = pages->sgl;
43	obj->mm.get_dma_page.sg_idx = 0;
44
45	obj->mm.pages = pages;
46
47	obj->mm.page_sizes.phys = i915_sg_dma_sizes(pages->sgl);
48	GEM_BUG_ON(!obj->mm.page_sizes.phys);
49
50	/*
51	 * Calculate the supported page-sizes which fit into the given
52	 * sg_page_sizes. This will give us the page-sizes which we may be able
53	 * to use opportunistically when later inserting into the GTT. For
54	 * example if phys=2G, then in theory we should be able to use 1G, 2M,
55	 * 64K or 4K pages, although in practice this will depend on a number of
56	 * other factors.
57	 */
58	obj->mm.page_sizes.sg = 0;
59	for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
60		if (obj->mm.page_sizes.phys & ~0u << i)
61			obj->mm.page_sizes.sg |= BIT(i);
62	}
63	GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
64
65	shrinkable = i915_gem_object_is_shrinkable(obj);
66
67	if (i915_gem_object_is_tiled(obj) &&
68	    i915->gem_quirks & GEM_QUIRK_PIN_SWIZZLED_PAGES) {
69		GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj));
70		i915_gem_object_set_tiling_quirk(obj);
71		GEM_BUG_ON(!list_empty(&obj->mm.link));
72		atomic_inc(&obj->mm.shrink_pin);
73		shrinkable = false;
74	}
75
76	if (shrinkable && !i915_gem_object_has_self_managed_shrink_list(obj)) {
77		struct list_head *list;
78		unsigned long flags;
79
80		assert_object_held(obj);
81		spin_lock_irqsave(&i915->mm.obj_lock, flags);
82
83		i915->mm.shrink_count++;
84		i915->mm.shrink_memory += obj->base.size;
85
86		if (obj->mm.madv != I915_MADV_WILLNEED)
87			list = &i915->mm.purge_list;
88		else
89			list = &i915->mm.shrink_list;
90		list_add_tail(&obj->mm.link, list);
91
92		atomic_set(&obj->mm.shrink_pin, 0);
93		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
94	}
95}
96
97int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
98{
99	struct drm_i915_private *i915 = to_i915(obj->base.dev);
100	int err;
101
102	assert_object_held_shared(obj);
103
104	if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
105		drm_dbg(&i915->drm,
106			"Attempting to obtain a purgeable object\n");
107		return -EFAULT;
108	}
109
110	err = obj->ops->get_pages(obj);
111	GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
112
113	return err;
114}
115
116/* Ensure that the associated pages are gathered from the backing storage
117 * and pinned into our object. i915_gem_object_pin_pages() may be called
118 * multiple times before they are released by a single call to
119 * i915_gem_object_unpin_pages() - once the pages are no longer referenced
120 * either as a result of memory pressure (reaping pages under the shrinker)
121 * or as the object is itself released.
122 */
123int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
124{
125	int err;
126
127	assert_object_held(obj);
128
129	assert_object_held_shared(obj);
130
131	if (unlikely(!i915_gem_object_has_pages(obj))) {
132		GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
133
134		err = ____i915_gem_object_get_pages(obj);
135		if (err)
136			return err;
137
138		smp_mb__before_atomic();
139	}
140	atomic_inc(&obj->mm.pages_pin_count);
141
142	return 0;
143}
144
145int i915_gem_object_pin_pages_unlocked(struct drm_i915_gem_object *obj)
146{
147	struct i915_gem_ww_ctx ww;
148	int err;
149
150	i915_gem_ww_ctx_init(&ww, true);
151retry:
152	err = i915_gem_object_lock(obj, &ww);
153	if (!err)
154		err = i915_gem_object_pin_pages(obj);
155
156	if (err == -EDEADLK) {
157		err = i915_gem_ww_ctx_backoff(&ww);
158		if (!err)
159			goto retry;
160	}
161	i915_gem_ww_ctx_fini(&ww);
162	return err;
163}
164
165/* Immediately discard the backing storage */
166int i915_gem_object_truncate(struct drm_i915_gem_object *obj)
167{
168	if (obj->ops->truncate)
169		return obj->ops->truncate(obj);
170
171	return 0;
172}
173
174static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
175{
176	struct radix_tree_iter iter;
177	void __rcu **slot;
178
179	rcu_read_lock();
180	radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
181		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
182	radix_tree_for_each_slot(slot, &obj->mm.get_dma_page.radix, &iter, 0)
183		radix_tree_delete(&obj->mm.get_dma_page.radix, iter.index);
184	rcu_read_unlock();
185}
186
187static void unmap_object(struct drm_i915_gem_object *obj, void *ptr)
188{
189	if (is_vmalloc_addr(ptr))
190		vunmap(ptr);
191}
192
193static void flush_tlb_invalidate(struct drm_i915_gem_object *obj)
194{
195	struct drm_i915_private *i915 = to_i915(obj->base.dev);
196	struct intel_gt *gt;
197	int id;
198
199	for_each_gt(gt, i915, id) {
200		if (!obj->mm.tlb[id])
201			continue;
202
203		intel_gt_invalidate_tlb_full(gt, obj->mm.tlb[id]);
204		obj->mm.tlb[id] = 0;
205	}
206}
207
208struct sg_table *
209__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
210{
211	struct sg_table *pages;
212
213	assert_object_held_shared(obj);
214
215	pages = fetch_and_zero(&obj->mm.pages);
216	if (IS_ERR_OR_NULL(pages))
217		return pages;
218
219	if (i915_gem_object_is_volatile(obj))
220		obj->mm.madv = I915_MADV_WILLNEED;
221
222	if (!i915_gem_object_has_self_managed_shrink_list(obj))
223		i915_gem_object_make_unshrinkable(obj);
224
225	if (obj->mm.mapping) {
226		unmap_object(obj, page_mask_bits(obj->mm.mapping));
227		obj->mm.mapping = NULL;
228	}
229
230	__i915_gem_object_reset_page_iter(obj);
231	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
232
233	flush_tlb_invalidate(obj);
234
235	return pages;
236}
237
238int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
239{
240	struct sg_table *pages;
241
242	if (i915_gem_object_has_pinned_pages(obj))
243		return -EBUSY;
244
245	/* May be called by shrinker from within get_pages() (on another bo) */
246	assert_object_held_shared(obj);
247
248	i915_gem_object_release_mmap_offset(obj);
249
250	/*
251	 * ->put_pages might need to allocate memory for the bit17 swizzle
252	 * array, hence protect them from being reaped by removing them from gtt
253	 * lists early.
254	 */
255	pages = __i915_gem_object_unset_pages(obj);
256
257	/*
258	 * XXX Temporary hijinx to avoid updating all backends to handle
259	 * NULL pages. In the future, when we have more asynchronous
260	 * get_pages backends we should be better able to handle the
261	 * cancellation of the async task in a more uniform manner.
262	 */
263	if (!IS_ERR_OR_NULL(pages))
264		obj->ops->put_pages(obj, pages);
265
266	return 0;
267}
268
269/* The 'mapping' part of i915_gem_object_pin_map() below */
270static void *i915_gem_object_map_page(struct drm_i915_gem_object *obj,
271				      enum i915_map_type type)
272{
273	unsigned long n_pages = obj->base.size >> PAGE_SHIFT, i;
274	struct page *stack[32], **pages = stack, *page;
275	struct sgt_iter iter;
276	pgprot_t pgprot;
277	void *vaddr;
278
279	switch (type) {
280	default:
281		MISSING_CASE(type);
282		fallthrough;	/* to use PAGE_KERNEL anyway */
283	case I915_MAP_WB:
284		/*
285		 * On 32b, highmem using a finite set of indirect PTE (i.e.
286		 * vmap) to provide virtual mappings of the high pages.
287		 * As these are finite, map_new_virtual() must wait for some
288		 * other kmap() to finish when it runs out. If we map a large
289		 * number of objects, there is no method for it to tell us
290		 * to release the mappings, and we deadlock.
291		 *
292		 * However, if we make an explicit vmap of the page, that
293		 * uses a larger vmalloc arena, and also has the ability
294		 * to tell us to release unwanted mappings. Most importantly,
295		 * it will fail and propagate an error instead of waiting
296		 * forever.
297		 *
298		 * So if the page is beyond the 32b boundary, make an explicit
299		 * vmap.
300		 */
301		if (n_pages == 1 && !PageHighMem(sg_page(obj->mm.pages->sgl)))
302			return page_address(sg_page(obj->mm.pages->sgl));
303		pgprot = PAGE_KERNEL;
304		break;
305	case I915_MAP_WC:
306		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
307		break;
308	}
309
310	if (n_pages > ARRAY_SIZE(stack)) {
311		/* Too big for stack -- allocate temporary array instead */
312		pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
313		if (!pages)
314			return ERR_PTR(-ENOMEM);
315	}
316
317	i = 0;
318	for_each_sgt_page(page, iter, obj->mm.pages)
319		pages[i++] = page;
320	vaddr = vmap(pages, n_pages, 0, pgprot);
321	if (pages != stack)
322		kvfree(pages);
323
324	return vaddr ?: ERR_PTR(-ENOMEM);
325}
326
327static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj,
328				     enum i915_map_type type)
329{
330	resource_size_t iomap = obj->mm.region->iomap.base -
331		obj->mm.region->region.start;
332	unsigned long n_pfn = obj->base.size >> PAGE_SHIFT;
333	unsigned long stack[32], *pfns = stack, i;
334	struct sgt_iter iter;
335	dma_addr_t addr;
336	void *vaddr;
337
338	GEM_BUG_ON(type != I915_MAP_WC);
339
340	if (n_pfn > ARRAY_SIZE(stack)) {
341		/* Too big for stack -- allocate temporary array instead */
342		pfns = kvmalloc_array(n_pfn, sizeof(*pfns), GFP_KERNEL);
343		if (!pfns)
344			return ERR_PTR(-ENOMEM);
345	}
346
347	i = 0;
348	for_each_sgt_daddr(addr, iter, obj->mm.pages)
349		pfns[i++] = (iomap + addr) >> PAGE_SHIFT;
350	vaddr = vmap_pfn(pfns, n_pfn, pgprot_writecombine(PAGE_KERNEL_IO));
351	if (pfns != stack)
352		kvfree(pfns);
353
354	return vaddr ?: ERR_PTR(-ENOMEM);
355}
356
357/* get, pin, and map the pages of the object into kernel space */
358void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
359			      enum i915_map_type type)
360{
361	enum i915_map_type has_type;
362	bool pinned;
363	void *ptr;
364	int err;
365
366	if (!i915_gem_object_has_struct_page(obj) &&
367	    !i915_gem_object_has_iomem(obj))
368		return ERR_PTR(-ENXIO);
369
370	if (WARN_ON_ONCE(obj->flags & I915_BO_ALLOC_GPU_ONLY))
371		return ERR_PTR(-EINVAL);
372
373	assert_object_held(obj);
374
375	pinned = !(type & I915_MAP_OVERRIDE);
376	type &= ~I915_MAP_OVERRIDE;
377
378	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
379		if (unlikely(!i915_gem_object_has_pages(obj))) {
380			GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
381
382			err = ____i915_gem_object_get_pages(obj);
383			if (err)
384				return ERR_PTR(err);
385
386			smp_mb__before_atomic();
387		}
388		atomic_inc(&obj->mm.pages_pin_count);
389		pinned = false;
390	}
391	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
392
393	/*
394	 * For discrete our CPU mappings needs to be consistent in order to
395	 * function correctly on !x86. When mapping things through TTM, we use
396	 * the same rules to determine the caching type.
397	 *
398	 * The caching rules, starting from DG1:
399	 *
400	 *	- If the object can be placed in device local-memory, then the
401	 *	  pages should be allocated and mapped as write-combined only.
402	 *
403	 *	- Everything else is always allocated and mapped as write-back,
404	 *	  with the guarantee that everything is also coherent with the
405	 *	  GPU.
406	 *
407	 * Internal users of lmem are already expected to get this right, so no
408	 * fudging needed there.
409	 */
410	if (i915_gem_object_placement_possible(obj, INTEL_MEMORY_LOCAL)) {
411		if (type != I915_MAP_WC && !obj->mm.n_placements) {
412			ptr = ERR_PTR(-ENODEV);
413			goto err_unpin;
414		}
415
416		type = I915_MAP_WC;
417	} else if (IS_DGFX(to_i915(obj->base.dev))) {
418		type = I915_MAP_WB;
419	}
420
421	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
422	if (ptr && has_type != type) {
423		if (pinned) {
424			ptr = ERR_PTR(-EBUSY);
425			goto err_unpin;
426		}
427
428		unmap_object(obj, ptr);
429
430		ptr = obj->mm.mapping = NULL;
431	}
432
433	if (!ptr) {
434		err = i915_gem_object_wait_moving_fence(obj, true);
435		if (err) {
436			ptr = ERR_PTR(err);
437			goto err_unpin;
438		}
439
440		if (GEM_WARN_ON(type == I915_MAP_WC && !pat_enabled()))
441			ptr = ERR_PTR(-ENODEV);
442		else if (i915_gem_object_has_struct_page(obj))
443			ptr = i915_gem_object_map_page(obj, type);
444		else
445			ptr = i915_gem_object_map_pfn(obj, type);
446		if (IS_ERR(ptr))
447			goto err_unpin;
448
449		obj->mm.mapping = page_pack_bits(ptr, type);
450	}
451
452	return ptr;
453
454err_unpin:
455	atomic_dec(&obj->mm.pages_pin_count);
456	return ptr;
457}
458
459void *i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj,
460				       enum i915_map_type type)
461{
462	void *ret;
463
464	i915_gem_object_lock(obj, NULL);
465	ret = i915_gem_object_pin_map(obj, type);
466	i915_gem_object_unlock(obj);
467
468	return ret;
469}
470
471void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
472				 unsigned long offset,
473				 unsigned long size)
474{
475	enum i915_map_type has_type;
476	void *ptr;
477
478	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
479	GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
480				     offset, size, obj->base.size));
481
482	wmb(); /* let all previous writes be visible to coherent partners */
483	obj->mm.dirty = true;
484
485	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
486		return;
487
488	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
489	if (has_type == I915_MAP_WC)
490		return;
491
492	drm_clflush_virt_range(ptr + offset, size);
493	if (size == obj->base.size) {
494		obj->write_domain &= ~I915_GEM_DOMAIN_CPU;
495		obj->cache_dirty = false;
496	}
497}
498
499void __i915_gem_object_release_map(struct drm_i915_gem_object *obj)
500{
501	GEM_BUG_ON(!obj->mm.mapping);
502
503	/*
504	 * We allow removing the mapping from underneath pinned pages!
505	 *
506	 * Furthermore, since this is an unsafe operation reserved only
507	 * for construction time manipulation, we ignore locking prudence.
508	 */
509	unmap_object(obj, page_mask_bits(fetch_and_zero(&obj->mm.mapping)));
510
511	i915_gem_object_unpin_map(obj);
512}
513
514struct scatterlist *
515__i915_gem_object_page_iter_get_sg(struct drm_i915_gem_object *obj,
516				   struct i915_gem_object_page_iter *iter,
517				   pgoff_t n,
518				   unsigned int *offset)
519
520{
521	const bool dma = iter == &obj->mm.get_dma_page ||
522			 iter == &obj->ttm.get_io_page;
523	unsigned int idx, count;
524	struct scatterlist *sg;
525
526	might_sleep();
527	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
528	if (!i915_gem_object_has_pinned_pages(obj))
529		assert_object_held(obj);
530
531	/* As we iterate forward through the sg, we record each entry in a
532	 * radixtree for quick repeated (backwards) lookups. If we have seen
533	 * this index previously, we will have an entry for it.
534	 *
535	 * Initial lookup is O(N), but this is amortized to O(1) for
536	 * sequential page access (where each new request is consecutive
537	 * to the previous one). Repeated lookups are O(lg(obj->base.size)),
538	 * i.e. O(1) with a large constant!
539	 */
540	if (n < READ_ONCE(iter->sg_idx))
541		goto lookup;
542
543	mutex_lock(&iter->lock);
544
545	/* We prefer to reuse the last sg so that repeated lookup of this
546	 * (or the subsequent) sg are fast - comparing against the last
547	 * sg is faster than going through the radixtree.
548	 */
549
550	sg = iter->sg_pos;
551	idx = iter->sg_idx;
552	count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg);
553
554	while (idx + count <= n) {
555		void *entry;
556		unsigned long i;
557		int ret;
558
559		/* If we cannot allocate and insert this entry, or the
560		 * individual pages from this range, cancel updating the
561		 * sg_idx so that on this lookup we are forced to linearly
562		 * scan onwards, but on future lookups we will try the
563		 * insertion again (in which case we need to be careful of
564		 * the error return reporting that we have already inserted
565		 * this index).
566		 */
567		ret = radix_tree_insert(&iter->radix, idx, sg);
568		if (ret && ret != -EEXIST)
569			goto scan;
570
571		entry = xa_mk_value(idx);
572		for (i = 1; i < count; i++) {
573			ret = radix_tree_insert(&iter->radix, idx + i, entry);
574			if (ret && ret != -EEXIST)
575				goto scan;
576		}
577
578		idx += count;
579		sg = ____sg_next(sg);
580		count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg);
581	}
582
583scan:
584	iter->sg_pos = sg;
585	iter->sg_idx = idx;
586
587	mutex_unlock(&iter->lock);
588
589	if (unlikely(n < idx)) /* insertion completed by another thread */
590		goto lookup;
591
592	/* In case we failed to insert the entry into the radixtree, we need
593	 * to look beyond the current sg.
594	 */
595	while (idx + count <= n) {
596		idx += count;
597		sg = ____sg_next(sg);
598		count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg);
599	}
600
601	*offset = n - idx;
602	return sg;
603
604lookup:
605	rcu_read_lock();
606
607	sg = radix_tree_lookup(&iter->radix, n);
608	GEM_BUG_ON(!sg);
609
610	/* If this index is in the middle of multi-page sg entry,
611	 * the radix tree will contain a value entry that points
612	 * to the start of that range. We will return the pointer to
613	 * the base page and the offset of this page within the
614	 * sg entry's range.
615	 */
616	*offset = 0;
617	if (unlikely(xa_is_value(sg))) {
618		unsigned long base = xa_to_value(sg);
619
620		sg = radix_tree_lookup(&iter->radix, base);
621		GEM_BUG_ON(!sg);
622
623		*offset = n - base;
624	}
625
626	rcu_read_unlock();
627
628	return sg;
629}
630
631struct page *
632__i915_gem_object_get_page(struct drm_i915_gem_object *obj, pgoff_t n)
633{
634	struct scatterlist *sg;
635	unsigned int offset;
636
637	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
638
639	sg = i915_gem_object_get_sg(obj, n, &offset);
640	return nth_page(sg_page(sg), offset);
641}
642
643/* Like i915_gem_object_get_page(), but mark the returned page dirty */
644struct page *
645__i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, pgoff_t n)
646{
647	struct page *page;
648
649	page = i915_gem_object_get_page(obj, n);
650	if (!obj->mm.dirty)
651		set_page_dirty(page);
652
653	return page;
654}
655
656dma_addr_t
657__i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
658				      pgoff_t n, unsigned int *len)
659{
660	struct scatterlist *sg;
661	unsigned int offset;
662
663	sg = i915_gem_object_get_sg_dma(obj, n, &offset);
664
665	if (len)
666		*len = sg_dma_len(sg) - (offset << PAGE_SHIFT);
667
668	return sg_dma_address(sg) + (offset << PAGE_SHIFT);
669}
670
671dma_addr_t
672__i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, pgoff_t n)
673{
674	return i915_gem_object_get_dma_address_len(obj, n, NULL);
675}
676