1/*	$NetBSD: intel_engine_pm.c,v 1.5 2021/12/19 12:37:28 riastradh Exp $	*/
2
3/*
4 * SPDX-License-Identifier: MIT
5 *
6 * Copyright �� 2019 Intel Corporation
7 */
8
9#include <sys/cdefs.h>
10__KERNEL_RCSID(0, "$NetBSD: intel_engine_pm.c,v 1.5 2021/12/19 12:37:28 riastradh Exp $");
11
12#include "i915_drv.h"
13
14#include "intel_context.h"
15#include "intel_engine.h"
16#include "intel_engine_heartbeat.h"
17#include "intel_engine_pm.h"
18#include "intel_engine_pool.h"
19#include "intel_gt.h"
20#include "intel_gt_pm.h"
21#include "intel_rc6.h"
22#include "intel_ring.h"
23
24static int __engine_unpark(struct intel_wakeref *wf)
25{
26	struct intel_engine_cs *engine =
27		container_of(wf, typeof(*engine), wakeref);
28	struct intel_context *ce;
29	void *map;
30
31	ENGINE_TRACE(engine, "\n");
32
33	intel_gt_pm_get(engine->gt);
34
35	/* Pin the default state for fast resets from atomic context. */
36	map = NULL;
37	if (engine->default_state)
38		map = i915_gem_object_pin_map(engine->default_state,
39					      I915_MAP_WB);
40	if (!IS_ERR_OR_NULL(map))
41		engine->pinned_default_state = map;
42
43	/* Discard stale context state from across idling */
44	ce = engine->kernel_context;
45	if (ce) {
46		GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags));
47
48		/* First poison the image to verify we never fully trust it */
49		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) {
50			struct drm_i915_gem_object *obj = ce->state->obj;
51			int type = i915_coherent_map_type(engine->i915);
52
53			map = i915_gem_object_pin_map(obj, type);
54			if (!IS_ERR(map)) {
55				memset(map, CONTEXT_REDZONE, obj->base.size);
56				i915_gem_object_flush_map(obj);
57				i915_gem_object_unpin_map(obj);
58			}
59		}
60
61		ce->ops->reset(ce);
62	}
63
64	if (engine->unpark)
65		engine->unpark(engine);
66
67	intel_engine_unpark_heartbeat(engine);
68	return 0;
69}
70
71#if IS_ENABLED(CONFIG_LOCKDEP)
72
73static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
74{
75	unsigned long flags;
76
77	local_irq_save(flags);
78	mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
79
80	return flags;
81}
82
83static inline void __timeline_mark_unlock(struct intel_context *ce,
84					  unsigned long flags)
85{
86	mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
87	local_irq_restore(flags);
88}
89
90#else
91
92static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
93{
94	return 0;
95}
96
97static inline void __timeline_mark_unlock(struct intel_context *ce,
98					  unsigned long flags)
99{
100}
101
102#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
103
104static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
105{
106	struct i915_request *rq = to_request(fence);
107
108	ewma__engine_latency_add(&rq->engine->latency,
109				 ktime_us_delta(rq->fence.timestamp,
110						rq->duration.emitted));
111}
112
113static void
114__queue_and_release_pm(struct i915_request *rq,
115		       struct intel_timeline *tl,
116		       struct intel_engine_cs *engine)
117{
118	struct intel_gt_timelines *timelines = &engine->gt->timelines;
119
120	ENGINE_TRACE(engine, "\n");
121
122	/*
123	 * We have to serialise all potential retirement paths with our
124	 * submission, as we don't want to underflow either the
125	 * engine->wakeref.counter or our timeline->active_count.
126	 *
127	 * Equally, we cannot allow a new submission to start until
128	 * after we finish queueing, nor could we allow that submitter
129	 * to retire us before we are ready!
130	 */
131	spin_lock(&timelines->lock);
132
133	/* Let intel_gt_retire_requests() retire us (acquired under lock) */
134	if (!atomic_fetch_inc(&tl->active_count))
135		list_add_tail(&tl->link, &timelines->active_list);
136
137	/* Hand the request over to HW and so engine_retire() */
138	__i915_request_queue(rq, NULL);
139
140	/* Let new submissions commence (and maybe retire this timeline) */
141	__intel_wakeref_defer_park(&engine->wakeref);
142
143	spin_unlock(&timelines->lock);
144}
145
146static bool switch_to_kernel_context(struct intel_engine_cs *engine)
147{
148	struct intel_context *ce = engine->kernel_context;
149	struct i915_request *rq;
150	unsigned long flags;
151	bool result = true;
152
153	/* GPU is pointing to the void, as good as in the kernel context. */
154	if (intel_gt_is_wedged(engine->gt))
155		return true;
156
157	GEM_BUG_ON(!intel_context_is_barrier(ce));
158
159	/* Already inside the kernel context, safe to power down. */
160	if (engine->wakeref_serial == engine->serial)
161		return true;
162
163	/*
164	 * Note, we do this without taking the timeline->mutex. We cannot
165	 * as we may be called while retiring the kernel context and so
166	 * already underneath the timeline->mutex. Instead we rely on the
167	 * exclusive property of the __engine_park that prevents anyone
168	 * else from creating a request on this engine. This also requires
169	 * that the ring is empty and we avoid any waits while constructing
170	 * the context, as they assume protection by the timeline->mutex.
171	 * This should hold true as we can only park the engine after
172	 * retiring the last request, thus all rings should be empty and
173	 * all timelines idle.
174	 *
175	 * For unlocking, there are 2 other parties and the GPU who have a
176	 * stake here.
177	 *
178	 * A new gpu user will be waiting on the engine-pm to start their
179	 * engine_unpark. New waiters are predicated on engine->wakeref.count
180	 * and so intel_wakeref_defer_park() acts like a mutex_unlock of the
181	 * engine->wakeref.
182	 *
183	 * The other party is intel_gt_retire_requests(), which is walking the
184	 * list of active timelines looking for completions. Meanwhile as soon
185	 * as we call __i915_request_queue(), the GPU may complete our request.
186	 * Ergo, if we put ourselves on the timelines.active_list
187	 * (se intel_timeline_enter()) before we increment the
188	 * engine->wakeref.count, we may see the request completion and retire
189	 * it causing an undeflow of the engine->wakeref.
190	 */
191	flags = __timeline_mark_lock(ce);
192	GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
193
194	rq = __i915_request_create(ce, GFP_NOWAIT);
195	if (IS_ERR(rq))
196		/* Context switch failed, hope for the best! Maybe reset? */
197		goto out_unlock;
198
199	/* Check again on the next retirement. */
200	engine->wakeref_serial = engine->serial + 1;
201	i915_request_add_active_barriers(rq);
202
203	/* Install ourselves as a preemption barrier */
204	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
205	if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */
206		/*
207		 * Use an interrupt for precise measurement of duration,
208		 * otherwise we rely on someone else retiring all the requests
209		 * which may delay the signaling (i.e. we will likely wait
210		 * until the background request retirement running every
211		 * second or two).
212		 */
213		dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration);
214		rq->duration.emitted = ktime_get();
215	}
216
217	/* Expose ourselves to the world */
218	__queue_and_release_pm(rq, ce->timeline, engine);
219
220	result = false;
221out_unlock:
222	__timeline_mark_unlock(ce, flags);
223	return result;
224}
225
226static void call_idle_barriers(struct intel_engine_cs *engine)
227{
228	struct llist_node *node, *next;
229
230	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
231		struct i915_active_fence *fence =
232		    container_of(node, struct i915_active_fence, llist);
233
234		fence->cb.func(ERR_PTR(-EAGAIN), &fence->cb);
235	}
236}
237
238static int __engine_park(struct intel_wakeref *wf)
239{
240	struct intel_engine_cs *engine =
241		container_of(wf, typeof(*engine), wakeref);
242
243	engine->saturated = 0;
244
245	/*
246	 * If one and only one request is completed between pm events,
247	 * we know that we are inside the kernel context and it is
248	 * safe to power down. (We are paranoid in case that runtime
249	 * suspend causes corruption to the active context image, and
250	 * want to avoid that impacting userspace.)
251	 */
252	if (!switch_to_kernel_context(engine))
253		return -EBUSY;
254
255	ENGINE_TRACE(engine, "\n");
256
257	call_idle_barriers(engine); /* cleanup after wedging */
258
259	intel_engine_park_heartbeat(engine);
260	intel_engine_disarm_breadcrumbs(engine);
261	intel_engine_pool_park(&engine->pool);
262
263	/* Must be reset upon idling, or we may miss the busy wakeup. */
264	GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
265
266	if (engine->park)
267		engine->park(engine);
268
269	if (engine->pinned_default_state) {
270		i915_gem_object_unpin_map(engine->default_state);
271		engine->pinned_default_state = NULL;
272	}
273
274	engine->execlists.no_priolist = false;
275
276	/* While gt calls i915_vma_parked(), we have to break the lock cycle */
277	intel_gt_pm_put_async(engine->gt);
278	return 0;
279}
280
281static const struct intel_wakeref_ops wf_ops = {
282	.get = __engine_unpark,
283	.put = __engine_park,
284};
285
286void intel_engine_init__pm(struct intel_engine_cs *engine)
287{
288	struct intel_runtime_pm *rpm = engine->uncore->rpm;
289
290	intel_wakeref_init(&engine->wakeref, rpm, &wf_ops);
291	intel_engine_init_heartbeat(engine);
292}
293
294void
295intel_engine_fini__pm(struct intel_engine_cs *engine)
296{
297
298	intel_wakeref_fini(&engine->wakeref);
299}
300
301#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
302#include "selftest_engine_pm.c"
303#endif
304