1// SPDX-License-Identifier: MIT
2/*
3 * Copyright �� 2018 Intel Corporation
4 */
5
6#include <linux/sort.h>
7
8#include "i915_drv.h"
9
10#include "intel_gt_requests.h"
11#include "i915_selftest.h"
12#include "selftest_engine_heartbeat.h"
13
14static void reset_heartbeat(struct intel_engine_cs *engine)
15{
16	intel_engine_set_heartbeat(engine,
17				   engine->defaults.heartbeat_interval_ms);
18}
19
20static int timeline_sync(struct intel_timeline *tl)
21{
22	struct dma_fence *fence;
23	long timeout;
24
25	fence = i915_active_fence_get(&tl->last_request);
26	if (!fence)
27		return 0;
28
29	timeout = dma_fence_wait_timeout(fence, true, HZ / 2);
30	dma_fence_put(fence);
31	if (timeout < 0)
32		return timeout;
33
34	return 0;
35}
36
37static int engine_sync_barrier(struct intel_engine_cs *engine)
38{
39	return timeline_sync(engine->kernel_context->timeline);
40}
41
42struct pulse {
43	struct i915_active active;
44	struct kref kref;
45};
46
47static int pulse_active(struct i915_active *active)
48{
49	kref_get(&container_of(active, struct pulse, active)->kref);
50	return 0;
51}
52
53static void pulse_free(struct kref *kref)
54{
55	struct pulse *p = container_of(kref, typeof(*p), kref);
56
57	i915_active_fini(&p->active);
58	kfree(p);
59}
60
61static void pulse_put(struct pulse *p)
62{
63	kref_put(&p->kref, pulse_free);
64}
65
66static void pulse_retire(struct i915_active *active)
67{
68	pulse_put(container_of(active, struct pulse, active));
69}
70
71static struct pulse *pulse_create(void)
72{
73	struct pulse *p;
74
75	p = kmalloc(sizeof(*p), GFP_KERNEL);
76	if (!p)
77		return p;
78
79	kref_init(&p->kref);
80	i915_active_init(&p->active, pulse_active, pulse_retire, 0);
81
82	return p;
83}
84
85static void pulse_unlock_wait(struct pulse *p)
86{
87	wait_var_event_timeout(&p->active, i915_active_is_idle(&p->active), HZ);
88}
89
90static int __live_idle_pulse(struct intel_engine_cs *engine,
91			     int (*fn)(struct intel_engine_cs *cs))
92{
93	struct pulse *p;
94	int err;
95
96	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
97
98	p = pulse_create();
99	if (!p)
100		return -ENOMEM;
101
102	err = i915_active_acquire(&p->active);
103	if (err)
104		goto out;
105
106	err = i915_active_acquire_preallocate_barrier(&p->active, engine);
107	if (err) {
108		i915_active_release(&p->active);
109		goto out;
110	}
111
112	i915_active_acquire_barrier(&p->active);
113	i915_active_release(&p->active);
114
115	GEM_BUG_ON(i915_active_is_idle(&p->active));
116	GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
117
118	err = fn(engine);
119	if (err)
120		goto out;
121
122	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
123
124	if (engine_sync_barrier(engine)) {
125		struct drm_printer m = drm_err_printer(&engine->i915->drm, "pulse");
126
127		drm_printf(&m, "%s: no heartbeat pulse?\n", engine->name);
128		intel_engine_dump(engine, &m, "%s", engine->name);
129
130		err = -ETIME;
131		goto out;
132	}
133
134	GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial);
135
136	pulse_unlock_wait(p); /* synchronize with the retirement callback */
137
138	if (!i915_active_is_idle(&p->active)) {
139		struct drm_printer m = drm_err_printer(&engine->i915->drm, "pulse");
140
141		drm_printf(&m, "%s: heartbeat pulse did not flush idle tasks\n",
142			   engine->name);
143		i915_active_print(&p->active, &m);
144
145		err = -EINVAL;
146		goto out;
147	}
148
149out:
150	pulse_put(p);
151	return err;
152}
153
154static int live_idle_flush(void *arg)
155{
156	struct intel_gt *gt = arg;
157	struct intel_engine_cs *engine;
158	enum intel_engine_id id;
159	int err = 0;
160
161	/* Check that we can flush the idle barriers */
162
163	for_each_engine(engine, gt, id) {
164		st_engine_heartbeat_disable(engine);
165		err = __live_idle_pulse(engine, intel_engine_flush_barriers);
166		st_engine_heartbeat_enable(engine);
167		if (err)
168			break;
169	}
170
171	return err;
172}
173
174static int live_idle_pulse(void *arg)
175{
176	struct intel_gt *gt = arg;
177	struct intel_engine_cs *engine;
178	enum intel_engine_id id;
179	int err = 0;
180
181	/* Check that heartbeat pulses flush the idle barriers */
182
183	for_each_engine(engine, gt, id) {
184		st_engine_heartbeat_disable(engine);
185		err = __live_idle_pulse(engine, intel_engine_pulse);
186		st_engine_heartbeat_enable(engine);
187		if (err && err != -ENODEV)
188			break;
189
190		err = 0;
191	}
192
193	return err;
194}
195
196static int cmp_u32(const void *_a, const void *_b)
197{
198	const u32 *a = _a, *b = _b;
199
200	return *a - *b;
201}
202
203static int __live_heartbeat_fast(struct intel_engine_cs *engine)
204{
205	const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6));
206	struct intel_context *ce;
207	struct i915_request *rq;
208	ktime_t t0, t1;
209	u32 times[5];
210	int err;
211	int i;
212
213	ce = intel_context_create(engine);
214	if (IS_ERR(ce))
215		return PTR_ERR(ce);
216
217	intel_engine_pm_get(engine);
218
219	err = intel_engine_set_heartbeat(engine, 1);
220	if (err)
221		goto err_pm;
222
223	for (i = 0; i < ARRAY_SIZE(times); i++) {
224		do {
225			/* Manufacture a tick */
226			intel_engine_park_heartbeat(engine);
227			GEM_BUG_ON(engine->heartbeat.systole);
228			engine->serial++; /*  pretend we are not idle! */
229			intel_engine_unpark_heartbeat(engine);
230
231			flush_delayed_work(&engine->heartbeat.work);
232			if (!delayed_work_pending(&engine->heartbeat.work)) {
233				pr_err("%s: heartbeat %d did not start\n",
234				       engine->name, i);
235				err = -EINVAL;
236				goto err_pm;
237			}
238
239			rcu_read_lock();
240			rq = READ_ONCE(engine->heartbeat.systole);
241			if (rq)
242				rq = i915_request_get_rcu(rq);
243			rcu_read_unlock();
244		} while (!rq);
245
246		t0 = ktime_get();
247		while (rq == READ_ONCE(engine->heartbeat.systole))
248			yield(); /* work is on the local cpu! */
249		t1 = ktime_get();
250
251		i915_request_put(rq);
252		times[i] = ktime_us_delta(t1, t0);
253	}
254
255	sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL);
256
257	pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
258		engine->name,
259		times[ARRAY_SIZE(times) / 2],
260		times[0],
261		times[ARRAY_SIZE(times) - 1]);
262
263	/*
264	 * Ideally, the upper bound on min work delay would be something like
265	 * 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we
266	 * are, even with system_wq_highpri, at the mercy of the CPU scheduler
267	 * and may be stuck behind some slow work for many millisecond. Such
268	 * as our very own display workers.
269	 */
270	if (times[ARRAY_SIZE(times) / 2] > error_threshold) {
271		pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
272		       engine->name,
273		       times[ARRAY_SIZE(times) / 2],
274		       error_threshold);
275		err = -EINVAL;
276	}
277
278	reset_heartbeat(engine);
279err_pm:
280	intel_engine_pm_put(engine);
281	intel_context_put(ce);
282	return err;
283}
284
285static int live_heartbeat_fast(void *arg)
286{
287	struct intel_gt *gt = arg;
288	struct intel_engine_cs *engine;
289	enum intel_engine_id id;
290	int err = 0;
291
292	/* Check that the heartbeat ticks at the desired rate. */
293	if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
294		return 0;
295
296	for_each_engine(engine, gt, id) {
297		err = __live_heartbeat_fast(engine);
298		if (err)
299			break;
300	}
301
302	return err;
303}
304
305static int __live_heartbeat_off(struct intel_engine_cs *engine)
306{
307	int err;
308
309	intel_engine_pm_get(engine);
310
311	engine->serial++;
312	flush_delayed_work(&engine->heartbeat.work);
313	if (!delayed_work_pending(&engine->heartbeat.work)) {
314		pr_err("%s: heartbeat not running\n",
315		       engine->name);
316		err = -EINVAL;
317		goto err_pm;
318	}
319
320	err = intel_engine_set_heartbeat(engine, 0);
321	if (err)
322		goto err_pm;
323
324	engine->serial++;
325	flush_delayed_work(&engine->heartbeat.work);
326	if (delayed_work_pending(&engine->heartbeat.work)) {
327		pr_err("%s: heartbeat still running\n",
328		       engine->name);
329		err = -EINVAL;
330		goto err_beat;
331	}
332
333	if (READ_ONCE(engine->heartbeat.systole)) {
334		pr_err("%s: heartbeat still allocated\n",
335		       engine->name);
336		err = -EINVAL;
337		goto err_beat;
338	}
339
340err_beat:
341	reset_heartbeat(engine);
342err_pm:
343	intel_engine_pm_put(engine);
344	return err;
345}
346
347static int live_heartbeat_off(void *arg)
348{
349	struct intel_gt *gt = arg;
350	struct intel_engine_cs *engine;
351	enum intel_engine_id id;
352	int err = 0;
353
354	/* Check that we can turn off heartbeat and not interrupt VIP */
355	if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
356		return 0;
357
358	for_each_engine(engine, gt, id) {
359		if (!intel_engine_has_preemption(engine))
360			continue;
361
362		err = __live_heartbeat_off(engine);
363		if (err)
364			break;
365	}
366
367	return err;
368}
369
370int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
371{
372	static const struct i915_subtest tests[] = {
373		SUBTEST(live_idle_flush),
374		SUBTEST(live_idle_pulse),
375		SUBTEST(live_heartbeat_fast),
376		SUBTEST(live_heartbeat_off),
377	};
378	int saved_hangcheck;
379	int err;
380
381	if (intel_gt_is_wedged(to_gt(i915)))
382		return 0;
383
384	saved_hangcheck = i915->params.enable_hangcheck;
385	i915->params.enable_hangcheck = INT_MAX;
386
387	err = intel_gt_live_subtests(tests, to_gt(i915));
388
389	i915->params.enable_hangcheck = saved_hangcheck;
390	return err;
391}
392
393void st_engine_heartbeat_disable(struct intel_engine_cs *engine)
394{
395	engine->props.heartbeat_interval_ms = 0;
396
397	intel_engine_pm_get(engine);
398	intel_engine_park_heartbeat(engine);
399}
400
401void st_engine_heartbeat_enable(struct intel_engine_cs *engine)
402{
403	intel_engine_pm_put(engine);
404
405	engine->props.heartbeat_interval_ms =
406		engine->defaults.heartbeat_interval_ms;
407}
408
409void st_engine_heartbeat_disable_no_pm(struct intel_engine_cs *engine)
410{
411	engine->props.heartbeat_interval_ms = 0;
412
413	/*
414	 * Park the heartbeat but without holding the PM lock as that
415	 * makes the engines appear not-idle. Note that if/when unpark
416	 * is called due to the PM lock being acquired later the
417	 * heartbeat still won't be enabled because of the above = 0.
418	 */
419	if (intel_engine_pm_get_if_awake(engine)) {
420		intel_engine_park_heartbeat(engine);
421		intel_engine_pm_put(engine);
422	}
423}
424
425void st_engine_heartbeat_enable_no_pm(struct intel_engine_cs *engine)
426{
427	engine->props.heartbeat_interval_ms =
428		engine->defaults.heartbeat_interval_ms;
429}
430