1/*
2 * Copyright 2021 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22#include "runl.h"
23#include "cgrp.h"
24#include "chan.h"
25#include "chid.h"
26#include "priv.h"
27#include "runq.h"
28
29#include <core/gpuobj.h>
30#include <subdev/timer.h>
31#include <subdev/top.h>
32
33static struct nvkm_cgrp *
34nvkm_engn_cgrp_get(struct nvkm_engn *engn, unsigned long *pirqflags)
35{
36	struct nvkm_cgrp *cgrp = NULL;
37	struct nvkm_chan *chan;
38	bool cgid;
39	int id;
40
41	id = engn->func->cxid(engn, &cgid);
42	if (id < 0)
43		return NULL;
44
45	if (!cgid) {
46		chan = nvkm_runl_chan_get_chid(engn->runl, id, pirqflags);
47		if (chan)
48			cgrp = chan->cgrp;
49	} else {
50		cgrp = nvkm_runl_cgrp_get_cgid(engn->runl, id, pirqflags);
51	}
52
53	WARN_ON(!cgrp);
54	return cgrp;
55}
56
57static void
58nvkm_runl_rc(struct nvkm_runl *runl)
59{
60	struct nvkm_fifo *fifo = runl->fifo;
61	struct nvkm_cgrp *cgrp, *gtmp;
62	struct nvkm_chan *chan, *ctmp;
63	struct nvkm_engn *engn;
64	unsigned long flags;
65	int rc, state, i;
66	bool reset;
67
68	/* Runlist is blocked before scheduling recovery - fetch count. */
69	BUG_ON(!mutex_is_locked(&runl->mutex));
70	rc = atomic_xchg(&runl->rc_pending, 0);
71	if (!rc)
72		return;
73
74	/* Look for channel groups flagged for RC. */
75	nvkm_runl_foreach_cgrp_safe(cgrp, gtmp, runl) {
76		state = atomic_cmpxchg(&cgrp->rc, NVKM_CGRP_RC_PENDING, NVKM_CGRP_RC_RUNNING);
77		if (state == NVKM_CGRP_RC_PENDING) {
78			/* Disable all channels in them, and remove from runlist. */
79			nvkm_cgrp_foreach_chan_safe(chan, ctmp, cgrp) {
80				nvkm_chan_error(chan, false);
81				nvkm_chan_remove_locked(chan);
82			}
83		}
84	}
85
86	/* On GPUs with runlist preempt, wait for PBDMA(s) servicing runlist to go idle. */
87	if (runl->func->preempt) {
88		for (i = 0; i < runl->runq_nr; i++) {
89			struct nvkm_runq *runq = runl->runq[i];
90
91			if (runq) {
92				nvkm_msec(fifo->engine.subdev.device, 2000,
93					if (runq->func->idle(runq))
94						break;
95				);
96			}
97		}
98	}
99
100	/* Look for engines that are still on flagged channel groups - reset them. */
101	nvkm_runl_foreach_engn_cond(engn, runl, engn->func->cxid) {
102		cgrp = nvkm_engn_cgrp_get(engn, &flags);
103		if (!cgrp) {
104			ENGN_DEBUG(engn, "cxid not valid");
105			continue;
106		}
107
108		reset = atomic_read(&cgrp->rc) == NVKM_CGRP_RC_RUNNING;
109		nvkm_cgrp_put(&cgrp, flags);
110		if (!reset) {
111			ENGN_DEBUG(engn, "cxid not in recovery");
112			continue;
113		}
114
115		ENGN_DEBUG(engn, "resetting...");
116		/*TODO: can we do something less of a potential catastrophe on failure? */
117		WARN_ON(nvkm_engine_reset(engn->engine));
118	}
119
120	/* Submit runlist update, and clear any remaining exception state. */
121	runl->func->update(runl);
122	if (runl->func->fault_clear)
123		runl->func->fault_clear(runl);
124
125	/* Unblock runlist processing. */
126	while (rc--)
127		nvkm_runl_allow(runl);
128	runl->func->wait(runl);
129}
130
131static void
132nvkm_runl_rc_runl(struct nvkm_runl *runl)
133{
134	RUNL_ERROR(runl, "rc scheduled");
135
136	nvkm_runl_block(runl);
137	if (runl->func->preempt)
138		runl->func->preempt(runl);
139
140	atomic_inc(&runl->rc_pending);
141	schedule_work(&runl->work);
142}
143
144void
145nvkm_runl_rc_cgrp(struct nvkm_cgrp *cgrp)
146{
147	if (atomic_cmpxchg(&cgrp->rc, NVKM_CGRP_RC_NONE, NVKM_CGRP_RC_PENDING) != NVKM_CGRP_RC_NONE)
148		return;
149
150	CGRP_ERROR(cgrp, "rc scheduled");
151	nvkm_runl_rc_runl(cgrp->runl);
152}
153
154void
155nvkm_runl_rc_engn(struct nvkm_runl *runl, struct nvkm_engn *engn)
156{
157	struct nvkm_cgrp *cgrp;
158	unsigned long flags;
159
160	/* Lookup channel group currently on engine. */
161	cgrp = nvkm_engn_cgrp_get(engn, &flags);
162	if (!cgrp) {
163		ENGN_DEBUG(engn, "rc skipped, not on channel");
164		return;
165	}
166
167	nvkm_runl_rc_cgrp(cgrp);
168	nvkm_cgrp_put(&cgrp, flags);
169}
170
171static void
172nvkm_runl_work(struct work_struct *work)
173{
174	struct nvkm_runl *runl = container_of(work, typeof(*runl), work);
175
176	mutex_lock(&runl->mutex);
177	nvkm_runl_rc(runl);
178	mutex_unlock(&runl->mutex);
179
180}
181
182struct nvkm_chan *
183nvkm_runl_chan_get_inst(struct nvkm_runl *runl, u64 inst, unsigned long *pirqflags)
184{
185	struct nvkm_chid *chid = runl->chid;
186	struct nvkm_chan *chan;
187	unsigned long flags;
188	int id;
189
190	spin_lock_irqsave(&chid->lock, flags);
191	for_each_set_bit(id, chid->used, chid->nr) {
192		chan = chid->data[id];
193		if (likely(chan)) {
194			if (chan->inst->addr == inst) {
195				spin_lock(&chan->cgrp->lock);
196				*pirqflags = flags;
197				spin_unlock(&chid->lock);
198				return chan;
199			}
200		}
201	}
202	spin_unlock_irqrestore(&chid->lock, flags);
203	return NULL;
204}
205
206struct nvkm_chan *
207nvkm_runl_chan_get_chid(struct nvkm_runl *runl, int id, unsigned long *pirqflags)
208{
209	struct nvkm_chid *chid = runl->chid;
210	struct nvkm_chan *chan;
211	unsigned long flags;
212
213	spin_lock_irqsave(&chid->lock, flags);
214	if (!WARN_ON(id >= chid->nr)) {
215		chan = chid->data[id];
216		if (likely(chan)) {
217			spin_lock(&chan->cgrp->lock);
218			*pirqflags = flags;
219			spin_unlock(&chid->lock);
220			return chan;
221		}
222	}
223	spin_unlock_irqrestore(&chid->lock, flags);
224	return NULL;
225}
226
227struct nvkm_cgrp *
228nvkm_runl_cgrp_get_cgid(struct nvkm_runl *runl, int id, unsigned long *pirqflags)
229{
230	struct nvkm_chid *cgid = runl->cgid;
231	struct nvkm_cgrp *cgrp;
232	unsigned long flags;
233
234	spin_lock_irqsave(&cgid->lock, flags);
235	if (!WARN_ON(id >= cgid->nr)) {
236		cgrp = cgid->data[id];
237		if (likely(cgrp)) {
238			spin_lock(&cgrp->lock);
239			*pirqflags = flags;
240			spin_unlock(&cgid->lock);
241			return cgrp;
242		}
243	}
244	spin_unlock_irqrestore(&cgid->lock, flags);
245	return NULL;
246}
247
248int
249nvkm_runl_preempt_wait(struct nvkm_runl *runl)
250{
251	return nvkm_msec(runl->fifo->engine.subdev.device, runl->fifo->timeout.chan_msec,
252		if (!runl->func->preempt_pending(runl))
253			break;
254
255		nvkm_runl_rc(runl);
256		usleep_range(1, 2);
257	) < 0 ? -ETIMEDOUT : 0;
258}
259
260bool
261nvkm_runl_update_pending(struct nvkm_runl *runl)
262{
263	if (!runl->func->pending(runl))
264		return false;
265
266	nvkm_runl_rc(runl);
267	return true;
268}
269
270void
271nvkm_runl_update_locked(struct nvkm_runl *runl, bool wait)
272{
273	if (atomic_xchg(&runl->changed, 0) && runl->func->update) {
274		runl->func->update(runl);
275		if (wait)
276			runl->func->wait(runl);
277	}
278}
279
280void
281nvkm_runl_allow(struct nvkm_runl *runl)
282{
283	struct nvkm_fifo *fifo = runl->fifo;
284	unsigned long flags;
285
286	spin_lock_irqsave(&fifo->lock, flags);
287	if (!--runl->blocked) {
288		RUNL_TRACE(runl, "running");
289		runl->func->allow(runl, ~0);
290	}
291	spin_unlock_irqrestore(&fifo->lock, flags);
292}
293
294void
295nvkm_runl_block(struct nvkm_runl *runl)
296{
297	struct nvkm_fifo *fifo = runl->fifo;
298	unsigned long flags;
299
300	spin_lock_irqsave(&fifo->lock, flags);
301	if (!runl->blocked++) {
302		RUNL_TRACE(runl, "stopped");
303		runl->func->block(runl, ~0);
304	}
305	spin_unlock_irqrestore(&fifo->lock, flags);
306}
307
308void
309nvkm_runl_fini(struct nvkm_runl *runl)
310{
311	if (runl->func->fini)
312		runl->func->fini(runl);
313
314	flush_work(&runl->work);
315}
316
317void
318nvkm_runl_del(struct nvkm_runl *runl)
319{
320	struct nvkm_engn *engn, *engt;
321
322	nvkm_memory_unref(&runl->mem);
323
324	list_for_each_entry_safe(engn, engt, &runl->engns, head) {
325		list_del(&engn->head);
326		kfree(engn);
327	}
328
329	nvkm_chid_unref(&runl->chid);
330	nvkm_chid_unref(&runl->cgid);
331
332	list_del(&runl->head);
333	mutex_destroy(&runl->mutex);
334	kfree(runl);
335}
336
337struct nvkm_engn *
338nvkm_runl_add(struct nvkm_runl *runl, int engi, const struct nvkm_engn_func *func,
339	      enum nvkm_subdev_type type, int inst)
340{
341	struct nvkm_fifo *fifo = runl->fifo;
342	struct nvkm_device *device = fifo->engine.subdev.device;
343	struct nvkm_engine *engine;
344	struct nvkm_engn *engn;
345
346	engine = nvkm_device_engine(device, type, inst);
347	if (!engine) {
348		RUNL_DEBUG(runl, "engn %d.%d[%s] not found", engi, inst, nvkm_subdev_type[type]);
349		return NULL;
350	}
351
352	if (!(engn = kzalloc(sizeof(*engn), GFP_KERNEL)))
353		return NULL;
354
355	engn->func = func;
356	engn->runl = runl;
357	engn->id = engi;
358	engn->engine = engine;
359	engn->fault = -1;
360	list_add_tail(&engn->head, &runl->engns);
361
362	/* Lookup MMU engine ID for fault handling. */
363	if (device->top)
364		engn->fault = nvkm_top_fault_id(device, engine->subdev.type, engine->subdev.inst);
365
366	if (engn->fault < 0 && fifo->func->mmu_fault) {
367		const struct nvkm_enum *map = fifo->func->mmu_fault->engine;
368
369		while (map->name) {
370			if (map->data2 == engine->subdev.type && map->inst == engine->subdev.inst) {
371				engn->fault = map->value;
372				break;
373			}
374			map++;
375		}
376	}
377
378	return engn;
379}
380
381struct nvkm_runl *
382nvkm_runl_get(struct nvkm_fifo *fifo, int runi, u32 addr)
383{
384	struct nvkm_runl *runl;
385
386	nvkm_runl_foreach(runl, fifo) {
387		if ((runi >= 0 && runl->id == runi) || (runi < 0 && runl->addr == addr))
388			return runl;
389	}
390
391	return NULL;
392}
393
394struct nvkm_runl *
395nvkm_runl_new(struct nvkm_fifo *fifo, int runi, u32 addr, int id_nr)
396{
397	struct nvkm_subdev *subdev = &fifo->engine.subdev;
398	struct nvkm_runl *runl;
399	int ret;
400
401	if (!(runl = kzalloc(sizeof(*runl), GFP_KERNEL)))
402		return ERR_PTR(-ENOMEM);
403
404	runl->func = fifo->func->runl;
405	runl->fifo = fifo;
406	runl->id = runi;
407	runl->addr = addr;
408	INIT_LIST_HEAD(&runl->engns);
409	INIT_LIST_HEAD(&runl->cgrps);
410	atomic_set(&runl->changed, 0);
411	mutex_init(&runl->mutex);
412	INIT_WORK(&runl->work, nvkm_runl_work);
413	atomic_set(&runl->rc_triggered, 0);
414	atomic_set(&runl->rc_pending, 0);
415	list_add_tail(&runl->head, &fifo->runls);
416
417	if (!fifo->chid) {
418		if ((ret = nvkm_chid_new(&nvkm_chan_event, subdev, id_nr, 0, id_nr, &runl->cgid)) ||
419		    (ret = nvkm_chid_new(&nvkm_chan_event, subdev, id_nr, 0, id_nr, &runl->chid))) {
420			RUNL_ERROR(runl, "cgid/chid: %d", ret);
421			nvkm_runl_del(runl);
422			return ERR_PTR(ret);
423		}
424	} else {
425		runl->cgid = nvkm_chid_ref(fifo->cgid);
426		runl->chid = nvkm_chid_ref(fifo->chid);
427	}
428
429	return runl;
430}
431