1// SPDX-License-Identifier: GPL-2.0
2/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
3#include <linux/init.h>
4#include <linux/kernel.h>
5#include <linux/module.h>
6#include <linux/pci.h>
7#include <uapi/linux/idxd.h>
8#include "idxd.h"
9#include "registers.h"
10
11static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
12{
13	struct idxd_desc *desc;
14	struct idxd_device *idxd = wq->idxd;
15
16	desc = wq->descs[idx];
17	memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
18	memset(desc->completion, 0, idxd->data->compl_size);
19	desc->cpu = cpu;
20
21	if (device_pasid_enabled(idxd))
22		desc->hw->pasid = idxd->pasid;
23
24	return desc;
25}
26
27struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
28{
29	int cpu, idx;
30	struct idxd_device *idxd = wq->idxd;
31	DEFINE_SBQ_WAIT(wait);
32	struct sbq_wait_state *ws;
33	struct sbitmap_queue *sbq;
34
35	if (idxd->state != IDXD_DEV_ENABLED)
36		return ERR_PTR(-EIO);
37
38	sbq = &wq->sbq;
39	idx = sbitmap_queue_get(sbq, &cpu);
40	if (idx < 0) {
41		if (optype == IDXD_OP_NONBLOCK)
42			return ERR_PTR(-EAGAIN);
43	} else {
44		return __get_desc(wq, idx, cpu);
45	}
46
47	ws = &sbq->ws[0];
48	for (;;) {
49		sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE);
50		if (signal_pending_state(TASK_INTERRUPTIBLE, current))
51			break;
52		idx = sbitmap_queue_get(sbq, &cpu);
53		if (idx >= 0)
54			break;
55		schedule();
56	}
57
58	sbitmap_finish_wait(sbq, ws, &wait);
59	if (idx < 0)
60		return ERR_PTR(-EAGAIN);
61
62	return __get_desc(wq, idx, cpu);
63}
64EXPORT_SYMBOL_NS_GPL(idxd_alloc_desc, IDXD);
65
66void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc)
67{
68	int cpu = desc->cpu;
69
70	desc->cpu = -1;
71	sbitmap_queue_clear(&wq->sbq, desc->id, cpu);
72}
73EXPORT_SYMBOL_NS_GPL(idxd_free_desc, IDXD);
74
75static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
76					 struct idxd_desc *desc)
77{
78	struct idxd_desc *d, *n;
79
80	lockdep_assert_held(&ie->list_lock);
81	list_for_each_entry_safe(d, n, &ie->work_list, list) {
82		if (d == desc) {
83			list_del(&d->list);
84			return d;
85		}
86	}
87
88	/*
89	 * At this point, the desc needs to be aborted is held by the completion
90	 * handler where it has taken it off the pending list but has not added to the
91	 * work list. It will be cleaned up by the interrupt handler when it sees the
92	 * IDXD_COMP_DESC_ABORT for completion status.
93	 */
94	return NULL;
95}
96
97static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
98			     struct idxd_desc *desc)
99{
100	struct idxd_desc *d, *t, *found = NULL;
101	struct llist_node *head;
102	LIST_HEAD(flist);
103
104	desc->completion->status = IDXD_COMP_DESC_ABORT;
105	/*
106	 * Grab the list lock so it will block the irq thread handler. This allows the
107	 * abort code to locate the descriptor need to be aborted.
108	 */
109	spin_lock(&ie->list_lock);
110	head = llist_del_all(&ie->pending_llist);
111	if (head) {
112		llist_for_each_entry_safe(d, t, head, llnode) {
113			if (d == desc) {
114				found = desc;
115				continue;
116			}
117
118			if (d->completion->status)
119				list_add_tail(&d->list, &flist);
120			else
121				list_add_tail(&d->list, &ie->work_list);
122		}
123	}
124
125	if (!found)
126		found = list_abort_desc(wq, ie, desc);
127	spin_unlock(&ie->list_lock);
128
129	if (found)
130		idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false,
131				      NULL, NULL);
132
133	/*
134	 * completing the descriptor will return desc to allocator and
135	 * the desc can be acquired by a different process and the
136	 * desc->list can be modified.  Delete desc from list so the
137	 * list trasversing does not get corrupted by the other process.
138	 */
139	list_for_each_entry_safe(d, t, &flist, list) {
140		list_del_init(&d->list);
141		idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true,
142				      NULL, NULL);
143	}
144}
145
146/*
147 * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver
148 * has better control of number of descriptors being submitted to a shared wq by limiting
149 * the number of driver allocated descriptors to the wq size. However, when the swq is
150 * exported to a guest kernel, it may be shared with multiple guest kernels. This means
151 * the likelihood of getting busy returned on the swq when submitting goes significantly up.
152 * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving
153 * up. The sysfs knob can be tuned by the system administrator.
154 */
155int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc)
156{
157	unsigned int retries = wq->enqcmds_retries;
158	int rc;
159
160	do {
161		rc = enqcmds(portal, desc);
162		if (rc == 0)
163			break;
164		cpu_relax();
165	} while (retries--);
166
167	return rc;
168}
169
170int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
171{
172	struct idxd_device *idxd = wq->idxd;
173	struct idxd_irq_entry *ie = NULL;
174	u32 desc_flags = desc->hw->flags;
175	void __iomem *portal;
176	int rc;
177
178	if (idxd->state != IDXD_DEV_ENABLED)
179		return -EIO;
180
181	if (!percpu_ref_tryget_live(&wq->wq_active)) {
182		wait_for_completion(&wq->wq_resurrect);
183		if (!percpu_ref_tryget_live(&wq->wq_active))
184			return -ENXIO;
185	}
186
187	portal = idxd_wq_portal_addr(wq);
188
189	/*
190	 * Pending the descriptor to the lockless list for the irq_entry
191	 * that we designated the descriptor to.
192	 */
193	if (desc_flags & IDXD_OP_FLAG_RCI) {
194		ie = &wq->ie;
195		desc->hw->int_handle = ie->int_handle;
196		llist_add(&desc->llnode, &ie->pending_llist);
197	}
198
199	/*
200	 * The wmb() flushes writes to coherent DMA data before
201	 * possibly triggering a DMA read. The wmb() is necessary
202	 * even on UP because the recipient is a device.
203	 */
204	wmb();
205
206	if (wq_dedicated(wq)) {
207		iosubmit_cmds512(portal, desc->hw, 1);
208	} else {
209		rc = idxd_enqcmds(wq, portal, desc->hw);
210		if (rc < 0) {
211			percpu_ref_put(&wq->wq_active);
212			/* abort operation frees the descriptor */
213			if (ie)
214				llist_abort_desc(wq, ie, desc);
215			return rc;
216		}
217	}
218
219	percpu_ref_put(&wq->wq_active);
220	return 0;
221}
222EXPORT_SYMBOL_NS_GPL(idxd_submit_desc, IDXD);
223