1// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
2/*
3 * Copyright(c) 2018 Intel Corporation.
4 *
5 */
6#include "hfi.h"
7#include "trace.h"
8#include "qp.h"
9#include "opfn.h"
10
11#define IB_BTHE_E                 BIT(IB_BTHE_E_SHIFT)
12
13#define OPFN_CODE(code) BIT((code) - 1)
14#define OPFN_MASK(code) OPFN_CODE(STL_VERBS_EXTD_##code)
15
16struct hfi1_opfn_type {
17	bool (*request)(struct rvt_qp *qp, u64 *data);
18	bool (*response)(struct rvt_qp *qp, u64 *data);
19	bool (*reply)(struct rvt_qp *qp, u64 data);
20	void (*error)(struct rvt_qp *qp);
21};
22
23static struct hfi1_opfn_type hfi1_opfn_handlers[STL_VERBS_EXTD_MAX] = {
24	[STL_VERBS_EXTD_TID_RDMA] = {
25		.request = tid_rdma_conn_req,
26		.response = tid_rdma_conn_resp,
27		.reply = tid_rdma_conn_reply,
28		.error = tid_rdma_conn_error,
29	},
30};
31
32static struct workqueue_struct *opfn_wq;
33
34static void opfn_schedule_conn_request(struct rvt_qp *qp);
35
36static bool hfi1_opfn_extended(u32 bth1)
37{
38	return !!(bth1 & IB_BTHE_E);
39}
40
41static void opfn_conn_request(struct rvt_qp *qp)
42{
43	struct hfi1_qp_priv *priv = qp->priv;
44	struct ib_atomic_wr wr;
45	u16 mask, capcode;
46	struct hfi1_opfn_type *extd;
47	u64 data;
48	unsigned long flags;
49	int ret = 0;
50
51	trace_hfi1_opfn_state_conn_request(qp);
52	spin_lock_irqsave(&priv->opfn.lock, flags);
53	/*
54	 * Exit if the extended bit is not set, or if nothing is requested, or
55	 * if we have completed all requests, or if a previous request is in
56	 * progress
57	 */
58	if (!priv->opfn.extended || !priv->opfn.requested ||
59	    priv->opfn.requested == priv->opfn.completed || priv->opfn.curr)
60		goto done;
61
62	mask = priv->opfn.requested & ~priv->opfn.completed;
63	capcode = ilog2(mask & ~(mask - 1)) + 1;
64	if (capcode >= STL_VERBS_EXTD_MAX) {
65		priv->opfn.completed |= OPFN_CODE(capcode);
66		goto done;
67	}
68
69	extd = &hfi1_opfn_handlers[capcode];
70	if (!extd || !extd->request || !extd->request(qp, &data)) {
71		/*
72		 * Either there is no handler for this capability or the request
73		 * packet could not be generated. Either way, mark it as done so
74		 * we don't keep attempting to complete it.
75		 */
76		priv->opfn.completed |= OPFN_CODE(capcode);
77		goto done;
78	}
79
80	trace_hfi1_opfn_data_conn_request(qp, capcode, data);
81	data = (data & ~0xf) | capcode;
82
83	memset(&wr, 0, sizeof(wr));
84	wr.wr.opcode = IB_WR_OPFN;
85	wr.remote_addr = HFI1_VERBS_E_ATOMIC_VADDR;
86	wr.compare_add = data;
87
88	priv->opfn.curr = capcode;	/* A new request is now in progress */
89	/* Drop opfn.lock before calling ib_post_send() */
90	spin_unlock_irqrestore(&priv->opfn.lock, flags);
91
92	ret = ib_post_send(&qp->ibqp, &wr.wr, NULL);
93	if (ret)
94		goto err;
95	trace_hfi1_opfn_state_conn_request(qp);
96	return;
97err:
98	trace_hfi1_msg_opfn_conn_request(qp, "ib_ost_send failed: ret = ",
99					 (u64)ret);
100	spin_lock_irqsave(&priv->opfn.lock, flags);
101	/*
102	 * In case of an unexpected error return from ib_post_send
103	 * clear opfn.curr and reschedule to try again
104	 */
105	priv->opfn.curr = STL_VERBS_EXTD_NONE;
106	opfn_schedule_conn_request(qp);
107done:
108	spin_unlock_irqrestore(&priv->opfn.lock, flags);
109}
110
111void opfn_send_conn_request(struct work_struct *work)
112{
113	struct hfi1_opfn_data *od;
114	struct hfi1_qp_priv *qpriv;
115
116	od = container_of(work, struct hfi1_opfn_data, opfn_work);
117	qpriv = container_of(od, struct hfi1_qp_priv, opfn);
118
119	opfn_conn_request(qpriv->owner);
120}
121
122/*
123 * When QP s_lock is held in the caller, the OPFN request must be scheduled
124 * to a different workqueue to avoid double locking QP s_lock in call to
125 * ib_post_send in opfn_conn_request
126 */
127static void opfn_schedule_conn_request(struct rvt_qp *qp)
128{
129	struct hfi1_qp_priv *priv = qp->priv;
130
131	trace_hfi1_opfn_state_sched_conn_request(qp);
132	queue_work(opfn_wq, &priv->opfn.opfn_work);
133}
134
135void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e,
136			struct ib_atomic_eth *ateth)
137{
138	struct hfi1_qp_priv *priv = qp->priv;
139	u64 data = be64_to_cpu(ateth->compare_data);
140	struct hfi1_opfn_type *extd;
141	u8 capcode;
142	unsigned long flags;
143
144	trace_hfi1_opfn_state_conn_response(qp);
145	capcode = data & 0xf;
146	trace_hfi1_opfn_data_conn_response(qp, capcode, data);
147	if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
148		return;
149
150	extd = &hfi1_opfn_handlers[capcode];
151
152	if (!extd || !extd->response) {
153		e->atomic_data = capcode;
154		return;
155	}
156
157	spin_lock_irqsave(&priv->opfn.lock, flags);
158	if (priv->opfn.completed & OPFN_CODE(capcode)) {
159		/*
160		 * We are receiving a request for a feature that has already
161		 * been negotiated. This may mean that the other side has reset
162		 */
163		priv->opfn.completed &= ~OPFN_CODE(capcode);
164		if (extd->error)
165			extd->error(qp);
166	}
167
168	if (extd->response(qp, &data))
169		priv->opfn.completed |= OPFN_CODE(capcode);
170	e->atomic_data = (data & ~0xf) | capcode;
171	trace_hfi1_opfn_state_conn_response(qp);
172	spin_unlock_irqrestore(&priv->opfn.lock, flags);
173}
174
175void opfn_conn_reply(struct rvt_qp *qp, u64 data)
176{
177	struct hfi1_qp_priv *priv = qp->priv;
178	struct hfi1_opfn_type *extd;
179	u8 capcode;
180	unsigned long flags;
181
182	trace_hfi1_opfn_state_conn_reply(qp);
183	capcode = data & 0xf;
184	trace_hfi1_opfn_data_conn_reply(qp, capcode, data);
185	if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
186		return;
187
188	spin_lock_irqsave(&priv->opfn.lock, flags);
189	/*
190	 * Either there is no previous request or the reply is not for the
191	 * current request
192	 */
193	if (!priv->opfn.curr || capcode != priv->opfn.curr)
194		goto done;
195
196	extd = &hfi1_opfn_handlers[capcode];
197
198	if (!extd || !extd->reply)
199		goto clear;
200
201	if (extd->reply(qp, data))
202		priv->opfn.completed |= OPFN_CODE(capcode);
203clear:
204	/*
205	 * Clear opfn.curr to indicate that the previous request is no longer in
206	 * progress
207	 */
208	priv->opfn.curr = STL_VERBS_EXTD_NONE;
209	trace_hfi1_opfn_state_conn_reply(qp);
210done:
211	spin_unlock_irqrestore(&priv->opfn.lock, flags);
212}
213
214void opfn_conn_error(struct rvt_qp *qp)
215{
216	struct hfi1_qp_priv *priv = qp->priv;
217	struct hfi1_opfn_type *extd = NULL;
218	unsigned long flags;
219	u16 capcode;
220
221	trace_hfi1_opfn_state_conn_error(qp);
222	trace_hfi1_msg_opfn_conn_error(qp, "error. qp state ", (u64)qp->state);
223	/*
224	 * The QP has gone into the Error state. We have to invalidate all
225	 * negotiated feature, including the one in progress (if any). The RC
226	 * QP handling will clean the WQE for the connection request.
227	 */
228	spin_lock_irqsave(&priv->opfn.lock, flags);
229	while (priv->opfn.completed) {
230		capcode = priv->opfn.completed & ~(priv->opfn.completed - 1);
231		extd = &hfi1_opfn_handlers[ilog2(capcode) + 1];
232		if (extd->error)
233			extd->error(qp);
234		priv->opfn.completed &= ~OPFN_CODE(capcode);
235	}
236	priv->opfn.extended = 0;
237	priv->opfn.requested = 0;
238	priv->opfn.curr = STL_VERBS_EXTD_NONE;
239	spin_unlock_irqrestore(&priv->opfn.lock, flags);
240}
241
242void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask)
243{
244	struct ib_qp *ibqp = &qp->ibqp;
245	struct hfi1_qp_priv *priv = qp->priv;
246	unsigned long flags;
247
248	if (attr_mask & IB_QP_RETRY_CNT)
249		priv->s_retry = attr->retry_cnt;
250
251	spin_lock_irqsave(&priv->opfn.lock, flags);
252	if (ibqp->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
253		struct tid_rdma_params *local = &priv->tid_rdma.local;
254
255		if (attr_mask & IB_QP_TIMEOUT)
256			priv->tid_retry_timeout_jiffies = qp->timeout_jiffies;
257		if (qp->pmtu == enum_to_mtu(OPA_MTU_4096) ||
258		    qp->pmtu == enum_to_mtu(OPA_MTU_8192)) {
259			tid_rdma_opfn_init(qp, local);
260			/*
261			 * We only want to set the OPFN requested bit when the
262			 * QP transitions to RTS.
263			 */
264			if (attr_mask & IB_QP_STATE &&
265			    attr->qp_state == IB_QPS_RTS) {
266				priv->opfn.requested |= OPFN_MASK(TID_RDMA);
267				/*
268				 * If the QP is transitioning to RTS and the
269				 * opfn.completed for TID RDMA has already been
270				 * set, the QP is being moved *back* into RTS.
271				 * We can now renegotiate the TID RDMA
272				 * parameters.
273				 */
274				if (priv->opfn.completed &
275				    OPFN_MASK(TID_RDMA)) {
276					priv->opfn.completed &=
277						~OPFN_MASK(TID_RDMA);
278					/*
279					 * Since the opfn.completed bit was
280					 * already set, it is safe to assume
281					 * that the opfn.extended is also set.
282					 */
283					opfn_schedule_conn_request(qp);
284				}
285			}
286		} else {
287			memset(local, 0, sizeof(*local));
288		}
289	}
290	spin_unlock_irqrestore(&priv->opfn.lock, flags);
291}
292
293void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1)
294{
295	struct hfi1_qp_priv *priv = qp->priv;
296
297	if (!priv->opfn.extended && hfi1_opfn_extended(bth1) &&
298	    HFI1_CAP_IS_KSET(OPFN)) {
299		priv->opfn.extended = 1;
300		if (qp->state == IB_QPS_RTS)
301			opfn_conn_request(qp);
302	}
303}
304
305int opfn_init(void)
306{
307	opfn_wq = alloc_workqueue("hfi_opfn",
308				  WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE |
309				  WQ_MEM_RECLAIM,
310				  HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES);
311	if (!opfn_wq)
312		return -ENOMEM;
313
314	return 0;
315}
316
317void opfn_exit(void)
318{
319	if (opfn_wq) {
320		destroy_workqueue(opfn_wq);
321		opfn_wq = NULL;
322	}
323}
324