1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/*
3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 */
6
7#include <linux/skbuff.h>
8
9#include "rxe.h"
10#include "rxe_loc.h"
11#include "rxe_queue.h"
12
13static char *resp_state_name[] = {
14	[RESPST_NONE]				= "NONE",
15	[RESPST_GET_REQ]			= "GET_REQ",
16	[RESPST_CHK_PSN]			= "CHK_PSN",
17	[RESPST_CHK_OP_SEQ]			= "CHK_OP_SEQ",
18	[RESPST_CHK_OP_VALID]			= "CHK_OP_VALID",
19	[RESPST_CHK_RESOURCE]			= "CHK_RESOURCE",
20	[RESPST_CHK_LENGTH]			= "CHK_LENGTH",
21	[RESPST_CHK_RKEY]			= "CHK_RKEY",
22	[RESPST_EXECUTE]			= "EXECUTE",
23	[RESPST_READ_REPLY]			= "READ_REPLY",
24	[RESPST_ATOMIC_REPLY]			= "ATOMIC_REPLY",
25	[RESPST_ATOMIC_WRITE_REPLY]		= "ATOMIC_WRITE_REPLY",
26	[RESPST_PROCESS_FLUSH]			= "PROCESS_FLUSH",
27	[RESPST_COMPLETE]			= "COMPLETE",
28	[RESPST_ACKNOWLEDGE]			= "ACKNOWLEDGE",
29	[RESPST_CLEANUP]			= "CLEANUP",
30	[RESPST_DUPLICATE_REQUEST]		= "DUPLICATE_REQUEST",
31	[RESPST_ERR_MALFORMED_WQE]		= "ERR_MALFORMED_WQE",
32	[RESPST_ERR_UNSUPPORTED_OPCODE]		= "ERR_UNSUPPORTED_OPCODE",
33	[RESPST_ERR_MISALIGNED_ATOMIC]		= "ERR_MISALIGNED_ATOMIC",
34	[RESPST_ERR_PSN_OUT_OF_SEQ]		= "ERR_PSN_OUT_OF_SEQ",
35	[RESPST_ERR_MISSING_OPCODE_FIRST]	= "ERR_MISSING_OPCODE_FIRST",
36	[RESPST_ERR_MISSING_OPCODE_LAST_C]	= "ERR_MISSING_OPCODE_LAST_C",
37	[RESPST_ERR_MISSING_OPCODE_LAST_D1E]	= "ERR_MISSING_OPCODE_LAST_D1E",
38	[RESPST_ERR_TOO_MANY_RDMA_ATM_REQ]	= "ERR_TOO_MANY_RDMA_ATM_REQ",
39	[RESPST_ERR_RNR]			= "ERR_RNR",
40	[RESPST_ERR_RKEY_VIOLATION]		= "ERR_RKEY_VIOLATION",
41	[RESPST_ERR_INVALIDATE_RKEY]		= "ERR_INVALIDATE_RKEY_VIOLATION",
42	[RESPST_ERR_LENGTH]			= "ERR_LENGTH",
43	[RESPST_ERR_CQ_OVERFLOW]		= "ERR_CQ_OVERFLOW",
44	[RESPST_ERROR]				= "ERROR",
45	[RESPST_DONE]				= "DONE",
46	[RESPST_EXIT]				= "EXIT",
47};
48
49/* rxe_recv calls here to add a request packet to the input queue */
50void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
51{
52	int must_sched;
53	struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
54
55	skb_queue_tail(&qp->req_pkts, skb);
56
57	must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) ||
58			(skb_queue_len(&qp->req_pkts) > 1);
59
60	if (must_sched)
61		rxe_sched_task(&qp->resp.task);
62	else
63		rxe_run_task(&qp->resp.task);
64}
65
66static inline enum resp_states get_req(struct rxe_qp *qp,
67				       struct rxe_pkt_info **pkt_p)
68{
69	struct sk_buff *skb;
70
71	skb = skb_peek(&qp->req_pkts);
72	if (!skb)
73		return RESPST_EXIT;
74
75	*pkt_p = SKB_TO_PKT(skb);
76
77	return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN;
78}
79
80static enum resp_states check_psn(struct rxe_qp *qp,
81				  struct rxe_pkt_info *pkt)
82{
83	int diff = psn_compare(pkt->psn, qp->resp.psn);
84	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
85
86	switch (qp_type(qp)) {
87	case IB_QPT_RC:
88		if (diff > 0) {
89			if (qp->resp.sent_psn_nak)
90				return RESPST_CLEANUP;
91
92			qp->resp.sent_psn_nak = 1;
93			rxe_counter_inc(rxe, RXE_CNT_OUT_OF_SEQ_REQ);
94			return RESPST_ERR_PSN_OUT_OF_SEQ;
95
96		} else if (diff < 0) {
97			rxe_counter_inc(rxe, RXE_CNT_DUP_REQ);
98			return RESPST_DUPLICATE_REQUEST;
99		}
100
101		if (qp->resp.sent_psn_nak)
102			qp->resp.sent_psn_nak = 0;
103
104		break;
105
106	case IB_QPT_UC:
107		if (qp->resp.drop_msg || diff != 0) {
108			if (pkt->mask & RXE_START_MASK) {
109				qp->resp.drop_msg = 0;
110				return RESPST_CHK_OP_SEQ;
111			}
112
113			qp->resp.drop_msg = 1;
114			return RESPST_CLEANUP;
115		}
116		break;
117	default:
118		break;
119	}
120
121	return RESPST_CHK_OP_SEQ;
122}
123
124static enum resp_states check_op_seq(struct rxe_qp *qp,
125				     struct rxe_pkt_info *pkt)
126{
127	switch (qp_type(qp)) {
128	case IB_QPT_RC:
129		switch (qp->resp.opcode) {
130		case IB_OPCODE_RC_SEND_FIRST:
131		case IB_OPCODE_RC_SEND_MIDDLE:
132			switch (pkt->opcode) {
133			case IB_OPCODE_RC_SEND_MIDDLE:
134			case IB_OPCODE_RC_SEND_LAST:
135			case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE:
136			case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE:
137				return RESPST_CHK_OP_VALID;
138			default:
139				return RESPST_ERR_MISSING_OPCODE_LAST_C;
140			}
141
142		case IB_OPCODE_RC_RDMA_WRITE_FIRST:
143		case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
144			switch (pkt->opcode) {
145			case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
146			case IB_OPCODE_RC_RDMA_WRITE_LAST:
147			case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
148				return RESPST_CHK_OP_VALID;
149			default:
150				return RESPST_ERR_MISSING_OPCODE_LAST_C;
151			}
152
153		default:
154			switch (pkt->opcode) {
155			case IB_OPCODE_RC_SEND_MIDDLE:
156			case IB_OPCODE_RC_SEND_LAST:
157			case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE:
158			case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE:
159			case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
160			case IB_OPCODE_RC_RDMA_WRITE_LAST:
161			case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
162				return RESPST_ERR_MISSING_OPCODE_FIRST;
163			default:
164				return RESPST_CHK_OP_VALID;
165			}
166		}
167		break;
168
169	case IB_QPT_UC:
170		switch (qp->resp.opcode) {
171		case IB_OPCODE_UC_SEND_FIRST:
172		case IB_OPCODE_UC_SEND_MIDDLE:
173			switch (pkt->opcode) {
174			case IB_OPCODE_UC_SEND_MIDDLE:
175			case IB_OPCODE_UC_SEND_LAST:
176			case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE:
177				return RESPST_CHK_OP_VALID;
178			default:
179				return RESPST_ERR_MISSING_OPCODE_LAST_D1E;
180			}
181
182		case IB_OPCODE_UC_RDMA_WRITE_FIRST:
183		case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
184			switch (pkt->opcode) {
185			case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
186			case IB_OPCODE_UC_RDMA_WRITE_LAST:
187			case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
188				return RESPST_CHK_OP_VALID;
189			default:
190				return RESPST_ERR_MISSING_OPCODE_LAST_D1E;
191			}
192
193		default:
194			switch (pkt->opcode) {
195			case IB_OPCODE_UC_SEND_MIDDLE:
196			case IB_OPCODE_UC_SEND_LAST:
197			case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE:
198			case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
199			case IB_OPCODE_UC_RDMA_WRITE_LAST:
200			case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
201				qp->resp.drop_msg = 1;
202				return RESPST_CLEANUP;
203			default:
204				return RESPST_CHK_OP_VALID;
205			}
206		}
207		break;
208
209	default:
210		return RESPST_CHK_OP_VALID;
211	}
212}
213
214static bool check_qp_attr_access(struct rxe_qp *qp,
215				 struct rxe_pkt_info *pkt)
216{
217	if (((pkt->mask & RXE_READ_MASK) &&
218	     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
219	    ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) &&
220	     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
221	    ((pkt->mask & RXE_ATOMIC_MASK) &&
222	     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
223		return false;
224
225	if (pkt->mask & RXE_FLUSH_MASK) {
226		u32 flush_type = feth_plt(pkt);
227
228		if ((flush_type & IB_FLUSH_GLOBAL &&
229		     !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) ||
230		    (flush_type & IB_FLUSH_PERSISTENT &&
231		     !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT)))
232			return false;
233	}
234
235	return true;
236}
237
238static enum resp_states check_op_valid(struct rxe_qp *qp,
239				       struct rxe_pkt_info *pkt)
240{
241	switch (qp_type(qp)) {
242	case IB_QPT_RC:
243		if (!check_qp_attr_access(qp, pkt))
244			return RESPST_ERR_UNSUPPORTED_OPCODE;
245
246		break;
247
248	case IB_QPT_UC:
249		if ((pkt->mask & RXE_WRITE_MASK) &&
250		    !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) {
251			qp->resp.drop_msg = 1;
252			return RESPST_CLEANUP;
253		}
254
255		break;
256
257	case IB_QPT_UD:
258	case IB_QPT_GSI:
259		break;
260
261	default:
262		WARN_ON_ONCE(1);
263		break;
264	}
265
266	return RESPST_CHK_RESOURCE;
267}
268
269static enum resp_states get_srq_wqe(struct rxe_qp *qp)
270{
271	struct rxe_srq *srq = qp->srq;
272	struct rxe_queue *q = srq->rq.queue;
273	struct rxe_recv_wqe *wqe;
274	struct ib_event ev;
275	unsigned int count;
276	size_t size;
277	unsigned long flags;
278
279	if (srq->error)
280		return RESPST_ERR_RNR;
281
282	spin_lock_irqsave(&srq->rq.consumer_lock, flags);
283
284	wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT);
285	if (!wqe) {
286		spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
287		return RESPST_ERR_RNR;
288	}
289
290	/* don't trust user space data */
291	if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) {
292		spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
293		rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n");
294		return RESPST_ERR_MALFORMED_WQE;
295	}
296	size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge);
297	memcpy(&qp->resp.srq_wqe, wqe, size);
298
299	qp->resp.wqe = &qp->resp.srq_wqe.wqe;
300	queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT);
301	count = queue_count(q, QUEUE_TYPE_FROM_CLIENT);
302
303	if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) {
304		srq->limit = 0;
305		goto event;
306	}
307
308	spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
309	return RESPST_CHK_LENGTH;
310
311event:
312	spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
313	ev.device = qp->ibqp.device;
314	ev.element.srq = qp->ibqp.srq;
315	ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
316	srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context);
317	return RESPST_CHK_LENGTH;
318}
319
320static enum resp_states check_resource(struct rxe_qp *qp,
321				       struct rxe_pkt_info *pkt)
322{
323	struct rxe_srq *srq = qp->srq;
324
325	if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) {
326		/* it is the requesters job to not send
327		 * too many read/atomic ops, we just
328		 * recycle the responder resource queue
329		 */
330		if (likely(qp->attr.max_dest_rd_atomic > 0))
331			return RESPST_CHK_LENGTH;
332		else
333			return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ;
334	}
335
336	if (pkt->mask & RXE_RWR_MASK) {
337		if (srq)
338			return get_srq_wqe(qp);
339
340		qp->resp.wqe = queue_head(qp->rq.queue,
341				QUEUE_TYPE_FROM_CLIENT);
342		return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR;
343	}
344
345	return RESPST_CHK_LENGTH;
346}
347
348static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
349					      struct rxe_pkt_info *pkt)
350{
351	/*
352	 * See IBA C9-92
353	 * For UD QPs we only check if the packet will fit in the
354	 * receive buffer later. For rmda operations additional
355	 * length checks are performed in check_rkey.
356	 */
357	if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) ||
358					     (qp_type(qp) == IB_QPT_UC))) {
359		unsigned int mtu = qp->mtu;
360		unsigned int payload = payload_size(pkt);
361
362		if ((pkt->mask & RXE_START_MASK) &&
363		    (pkt->mask & RXE_END_MASK)) {
364			if (unlikely(payload > mtu)) {
365				rxe_dbg_qp(qp, "only packet too long\n");
366				return RESPST_ERR_LENGTH;
367			}
368		} else if ((pkt->mask & RXE_START_MASK) ||
369			   (pkt->mask & RXE_MIDDLE_MASK)) {
370			if (unlikely(payload != mtu)) {
371				rxe_dbg_qp(qp, "first or middle packet not mtu\n");
372				return RESPST_ERR_LENGTH;
373			}
374		} else if (pkt->mask & RXE_END_MASK) {
375			if (unlikely((payload == 0) || (payload > mtu))) {
376				rxe_dbg_qp(qp, "last packet zero or too long\n");
377				return RESPST_ERR_LENGTH;
378			}
379		}
380	}
381
382	/* See IBA C9-94 */
383	if (pkt->mask & RXE_RETH_MASK) {
384		if (reth_len(pkt) > (1U << 31)) {
385			rxe_dbg_qp(qp, "dma length too long\n");
386			return RESPST_ERR_LENGTH;
387		}
388	}
389
390	if (pkt->mask & RXE_RDMA_OP_MASK)
391		return RESPST_CHK_RKEY;
392	else
393		return RESPST_EXECUTE;
394}
395
396/* if the reth length field is zero we can assume nothing
397 * about the rkey value and should not validate or use it.
398 * Instead set qp->resp.rkey to 0 which is an invalid rkey
399 * value since the minimum index part is 1.
400 */
401static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
402{
403	unsigned int length = reth_len(pkt);
404
405	qp->resp.va = reth_va(pkt);
406	qp->resp.offset = 0;
407	qp->resp.resid = length;
408	qp->resp.length = length;
409	if (pkt->mask & RXE_READ_OR_WRITE_MASK && length == 0)
410		qp->resp.rkey = 0;
411	else
412		qp->resp.rkey = reth_rkey(pkt);
413}
414
415static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
416{
417	qp->resp.va = atmeth_va(pkt);
418	qp->resp.offset = 0;
419	qp->resp.rkey = atmeth_rkey(pkt);
420	qp->resp.resid = sizeof(u64);
421}
422
423/* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL
424 * if an invalid rkey is received or the rdma length is zero. For middle
425 * or last packets use the stored value of mr.
426 */
427static enum resp_states check_rkey(struct rxe_qp *qp,
428				   struct rxe_pkt_info *pkt)
429{
430	struct rxe_mr *mr = NULL;
431	struct rxe_mw *mw = NULL;
432	u64 va;
433	u32 rkey;
434	u32 resid;
435	u32 pktlen;
436	int mtu = qp->mtu;
437	enum resp_states state;
438	int access = 0;
439
440	/* parse RETH or ATMETH header for first/only packets
441	 * for va, length, rkey, etc. or use current value for
442	 * middle/last packets.
443	 */
444	if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
445		if (pkt->mask & RXE_RETH_MASK)
446			qp_resp_from_reth(qp, pkt);
447
448		access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ
449						     : IB_ACCESS_REMOTE_WRITE;
450	} else if (pkt->mask & RXE_FLUSH_MASK) {
451		u32 flush_type = feth_plt(pkt);
452
453		if (pkt->mask & RXE_RETH_MASK)
454			qp_resp_from_reth(qp, pkt);
455
456		if (flush_type & IB_FLUSH_GLOBAL)
457			access |= IB_ACCESS_FLUSH_GLOBAL;
458		if (flush_type & IB_FLUSH_PERSISTENT)
459			access |= IB_ACCESS_FLUSH_PERSISTENT;
460	} else if (pkt->mask & RXE_ATOMIC_MASK) {
461		qp_resp_from_atmeth(qp, pkt);
462		access = IB_ACCESS_REMOTE_ATOMIC;
463	} else {
464		/* shouldn't happen */
465		WARN_ON(1);
466	}
467
468	/* A zero-byte read or write op is not required to
469	 * set an addr or rkey. See C9-88
470	 */
471	if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
472	    (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) {
473		qp->resp.mr = NULL;
474		return RESPST_EXECUTE;
475	}
476
477	va	= qp->resp.va;
478	rkey	= qp->resp.rkey;
479	resid	= qp->resp.resid;
480	pktlen	= payload_size(pkt);
481
482	if (rkey_is_mw(rkey)) {
483		mw = rxe_lookup_mw(qp, access, rkey);
484		if (!mw) {
485			rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey);
486			state = RESPST_ERR_RKEY_VIOLATION;
487			goto err;
488		}
489
490		mr = mw->mr;
491		if (!mr) {
492			rxe_dbg_qp(qp, "MW doesn't have an MR\n");
493			state = RESPST_ERR_RKEY_VIOLATION;
494			goto err;
495		}
496
497		if (mw->access & IB_ZERO_BASED)
498			qp->resp.offset = mw->addr;
499
500		rxe_get(mr);
501		rxe_put(mw);
502		mw = NULL;
503	} else {
504		mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
505		if (!mr) {
506			rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey);
507			state = RESPST_ERR_RKEY_VIOLATION;
508			goto err;
509		}
510	}
511
512	if (pkt->mask & RXE_FLUSH_MASK) {
513		/* FLUSH MR may not set va or resid
514		 * no need to check range since we will flush whole mr
515		 */
516		if (feth_sel(pkt) == IB_FLUSH_MR)
517			goto skip_check_range;
518	}
519
520	if (mr_check_range(mr, va + qp->resp.offset, resid)) {
521		state = RESPST_ERR_RKEY_VIOLATION;
522		goto err;
523	}
524
525skip_check_range:
526	if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
527		if (resid > mtu) {
528			if (pktlen != mtu || bth_pad(pkt)) {
529				state = RESPST_ERR_LENGTH;
530				goto err;
531			}
532		} else {
533			if (pktlen != resid) {
534				state = RESPST_ERR_LENGTH;
535				goto err;
536			}
537			if ((bth_pad(pkt) != (0x3 & (-resid)))) {
538				/* This case may not be exactly that
539				 * but nothing else fits.
540				 */
541				state = RESPST_ERR_LENGTH;
542				goto err;
543			}
544		}
545	}
546
547	WARN_ON_ONCE(qp->resp.mr);
548
549	qp->resp.mr = mr;
550	return RESPST_EXECUTE;
551
552err:
553	qp->resp.mr = NULL;
554	if (mr)
555		rxe_put(mr);
556	if (mw)
557		rxe_put(mw);
558
559	return state;
560}
561
562static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,
563				     int data_len)
564{
565	int err;
566
567	err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma,
568			data_addr, data_len, RXE_TO_MR_OBJ);
569	if (unlikely(err))
570		return (err == -ENOSPC) ? RESPST_ERR_LENGTH
571					: RESPST_ERR_MALFORMED_WQE;
572
573	return RESPST_NONE;
574}
575
576static enum resp_states write_data_in(struct rxe_qp *qp,
577				      struct rxe_pkt_info *pkt)
578{
579	enum resp_states rc = RESPST_NONE;
580	int	err;
581	int data_len = payload_size(pkt);
582
583	err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset,
584			  payload_addr(pkt), data_len, RXE_TO_MR_OBJ);
585	if (err) {
586		rc = RESPST_ERR_RKEY_VIOLATION;
587		goto out;
588	}
589
590	qp->resp.va += data_len;
591	qp->resp.resid -= data_len;
592
593out:
594	return rc;
595}
596
597static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
598					struct rxe_pkt_info *pkt,
599					int type)
600{
601	struct resp_res *res;
602	u32 pkts;
603
604	res = &qp->resp.resources[qp->resp.res_head];
605	rxe_advance_resp_resource(qp);
606	free_rd_atomic_resource(res);
607
608	res->type = type;
609	res->replay = 0;
610
611	switch (type) {
612	case RXE_READ_MASK:
613		res->read.va = qp->resp.va + qp->resp.offset;
614		res->read.va_org = qp->resp.va + qp->resp.offset;
615		res->read.resid = qp->resp.resid;
616		res->read.length = qp->resp.resid;
617		res->read.rkey = qp->resp.rkey;
618
619		pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1);
620		res->first_psn = pkt->psn;
621		res->cur_psn = pkt->psn;
622		res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK;
623
624		res->state = rdatm_res_state_new;
625		break;
626	case RXE_ATOMIC_MASK:
627	case RXE_ATOMIC_WRITE_MASK:
628		res->first_psn = pkt->psn;
629		res->last_psn = pkt->psn;
630		res->cur_psn = pkt->psn;
631		break;
632	case RXE_FLUSH_MASK:
633		res->flush.va = qp->resp.va + qp->resp.offset;
634		res->flush.length = qp->resp.length;
635		res->flush.type = feth_plt(pkt);
636		res->flush.level = feth_sel(pkt);
637	}
638
639	return res;
640}
641
642static enum resp_states process_flush(struct rxe_qp *qp,
643				       struct rxe_pkt_info *pkt)
644{
645	u64 length, start;
646	struct rxe_mr *mr = qp->resp.mr;
647	struct resp_res *res = qp->resp.res;
648
649	/* oA19-14, oA19-15 */
650	if (res && res->replay)
651		return RESPST_ACKNOWLEDGE;
652	else if (!res) {
653		res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK);
654		qp->resp.res = res;
655	}
656
657	if (res->flush.level == IB_FLUSH_RANGE) {
658		start = res->flush.va;
659		length = res->flush.length;
660	} else { /* level == IB_FLUSH_MR */
661		start = mr->ibmr.iova;
662		length = mr->ibmr.length;
663	}
664
665	if (res->flush.type & IB_FLUSH_PERSISTENT) {
666		if (rxe_flush_pmem_iova(mr, start, length))
667			return RESPST_ERR_RKEY_VIOLATION;
668		/* Make data persistent. */
669		wmb();
670	} else if (res->flush.type & IB_FLUSH_GLOBAL) {
671		/* Make data global visibility. */
672		wmb();
673	}
674
675	qp->resp.msn++;
676
677	/* next expected psn, read handles this separately */
678	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
679	qp->resp.ack_psn = qp->resp.psn;
680
681	qp->resp.opcode = pkt->opcode;
682	qp->resp.status = IB_WC_SUCCESS;
683
684	return RESPST_ACKNOWLEDGE;
685}
686
687static enum resp_states atomic_reply(struct rxe_qp *qp,
688				     struct rxe_pkt_info *pkt)
689{
690	struct rxe_mr *mr = qp->resp.mr;
691	struct resp_res *res = qp->resp.res;
692	int err;
693
694	if (!res) {
695		res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK);
696		qp->resp.res = res;
697	}
698
699	if (!res->replay) {
700		u64 iova = qp->resp.va + qp->resp.offset;
701
702		err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
703					  atmeth_comp(pkt),
704					  atmeth_swap_add(pkt),
705					  &res->atomic.orig_val);
706		if (err)
707			return err;
708
709		qp->resp.msn++;
710
711		/* next expected psn, read handles this separately */
712		qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
713		qp->resp.ack_psn = qp->resp.psn;
714
715		qp->resp.opcode = pkt->opcode;
716		qp->resp.status = IB_WC_SUCCESS;
717	}
718
719	return RESPST_ACKNOWLEDGE;
720}
721
722static enum resp_states atomic_write_reply(struct rxe_qp *qp,
723					   struct rxe_pkt_info *pkt)
724{
725	struct resp_res *res = qp->resp.res;
726	struct rxe_mr *mr;
727	u64 value;
728	u64 iova;
729	int err;
730
731	if (!res) {
732		res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
733		qp->resp.res = res;
734	}
735
736	if (res->replay)
737		return RESPST_ACKNOWLEDGE;
738
739	mr = qp->resp.mr;
740	value = *(u64 *)payload_addr(pkt);
741	iova = qp->resp.va + qp->resp.offset;
742
743	err = rxe_mr_do_atomic_write(mr, iova, value);
744	if (err)
745		return err;
746
747	qp->resp.resid = 0;
748	qp->resp.msn++;
749
750	/* next expected psn, read handles this separately */
751	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
752	qp->resp.ack_psn = qp->resp.psn;
753
754	qp->resp.opcode = pkt->opcode;
755	qp->resp.status = IB_WC_SUCCESS;
756
757	return RESPST_ACKNOWLEDGE;
758}
759
760static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
761					  struct rxe_pkt_info *ack,
762					  int opcode,
763					  int payload,
764					  u32 psn,
765					  u8 syndrome)
766{
767	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
768	struct sk_buff *skb;
769	int paylen;
770	int pad;
771	int err;
772
773	/*
774	 * allocate packet
775	 */
776	pad = (-payload) & 0x3;
777	paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
778
779	skb = rxe_init_packet(rxe, &qp->pri_av, paylen, ack);
780	if (!skb)
781		return NULL;
782
783	ack->qp = qp;
784	ack->opcode = opcode;
785	ack->mask = rxe_opcode[opcode].mask;
786	ack->paylen = paylen;
787	ack->psn = psn;
788
789	bth_init(ack, opcode, 0, 0, pad, IB_DEFAULT_PKEY_FULL,
790		 qp->attr.dest_qp_num, 0, psn);
791
792	if (ack->mask & RXE_AETH_MASK) {
793		aeth_set_syn(ack, syndrome);
794		aeth_set_msn(ack, qp->resp.msn);
795	}
796
797	if (ack->mask & RXE_ATMACK_MASK)
798		atmack_set_orig(ack, qp->resp.res->atomic.orig_val);
799
800	err = rxe_prepare(&qp->pri_av, ack, skb);
801	if (err) {
802		kfree_skb(skb);
803		return NULL;
804	}
805
806	return skb;
807}
808
809/**
810 * rxe_recheck_mr - revalidate MR from rkey and get a reference
811 * @qp: the qp
812 * @rkey: the rkey
813 *
814 * This code allows the MR to be invalidated or deregistered or
815 * the MW if one was used to be invalidated or deallocated.
816 * It is assumed that the access permissions if originally good
817 * are OK and the mappings to be unchanged.
818 *
819 * TODO: If someone reregisters an MR to change its size or
820 * access permissions during the processing of an RDMA read
821 * we should kill the responder resource and complete the
822 * operation with an error.
823 *
824 * Return: mr on success else NULL
825 */
826static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)
827{
828	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
829	struct rxe_mr *mr;
830	struct rxe_mw *mw;
831
832	if (rkey_is_mw(rkey)) {
833		mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8);
834		if (!mw)
835			return NULL;
836
837		mr = mw->mr;
838		if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID ||
839		    !mr || mr->state != RXE_MR_STATE_VALID) {
840			rxe_put(mw);
841			return NULL;
842		}
843
844		rxe_get(mr);
845		rxe_put(mw);
846
847		return mr;
848	}
849
850	mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
851	if (!mr)
852		return NULL;
853
854	if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) {
855		rxe_put(mr);
856		return NULL;
857	}
858
859	return mr;
860}
861
862/* RDMA read response. If res is not NULL, then we have a current RDMA request
863 * being processed or replayed.
864 */
865static enum resp_states read_reply(struct rxe_qp *qp,
866				   struct rxe_pkt_info *req_pkt)
867{
868	struct rxe_pkt_info ack_pkt;
869	struct sk_buff *skb;
870	int mtu = qp->mtu;
871	enum resp_states state;
872	int payload;
873	int opcode;
874	int err;
875	struct resp_res *res = qp->resp.res;
876	struct rxe_mr *mr;
877
878	if (!res) {
879		res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK);
880		qp->resp.res = res;
881	}
882
883	if (res->state == rdatm_res_state_new) {
884		if (!res->replay || qp->resp.length == 0) {
885			/* if length == 0 mr will be NULL (is ok)
886			 * otherwise qp->resp.mr holds a ref on mr
887			 * which we transfer to mr and drop below.
888			 */
889			mr = qp->resp.mr;
890			qp->resp.mr = NULL;
891		} else {
892			mr = rxe_recheck_mr(qp, res->read.rkey);
893			if (!mr)
894				return RESPST_ERR_RKEY_VIOLATION;
895		}
896
897		if (res->read.resid <= mtu)
898			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY;
899		else
900			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST;
901	} else {
902		/* re-lookup mr from rkey on all later packets.
903		 * length will be non-zero. This can fail if someone
904		 * modifies or destroys the mr since the first packet.
905		 */
906		mr = rxe_recheck_mr(qp, res->read.rkey);
907		if (!mr)
908			return RESPST_ERR_RKEY_VIOLATION;
909
910		if (res->read.resid > mtu)
911			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE;
912		else
913			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST;
914	}
915
916	res->state = rdatm_res_state_next;
917
918	payload = min_t(int, res->read.resid, mtu);
919
920	skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload,
921				 res->cur_psn, AETH_ACK_UNLIMITED);
922	if (!skb) {
923		state = RESPST_ERR_RNR;
924		goto err_out;
925	}
926
927	err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
928			  payload, RXE_FROM_MR_OBJ);
929	if (err) {
930		kfree_skb(skb);
931		state = RESPST_ERR_RKEY_VIOLATION;
932		goto err_out;
933	}
934
935	if (bth_pad(&ack_pkt)) {
936		u8 *pad = payload_addr(&ack_pkt) + payload;
937
938		memset(pad, 0, bth_pad(&ack_pkt));
939	}
940
941	/* rxe_xmit_packet always consumes the skb */
942	err = rxe_xmit_packet(qp, &ack_pkt, skb);
943	if (err) {
944		state = RESPST_ERR_RNR;
945		goto err_out;
946	}
947
948	res->read.va += payload;
949	res->read.resid -= payload;
950	res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK;
951
952	if (res->read.resid > 0) {
953		state = RESPST_DONE;
954	} else {
955		qp->resp.res = NULL;
956		if (!res->replay)
957			qp->resp.opcode = -1;
958		if (psn_compare(res->cur_psn, qp->resp.psn) >= 0)
959			qp->resp.psn = res->cur_psn;
960		state = RESPST_CLEANUP;
961	}
962
963err_out:
964	if (mr)
965		rxe_put(mr);
966	return state;
967}
968
969static int invalidate_rkey(struct rxe_qp *qp, u32 rkey)
970{
971	if (rkey_is_mw(rkey))
972		return rxe_invalidate_mw(qp, rkey);
973	else
974		return rxe_invalidate_mr(qp, rkey);
975}
976
977/* Executes a new request. A retried request never reach that function (send
978 * and writes are discarded, and reads and atomics are retried elsewhere.
979 */
980static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
981{
982	enum resp_states err;
983	struct sk_buff *skb = PKT_TO_SKB(pkt);
984	union rdma_network_hdr hdr;
985
986	if (pkt->mask & RXE_SEND_MASK) {
987		if (qp_type(qp) == IB_QPT_UD ||
988		    qp_type(qp) == IB_QPT_GSI) {
989			if (skb->protocol == htons(ETH_P_IP)) {
990				memset(&hdr.reserved, 0,
991						sizeof(hdr.reserved));
992				memcpy(&hdr.roce4grh, ip_hdr(skb),
993						sizeof(hdr.roce4grh));
994				err = send_data_in(qp, &hdr, sizeof(hdr));
995			} else {
996				err = send_data_in(qp, ipv6_hdr(skb),
997						sizeof(hdr));
998			}
999			if (err)
1000				return err;
1001		}
1002		err = send_data_in(qp, payload_addr(pkt), payload_size(pkt));
1003		if (err)
1004			return err;
1005	} else if (pkt->mask & RXE_WRITE_MASK) {
1006		err = write_data_in(qp, pkt);
1007		if (err)
1008			return err;
1009	} else if (pkt->mask & RXE_READ_MASK) {
1010		/* For RDMA Read we can increment the msn now. See C9-148. */
1011		qp->resp.msn++;
1012		return RESPST_READ_REPLY;
1013	} else if (pkt->mask & RXE_ATOMIC_MASK) {
1014		return RESPST_ATOMIC_REPLY;
1015	} else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
1016		return RESPST_ATOMIC_WRITE_REPLY;
1017	} else if (pkt->mask & RXE_FLUSH_MASK) {
1018		return RESPST_PROCESS_FLUSH;
1019	} else {
1020		/* Unreachable */
1021		WARN_ON_ONCE(1);
1022	}
1023
1024	if (pkt->mask & RXE_IETH_MASK) {
1025		u32 rkey = ieth_rkey(pkt);
1026
1027		err = invalidate_rkey(qp, rkey);
1028		if (err)
1029			return RESPST_ERR_INVALIDATE_RKEY;
1030	}
1031
1032	if (pkt->mask & RXE_END_MASK)
1033		/* We successfully processed this new request. */
1034		qp->resp.msn++;
1035
1036	/* next expected psn, read handles this separately */
1037	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
1038	qp->resp.ack_psn = qp->resp.psn;
1039
1040	qp->resp.opcode = pkt->opcode;
1041	qp->resp.status = IB_WC_SUCCESS;
1042
1043	if (pkt->mask & RXE_COMP_MASK)
1044		return RESPST_COMPLETE;
1045	else if (qp_type(qp) == IB_QPT_RC)
1046		return RESPST_ACKNOWLEDGE;
1047	else
1048		return RESPST_CLEANUP;
1049}
1050
1051static enum resp_states do_complete(struct rxe_qp *qp,
1052				    struct rxe_pkt_info *pkt)
1053{
1054	struct rxe_cqe cqe;
1055	struct ib_wc *wc = &cqe.ibwc;
1056	struct ib_uverbs_wc *uwc = &cqe.uibwc;
1057	struct rxe_recv_wqe *wqe = qp->resp.wqe;
1058	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
1059	unsigned long flags;
1060
1061	if (!wqe)
1062		goto finish;
1063
1064	memset(&cqe, 0, sizeof(cqe));
1065
1066	if (qp->rcq->is_user) {
1067		uwc->status		= qp->resp.status;
1068		uwc->qp_num		= qp->ibqp.qp_num;
1069		uwc->wr_id		= wqe->wr_id;
1070	} else {
1071		wc->status		= qp->resp.status;
1072		wc->qp			= &qp->ibqp;
1073		wc->wr_id		= wqe->wr_id;
1074	}
1075
1076	if (wc->status == IB_WC_SUCCESS) {
1077		rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV);
1078		wc->opcode = (pkt->mask & RXE_IMMDT_MASK &&
1079				pkt->mask & RXE_WRITE_MASK) ?
1080					IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;
1081		wc->byte_len = (pkt->mask & RXE_IMMDT_MASK &&
1082				pkt->mask & RXE_WRITE_MASK) ?
1083					qp->resp.length : wqe->dma.length - wqe->dma.resid;
1084
1085		/* fields after byte_len are different between kernel and user
1086		 * space
1087		 */
1088		if (qp->rcq->is_user) {
1089			uwc->wc_flags = IB_WC_GRH;
1090
1091			if (pkt->mask & RXE_IMMDT_MASK) {
1092				uwc->wc_flags |= IB_WC_WITH_IMM;
1093				uwc->ex.imm_data = immdt_imm(pkt);
1094			}
1095
1096			if (pkt->mask & RXE_IETH_MASK) {
1097				uwc->wc_flags |= IB_WC_WITH_INVALIDATE;
1098				uwc->ex.invalidate_rkey = ieth_rkey(pkt);
1099			}
1100
1101			if (pkt->mask & RXE_DETH_MASK)
1102				uwc->src_qp = deth_sqp(pkt);
1103
1104			uwc->port_num		= qp->attr.port_num;
1105		} else {
1106			struct sk_buff *skb = PKT_TO_SKB(pkt);
1107
1108			wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE;
1109			if (skb->protocol == htons(ETH_P_IP))
1110				wc->network_hdr_type = RDMA_NETWORK_IPV4;
1111			else
1112				wc->network_hdr_type = RDMA_NETWORK_IPV6;
1113
1114			if (is_vlan_dev(skb->dev)) {
1115				wc->wc_flags |= IB_WC_WITH_VLAN;
1116				wc->vlan_id = vlan_dev_vlan_id(skb->dev);
1117			}
1118
1119			if (pkt->mask & RXE_IMMDT_MASK) {
1120				wc->wc_flags |= IB_WC_WITH_IMM;
1121				wc->ex.imm_data = immdt_imm(pkt);
1122			}
1123
1124			if (pkt->mask & RXE_IETH_MASK) {
1125				wc->wc_flags |= IB_WC_WITH_INVALIDATE;
1126				wc->ex.invalidate_rkey = ieth_rkey(pkt);
1127			}
1128
1129			if (pkt->mask & RXE_DETH_MASK)
1130				wc->src_qp = deth_sqp(pkt);
1131
1132			wc->port_num		= qp->attr.port_num;
1133		}
1134	} else {
1135		if (wc->status != IB_WC_WR_FLUSH_ERR)
1136			rxe_err_qp(qp, "non-flush error status = %d\n",
1137				wc->status);
1138	}
1139
1140	/* have copy for srq and reference for !srq */
1141	if (!qp->srq)
1142		queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT);
1143
1144	qp->resp.wqe = NULL;
1145
1146	if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1))
1147		return RESPST_ERR_CQ_OVERFLOW;
1148
1149finish:
1150	spin_lock_irqsave(&qp->state_lock, flags);
1151	if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
1152		spin_unlock_irqrestore(&qp->state_lock, flags);
1153		return RESPST_CHK_RESOURCE;
1154	}
1155	spin_unlock_irqrestore(&qp->state_lock, flags);
1156
1157	if (unlikely(!pkt))
1158		return RESPST_DONE;
1159	if (qp_type(qp) == IB_QPT_RC)
1160		return RESPST_ACKNOWLEDGE;
1161	else
1162		return RESPST_CLEANUP;
1163}
1164
1165
1166static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn,
1167				  int opcode, const char *msg)
1168{
1169	int err;
1170	struct rxe_pkt_info ack_pkt;
1171	struct sk_buff *skb;
1172
1173	skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome);
1174	if (!skb)
1175		return -ENOMEM;
1176
1177	err = rxe_xmit_packet(qp, &ack_pkt, skb);
1178	if (err)
1179		rxe_dbg_qp(qp, "Failed sending %s\n", msg);
1180
1181	return err;
1182}
1183
1184static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
1185{
1186	return send_common_ack(qp, syndrome, psn,
1187			IB_OPCODE_RC_ACKNOWLEDGE, "ACK");
1188}
1189
1190static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
1191{
1192	int ret = send_common_ack(qp, syndrome, psn,
1193			IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK");
1194
1195	/* have to clear this since it is used to trigger
1196	 * long read replies
1197	 */
1198	qp->resp.res = NULL;
1199	return ret;
1200}
1201
1202static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
1203{
1204	int ret = send_common_ack(qp, syndrome, psn,
1205			IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY,
1206			"RDMA READ response of length zero ACK");
1207
1208	/* have to clear this since it is used to trigger
1209	 * long read replies
1210	 */
1211	qp->resp.res = NULL;
1212	return ret;
1213}
1214
1215static enum resp_states acknowledge(struct rxe_qp *qp,
1216				    struct rxe_pkt_info *pkt)
1217{
1218	if (qp_type(qp) != IB_QPT_RC)
1219		return RESPST_CLEANUP;
1220
1221	if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED)
1222		send_ack(qp, qp->resp.aeth_syndrome, pkt->psn);
1223	else if (pkt->mask & RXE_ATOMIC_MASK)
1224		send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1225	else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK))
1226		send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1227	else if (bth_ack(pkt))
1228		send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1229
1230	return RESPST_CLEANUP;
1231}
1232
1233static enum resp_states cleanup(struct rxe_qp *qp,
1234				struct rxe_pkt_info *pkt)
1235{
1236	struct sk_buff *skb;
1237
1238	if (pkt) {
1239		skb = skb_dequeue(&qp->req_pkts);
1240		rxe_put(qp);
1241		kfree_skb(skb);
1242		ib_device_put(qp->ibqp.device);
1243	}
1244
1245	if (qp->resp.mr) {
1246		rxe_put(qp->resp.mr);
1247		qp->resp.mr = NULL;
1248	}
1249
1250	return RESPST_DONE;
1251}
1252
1253static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn)
1254{
1255	int i;
1256
1257	for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) {
1258		struct resp_res *res = &qp->resp.resources[i];
1259
1260		if (res->type == 0)
1261			continue;
1262
1263		if (psn_compare(psn, res->first_psn) >= 0 &&
1264		    psn_compare(psn, res->last_psn) <= 0) {
1265			return res;
1266		}
1267	}
1268
1269	return NULL;
1270}
1271
1272static enum resp_states duplicate_request(struct rxe_qp *qp,
1273					  struct rxe_pkt_info *pkt)
1274{
1275	enum resp_states rc;
1276	u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK;
1277
1278	if (pkt->mask & RXE_SEND_MASK ||
1279	    pkt->mask & RXE_WRITE_MASK) {
1280		/* SEND. Ack again and cleanup. C9-105. */
1281		send_ack(qp, AETH_ACK_UNLIMITED, prev_psn);
1282		return RESPST_CLEANUP;
1283	} else if (pkt->mask & RXE_FLUSH_MASK) {
1284		struct resp_res *res;
1285
1286		/* Find the operation in our list of responder resources. */
1287		res = find_resource(qp, pkt->psn);
1288		if (res) {
1289			res->replay = 1;
1290			res->cur_psn = pkt->psn;
1291			qp->resp.res = res;
1292			rc = RESPST_PROCESS_FLUSH;
1293			goto out;
1294		}
1295
1296		/* Resource not found. Class D error. Drop the request. */
1297		rc = RESPST_CLEANUP;
1298		goto out;
1299	} else if (pkt->mask & RXE_READ_MASK) {
1300		struct resp_res *res;
1301
1302		res = find_resource(qp, pkt->psn);
1303		if (!res) {
1304			/* Resource not found. Class D error.  Drop the
1305			 * request.
1306			 */
1307			rc = RESPST_CLEANUP;
1308			goto out;
1309		} else {
1310			/* Ensure this new request is the same as the previous
1311			 * one or a subset of it.
1312			 */
1313			u64 iova = reth_va(pkt);
1314			u32 resid = reth_len(pkt);
1315
1316			if (iova < res->read.va_org ||
1317			    resid > res->read.length ||
1318			    (iova + resid) > (res->read.va_org +
1319					      res->read.length)) {
1320				rc = RESPST_CLEANUP;
1321				goto out;
1322			}
1323
1324			if (reth_rkey(pkt) != res->read.rkey) {
1325				rc = RESPST_CLEANUP;
1326				goto out;
1327			}
1328
1329			res->cur_psn = pkt->psn;
1330			res->state = (pkt->psn == res->first_psn) ?
1331					rdatm_res_state_new :
1332					rdatm_res_state_replay;
1333			res->replay = 1;
1334
1335			/* Reset the resource, except length. */
1336			res->read.va_org = iova;
1337			res->read.va = iova;
1338			res->read.resid = resid;
1339
1340			/* Replay the RDMA read reply. */
1341			qp->resp.res = res;
1342			rc = RESPST_READ_REPLY;
1343			goto out;
1344		}
1345	} else {
1346		struct resp_res *res;
1347
1348		/* Find the operation in our list of responder resources. */
1349		res = find_resource(qp, pkt->psn);
1350		if (res) {
1351			res->replay = 1;
1352			res->cur_psn = pkt->psn;
1353			qp->resp.res = res;
1354			rc = pkt->mask & RXE_ATOMIC_MASK ?
1355					RESPST_ATOMIC_REPLY :
1356					RESPST_ATOMIC_WRITE_REPLY;
1357			goto out;
1358		}
1359
1360		/* Resource not found. Class D error. Drop the request. */
1361		rc = RESPST_CLEANUP;
1362		goto out;
1363	}
1364out:
1365	return rc;
1366}
1367
1368/* Process a class A or C. Both are treated the same in this implementation. */
1369static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome,
1370			      enum ib_wc_status status)
1371{
1372	qp->resp.aeth_syndrome	= syndrome;
1373	qp->resp.status		= status;
1374
1375	/* indicate that we should go through the ERROR state */
1376	qp->resp.goto_error	= 1;
1377}
1378
1379static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
1380{
1381	/* UC */
1382	if (qp->srq) {
1383		/* Class E */
1384		qp->resp.drop_msg = 1;
1385		if (qp->resp.wqe) {
1386			qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1387			return RESPST_COMPLETE;
1388		} else {
1389			return RESPST_CLEANUP;
1390		}
1391	} else {
1392		/* Class D1. This packet may be the start of a
1393		 * new message and could be valid. The previous
1394		 * message is invalid and ignored. reset the
1395		 * recv wr to its original state
1396		 */
1397		if (qp->resp.wqe) {
1398			qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length;
1399			qp->resp.wqe->dma.cur_sge = 0;
1400			qp->resp.wqe->dma.sge_offset = 0;
1401			qp->resp.opcode = -1;
1402		}
1403
1404		if (qp->resp.mr) {
1405			rxe_put(qp->resp.mr);
1406			qp->resp.mr = NULL;
1407		}
1408
1409		return RESPST_CLEANUP;
1410	}
1411}
1412
1413/* drain incoming request packet queue */
1414static void drain_req_pkts(struct rxe_qp *qp)
1415{
1416	struct sk_buff *skb;
1417
1418	while ((skb = skb_dequeue(&qp->req_pkts))) {
1419		rxe_put(qp);
1420		kfree_skb(skb);
1421		ib_device_put(qp->ibqp.device);
1422	}
1423}
1424
1425/* complete receive wqe with flush error */
1426static int flush_recv_wqe(struct rxe_qp *qp, struct rxe_recv_wqe *wqe)
1427{
1428	struct rxe_cqe cqe = {};
1429	struct ib_wc *wc = &cqe.ibwc;
1430	struct ib_uverbs_wc *uwc = &cqe.uibwc;
1431	int err;
1432
1433	if (qp->rcq->is_user) {
1434		uwc->wr_id = wqe->wr_id;
1435		uwc->status = IB_WC_WR_FLUSH_ERR;
1436		uwc->qp_num = qp_num(qp);
1437	} else {
1438		wc->wr_id = wqe->wr_id;
1439		wc->status = IB_WC_WR_FLUSH_ERR;
1440		wc->qp = &qp->ibqp;
1441	}
1442
1443	err = rxe_cq_post(qp->rcq, &cqe, 0);
1444	if (err)
1445		rxe_dbg_cq(qp->rcq, "post cq failed err = %d\n", err);
1446
1447	return err;
1448}
1449
1450/* drain and optionally complete the recive queue
1451 * if unable to complete a wqe stop completing and
1452 * just flush the remaining wqes
1453 */
1454static void flush_recv_queue(struct rxe_qp *qp, bool notify)
1455{
1456	struct rxe_queue *q = qp->rq.queue;
1457	struct rxe_recv_wqe *wqe;
1458	int err;
1459
1460	if (qp->srq) {
1461		if (notify && qp->ibqp.event_handler) {
1462			struct ib_event ev;
1463
1464			ev.device = qp->ibqp.device;
1465			ev.element.qp = &qp->ibqp;
1466			ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1467			qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1468		}
1469		return;
1470	}
1471
1472	/* recv queue not created. nothing to do. */
1473	if (!qp->rq.queue)
1474		return;
1475
1476	while ((wqe = queue_head(q, q->type))) {
1477		if (notify) {
1478			err = flush_recv_wqe(qp, wqe);
1479			if (err)
1480				notify = 0;
1481		}
1482		queue_advance_consumer(q, q->type);
1483	}
1484
1485	qp->resp.wqe = NULL;
1486}
1487
1488int rxe_responder(struct rxe_qp *qp)
1489{
1490	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
1491	enum resp_states state;
1492	struct rxe_pkt_info *pkt = NULL;
1493	int ret;
1494	unsigned long flags;
1495
1496	spin_lock_irqsave(&qp->state_lock, flags);
1497	if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
1498			  qp_state(qp) == IB_QPS_RESET) {
1499		bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR);
1500
1501		drain_req_pkts(qp);
1502		flush_recv_queue(qp, notify);
1503		spin_unlock_irqrestore(&qp->state_lock, flags);
1504		goto exit;
1505	}
1506	spin_unlock_irqrestore(&qp->state_lock, flags);
1507
1508	qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
1509
1510	state = RESPST_GET_REQ;
1511
1512	while (1) {
1513		rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]);
1514		switch (state) {
1515		case RESPST_GET_REQ:
1516			state = get_req(qp, &pkt);
1517			break;
1518		case RESPST_CHK_PSN:
1519			state = check_psn(qp, pkt);
1520			break;
1521		case RESPST_CHK_OP_SEQ:
1522			state = check_op_seq(qp, pkt);
1523			break;
1524		case RESPST_CHK_OP_VALID:
1525			state = check_op_valid(qp, pkt);
1526			break;
1527		case RESPST_CHK_RESOURCE:
1528			state = check_resource(qp, pkt);
1529			break;
1530		case RESPST_CHK_LENGTH:
1531			state = rxe_resp_check_length(qp, pkt);
1532			break;
1533		case RESPST_CHK_RKEY:
1534			state = check_rkey(qp, pkt);
1535			break;
1536		case RESPST_EXECUTE:
1537			state = execute(qp, pkt);
1538			break;
1539		case RESPST_COMPLETE:
1540			state = do_complete(qp, pkt);
1541			break;
1542		case RESPST_READ_REPLY:
1543			state = read_reply(qp, pkt);
1544			break;
1545		case RESPST_ATOMIC_REPLY:
1546			state = atomic_reply(qp, pkt);
1547			break;
1548		case RESPST_ATOMIC_WRITE_REPLY:
1549			state = atomic_write_reply(qp, pkt);
1550			break;
1551		case RESPST_PROCESS_FLUSH:
1552			state = process_flush(qp, pkt);
1553			break;
1554		case RESPST_ACKNOWLEDGE:
1555			state = acknowledge(qp, pkt);
1556			break;
1557		case RESPST_CLEANUP:
1558			state = cleanup(qp, pkt);
1559			break;
1560		case RESPST_DUPLICATE_REQUEST:
1561			state = duplicate_request(qp, pkt);
1562			break;
1563		case RESPST_ERR_PSN_OUT_OF_SEQ:
1564			/* RC only - Class B. Drop packet. */
1565			send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn);
1566			state = RESPST_CLEANUP;
1567			break;
1568
1569		case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ:
1570		case RESPST_ERR_MISSING_OPCODE_FIRST:
1571		case RESPST_ERR_MISSING_OPCODE_LAST_C:
1572		case RESPST_ERR_UNSUPPORTED_OPCODE:
1573		case RESPST_ERR_MISALIGNED_ATOMIC:
1574			/* RC Only - Class C. */
1575			do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
1576					  IB_WC_REM_INV_REQ_ERR);
1577			state = RESPST_COMPLETE;
1578			break;
1579
1580		case RESPST_ERR_MISSING_OPCODE_LAST_D1E:
1581			state = do_class_d1e_error(qp);
1582			break;
1583		case RESPST_ERR_RNR:
1584			if (qp_type(qp) == IB_QPT_RC) {
1585				rxe_counter_inc(rxe, RXE_CNT_SND_RNR);
1586				/* RC - class B */
1587				send_ack(qp, AETH_RNR_NAK |
1588					 (~AETH_TYPE_MASK &
1589					 qp->attr.min_rnr_timer),
1590					 pkt->psn);
1591			} else {
1592				/* UD/UC - class D */
1593				qp->resp.drop_msg = 1;
1594			}
1595			state = RESPST_CLEANUP;
1596			break;
1597
1598		case RESPST_ERR_RKEY_VIOLATION:
1599			if (qp_type(qp) == IB_QPT_RC) {
1600				/* Class C */
1601				do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR,
1602						  IB_WC_REM_ACCESS_ERR);
1603				state = RESPST_COMPLETE;
1604			} else {
1605				qp->resp.drop_msg = 1;
1606				if (qp->srq) {
1607					/* UC/SRQ Class D */
1608					qp->resp.status = IB_WC_REM_ACCESS_ERR;
1609					state = RESPST_COMPLETE;
1610				} else {
1611					/* UC/non-SRQ Class E. */
1612					state = RESPST_CLEANUP;
1613				}
1614			}
1615			break;
1616
1617		case RESPST_ERR_INVALIDATE_RKEY:
1618			/* RC - Class J. */
1619			qp->resp.goto_error = 1;
1620			qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1621			state = RESPST_COMPLETE;
1622			break;
1623
1624		case RESPST_ERR_LENGTH:
1625			if (qp_type(qp) == IB_QPT_RC) {
1626				/* Class C */
1627				do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
1628						  IB_WC_REM_INV_REQ_ERR);
1629				state = RESPST_COMPLETE;
1630			} else if (qp->srq) {
1631				/* UC/UD - class E */
1632				qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1633				state = RESPST_COMPLETE;
1634			} else {
1635				/* UC/UD - class D */
1636				qp->resp.drop_msg = 1;
1637				state = RESPST_CLEANUP;
1638			}
1639			break;
1640
1641		case RESPST_ERR_MALFORMED_WQE:
1642			/* All, Class A. */
1643			do_class_ac_error(qp, AETH_NAK_REM_OP_ERR,
1644					  IB_WC_LOC_QP_OP_ERR);
1645			state = RESPST_COMPLETE;
1646			break;
1647
1648		case RESPST_ERR_CQ_OVERFLOW:
1649			/* All - Class G */
1650			state = RESPST_ERROR;
1651			break;
1652
1653		case RESPST_DONE:
1654			if (qp->resp.goto_error) {
1655				state = RESPST_ERROR;
1656				break;
1657			}
1658
1659			goto done;
1660
1661		case RESPST_EXIT:
1662			if (qp->resp.goto_error) {
1663				state = RESPST_ERROR;
1664				break;
1665			}
1666
1667			goto exit;
1668
1669		case RESPST_ERROR:
1670			qp->resp.goto_error = 0;
1671			rxe_dbg_qp(qp, "moved to error state\n");
1672			rxe_qp_error(qp);
1673			goto exit;
1674
1675		default:
1676			WARN_ON_ONCE(1);
1677		}
1678	}
1679
1680	/* A non-zero return value will cause rxe_do_task to
1681	 * exit its loop and end the work item. A zero return
1682	 * will continue looping and return to rxe_responder
1683	 */
1684done:
1685	ret = 0;
1686	goto out;
1687exit:
1688	ret = -EAGAIN;
1689out:
1690	return ret;
1691}
1692