1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
4/* Copyright (c) 2008-2019, IBM Corporation */
5
6#include <linux/errno.h>
7#include <linux/types.h>
8#include <linux/net.h>
9#include <linux/scatterlist.h>
10#include <linux/llist.h>
11#include <asm/barrier.h>
12#include <net/tcp.h>
13#include <trace/events/sock.h>
14
15#include "siw.h"
16#include "siw_verbs.h"
17#include "siw_mem.h"
18
19static char siw_qp_state_to_string[SIW_QP_STATE_COUNT][sizeof "TERMINATE"] = {
20	[SIW_QP_STATE_IDLE] = "IDLE",
21	[SIW_QP_STATE_RTR] = "RTR",
22	[SIW_QP_STATE_RTS] = "RTS",
23	[SIW_QP_STATE_CLOSING] = "CLOSING",
24	[SIW_QP_STATE_TERMINATE] = "TERMINATE",
25	[SIW_QP_STATE_ERROR] = "ERROR"
26};
27
28/*
29 * iWARP (RDMAP, DDP and MPA) parameters as well as Softiwarp settings on a
30 * per-RDMAP message basis. Please keep order of initializer. All MPA len
31 * is initialized to minimum packet size.
32 */
33struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1] = {
34	{ /* RDMAP_RDMA_WRITE */
35	  .hdr_len = sizeof(struct iwarp_rdma_write),
36	  .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_write) - 2),
37	  .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST |
38				 cpu_to_be16(DDP_VERSION << 8) |
39				 cpu_to_be16(RDMAP_VERSION << 6) |
40				 cpu_to_be16(RDMAP_RDMA_WRITE),
41	  .rx_data = siw_proc_write },
42	{ /* RDMAP_RDMA_READ_REQ */
43	  .hdr_len = sizeof(struct iwarp_rdma_rreq),
44	  .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rreq) - 2),
45	  .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
46				 cpu_to_be16(RDMAP_VERSION << 6) |
47				 cpu_to_be16(RDMAP_RDMA_READ_REQ),
48	  .rx_data = siw_proc_rreq },
49	{ /* RDMAP_RDMA_READ_RESP */
50	  .hdr_len = sizeof(struct iwarp_rdma_rresp),
51	  .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rresp) - 2),
52	  .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST |
53				 cpu_to_be16(DDP_VERSION << 8) |
54				 cpu_to_be16(RDMAP_VERSION << 6) |
55				 cpu_to_be16(RDMAP_RDMA_READ_RESP),
56	  .rx_data = siw_proc_rresp },
57	{ /* RDMAP_SEND */
58	  .hdr_len = sizeof(struct iwarp_send),
59	  .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
60	  .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
61				 cpu_to_be16(RDMAP_VERSION << 6) |
62				 cpu_to_be16(RDMAP_SEND),
63	  .rx_data = siw_proc_send },
64	{ /* RDMAP_SEND_INVAL */
65	  .hdr_len = sizeof(struct iwarp_send_inv),
66	  .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
67	  .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
68				 cpu_to_be16(RDMAP_VERSION << 6) |
69				 cpu_to_be16(RDMAP_SEND_INVAL),
70	  .rx_data = siw_proc_send },
71	{ /* RDMAP_SEND_SE */
72	  .hdr_len = sizeof(struct iwarp_send),
73	  .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
74	  .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
75				 cpu_to_be16(RDMAP_VERSION << 6) |
76				 cpu_to_be16(RDMAP_SEND_SE),
77	  .rx_data = siw_proc_send },
78	{ /* RDMAP_SEND_SE_INVAL */
79	  .hdr_len = sizeof(struct iwarp_send_inv),
80	  .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
81	  .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
82				 cpu_to_be16(RDMAP_VERSION << 6) |
83				 cpu_to_be16(RDMAP_SEND_SE_INVAL),
84	  .rx_data = siw_proc_send },
85	{ /* RDMAP_TERMINATE */
86	  .hdr_len = sizeof(struct iwarp_terminate),
87	  .ctrl.mpa_len = htons(sizeof(struct iwarp_terminate) - 2),
88	  .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
89				 cpu_to_be16(RDMAP_VERSION << 6) |
90				 cpu_to_be16(RDMAP_TERMINATE),
91	  .rx_data = siw_proc_terminate }
92};
93
94void siw_qp_llp_data_ready(struct sock *sk)
95{
96	struct siw_qp *qp;
97
98	trace_sk_data_ready(sk);
99
100	read_lock(&sk->sk_callback_lock);
101
102	if (unlikely(!sk->sk_user_data || !sk_to_qp(sk)))
103		goto done;
104
105	qp = sk_to_qp(sk);
106
107	if (likely(!qp->rx_stream.rx_suspend &&
108		   down_read_trylock(&qp->state_lock))) {
109		read_descriptor_t rd_desc = { .arg.data = qp, .count = 1 };
110
111		if (likely(qp->attrs.state == SIW_QP_STATE_RTS))
112			/*
113			 * Implements data receive operation during
114			 * socket callback. TCP gracefully catches
115			 * the case where there is nothing to receive
116			 * (not calling siw_tcp_rx_data() then).
117			 */
118			tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data);
119
120		up_read(&qp->state_lock);
121	} else {
122		siw_dbg_qp(qp, "unable to process RX, suspend: %d\n",
123			   qp->rx_stream.rx_suspend);
124	}
125done:
126	read_unlock(&sk->sk_callback_lock);
127}
128
129void siw_qp_llp_close(struct siw_qp *qp)
130{
131	siw_dbg_qp(qp, "enter llp close, state = %s\n",
132		   siw_qp_state_to_string[qp->attrs.state]);
133
134	down_write(&qp->state_lock);
135
136	qp->rx_stream.rx_suspend = 1;
137	qp->tx_ctx.tx_suspend = 1;
138	qp->attrs.sk = NULL;
139
140	switch (qp->attrs.state) {
141	case SIW_QP_STATE_RTS:
142	case SIW_QP_STATE_RTR:
143	case SIW_QP_STATE_IDLE:
144	case SIW_QP_STATE_TERMINATE:
145		qp->attrs.state = SIW_QP_STATE_ERROR;
146		break;
147	/*
148	 * SIW_QP_STATE_CLOSING:
149	 *
150	 * This is a forced close. shall the QP be moved to
151	 * ERROR or IDLE ?
152	 */
153	case SIW_QP_STATE_CLOSING:
154		if (tx_wqe(qp)->wr_status == SIW_WR_IDLE)
155			qp->attrs.state = SIW_QP_STATE_ERROR;
156		else
157			qp->attrs.state = SIW_QP_STATE_IDLE;
158		break;
159
160	default:
161		siw_dbg_qp(qp, "llp close: no state transition needed: %s\n",
162			   siw_qp_state_to_string[qp->attrs.state]);
163		break;
164	}
165	siw_sq_flush(qp);
166	siw_rq_flush(qp);
167
168	/*
169	 * Dereference closing CEP
170	 */
171	if (qp->cep) {
172		siw_cep_put(qp->cep);
173		qp->cep = NULL;
174	}
175
176	up_write(&qp->state_lock);
177
178	siw_dbg_qp(qp, "llp close exit: state %s\n",
179		   siw_qp_state_to_string[qp->attrs.state]);
180}
181
182/*
183 * socket callback routine informing about newly available send space.
184 * Function schedules SQ work for processing SQ items.
185 */
186void siw_qp_llp_write_space(struct sock *sk)
187{
188	struct siw_cep *cep;
189
190	read_lock(&sk->sk_callback_lock);
191
192	cep  = sk_to_cep(sk);
193	if (cep) {
194		cep->sk_write_space(sk);
195
196		if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
197			(void)siw_sq_start(cep->qp);
198	}
199
200	read_unlock(&sk->sk_callback_lock);
201}
202
203static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
204{
205	if (irq_size) {
206		irq_size = roundup_pow_of_two(irq_size);
207		qp->irq = vcalloc(irq_size, sizeof(struct siw_sqe));
208		if (!qp->irq) {
209			qp->attrs.irq_size = 0;
210			return -ENOMEM;
211		}
212	}
213	if (orq_size) {
214		orq_size = roundup_pow_of_two(orq_size);
215		qp->orq = vcalloc(orq_size, sizeof(struct siw_sqe));
216		if (!qp->orq) {
217			qp->attrs.orq_size = 0;
218			qp->attrs.irq_size = 0;
219			vfree(qp->irq);
220			return -ENOMEM;
221		}
222	}
223	qp->attrs.irq_size = irq_size;
224	qp->attrs.orq_size = orq_size;
225	siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size);
226	return 0;
227}
228
229static int siw_qp_enable_crc(struct siw_qp *qp)
230{
231	struct siw_rx_stream *c_rx = &qp->rx_stream;
232	struct siw_iwarp_tx *c_tx = &qp->tx_ctx;
233	int size;
234
235	if (siw_crypto_shash == NULL)
236		return -ENOENT;
237
238	size = crypto_shash_descsize(siw_crypto_shash) +
239		sizeof(struct shash_desc);
240
241	c_tx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
242	c_rx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
243	if (!c_tx->mpa_crc_hd || !c_rx->mpa_crc_hd) {
244		kfree(c_tx->mpa_crc_hd);
245		kfree(c_rx->mpa_crc_hd);
246		c_tx->mpa_crc_hd = NULL;
247		c_rx->mpa_crc_hd = NULL;
248		return -ENOMEM;
249	}
250	c_tx->mpa_crc_hd->tfm = siw_crypto_shash;
251	c_rx->mpa_crc_hd->tfm = siw_crypto_shash;
252
253	return 0;
254}
255
256/*
257 * Send a non signalled READ or WRITE to peer side as negotiated
258 * with MPAv2 P2P setup protocol. The work request is only created
259 * as a current active WR and does not consume Send Queue space.
260 *
261 * Caller must hold QP state lock.
262 */
263int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl)
264{
265	struct siw_wqe *wqe = tx_wqe(qp);
266	unsigned long flags;
267	int rv = 0;
268
269	spin_lock_irqsave(&qp->sq_lock, flags);
270
271	if (unlikely(wqe->wr_status != SIW_WR_IDLE)) {
272		spin_unlock_irqrestore(&qp->sq_lock, flags);
273		return -EIO;
274	}
275	memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
276
277	wqe->wr_status = SIW_WR_QUEUED;
278	wqe->sqe.flags = 0;
279	wqe->sqe.num_sge = 1;
280	wqe->sqe.sge[0].length = 0;
281	wqe->sqe.sge[0].laddr = 0;
282	wqe->sqe.sge[0].lkey = 0;
283	/*
284	 * While it must not be checked for inbound zero length
285	 * READ/WRITE, some HW may treat STag 0 special.
286	 */
287	wqe->sqe.rkey = 1;
288	wqe->sqe.raddr = 0;
289	wqe->processed = 0;
290
291	if (ctrl & MPA_V2_RDMA_WRITE_RTR)
292		wqe->sqe.opcode = SIW_OP_WRITE;
293	else if (ctrl & MPA_V2_RDMA_READ_RTR) {
294		struct siw_sqe *rreq = NULL;
295
296		wqe->sqe.opcode = SIW_OP_READ;
297
298		spin_lock(&qp->orq_lock);
299
300		if (qp->attrs.orq_size)
301			rreq = orq_get_free(qp);
302		if (rreq) {
303			siw_read_to_orq(rreq, &wqe->sqe);
304			qp->orq_put++;
305		} else
306			rv = -EIO;
307
308		spin_unlock(&qp->orq_lock);
309	} else
310		rv = -EINVAL;
311
312	if (rv)
313		wqe->wr_status = SIW_WR_IDLE;
314
315	spin_unlock_irqrestore(&qp->sq_lock, flags);
316
317	if (!rv)
318		rv = siw_sq_start(qp);
319
320	return rv;
321}
322
323/*
324 * Map memory access error to DDP tagged error
325 */
326enum ddp_ecode siw_tagged_error(enum siw_access_state state)
327{
328	switch (state) {
329	case E_STAG_INVALID:
330		return DDP_ECODE_T_INVALID_STAG;
331	case E_BASE_BOUNDS:
332		return DDP_ECODE_T_BASE_BOUNDS;
333	case E_PD_MISMATCH:
334		return DDP_ECODE_T_STAG_NOT_ASSOC;
335	case E_ACCESS_PERM:
336		/*
337		 * RFC 5041 (DDP) lacks an ecode for insufficient access
338		 * permissions. 'Invalid STag' seem to be the closest
339		 * match though.
340		 */
341		return DDP_ECODE_T_INVALID_STAG;
342	default:
343		WARN_ON(1);
344		return DDP_ECODE_T_INVALID_STAG;
345	}
346}
347
348/*
349 * Map memory access error to RDMAP protection error
350 */
351enum rdmap_ecode siw_rdmap_error(enum siw_access_state state)
352{
353	switch (state) {
354	case E_STAG_INVALID:
355		return RDMAP_ECODE_INVALID_STAG;
356	case E_BASE_BOUNDS:
357		return RDMAP_ECODE_BASE_BOUNDS;
358	case E_PD_MISMATCH:
359		return RDMAP_ECODE_STAG_NOT_ASSOC;
360	case E_ACCESS_PERM:
361		return RDMAP_ECODE_ACCESS_RIGHTS;
362	default:
363		return RDMAP_ECODE_UNSPECIFIED;
364	}
365}
366
367void siw_init_terminate(struct siw_qp *qp, enum term_elayer layer, u8 etype,
368			u8 ecode, int in_tx)
369{
370	if (!qp->term_info.valid) {
371		memset(&qp->term_info, 0, sizeof(qp->term_info));
372		qp->term_info.layer = layer;
373		qp->term_info.etype = etype;
374		qp->term_info.ecode = ecode;
375		qp->term_info.in_tx = in_tx;
376		qp->term_info.valid = 1;
377	}
378	siw_dbg_qp(qp, "init TERM: layer %d, type %d, code %d, in tx %s\n",
379		   layer, etype, ecode, in_tx ? "yes" : "no");
380}
381
382/*
383 * Send a TERMINATE message, as defined in RFC's 5040/5041/5044/6581.
384 * Sending TERMINATE messages is best effort - such messages
385 * can only be send if the QP is still connected and it does
386 * not have another outbound message in-progress, i.e. the
387 * TERMINATE message must not interfer with an incomplete current
388 * transmit operation.
389 */
390void siw_send_terminate(struct siw_qp *qp)
391{
392	struct kvec iov[3];
393	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR };
394	struct iwarp_terminate *term = NULL;
395	union iwarp_hdr *err_hdr = NULL;
396	struct socket *s = qp->attrs.sk;
397	struct siw_rx_stream *srx = &qp->rx_stream;
398	union iwarp_hdr *rx_hdr = &srx->hdr;
399	u32 crc = 0;
400	int num_frags, len_terminate, rv;
401
402	if (!qp->term_info.valid)
403		return;
404
405	qp->term_info.valid = 0;
406
407	if (tx_wqe(qp)->wr_status == SIW_WR_INPROGRESS) {
408		siw_dbg_qp(qp, "cannot send TERMINATE: op %d in progress\n",
409			   tx_type(tx_wqe(qp)));
410		return;
411	}
412	if (!s && qp->cep)
413		/* QP not yet in RTS. Take socket from connection end point */
414		s = qp->cep->sock;
415
416	if (!s) {
417		siw_dbg_qp(qp, "cannot send TERMINATE: not connected\n");
418		return;
419	}
420
421	term = kzalloc(sizeof(*term), GFP_KERNEL);
422	if (!term)
423		return;
424
425	term->ddp_qn = cpu_to_be32(RDMAP_UNTAGGED_QN_TERMINATE);
426	term->ddp_mo = 0;
427	term->ddp_msn = cpu_to_be32(1);
428
429	iov[0].iov_base = term;
430	iov[0].iov_len = sizeof(*term);
431
432	if ((qp->term_info.layer == TERM_ERROR_LAYER_DDP) ||
433	    ((qp->term_info.layer == TERM_ERROR_LAYER_RDMAP) &&
434	     (qp->term_info.etype != RDMAP_ETYPE_CATASTROPHIC))) {
435		err_hdr = kzalloc(sizeof(*err_hdr), GFP_KERNEL);
436		if (!err_hdr) {
437			kfree(term);
438			return;
439		}
440	}
441	memcpy(&term->ctrl, &iwarp_pktinfo[RDMAP_TERMINATE].ctrl,
442	       sizeof(struct iwarp_ctrl));
443
444	__rdmap_term_set_layer(term, qp->term_info.layer);
445	__rdmap_term_set_etype(term, qp->term_info.etype);
446	__rdmap_term_set_ecode(term, qp->term_info.ecode);
447
448	switch (qp->term_info.layer) {
449	case TERM_ERROR_LAYER_RDMAP:
450		if (qp->term_info.etype == RDMAP_ETYPE_CATASTROPHIC)
451			/* No additional DDP/RDMAP header to be included */
452			break;
453
454		if (qp->term_info.etype == RDMAP_ETYPE_REMOTE_PROTECTION) {
455			/*
456			 * Complete RDMAP frame will get attached, and
457			 * DDP segment length is valid
458			 */
459			term->flag_m = 1;
460			term->flag_d = 1;
461			term->flag_r = 1;
462
463			if (qp->term_info.in_tx) {
464				struct iwarp_rdma_rreq *rreq;
465				struct siw_wqe *wqe = tx_wqe(qp);
466
467				/* Inbound RREQ error, detected during
468				 * RRESP creation. Take state from
469				 * current TX work queue element to
470				 * reconstruct peers RREQ.
471				 */
472				rreq = (struct iwarp_rdma_rreq *)err_hdr;
473
474				memcpy(&rreq->ctrl,
475				       &iwarp_pktinfo[RDMAP_RDMA_READ_REQ].ctrl,
476				       sizeof(struct iwarp_ctrl));
477
478				rreq->rsvd = 0;
479				rreq->ddp_qn =
480					htonl(RDMAP_UNTAGGED_QN_RDMA_READ);
481
482				/* Provide RREQ's MSN as kept aside */
483				rreq->ddp_msn = htonl(wqe->sqe.sge[0].length);
484
485				rreq->ddp_mo = htonl(wqe->processed);
486				rreq->sink_stag = htonl(wqe->sqe.rkey);
487				rreq->sink_to = cpu_to_be64(wqe->sqe.raddr);
488				rreq->read_size = htonl(wqe->sqe.sge[0].length);
489				rreq->source_stag = htonl(wqe->sqe.sge[0].lkey);
490				rreq->source_to =
491					cpu_to_be64(wqe->sqe.sge[0].laddr);
492
493				iov[1].iov_base = rreq;
494				iov[1].iov_len = sizeof(*rreq);
495
496				rx_hdr = (union iwarp_hdr *)rreq;
497			} else {
498				/* Take RDMAP/DDP information from
499				 * current (failed) inbound frame.
500				 */
501				iov[1].iov_base = rx_hdr;
502
503				if (__rdmap_get_opcode(&rx_hdr->ctrl) ==
504				    RDMAP_RDMA_READ_REQ)
505					iov[1].iov_len =
506						sizeof(struct iwarp_rdma_rreq);
507				else /* SEND type */
508					iov[1].iov_len =
509						sizeof(struct iwarp_send);
510			}
511		} else {
512			/* Do not report DDP hdr information if packet
513			 * layout is unknown
514			 */
515			if ((qp->term_info.ecode == RDMAP_ECODE_VERSION) ||
516			    (qp->term_info.ecode == RDMAP_ECODE_OPCODE))
517				break;
518
519			iov[1].iov_base = rx_hdr;
520
521			/* Only DDP frame will get attached */
522			if (rx_hdr->ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)
523				iov[1].iov_len =
524					sizeof(struct iwarp_rdma_write);
525			else
526				iov[1].iov_len = sizeof(struct iwarp_send);
527
528			term->flag_m = 1;
529			term->flag_d = 1;
530		}
531		term->ctrl.mpa_len = cpu_to_be16(iov[1].iov_len);
532		break;
533
534	case TERM_ERROR_LAYER_DDP:
535		/* Report error encountered while DDP processing.
536		 * This can only happen as a result of inbound
537		 * DDP processing
538		 */
539
540		/* Do not report DDP hdr information if packet
541		 * layout is unknown
542		 */
543		if (((qp->term_info.etype == DDP_ETYPE_TAGGED_BUF) &&
544		     (qp->term_info.ecode == DDP_ECODE_T_VERSION)) ||
545		    ((qp->term_info.etype == DDP_ETYPE_UNTAGGED_BUF) &&
546		     (qp->term_info.ecode == DDP_ECODE_UT_VERSION)))
547			break;
548
549		iov[1].iov_base = rx_hdr;
550
551		if (rx_hdr->ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)
552			iov[1].iov_len = sizeof(struct iwarp_ctrl_tagged);
553		else
554			iov[1].iov_len = sizeof(struct iwarp_ctrl_untagged);
555
556		term->flag_m = 1;
557		term->flag_d = 1;
558		break;
559
560	default:
561		break;
562	}
563	if (term->flag_m || term->flag_d || term->flag_r) {
564		iov[2].iov_base = &crc;
565		iov[2].iov_len = sizeof(crc);
566		len_terminate = sizeof(*term) + iov[1].iov_len + MPA_CRC_SIZE;
567		num_frags = 3;
568	} else {
569		iov[1].iov_base = &crc;
570		iov[1].iov_len = sizeof(crc);
571		len_terminate = sizeof(*term) + MPA_CRC_SIZE;
572		num_frags = 2;
573	}
574
575	/* Adjust DDP Segment Length parameter, if valid */
576	if (term->flag_m) {
577		u32 real_ddp_len = be16_to_cpu(rx_hdr->ctrl.mpa_len);
578		enum rdma_opcode op = __rdmap_get_opcode(&rx_hdr->ctrl);
579
580		real_ddp_len -= iwarp_pktinfo[op].hdr_len - MPA_HDR_SIZE;
581		rx_hdr->ctrl.mpa_len = cpu_to_be16(real_ddp_len);
582	}
583
584	term->ctrl.mpa_len =
585		cpu_to_be16(len_terminate - (MPA_HDR_SIZE + MPA_CRC_SIZE));
586	if (qp->tx_ctx.mpa_crc_hd) {
587		crypto_shash_init(qp->tx_ctx.mpa_crc_hd);
588		if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
589					(u8 *)iov[0].iov_base,
590					iov[0].iov_len))
591			goto out;
592
593		if (num_frags == 3) {
594			if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
595						(u8 *)iov[1].iov_base,
596						iov[1].iov_len))
597				goto out;
598		}
599		crypto_shash_final(qp->tx_ctx.mpa_crc_hd, (u8 *)&crc);
600	}
601
602	rv = kernel_sendmsg(s, &msg, iov, num_frags, len_terminate);
603	siw_dbg_qp(qp, "sent TERM: %s, layer %d, type %d, code %d (%d bytes)\n",
604		   rv == len_terminate ? "success" : "failure",
605		   __rdmap_term_layer(term), __rdmap_term_etype(term),
606		   __rdmap_term_ecode(term), rv);
607out:
608	kfree(term);
609	kfree(err_hdr);
610}
611
612/*
613 * Handle all attrs other than state
614 */
615static void siw_qp_modify_nonstate(struct siw_qp *qp,
616				   struct siw_qp_attrs *attrs,
617				   enum siw_qp_attr_mask mask)
618{
619	if (mask & SIW_QP_ATTR_ACCESS_FLAGS) {
620		if (attrs->flags & SIW_RDMA_BIND_ENABLED)
621			qp->attrs.flags |= SIW_RDMA_BIND_ENABLED;
622		else
623			qp->attrs.flags &= ~SIW_RDMA_BIND_ENABLED;
624
625		if (attrs->flags & SIW_RDMA_WRITE_ENABLED)
626			qp->attrs.flags |= SIW_RDMA_WRITE_ENABLED;
627		else
628			qp->attrs.flags &= ~SIW_RDMA_WRITE_ENABLED;
629
630		if (attrs->flags & SIW_RDMA_READ_ENABLED)
631			qp->attrs.flags |= SIW_RDMA_READ_ENABLED;
632		else
633			qp->attrs.flags &= ~SIW_RDMA_READ_ENABLED;
634	}
635}
636
637static int siw_qp_nextstate_from_idle(struct siw_qp *qp,
638				      struct siw_qp_attrs *attrs,
639				      enum siw_qp_attr_mask mask)
640{
641	int rv = 0;
642
643	switch (attrs->state) {
644	case SIW_QP_STATE_RTS:
645		if (attrs->flags & SIW_MPA_CRC) {
646			rv = siw_qp_enable_crc(qp);
647			if (rv)
648				break;
649		}
650		if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) {
651			siw_dbg_qp(qp, "no socket\n");
652			rv = -EINVAL;
653			break;
654		}
655		if (!(mask & SIW_QP_ATTR_MPA)) {
656			siw_dbg_qp(qp, "no MPA\n");
657			rv = -EINVAL;
658			break;
659		}
660		/*
661		 * Initialize iWARP TX state
662		 */
663		qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 0;
664		qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 0;
665		qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 0;
666
667		/*
668		 * Initialize iWARP RX state
669		 */
670		qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 1;
671		qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 1;
672		qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 1;
673
674		/*
675		 * init IRD free queue, caller has already checked
676		 * limits.
677		 */
678		rv = siw_qp_readq_init(qp, attrs->irq_size,
679				       attrs->orq_size);
680		if (rv)
681			break;
682
683		qp->attrs.sk = attrs->sk;
684		qp->attrs.state = SIW_QP_STATE_RTS;
685
686		siw_dbg_qp(qp, "enter RTS: crc=%s, ord=%u, ird=%u\n",
687			   attrs->flags & SIW_MPA_CRC ? "y" : "n",
688			   qp->attrs.orq_size, qp->attrs.irq_size);
689		break;
690
691	case SIW_QP_STATE_ERROR:
692		siw_rq_flush(qp);
693		qp->attrs.state = SIW_QP_STATE_ERROR;
694		if (qp->cep) {
695			siw_cep_put(qp->cep);
696			qp->cep = NULL;
697		}
698		break;
699
700	default:
701		break;
702	}
703	return rv;
704}
705
706static int siw_qp_nextstate_from_rts(struct siw_qp *qp,
707				     struct siw_qp_attrs *attrs)
708{
709	int drop_conn = 0;
710
711	switch (attrs->state) {
712	case SIW_QP_STATE_CLOSING:
713		/*
714		 * Verbs: move to IDLE if SQ and ORQ are empty.
715		 * Move to ERROR otherwise. But first of all we must
716		 * close the connection. So we keep CLOSING or ERROR
717		 * as a transient state, schedule connection drop work
718		 * and wait for the socket state change upcall to
719		 * come back closed.
720		 */
721		if (tx_wqe(qp)->wr_status == SIW_WR_IDLE) {
722			qp->attrs.state = SIW_QP_STATE_CLOSING;
723		} else {
724			qp->attrs.state = SIW_QP_STATE_ERROR;
725			siw_sq_flush(qp);
726		}
727		siw_rq_flush(qp);
728
729		drop_conn = 1;
730		break;
731
732	case SIW_QP_STATE_TERMINATE:
733		qp->attrs.state = SIW_QP_STATE_TERMINATE;
734
735		siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
736				   RDMAP_ETYPE_CATASTROPHIC,
737				   RDMAP_ECODE_UNSPECIFIED, 1);
738		drop_conn = 1;
739		break;
740
741	case SIW_QP_STATE_ERROR:
742		/*
743		 * This is an emergency close.
744		 *
745		 * Any in progress transmit operation will get
746		 * cancelled.
747		 * This will likely result in a protocol failure,
748		 * if a TX operation is in transit. The caller
749		 * could unconditional wait to give the current
750		 * operation a chance to complete.
751		 * Esp., how to handle the non-empty IRQ case?
752		 * The peer was asking for data transfer at a valid
753		 * point in time.
754		 */
755		siw_sq_flush(qp);
756		siw_rq_flush(qp);
757		qp->attrs.state = SIW_QP_STATE_ERROR;
758		drop_conn = 1;
759		break;
760
761	default:
762		break;
763	}
764	return drop_conn;
765}
766
767static void siw_qp_nextstate_from_term(struct siw_qp *qp,
768				       struct siw_qp_attrs *attrs)
769{
770	switch (attrs->state) {
771	case SIW_QP_STATE_ERROR:
772		siw_rq_flush(qp);
773		qp->attrs.state = SIW_QP_STATE_ERROR;
774
775		if (tx_wqe(qp)->wr_status != SIW_WR_IDLE)
776			siw_sq_flush(qp);
777		break;
778
779	default:
780		break;
781	}
782}
783
784static int siw_qp_nextstate_from_close(struct siw_qp *qp,
785				       struct siw_qp_attrs *attrs)
786{
787	int rv = 0;
788
789	switch (attrs->state) {
790	case SIW_QP_STATE_IDLE:
791		WARN_ON(tx_wqe(qp)->wr_status != SIW_WR_IDLE);
792		qp->attrs.state = SIW_QP_STATE_IDLE;
793		break;
794
795	case SIW_QP_STATE_CLOSING:
796		/*
797		 * The LLP may already moved the QP to closing
798		 * due to graceful peer close init
799		 */
800		break;
801
802	case SIW_QP_STATE_ERROR:
803		/*
804		 * QP was moved to CLOSING by LLP event
805		 * not yet seen by user.
806		 */
807		qp->attrs.state = SIW_QP_STATE_ERROR;
808
809		if (tx_wqe(qp)->wr_status != SIW_WR_IDLE)
810			siw_sq_flush(qp);
811
812		siw_rq_flush(qp);
813		break;
814
815	default:
816		siw_dbg_qp(qp, "state transition undefined: %s => %s\n",
817			   siw_qp_state_to_string[qp->attrs.state],
818			   siw_qp_state_to_string[attrs->state]);
819
820		rv = -ECONNABORTED;
821	}
822	return rv;
823}
824
825/*
826 * Caller must hold qp->state_lock
827 */
828int siw_qp_modify(struct siw_qp *qp, struct siw_qp_attrs *attrs,
829		  enum siw_qp_attr_mask mask)
830{
831	int drop_conn = 0, rv = 0;
832
833	if (!mask)
834		return 0;
835
836	siw_dbg_qp(qp, "state: %s => %s\n",
837		   siw_qp_state_to_string[qp->attrs.state],
838		   siw_qp_state_to_string[attrs->state]);
839
840	if (mask != SIW_QP_ATTR_STATE)
841		siw_qp_modify_nonstate(qp, attrs, mask);
842
843	if (!(mask & SIW_QP_ATTR_STATE))
844		return 0;
845
846	switch (qp->attrs.state) {
847	case SIW_QP_STATE_IDLE:
848	case SIW_QP_STATE_RTR:
849		rv = siw_qp_nextstate_from_idle(qp, attrs, mask);
850		break;
851
852	case SIW_QP_STATE_RTS:
853		drop_conn = siw_qp_nextstate_from_rts(qp, attrs);
854		break;
855
856	case SIW_QP_STATE_TERMINATE:
857		siw_qp_nextstate_from_term(qp, attrs);
858		break;
859
860	case SIW_QP_STATE_CLOSING:
861		siw_qp_nextstate_from_close(qp, attrs);
862		break;
863	default:
864		break;
865	}
866	if (drop_conn)
867		siw_qp_cm_drop(qp, 0);
868
869	return rv;
870}
871
872void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe)
873{
874	rreq->id = sqe->id;
875	rreq->opcode = sqe->opcode;
876	rreq->sge[0].laddr = sqe->sge[0].laddr;
877	rreq->sge[0].length = sqe->sge[0].length;
878	rreq->sge[0].lkey = sqe->sge[0].lkey;
879	rreq->sge[1].lkey = sqe->sge[1].lkey;
880	rreq->flags = sqe->flags | SIW_WQE_VALID;
881	rreq->num_sge = 1;
882}
883
884static int siw_activate_tx_from_sq(struct siw_qp *qp)
885{
886	struct siw_sqe *sqe;
887	struct siw_wqe *wqe = tx_wqe(qp);
888	int rv = 1;
889
890	sqe = sq_get_next(qp);
891	if (!sqe)
892		return 0;
893
894	memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
895	wqe->wr_status = SIW_WR_QUEUED;
896
897	/* First copy SQE to kernel private memory */
898	memcpy(&wqe->sqe, sqe, sizeof(*sqe));
899
900	if (wqe->sqe.opcode >= SIW_NUM_OPCODES) {
901		rv = -EINVAL;
902		goto out;
903	}
904	if (wqe->sqe.flags & SIW_WQE_INLINE) {
905		if (wqe->sqe.opcode != SIW_OP_SEND &&
906		    wqe->sqe.opcode != SIW_OP_WRITE) {
907			rv = -EINVAL;
908			goto out;
909		}
910		if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) {
911			rv = -EINVAL;
912			goto out;
913		}
914		wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1];
915		wqe->sqe.sge[0].lkey = 0;
916		wqe->sqe.num_sge = 1;
917	}
918	if (wqe->sqe.flags & SIW_WQE_READ_FENCE) {
919		/* A READ cannot be fenced */
920		if (unlikely(wqe->sqe.opcode == SIW_OP_READ ||
921			     wqe->sqe.opcode ==
922				     SIW_OP_READ_LOCAL_INV)) {
923			siw_dbg_qp(qp, "cannot fence read\n");
924			rv = -EINVAL;
925			goto out;
926		}
927		spin_lock(&qp->orq_lock);
928
929		if (qp->attrs.orq_size && !siw_orq_empty(qp)) {
930			qp->tx_ctx.orq_fence = 1;
931			rv = 0;
932		}
933		spin_unlock(&qp->orq_lock);
934
935	} else if (wqe->sqe.opcode == SIW_OP_READ ||
936		   wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
937		struct siw_sqe *rreq;
938
939		if (unlikely(!qp->attrs.orq_size)) {
940			/* We negotiated not to send READ req's */
941			rv = -EINVAL;
942			goto out;
943		}
944		wqe->sqe.num_sge = 1;
945
946		spin_lock(&qp->orq_lock);
947
948		rreq = orq_get_free(qp);
949		if (rreq) {
950			/*
951			 * Make an immediate copy in ORQ to be ready
952			 * to process loopback READ reply
953			 */
954			siw_read_to_orq(rreq, &wqe->sqe);
955			qp->orq_put++;
956		} else {
957			qp->tx_ctx.orq_fence = 1;
958			rv = 0;
959		}
960		spin_unlock(&qp->orq_lock);
961	}
962
963	/* Clear SQE, can be re-used by application */
964	smp_store_mb(sqe->flags, 0);
965	qp->sq_get++;
966out:
967	if (unlikely(rv < 0)) {
968		siw_dbg_qp(qp, "error %d\n", rv);
969		wqe->wr_status = SIW_WR_IDLE;
970	}
971	return rv;
972}
973
974/*
975 * Must be called with SQ locked.
976 * To avoid complete SQ starvation by constant inbound READ requests,
977 * the active IRQ will not be served after qp->irq_burst, if the
978 * SQ has pending work.
979 */
980int siw_activate_tx(struct siw_qp *qp)
981{
982	struct siw_sqe *irqe;
983	struct siw_wqe *wqe = tx_wqe(qp);
984
985	if (!qp->attrs.irq_size)
986		return siw_activate_tx_from_sq(qp);
987
988	irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size];
989
990	if (!(irqe->flags & SIW_WQE_VALID))
991		return siw_activate_tx_from_sq(qp);
992
993	/*
994	 * Avoid local WQE processing starvation in case
995	 * of constant inbound READ request stream
996	 */
997	if (sq_get_next(qp) && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) {
998		qp->irq_burst = 0;
999		return siw_activate_tx_from_sq(qp);
1000	}
1001	memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
1002	wqe->wr_status = SIW_WR_QUEUED;
1003
1004	/* start READ RESPONSE */
1005	wqe->sqe.opcode = SIW_OP_READ_RESPONSE;
1006	wqe->sqe.flags = 0;
1007	if (irqe->num_sge) {
1008		wqe->sqe.num_sge = 1;
1009		wqe->sqe.sge[0].length = irqe->sge[0].length;
1010		wqe->sqe.sge[0].laddr = irqe->sge[0].laddr;
1011		wqe->sqe.sge[0].lkey = irqe->sge[0].lkey;
1012	} else {
1013		wqe->sqe.num_sge = 0;
1014	}
1015
1016	/* Retain original RREQ's message sequence number for
1017	 * potential error reporting cases.
1018	 */
1019	wqe->sqe.sge[1].length = irqe->sge[1].length;
1020
1021	wqe->sqe.rkey = irqe->rkey;
1022	wqe->sqe.raddr = irqe->raddr;
1023
1024	wqe->processed = 0;
1025	qp->irq_get++;
1026
1027	/* mark current IRQ entry free */
1028	smp_store_mb(irqe->flags, 0);
1029
1030	return 1;
1031}
1032
1033/*
1034 * Check if current CQ state qualifies for calling CQ completion
1035 * handler. Must be called with CQ lock held.
1036 */
1037static bool siw_cq_notify_now(struct siw_cq *cq, u32 flags)
1038{
1039	u32 cq_notify;
1040
1041	if (!cq->base_cq.comp_handler)
1042		return false;
1043
1044	/* Read application shared notification state */
1045	cq_notify = READ_ONCE(cq->notify->flags);
1046
1047	if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) ||
1048	    ((cq_notify & SIW_NOTIFY_SOLICITED) &&
1049	     (flags & SIW_WQE_SOLICITED))) {
1050		/*
1051		 * CQ notification is one-shot: Since the
1052		 * current CQE causes user notification,
1053		 * the CQ gets dis-aremd and must be re-aremd
1054		 * by the user for a new notification.
1055		 */
1056		WRITE_ONCE(cq->notify->flags, SIW_NOTIFY_NOT);
1057
1058		return true;
1059	}
1060	return false;
1061}
1062
1063int siw_sqe_complete(struct siw_qp *qp, struct siw_sqe *sqe, u32 bytes,
1064		     enum siw_wc_status status)
1065{
1066	struct siw_cq *cq = qp->scq;
1067	int rv = 0;
1068
1069	if (cq) {
1070		u32 sqe_flags = sqe->flags;
1071		struct siw_cqe *cqe;
1072		u32 idx;
1073		unsigned long flags;
1074
1075		spin_lock_irqsave(&cq->lock, flags);
1076
1077		idx = cq->cq_put % cq->num_cqe;
1078		cqe = &cq->queue[idx];
1079
1080		if (!READ_ONCE(cqe->flags)) {
1081			bool notify;
1082
1083			cqe->id = sqe->id;
1084			cqe->opcode = sqe->opcode;
1085			cqe->status = status;
1086			cqe->imm_data = 0;
1087			cqe->bytes = bytes;
1088
1089			if (rdma_is_kernel_res(&cq->base_cq.res))
1090				cqe->base_qp = &qp->base_qp;
1091			else
1092				cqe->qp_id = qp_id(qp);
1093
1094			/* mark CQE valid for application */
1095			WRITE_ONCE(cqe->flags, SIW_WQE_VALID);
1096			/* recycle SQE */
1097			smp_store_mb(sqe->flags, 0);
1098
1099			cq->cq_put++;
1100			notify = siw_cq_notify_now(cq, sqe_flags);
1101
1102			spin_unlock_irqrestore(&cq->lock, flags);
1103
1104			if (notify) {
1105				siw_dbg_cq(cq, "Call completion handler\n");
1106				cq->base_cq.comp_handler(&cq->base_cq,
1107						cq->base_cq.cq_context);
1108			}
1109		} else {
1110			spin_unlock_irqrestore(&cq->lock, flags);
1111			rv = -ENOMEM;
1112			siw_cq_event(cq, IB_EVENT_CQ_ERR);
1113		}
1114	} else {
1115		/* recycle SQE */
1116		smp_store_mb(sqe->flags, 0);
1117	}
1118	return rv;
1119}
1120
1121int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes,
1122		     u32 inval_stag, enum siw_wc_status status)
1123{
1124	struct siw_cq *cq = qp->rcq;
1125	int rv = 0;
1126
1127	if (cq) {
1128		struct siw_cqe *cqe;
1129		u32 idx;
1130		unsigned long flags;
1131
1132		spin_lock_irqsave(&cq->lock, flags);
1133
1134		idx = cq->cq_put % cq->num_cqe;
1135		cqe = &cq->queue[idx];
1136
1137		if (!READ_ONCE(cqe->flags)) {
1138			bool notify;
1139			u8 cqe_flags = SIW_WQE_VALID;
1140
1141			cqe->id = rqe->id;
1142			cqe->opcode = SIW_OP_RECEIVE;
1143			cqe->status = status;
1144			cqe->imm_data = 0;
1145			cqe->bytes = bytes;
1146
1147			if (rdma_is_kernel_res(&cq->base_cq.res)) {
1148				cqe->base_qp = &qp->base_qp;
1149				if (inval_stag) {
1150					cqe_flags |= SIW_WQE_REM_INVAL;
1151					cqe->inval_stag = inval_stag;
1152				}
1153			} else {
1154				cqe->qp_id = qp_id(qp);
1155			}
1156			/* mark CQE valid for application */
1157			WRITE_ONCE(cqe->flags, cqe_flags);
1158			/* recycle RQE */
1159			smp_store_mb(rqe->flags, 0);
1160
1161			cq->cq_put++;
1162			notify = siw_cq_notify_now(cq, SIW_WQE_SIGNALLED);
1163
1164			spin_unlock_irqrestore(&cq->lock, flags);
1165
1166			if (notify) {
1167				siw_dbg_cq(cq, "Call completion handler\n");
1168				cq->base_cq.comp_handler(&cq->base_cq,
1169						cq->base_cq.cq_context);
1170			}
1171		} else {
1172			spin_unlock_irqrestore(&cq->lock, flags);
1173			rv = -ENOMEM;
1174			siw_cq_event(cq, IB_EVENT_CQ_ERR);
1175		}
1176	} else {
1177		/* recycle RQE */
1178		smp_store_mb(rqe->flags, 0);
1179	}
1180	return rv;
1181}
1182
1183/*
1184 * siw_sq_flush()
1185 *
1186 * Flush SQ and ORQ entries to CQ.
1187 *
1188 * Must be called with QP state write lock held.
1189 * Therefore, SQ and ORQ lock must not be taken.
1190 */
1191void siw_sq_flush(struct siw_qp *qp)
1192{
1193	struct siw_sqe *sqe;
1194	struct siw_wqe *wqe = tx_wqe(qp);
1195	int async_event = 0;
1196
1197	/*
1198	 * Start with completing any work currently on the ORQ
1199	 */
1200	while (qp->attrs.orq_size) {
1201		sqe = &qp->orq[qp->orq_get % qp->attrs.orq_size];
1202		if (!READ_ONCE(sqe->flags))
1203			break;
1204
1205		if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0)
1206			break;
1207
1208		WRITE_ONCE(sqe->flags, 0);
1209		qp->orq_get++;
1210	}
1211	/*
1212	 * Flush an in-progress WQE if present
1213	 */
1214	if (wqe->wr_status != SIW_WR_IDLE) {
1215		siw_dbg_qp(qp, "flush current SQE, type %d, status %d\n",
1216			   tx_type(wqe), wqe->wr_status);
1217
1218		siw_wqe_put_mem(wqe, tx_type(wqe));
1219
1220		if (tx_type(wqe) != SIW_OP_READ_RESPONSE &&
1221		    ((tx_type(wqe) != SIW_OP_READ &&
1222		      tx_type(wqe) != SIW_OP_READ_LOCAL_INV) ||
1223		     wqe->wr_status == SIW_WR_QUEUED))
1224			/*
1225			 * An in-progress Read Request is already in
1226			 * the ORQ
1227			 */
1228			siw_sqe_complete(qp, &wqe->sqe, wqe->bytes,
1229					 SIW_WC_WR_FLUSH_ERR);
1230
1231		wqe->wr_status = SIW_WR_IDLE;
1232	}
1233	/*
1234	 * Flush the Send Queue
1235	 */
1236	while (qp->attrs.sq_size) {
1237		sqe = &qp->sendq[qp->sq_get % qp->attrs.sq_size];
1238		if (!READ_ONCE(sqe->flags))
1239			break;
1240
1241		async_event = 1;
1242		if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0)
1243			/*
1244			 * Shall IB_EVENT_SQ_DRAINED be supressed if work
1245			 * completion fails?
1246			 */
1247			break;
1248
1249		WRITE_ONCE(sqe->flags, 0);
1250		qp->sq_get++;
1251	}
1252	if (async_event)
1253		siw_qp_event(qp, IB_EVENT_SQ_DRAINED);
1254}
1255
1256/*
1257 * siw_rq_flush()
1258 *
1259 * Flush recv queue entries to CQ. Also
1260 * takes care of pending active tagged and untagged
1261 * inbound transfers, which have target memory
1262 * referenced.
1263 *
1264 * Must be called with QP state write lock held.
1265 * Therefore, RQ lock must not be taken.
1266 */
1267void siw_rq_flush(struct siw_qp *qp)
1268{
1269	struct siw_wqe *wqe = &qp->rx_untagged.wqe_active;
1270
1271	/*
1272	 * Flush an in-progress untagged operation if present
1273	 */
1274	if (wqe->wr_status != SIW_WR_IDLE) {
1275		siw_dbg_qp(qp, "flush current rqe, type %d, status %d\n",
1276			   rx_type(wqe), wqe->wr_status);
1277
1278		siw_wqe_put_mem(wqe, rx_type(wqe));
1279
1280		if (rx_type(wqe) == SIW_OP_RECEIVE) {
1281			siw_rqe_complete(qp, &wqe->rqe, wqe->bytes,
1282					 0, SIW_WC_WR_FLUSH_ERR);
1283		} else if (rx_type(wqe) != SIW_OP_READ &&
1284			   rx_type(wqe) != SIW_OP_READ_RESPONSE &&
1285			   rx_type(wqe) != SIW_OP_WRITE) {
1286			siw_sqe_complete(qp, &wqe->sqe, 0, SIW_WC_WR_FLUSH_ERR);
1287		}
1288		wqe->wr_status = SIW_WR_IDLE;
1289	}
1290	wqe = &qp->rx_tagged.wqe_active;
1291
1292	if (wqe->wr_status != SIW_WR_IDLE) {
1293		siw_wqe_put_mem(wqe, rx_type(wqe));
1294		wqe->wr_status = SIW_WR_IDLE;
1295	}
1296	/*
1297	 * Flush the Receive Queue
1298	 */
1299	while (qp->attrs.rq_size) {
1300		struct siw_rqe *rqe =
1301			&qp->recvq[qp->rq_get % qp->attrs.rq_size];
1302
1303		if (!READ_ONCE(rqe->flags))
1304			break;
1305
1306		if (siw_rqe_complete(qp, rqe, 0, 0, SIW_WC_WR_FLUSH_ERR) != 0)
1307			break;
1308
1309		WRITE_ONCE(rqe->flags, 0);
1310		qp->rq_get++;
1311	}
1312}
1313
1314int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp)
1315{
1316	int rv = xa_alloc(&sdev->qp_xa, &qp->base_qp.qp_num, qp, xa_limit_32b,
1317			  GFP_KERNEL);
1318
1319	if (!rv) {
1320		kref_init(&qp->ref);
1321		qp->sdev = sdev;
1322		siw_dbg_qp(qp, "new QP\n");
1323	}
1324	return rv;
1325}
1326
1327void siw_free_qp(struct kref *ref)
1328{
1329	struct siw_qp *found, *qp = container_of(ref, struct siw_qp, ref);
1330	struct siw_device *sdev = qp->sdev;
1331	unsigned long flags;
1332
1333	if (qp->cep)
1334		siw_cep_put(qp->cep);
1335
1336	found = xa_erase(&sdev->qp_xa, qp_id(qp));
1337	WARN_ON(found != qp);
1338	spin_lock_irqsave(&sdev->lock, flags);
1339	list_del(&qp->devq);
1340	spin_unlock_irqrestore(&sdev->lock, flags);
1341
1342	vfree(qp->sendq);
1343	vfree(qp->recvq);
1344	vfree(qp->irq);
1345	vfree(qp->orq);
1346
1347	siw_put_tx_cpu(qp->tx_cpu);
1348	complete(&qp->qp_free);
1349	atomic_dec(&sdev->num_qp);
1350}
1351