iw_cxgb_qp.c revision 183321
1
2/**************************************************************************
3
4Copyright (c) 2007, Chelsio Inc.
5All rights reserved.
6
7Redistribution and use in source and binary forms, with or without
8modification, are permitted provided that the following conditions are met:
9
10 1. Redistributions of source code must retain the above copyright notice,
11    this list of conditions and the following disclaimer.
12
13 2. Neither the name of the Chelsio Corporation nor the names of its
14    contributors may be used to endorse or promote products derived from
15    this software without specific prior written permission.
16
17THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27POSSIBILITY OF SUCH DAMAGE.
28
29***************************************************************************/
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c 183321 2008-09-24 01:19:08Z kmacy $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/bus.h>
37#include <sys/module.h>
38#include <sys/pciio.h>
39#include <sys/conf.h>
40#include <machine/bus.h>
41#include <machine/resource.h>
42#include <sys/bus_dma.h>
43#include <sys/rman.h>
44#include <sys/ioccom.h>
45#include <sys/mbuf.h>
46#include <sys/mutex.h>
47#include <sys/rwlock.h>
48#include <sys/linker.h>
49#include <sys/firmware.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/smp.h>
53#include <sys/sysctl.h>
54#include <sys/syslog.h>
55#include <sys/queue.h>
56#include <sys/taskqueue.h>
57#include <sys/proc.h>
58#include <sys/queue.h>
59
60#include <netinet/in.h>
61
62#include <contrib/rdma/ib_verbs.h>
63#include <contrib/rdma/ib_umem.h>
64#include <contrib/rdma/ib_user_verbs.h>
65
66#include <cxgb_include.h>
67#include <ulp/tom/cxgb_l2t.h>
68#include <ulp/iw_cxgb/iw_cxgb_wr.h>
69#include <ulp/iw_cxgb/iw_cxgb_hal.h>
70#include <ulp/iw_cxgb/iw_cxgb_provider.h>
71#include <ulp/iw_cxgb/iw_cxgb_cm.h>
72#include <ulp/iw_cxgb/iw_cxgb.h>
73#include <ulp/iw_cxgb/iw_cxgb_resource.h>
74#include <ulp/iw_cxgb/iw_cxgb_user.h>
75
76#define NO_SUPPORT -1
77
78static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
79				u8 * flit_cnt)
80{
81	int i;
82	u32 plen;
83
84	switch (wr->opcode) {
85	case IB_WR_SEND:
86	case IB_WR_SEND_WITH_IMM:
87		if (wr->send_flags & IB_SEND_SOLICITED)
88			wqe->send.rdmaop = T3_SEND_WITH_SE;
89		else
90			wqe->send.rdmaop = T3_SEND;
91		wqe->send.rem_stag = 0;
92		break;
93#if 0				/* Not currently supported */
94	case TYPE_SEND_INVALIDATE:
95	case TYPE_SEND_INVALIDATE_IMMEDIATE:
96		wqe->send.rdmaop = T3_SEND_WITH_INV;
97		wqe->send.rem_stag = htobe32(wr->wr.rdma.rkey);
98		break;
99	case TYPE_SEND_SE_INVALIDATE:
100		wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
101		wqe->send.rem_stag = htobe32(wr->wr.rdma.rkey);
102		break;
103#endif
104	default:
105		break;
106	}
107	if (wr->num_sge > T3_MAX_SGE)
108		return (-EINVAL);
109	wqe->send.reserved[0] = 0;
110	wqe->send.reserved[1] = 0;
111	wqe->send.reserved[2] = 0;
112	if (wr->opcode == IB_WR_SEND_WITH_IMM) {
113		plen = 4;
114		wqe->send.sgl[0].stag = wr->imm_data;
115		wqe->send.sgl[0].len = 0;
116		wqe->send.num_sgle = 0;
117		*flit_cnt = 5;
118	} else {
119		plen = 0;
120		for (i = 0; i < wr->num_sge; i++) {
121			if ((plen + wr->sg_list[i].length) < plen) {
122				return (-EMSGSIZE);
123			}
124			plen += wr->sg_list[i].length;
125			wqe->send.sgl[i].stag =
126			    htobe32(wr->sg_list[i].lkey);
127			wqe->send.sgl[i].len =
128			    htobe32(wr->sg_list[i].length);
129			wqe->send.sgl[i].to = htobe64(wr->sg_list[i].addr);
130		}
131		wqe->send.num_sgle = htobe32(wr->num_sge);
132		*flit_cnt = 4 + ((wr->num_sge) << 1);
133	}
134	wqe->send.plen = htobe32(plen);
135	return 0;
136}
137
138static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
139				 u8 *flit_cnt)
140{
141	int i;
142	u32 plen;
143
144	if (wr->num_sge > T3_MAX_SGE)
145		return (-EINVAL);
146	wqe->write.rdmaop = T3_RDMA_WRITE;
147	wqe->write.reserved[0] = 0;
148	wqe->write.reserved[1] = 0;
149	wqe->write.reserved[2] = 0;
150	wqe->write.stag_sink = htobe32(wr->wr.rdma.rkey);
151	wqe->write.to_sink = htobe64(wr->wr.rdma.remote_addr);
152
153	if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
154		plen = 4;
155		wqe->write.sgl[0].stag = wr->imm_data;
156		wqe->write.sgl[0].len = 0;
157		wqe->write.num_sgle = 0;
158		*flit_cnt = 6;
159	} else {
160		plen = 0;
161		for (i = 0; i < wr->num_sge; i++) {
162			if ((plen + wr->sg_list[i].length) < plen) {
163				return (-EMSGSIZE);
164			}
165			plen += wr->sg_list[i].length;
166			wqe->write.sgl[i].stag =
167			    htobe32(wr->sg_list[i].lkey);
168			wqe->write.sgl[i].len =
169			    htobe32(wr->sg_list[i].length);
170			wqe->write.sgl[i].to =
171			    htobe64(wr->sg_list[i].addr);
172		}
173		wqe->write.num_sgle = htobe32(wr->num_sge);
174		*flit_cnt = 5 + ((wr->num_sge) << 1);
175	}
176	wqe->write.plen = htobe32(plen);
177	return 0;
178}
179
180static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
181				u8 *flit_cnt)
182{
183	if (wr->num_sge > 1)
184		return (-EINVAL);
185	wqe->read.rdmaop = T3_READ_REQ;
186	wqe->read.reserved[0] = 0;
187	wqe->read.reserved[1] = 0;
188	wqe->read.reserved[2] = 0;
189	wqe->read.rem_stag = htobe32(wr->wr.rdma.rkey);
190	wqe->read.rem_to = htobe64(wr->wr.rdma.remote_addr);
191	wqe->read.local_stag = htobe32(wr->sg_list[0].lkey);
192	wqe->read.local_len = htobe32(wr->sg_list[0].length);
193	wqe->read.local_to = htobe64(wr->sg_list[0].addr);
194	*flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
195	return 0;
196}
197
198/*
199 * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
200 */
201static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
202			    u32 num_sgle, u32 * pbl_addr, u8 * page_size)
203{
204	int i;
205	struct iwch_mr *mhp;
206	u32 offset;
207	for (i = 0; i < num_sgle; i++) {
208
209		mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
210		if (!mhp) {
211			CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
212			return (-EIO);
213		}
214		if (!mhp->attr.state) {
215			CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
216			return (-EIO);
217		}
218		if (mhp->attr.zbva) {
219			CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
220			return (-EIO);
221		}
222
223		if (sg_list[i].addr < mhp->attr.va_fbo) {
224			CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
225			return (-EINVAL);
226		}
227		if (sg_list[i].addr + ((u64) sg_list[i].length) <
228		    sg_list[i].addr) {
229			CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
230			return (-EINVAL);
231		}
232		if (sg_list[i].addr + ((u64) sg_list[i].length) >
233		    mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
234			CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
235			return (-EINVAL);
236		}
237		offset = sg_list[i].addr - mhp->attr.va_fbo;
238		offset += ((u32) mhp->attr.va_fbo) %
239		          (1UL << (12 + mhp->attr.page_size));
240		pbl_addr[i] = ((mhp->attr.pbl_addr -
241			        rhp->rdev.rnic_info.pbl_base) >> 3) +
242			      (offset >> (12 + mhp->attr.page_size));
243		page_size[i] = mhp->attr.page_size;
244	}
245	return 0;
246}
247
248static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe,
249				struct ib_recv_wr *wr)
250{
251	int i;
252	if (wr->num_sge > T3_MAX_SGE)
253		return (-EINVAL);
254	wqe->recv.num_sgle = htobe32(wr->num_sge);
255	for (i = 0; i < wr->num_sge; i++) {
256		wqe->recv.sgl[i].stag = htobe32(wr->sg_list[i].lkey);
257		wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length);
258		wqe->recv.sgl[i].to = htobe64(wr->sg_list[i].addr);
259	}
260	for (; i < T3_MAX_SGE; i++) {
261		wqe->recv.sgl[i].stag = 0;
262		wqe->recv.sgl[i].len = 0;
263		wqe->recv.sgl[i].to = 0;
264	}
265	return 0;
266}
267
268int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
269		      struct ib_send_wr **bad_wr)
270{
271	int err = 0;
272	u8 t3_wr_flit_cnt = 0;
273	enum t3_wr_opcode t3_wr_opcode = 0;
274	enum t3_wr_flags t3_wr_flags;
275	struct iwch_qp *qhp;
276	u32 idx;
277	union t3_wr *wqe;
278	u32 num_wrs;
279	struct t3_swsq *sqp;
280
281	qhp = to_iwch_qp(ibqp);
282	mtx_lock(&qhp->lock);
283	if (qhp->attr.state > IWCH_QP_STATE_RTS) {
284		mtx_unlock(&qhp->lock);
285		return (-EINVAL);
286	}
287	num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
288		  qhp->wq.sq_size_log2);
289	if (num_wrs <= 0) {
290		mtx_unlock(&qhp->lock);
291		return (-ENOMEM);
292	}
293	while (wr) {
294		if (num_wrs == 0) {
295			err = -ENOMEM;
296			*bad_wr = wr;
297			break;
298		}
299		idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
300		wqe = (union t3_wr *) (qhp->wq.queue + idx);
301		t3_wr_flags = 0;
302		if (wr->send_flags & IB_SEND_SOLICITED)
303			t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
304		if (wr->send_flags & IB_SEND_FENCE)
305			t3_wr_flags |= T3_READ_FENCE_FLAG;
306		if (wr->send_flags & IB_SEND_SIGNALED)
307			t3_wr_flags |= T3_COMPLETION_FLAG;
308		sqp = qhp->wq.sq +
309		      Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
310		switch (wr->opcode) {
311		case IB_WR_SEND:
312		case IB_WR_SEND_WITH_IMM:
313			t3_wr_opcode = T3_WR_SEND;
314			err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
315			break;
316		case IB_WR_RDMA_WRITE:
317		case IB_WR_RDMA_WRITE_WITH_IMM:
318			t3_wr_opcode = T3_WR_WRITE;
319			err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
320			break;
321		case IB_WR_RDMA_READ:
322			t3_wr_opcode = T3_WR_READ;
323			t3_wr_flags = 0; /* T3 reads are always signaled */
324			err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
325			if (err)
326				break;
327			sqp->read_len = wqe->read.local_len;
328			if (!qhp->wq.oldest_read)
329				qhp->wq.oldest_read = sqp;
330			break;
331		default:
332			CTR2(KTR_IW_CXGB, "%s post of type=%d TBD!", __FUNCTION__,
333			     wr->opcode);
334			err = -EINVAL;
335		}
336		if (err) {
337			*bad_wr = wr;
338			break;
339		}
340		wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
341		sqp->wr_id = wr->wr_id;
342		sqp->opcode = wr2opcode(t3_wr_opcode);
343		sqp->sq_wptr = qhp->wq.sq_wptr;
344		sqp->complete = 0;
345		sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED);
346
347		build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
348			       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
349			       0, t3_wr_flit_cnt);
350		CTR5(KTR_IW_CXGB, "%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d",
351		     __FUNCTION__, (unsigned long long) wr->wr_id, idx,
352		     Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
353		     sqp->opcode);
354		wr = wr->next;
355		num_wrs--;
356		++(qhp->wq.wptr);
357		++(qhp->wq.sq_wptr);
358	}
359	mtx_unlock(&qhp->lock);
360	ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
361	return err;
362}
363
364int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
365		      struct ib_recv_wr **bad_wr)
366{
367	int err = 0;
368	struct iwch_qp *qhp;
369	u32 idx;
370	union t3_wr *wqe;
371	u32 num_wrs;
372
373	qhp = to_iwch_qp(ibqp);
374	mtx_lock(&qhp->lock);
375	if (qhp->attr.state > IWCH_QP_STATE_RTS) {
376		mtx_unlock(&qhp->lock);
377		return (-EINVAL);
378	}
379	num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
380			    qhp->wq.rq_size_log2) - 1;
381	if (!wr) {
382		mtx_unlock(&qhp->lock);
383		return (-EINVAL);
384	}
385	while (wr) {
386		idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
387		wqe = (union t3_wr *) (qhp->wq.queue + idx);
388		if (num_wrs)
389			err = iwch_build_rdma_recv(qhp->rhp, wqe, wr);
390		else
391			err = -ENOMEM;
392		if (err) {
393			*bad_wr = wr;
394			break;
395		}
396		qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] =
397			wr->wr_id;
398		build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
399			       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
400			       0, sizeof(struct t3_receive_wr) >> 3);
401		CTR6(KTR_IW_CXGB, "%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
402		     "wqe %p ", __FUNCTION__, (unsigned long long) wr->wr_id,
403		     idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
404		++(qhp->wq.rq_wptr);
405		++(qhp->wq.wptr);
406		wr = wr->next;
407		num_wrs--;
408	}
409	mtx_unlock(&qhp->lock);
410	ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
411	return err;
412}
413
414int iwch_bind_mw(struct ib_qp *qp,
415			     struct ib_mw *mw,
416			     struct ib_mw_bind *mw_bind)
417{
418	struct iwch_dev *rhp;
419	struct iwch_mw *mhp;
420	struct iwch_qp *qhp;
421	union t3_wr *wqe;
422	u32 pbl_addr;
423	u8 page_size;
424	u32 num_wrs;
425	struct ib_sge sgl;
426	int err=0;
427	enum t3_wr_flags t3_wr_flags;
428	u32 idx;
429	struct t3_swsq *sqp;
430
431	qhp = to_iwch_qp(qp);
432	mhp = to_iwch_mw(mw);
433	rhp = qhp->rhp;
434
435	mtx_lock(&qhp->lock);
436	if (qhp->attr.state > IWCH_QP_STATE_RTS) {
437		mtx_unlock(&qhp->lock);
438		return (-EINVAL);
439	}
440	num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
441			    qhp->wq.sq_size_log2);
442	if ((num_wrs) <= 0) {
443		mtx_unlock(&qhp->lock);
444		return (-ENOMEM);
445	}
446	idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
447	CTR4(KTR_IW_CXGB, "%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p", __FUNCTION__, idx,
448	     mw, mw_bind);
449	wqe = (union t3_wr *) (qhp->wq.queue + idx);
450
451	t3_wr_flags = 0;
452	if (mw_bind->send_flags & IB_SEND_SIGNALED)
453		t3_wr_flags = T3_COMPLETION_FLAG;
454
455	sgl.addr = mw_bind->addr;
456	sgl.lkey = mw_bind->mr->lkey;
457	sgl.length = mw_bind->length;
458	wqe->bind.reserved = 0;
459	wqe->bind.type = T3_VA_BASED_TO;
460
461	/* TBD: check perms */
462	wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags);
463	wqe->bind.mr_stag = htobe32(mw_bind->mr->lkey);
464	wqe->bind.mw_stag = htobe32(mw->rkey);
465	wqe->bind.mw_len = htobe32(mw_bind->length);
466	wqe->bind.mw_va = htobe64(mw_bind->addr);
467	err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
468	if (err) {
469		mtx_unlock(&qhp->lock);
470	        return (err);
471	}
472	wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
473	sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
474	sqp->wr_id = mw_bind->wr_id;
475	sqp->opcode = T3_BIND_MW;
476	sqp->sq_wptr = qhp->wq.sq_wptr;
477	sqp->complete = 0;
478	sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
479	wqe->bind.mr_pbl_addr = htobe32(pbl_addr);
480	wqe->bind.mr_pagesz = page_size;
481	wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id;
482	build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
483		       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
484			        sizeof(struct t3_bind_mw_wr) >> 3);
485	++(qhp->wq.wptr);
486	++(qhp->wq.sq_wptr);
487	mtx_unlock(&qhp->lock);
488
489	ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
490
491	return err;
492}
493
494static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
495				    u8 *layer_type, u8 *ecode)
496{
497	int status = TPT_ERR_INTERNAL_ERR;
498	int tagged = 0;
499	int opcode = -1;
500	int rqtype = 0;
501	int send_inv = 0;
502
503	if (rsp_msg) {
504		status = CQE_STATUS(rsp_msg->cqe);
505		opcode = CQE_OPCODE(rsp_msg->cqe);
506		rqtype = RQ_TYPE(rsp_msg->cqe);
507		send_inv = (opcode == T3_SEND_WITH_INV) ||
508		           (opcode == T3_SEND_WITH_SE_INV);
509		tagged = (opcode == T3_RDMA_WRITE) ||
510			 (rqtype && (opcode == T3_READ_RESP));
511	}
512
513	switch (status) {
514	case TPT_ERR_STAG:
515		if (send_inv) {
516			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
517			*ecode = RDMAP_CANT_INV_STAG;
518		} else {
519			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
520			*ecode = RDMAP_INV_STAG;
521		}
522		break;
523	case TPT_ERR_PDID:
524		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
525		if ((opcode == T3_SEND_WITH_INV) ||
526		    (opcode == T3_SEND_WITH_SE_INV))
527			*ecode = RDMAP_CANT_INV_STAG;
528		else
529			*ecode = RDMAP_STAG_NOT_ASSOC;
530		break;
531	case TPT_ERR_QPID:
532		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
533		*ecode = RDMAP_STAG_NOT_ASSOC;
534		break;
535	case TPT_ERR_ACCESS:
536		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
537		*ecode = RDMAP_ACC_VIOL;
538		break;
539	case TPT_ERR_WRAP:
540		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
541		*ecode = RDMAP_TO_WRAP;
542		break;
543	case TPT_ERR_BOUND:
544		if (tagged) {
545			*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
546			*ecode = DDPT_BASE_BOUNDS;
547		} else {
548			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
549			*ecode = RDMAP_BASE_BOUNDS;
550		}
551		break;
552	case TPT_ERR_INVALIDATE_SHARED_MR:
553	case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
554		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
555		*ecode = RDMAP_CANT_INV_STAG;
556		break;
557	case TPT_ERR_ECC:
558	case TPT_ERR_ECC_PSTAG:
559	case TPT_ERR_INTERNAL_ERR:
560		*layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
561		*ecode = 0;
562		break;
563	case TPT_ERR_OUT_OF_RQE:
564		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
565		*ecode = DDPU_INV_MSN_NOBUF;
566		break;
567	case TPT_ERR_PBL_ADDR_BOUND:
568		*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
569		*ecode = DDPT_BASE_BOUNDS;
570		break;
571	case TPT_ERR_CRC:
572		*layer_type = LAYER_MPA|DDP_LLP;
573		*ecode = MPA_CRC_ERR;
574		break;
575	case TPT_ERR_MARKER:
576		*layer_type = LAYER_MPA|DDP_LLP;
577		*ecode = MPA_MARKER_ERR;
578		break;
579	case TPT_ERR_PDU_LEN_ERR:
580		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
581		*ecode = DDPU_MSG_TOOBIG;
582		break;
583	case TPT_ERR_DDP_VERSION:
584		if (tagged) {
585			*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
586			*ecode = DDPT_INV_VERS;
587		} else {
588			*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
589			*ecode = DDPU_INV_VERS;
590		}
591		break;
592	case TPT_ERR_RDMA_VERSION:
593		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
594		*ecode = RDMAP_INV_VERS;
595		break;
596	case TPT_ERR_OPCODE:
597		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
598		*ecode = RDMAP_INV_OPCODE;
599		break;
600	case TPT_ERR_DDP_QUEUE_NUM:
601		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
602		*ecode = DDPU_INV_QN;
603		break;
604	case TPT_ERR_MSN:
605	case TPT_ERR_MSN_GAP:
606	case TPT_ERR_MSN_RANGE:
607	case TPT_ERR_IRD_OVERFLOW:
608		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
609		*ecode = DDPU_INV_MSN_RANGE;
610		break;
611	case TPT_ERR_TBIT:
612		*layer_type = LAYER_DDP|DDP_LOCAL_CATA;
613		*ecode = 0;
614		break;
615	case TPT_ERR_MO:
616		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
617		*ecode = DDPU_INV_MO;
618		break;
619	default:
620		*layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
621		*ecode = 0;
622		break;
623	}
624}
625
626/*
627 * This posts a TERMINATE with layer=RDMA, type=catastrophic.
628 */
629int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
630{
631	union t3_wr *wqe;
632	struct terminate_message *term;
633	struct mbuf *m;
634
635	CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
636	m = m_gethdr(MT_DATA, M_NOWAIT);
637	if (!m) {
638		log(LOG_ERR, "%s cannot send TERMINATE!\n", __FUNCTION__);
639		return (-ENOMEM);
640	}
641	wqe = mtod(m, union t3_wr *);
642	m->m_len = m->m_pkthdr.len = 40;
643	memset(wqe, 0, 40);
644	wqe->send.rdmaop = T3_TERMINATE;
645
646	/* immediate data length */
647	wqe->send.plen = htonl(4);
648
649	/* immediate data starts here. */
650	term = (struct terminate_message *)wqe->send.sgl;
651	build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
652	wqe->send.wrh.op_seop_flags = htobe32(V_FW_RIWR_OP(T3_WR_SEND) |
653		V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
654	wqe->send.wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(qhp->ep->hwtid));
655
656	m_set_priority(m, CPL_PRIORITY_DATA);
657	m_set_sgl(m, NULL);
658	m_set_sgllen(m, 0);
659	return cxgb_ofld_send(qhp->rhp->rdev.t3cdev_p, m);
660}
661
662/*
663 * Assumes qhp lock is held.
664 */
665static void __flush_qp(struct iwch_qp *qhp)
666{
667	struct iwch_cq *rchp, *schp;
668	int count;
669
670	rchp = get_chp(qhp->rhp, qhp->attr.rcq);
671	schp = get_chp(qhp->rhp, qhp->attr.scq);
672
673	CTR4(KTR_IW_CXGB, "%s qhp %p rchp %p schp %p", __FUNCTION__, qhp, rchp, schp);
674	/* take a ref on the qhp since we must release the lock */
675	qhp->refcnt++;
676	mtx_unlock(&qhp->lock);
677
678	/* locking heirarchy: cq lock first, then qp lock. */
679	mtx_lock(&rchp->lock);
680	mtx_lock(&qhp->lock);
681	cxio_flush_hw_cq(&rchp->cq);
682	cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
683	cxio_flush_rq(&qhp->wq, &rchp->cq, count);
684	mtx_unlock(&qhp->lock);
685	mtx_unlock(&rchp->lock);
686 	(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
687
688	/* locking heirarchy: cq lock first, then qp lock. */
689	mtx_lock(&schp->lock);
690	mtx_lock(&qhp->lock);
691	cxio_flush_hw_cq(&schp->cq);
692	cxio_count_scqes(&schp->cq, &qhp->wq, &count);
693	cxio_flush_sq(&qhp->wq, &schp->cq, count);
694	mtx_unlock(&qhp->lock);
695	mtx_unlock(&schp->lock);
696 	(*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
697
698	/* deref */
699	mtx_lock(&qhp->lock);
700	if (--qhp->refcnt == 0)
701		wakeup(qhp);
702}
703
704static void flush_qp(struct iwch_qp *qhp)
705{
706	if (qhp->ibqp.uobject)
707		cxio_set_wq_in_error(&qhp->wq);
708	else
709		__flush_qp(qhp);
710}
711
712
713/*
714 * Return non zero if at least one RECV was pre-posted.
715 */
716static int rqes_posted(struct iwch_qp *qhp)
717{
718	return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV;
719}
720
721static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
722				enum iwch_qp_attr_mask mask,
723				struct iwch_qp_attributes *attrs)
724{
725	struct t3_rdma_init_attr init_attr;
726	int ret;
727
728	init_attr.tid = qhp->ep->hwtid;
729	init_attr.qpid = qhp->wq.qpid;
730	init_attr.pdid = qhp->attr.pd;
731	init_attr.scqid = qhp->attr.scq;
732	init_attr.rcqid = qhp->attr.rcq;
733	init_attr.rq_addr = qhp->wq.rq_addr;
734	init_attr.rq_size = 1 << qhp->wq.rq_size_log2;
735	init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE |
736		qhp->attr.mpa_attr.recv_marker_enabled |
737		(qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
738		(qhp->attr.mpa_attr.crc_enabled << 2);
739
740	/*
741	 * XXX - The IWCM doesn't quite handle getting these
742	 * attrs set before going into RTS.  For now, just turn
743	 * them on always...
744	 */
745#if 0
746	init_attr.qpcaps = qhp->attr.enableRdmaRead |
747		(qhp->attr.enableRdmaWrite << 1) |
748		(qhp->attr.enableBind << 2) |
749		(qhp->attr.enable_stag0_fastreg << 3) |
750		(qhp->attr.enable_stag0_fastreg << 4);
751#else
752	init_attr.qpcaps = 0x1f;
753#endif
754	init_attr.tcp_emss = qhp->ep->emss;
755	init_attr.ord = qhp->attr.max_ord;
756	init_attr.ird = qhp->attr.max_ird;
757	init_attr.qp_dma_addr = qhp->wq.dma_addr;
758	init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
759	init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
760	init_attr.irs = qhp->ep->rcv_seq;
761	CTR5(KTR_IW_CXGB, "%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
762	     "flags 0x%x qpcaps 0x%x", __FUNCTION__,
763	     init_attr.rq_addr, init_attr.rq_size,
764	     init_attr.flags, init_attr.qpcaps);
765	ret = cxio_rdma_init(&rhp->rdev, &init_attr);
766	CTR2(KTR_IW_CXGB, "%s ret %d", __FUNCTION__, ret);
767	return ret;
768}
769
770int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
771				enum iwch_qp_attr_mask mask,
772				struct iwch_qp_attributes *attrs,
773				int internal)
774{
775	int ret = 0;
776	struct iwch_qp_attributes newattr = qhp->attr;
777	int disconnect = 0;
778	int terminate = 0;
779	int abort = 0;
780	int free = 0;
781	struct iwch_ep *ep = NULL;
782
783	CTR6(KTR_IW_CXGB, "%s qhp %p qpid 0x%x ep %p state %d -> %d", __FUNCTION__,
784	     qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
785	     (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
786
787	mtx_lock(&qhp->lock);
788
789	/* Process attr changes if in IDLE */
790	if (mask & IWCH_QP_ATTR_VALID_MODIFY) {
791		if (qhp->attr.state != IWCH_QP_STATE_IDLE) {
792			ret = -EIO;
793			goto out;
794		}
795		if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ)
796			newattr.enable_rdma_read = attrs->enable_rdma_read;
797		if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE)
798			newattr.enable_rdma_write = attrs->enable_rdma_write;
799		if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND)
800			newattr.enable_bind = attrs->enable_bind;
801		if (mask & IWCH_QP_ATTR_MAX_ORD) {
802			if (attrs->max_ord >
803			    rhp->attr.max_rdma_read_qp_depth) {
804				ret = -EINVAL;
805				goto out;
806			}
807			newattr.max_ord = attrs->max_ord;
808		}
809		if (mask & IWCH_QP_ATTR_MAX_IRD) {
810			if (attrs->max_ird >
811			    rhp->attr.max_rdma_reads_per_qp) {
812				ret = -EINVAL;
813				goto out;
814			}
815			newattr.max_ird = attrs->max_ird;
816		}
817		qhp->attr = newattr;
818	}
819
820	if (!(mask & IWCH_QP_ATTR_NEXT_STATE))
821		goto out;
822	if (qhp->attr.state == attrs->next_state)
823		goto out;
824
825	switch (qhp->attr.state) {
826	case IWCH_QP_STATE_IDLE:
827		switch (attrs->next_state) {
828		case IWCH_QP_STATE_RTS:
829			if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) {
830				ret = -EINVAL;
831				goto out;
832			}
833			if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) {
834				ret = -EINVAL;
835				goto out;
836			}
837			qhp->attr.mpa_attr = attrs->mpa_attr;
838			qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
839			qhp->ep = qhp->attr.llp_stream_handle;
840			qhp->attr.state = IWCH_QP_STATE_RTS;
841
842			/*
843			 * Ref the endpoint here and deref when we
844			 * disassociate the endpoint from the QP.  This
845			 * happens in CLOSING->IDLE transition or *->ERROR
846			 * transition.
847			 */
848			get_ep(&qhp->ep->com);
849			mtx_unlock(&qhp->lock);
850			ret = rdma_init(rhp, qhp, mask, attrs);
851			mtx_lock(&qhp->lock);
852			if (ret)
853				goto err;
854			break;
855		case IWCH_QP_STATE_ERROR:
856			qhp->attr.state = IWCH_QP_STATE_ERROR;
857			flush_qp(qhp);
858			break;
859		default:
860			ret = -EINVAL;
861			goto out;
862		}
863		break;
864	case IWCH_QP_STATE_RTS:
865		switch (attrs->next_state) {
866		case IWCH_QP_STATE_CLOSING:
867			PANIC_IF(atomic_load_acq_int(&qhp->ep->com.refcount) < 2);
868			qhp->attr.state = IWCH_QP_STATE_CLOSING;
869			if (!internal) {
870				abort=0;
871				disconnect = 1;
872				ep = qhp->ep;
873			}
874			flush_qp(qhp);
875			break;
876		case IWCH_QP_STATE_TERMINATE:
877			qhp->attr.state = IWCH_QP_STATE_TERMINATE;
878			if (qhp->ibqp.uobject)
879				cxio_set_wq_in_error(&qhp->wq);
880			if (!internal)
881				terminate = 1;
882			break;
883		case IWCH_QP_STATE_ERROR:
884			qhp->attr.state = IWCH_QP_STATE_ERROR;
885			if (!internal) {
886				abort=1;
887				disconnect = 1;
888				ep = qhp->ep;
889			}
890			goto err;
891			break;
892		default:
893			ret = -EINVAL;
894			goto out;
895		}
896		break;
897	case IWCH_QP_STATE_CLOSING:
898		if (!internal) {
899			ret = -EINVAL;
900			goto out;
901		}
902		switch (attrs->next_state) {
903			case IWCH_QP_STATE_IDLE:
904				qhp->attr.state = IWCH_QP_STATE_IDLE;
905				qhp->attr.llp_stream_handle = NULL;
906				put_ep(&qhp->ep->com);
907				qhp->ep = NULL;
908				wakeup(qhp);
909				break;
910			case IWCH_QP_STATE_ERROR:
911				disconnect=1;
912				goto err;
913			default:
914				ret = -EINVAL;
915				goto err;
916		}
917		break;
918	case IWCH_QP_STATE_ERROR:
919		if (attrs->next_state != IWCH_QP_STATE_IDLE) {
920			ret = -EINVAL;
921			goto out;
922		}
923
924		if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) ||
925		    !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) {
926			ret = -EINVAL;
927			goto out;
928		}
929		qhp->attr.state = IWCH_QP_STATE_IDLE;
930		memset(&qhp->attr, 0, sizeof(qhp->attr));
931		break;
932	case IWCH_QP_STATE_TERMINATE:
933		if (!internal) {
934			ret = -EINVAL;
935			goto out;
936		}
937		goto err;
938		break;
939	default:
940		log(LOG_ERR, "%s in a bad state %d\n",
941		       __FUNCTION__, qhp->attr.state);
942		ret = -EINVAL;
943		goto err;
944		break;
945	}
946	goto out;
947err:
948	CTR3(KTR_IW_CXGB, "%s disassociating ep %p qpid 0x%x", __FUNCTION__, qhp->ep,
949	     qhp->wq.qpid);
950
951	/* disassociate the LLP connection */
952	qhp->attr.llp_stream_handle = NULL;
953	ep = qhp->ep;
954	qhp->ep = NULL;
955	qhp->attr.state = IWCH_QP_STATE_ERROR;
956	free=1;
957	wakeup(qhp);
958	PANIC_IF(!ep);
959	flush_qp(qhp);
960out:
961	mtx_unlock(&qhp->lock);
962
963	if (terminate)
964		iwch_post_terminate(qhp, NULL);
965
966	/*
967	 * If disconnect is 1, then we need to initiate a disconnect
968	 * on the EP.  This can be a normal close (RTS->CLOSING) or
969	 * an abnormal close (RTS/CLOSING->ERROR).
970	 */
971	if (disconnect)
972		iwch_ep_disconnect(ep, abort, M_NOWAIT);
973
974	/*
975	 * If free is 1, then we've disassociated the EP from the QP
976	 * and we need to dereference the EP.
977	 */
978	if (free)
979		put_ep(&ep->com);
980
981	CTR2(KTR_IW_CXGB, "%s exit state %d", __FUNCTION__, qhp->attr.state);
982	return ret;
983}
984
985static int quiesce_qp(struct iwch_qp *qhp)
986{
987	mtx_lock(&qhp->lock);
988	iwch_quiesce_tid(qhp->ep);
989	qhp->flags |= QP_QUIESCED;
990	mtx_unlock(&qhp->lock);
991	return 0;
992}
993
994static int resume_qp(struct iwch_qp *qhp)
995{
996	mtx_lock(&qhp->lock);
997	iwch_resume_tid(qhp->ep);
998	qhp->flags &= ~QP_QUIESCED;
999	mtx_lock(&qhp->lock);
1000	return 0;
1001}
1002
1003int iwch_quiesce_qps(struct iwch_cq *chp)
1004{
1005	int i;
1006	struct iwch_qp *qhp;
1007
1008	for (i=0; i < T3_MAX_NUM_QP; i++) {
1009		qhp = get_qhp(chp->rhp, i);
1010		if (!qhp)
1011			continue;
1012		if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) {
1013			quiesce_qp(qhp);
1014			continue;
1015		}
1016		if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp))
1017			quiesce_qp(qhp);
1018	}
1019	return 0;
1020}
1021
1022int iwch_resume_qps(struct iwch_cq *chp)
1023{
1024	int i;
1025	struct iwch_qp *qhp;
1026
1027	for (i=0; i < T3_MAX_NUM_QP; i++) {
1028		qhp = get_qhp(chp->rhp, i);
1029		if (!qhp)
1030			continue;
1031		if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) {
1032			resume_qp(qhp);
1033			continue;
1034		}
1035		if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp))
1036			resume_qp(qhp);
1037	}
1038	return 0;
1039}
1040
1041