iw_cxgb_qp.c revision 302408
1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/11/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c 278886 2015-02-17 08:40:27Z hselasky $");
31
32#include "opt_inet.h"
33
34#ifdef TCP_OFFLOAD
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/pciio.h>
40#include <sys/conf.h>
41#include <machine/bus.h>
42#include <machine/resource.h>
43#include <sys/bus_dma.h>
44#include <sys/rman.h>
45#include <sys/ioccom.h>
46#include <sys/mbuf.h>
47#include <sys/mutex.h>
48#include <sys/rwlock.h>
49#include <sys/linker.h>
50#include <sys/firmware.h>
51#include <sys/socket.h>
52#include <sys/socketvar.h>
53#include <sys/sockio.h>
54#include <sys/smp.h>
55#include <sys/sysctl.h>
56#include <sys/syslog.h>
57#include <sys/queue.h>
58#include <sys/taskqueue.h>
59#include <sys/proc.h>
60#include <sys/queue.h>
61
62#include <net/route.h>
63#include <netinet/in_systm.h>
64#include <netinet/in.h>
65#include <netinet/toecore.h>
66#include <netinet/in_pcb.h>
67#include <netinet/ip.h>
68#include <netinet/ip_var.h>
69#include <netinet/tcp_var.h>
70#include <netinet/tcp.h>
71#include <netinet/tcpip.h>
72
73#include <rdma/ib_verbs.h>
74#include <rdma/ib_umem.h>
75#include <rdma/ib_user_verbs.h>
76#include <linux/idr.h>
77#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
78
79#include <cxgb_include.h>
80#include <ulp/tom/cxgb_l2t.h>
81#include <ulp/tom/cxgb_toepcb.h>
82#include <ulp/iw_cxgb/iw_cxgb_wr.h>
83#include <ulp/iw_cxgb/iw_cxgb_hal.h>
84#include <ulp/iw_cxgb/iw_cxgb_provider.h>
85#include <ulp/iw_cxgb/iw_cxgb_cm.h>
86#include <ulp/iw_cxgb/iw_cxgb.h>
87#include <ulp/iw_cxgb/iw_cxgb_resource.h>
88#include <ulp/iw_cxgb/iw_cxgb_user.h>
89
90#define NO_SUPPORT -1
91
92static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
93				u8 * flit_cnt)
94{
95	int i;
96	u32 plen;
97
98	switch (wr->opcode) {
99	case IB_WR_SEND:
100		if (wr->send_flags & IB_SEND_SOLICITED)
101			wqe->send.rdmaop = T3_SEND_WITH_SE;
102		else
103			wqe->send.rdmaop = T3_SEND;
104		wqe->send.rem_stag = 0;
105		break;
106	case IB_WR_SEND_WITH_IMM:
107		if (wr->send_flags & IB_SEND_SOLICITED)
108			wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
109		else
110			wqe->send.rdmaop = T3_SEND_WITH_INV;
111		wqe->send.rem_stag = 0;
112		break;
113	default:
114		return -EINVAL;
115	}
116	if (wr->num_sge > T3_MAX_SGE)
117		return (-EINVAL);
118	wqe->send.reserved[0] = 0;
119	wqe->send.reserved[1] = 0;
120	wqe->send.reserved[2] = 0;
121	plen = 0;
122	for (i = 0; i < wr->num_sge; i++) {
123		if ((plen + wr->sg_list[i].length) < plen) {
124			return (-EMSGSIZE);
125		}
126		plen += wr->sg_list[i].length;
127		wqe->send.sgl[i].stag =
128		    htobe32(wr->sg_list[i].lkey);
129		wqe->send.sgl[i].len =
130		    htobe32(wr->sg_list[i].length);
131		wqe->send.sgl[i].to = htobe64(wr->sg_list[i].addr);
132	}
133	wqe->send.num_sgle = htobe32(wr->num_sge);
134	*flit_cnt = 4 + ((wr->num_sge) << 1);
135	wqe->send.plen = htobe32(plen);
136	return 0;
137}
138
139static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
140				 u8 *flit_cnt)
141{
142	int i;
143	u32 plen;
144
145	if (wr->num_sge > T3_MAX_SGE)
146		return (-EINVAL);
147	wqe->write.rdmaop = T3_RDMA_WRITE;
148	wqe->write.reserved[0] = 0;
149	wqe->write.reserved[1] = 0;
150	wqe->write.reserved[2] = 0;
151	wqe->write.stag_sink = htobe32(wr->wr.rdma.rkey);
152	wqe->write.to_sink = htobe64(wr->wr.rdma.remote_addr);
153
154	if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
155		plen = 4;
156		wqe->write.sgl[0].stag = wr->ex.imm_data;
157		wqe->write.sgl[0].len = 0;
158		wqe->write.num_sgle = 0;
159		*flit_cnt = 6;
160	} else {
161		plen = 0;
162		for (i = 0; i < wr->num_sge; i++) {
163			if ((plen + wr->sg_list[i].length) < plen) {
164				return (-EMSGSIZE);
165			}
166			plen += wr->sg_list[i].length;
167			wqe->write.sgl[i].stag =
168			    htobe32(wr->sg_list[i].lkey);
169			wqe->write.sgl[i].len =
170			    htobe32(wr->sg_list[i].length);
171			wqe->write.sgl[i].to =
172			    htobe64(wr->sg_list[i].addr);
173		}
174		wqe->write.num_sgle = htobe32(wr->num_sge);
175		*flit_cnt = 5 + ((wr->num_sge) << 1);
176	}
177	wqe->write.plen = htobe32(plen);
178	return 0;
179}
180
181static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
182				u8 *flit_cnt)
183{
184	if (wr->num_sge > 1)
185		return (-EINVAL);
186	wqe->read.rdmaop = T3_READ_REQ;
187	wqe->read.reserved[0] = 0;
188	wqe->read.reserved[1] = 0;
189	wqe->read.reserved[2] = 0;
190	wqe->read.rem_stag = htobe32(wr->wr.rdma.rkey);
191	wqe->read.rem_to = htobe64(wr->wr.rdma.remote_addr);
192	wqe->read.local_stag = htobe32(wr->sg_list[0].lkey);
193	wqe->read.local_len = htobe32(wr->sg_list[0].length);
194	wqe->read.local_to = htobe64(wr->sg_list[0].addr);
195	*flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
196	return 0;
197}
198
199static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
200			    u32 num_sgle, u32 * pbl_addr, u8 * page_size)
201{
202	int i;
203	struct iwch_mr *mhp;
204	u64 offset;
205	for (i = 0; i < num_sgle; i++) {
206
207		mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
208		if (!mhp) {
209			CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
210			return (-EIO);
211		}
212		if (!mhp->attr.state) {
213			CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
214			return (-EIO);
215		}
216		if (mhp->attr.zbva) {
217			CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
218			return (-EIO);
219		}
220
221		if (sg_list[i].addr < mhp->attr.va_fbo) {
222			CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
223			return (-EINVAL);
224		}
225		if (sg_list[i].addr + ((u64) sg_list[i].length) <
226		    sg_list[i].addr) {
227			CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
228			return (-EINVAL);
229		}
230		if (sg_list[i].addr + ((u64) sg_list[i].length) >
231		    mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
232			CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
233			return (-EINVAL);
234		}
235		offset = sg_list[i].addr - mhp->attr.va_fbo;
236		offset += mhp->attr.va_fbo &
237			  ((1UL << (12 + mhp->attr.page_size)) - 1);
238		pbl_addr[i] = ((mhp->attr.pbl_addr -
239			        rhp->rdev.rnic_info.pbl_base) >> 3) +
240			      (offset >> (12 + mhp->attr.page_size));
241		page_size[i] = mhp->attr.page_size;
242	}
243	return 0;
244}
245
246static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
247				struct ib_recv_wr *wr)
248{
249       int i, err = 0;
250       u32 pbl_addr[T3_MAX_SGE];
251       u8 page_size[T3_MAX_SGE];
252
253       if (wr->num_sge > T3_MAX_SGE)
254		return (-EINVAL);
255
256
257        err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr,
258                               page_size);
259        if (err)
260                return err;
261        wqe->recv.pagesz[0] = page_size[0];
262        wqe->recv.pagesz[1] = page_size[1];
263        wqe->recv.pagesz[2] = page_size[2];
264        wqe->recv.pagesz[3] = page_size[3];
265	wqe->recv.num_sgle = htobe32(wr->num_sge);
266
267	for (i = 0; i < wr->num_sge; i++) {
268		wqe->recv.sgl[i].stag = htobe32(wr->sg_list[i].lkey);
269		wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length);
270		wqe->recv.sgl[i].to = htobe64(((u32)wr->sg_list[i].addr) &
271				((1UL << (12 + page_size[i])) - 1));
272		/* pbl_addr is the adapters address in the PBL */
273		wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
274	}
275	for (; i < T3_MAX_SGE; i++) {
276		wqe->recv.sgl[i].stag = 0;
277		wqe->recv.sgl[i].len = 0;
278		wqe->recv.sgl[i].to = 0;
279		wqe->recv.pbl_addr[i] = 0;
280	}
281
282        qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
283                             qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
284        qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
285                             qhp->wq.rq_size_log2)].pbl_addr = 0;
286
287	return 0;
288}
289
290static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
291                                struct ib_recv_wr *wr)
292{
293        int i;
294        u32 pbl_addr;
295        u32 pbl_offset;
296
297
298        /*
299         * The T3 HW requires the PBL in the HW recv descriptor to reference
300         * a PBL entry.  So we allocate the max needed PBL memory here and pass
301         * it to the uP in the recv WR.  The uP will build the PBL and setup
302         * the HW recv descriptor.
303         */
304        pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE);
305        if (!pbl_addr)
306                return -ENOMEM;
307
308        /*
309         * Compute the 8B aligned offset.
310         */
311        pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3;
312
313        wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
314
315        for (i = 0; i < wr->num_sge; i++) {
316
317                /*
318                 * Use a 128MB page size. This and an imposed 128MB
319                 * sge length limit allows us to require only a 2-entry HW
320                 * PBL for each SGE.  This restriction is acceptable since
321                 * since it is not possible to allocate 128MB of contiguous
322                 * DMA coherent memory!
323                 */
324                if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN)
325                        return -EINVAL;
326                wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT;
327
328                /*
329                 * T3 restricts a recv to all zero-stag or all non-zero-stag.
330                 */
331                if (wr->sg_list[i].lkey != 0)
332                        return -EINVAL;
333                wqe->recv.sgl[i].stag = 0;
334                wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length);
335                wqe->recv.sgl[i].to = htobe64(wr->sg_list[i].addr);
336                wqe->recv.pbl_addr[i] = htobe32(pbl_offset);
337                pbl_offset += 2;
338        }
339        for (; i < T3_MAX_SGE; i++) {
340                wqe->recv.pagesz[i] = 0;
341                wqe->recv.sgl[i].stag = 0;
342                wqe->recv.sgl[i].len = 0;
343                wqe->recv.sgl[i].to = 0;
344                wqe->recv.pbl_addr[i] = 0;
345        }
346        qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
347                             qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
348        qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
349                             qhp->wq.rq_size_log2)].pbl_addr = pbl_addr;
350        return 0;
351}
352
353int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
354		      struct ib_send_wr **bad_wr)
355{
356	int err = 0;
357	u8 t3_wr_flit_cnt = 0;
358	enum t3_wr_opcode t3_wr_opcode = 0;
359	enum t3_wr_flags t3_wr_flags;
360	struct iwch_qp *qhp;
361	u32 idx;
362	union t3_wr *wqe;
363	u32 num_wrs;
364	struct t3_swsq *sqp;
365
366	qhp = to_iwch_qp(ibqp);
367	mtx_lock(&qhp->lock);
368	if (qhp->attr.state > IWCH_QP_STATE_RTS) {
369		mtx_unlock(&qhp->lock);
370		err = -EINVAL;
371		goto out;
372	}
373	num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
374		  qhp->wq.sq_size_log2);
375	if (num_wrs == 0) {
376		mtx_unlock(&qhp->lock);
377		err = -EINVAL;
378		goto out;
379	}
380	while (wr) {
381		if (num_wrs == 0) {
382			err = -ENOMEM;
383			break;
384		}
385		idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
386		wqe = (union t3_wr *) (qhp->wq.queue + idx);
387		t3_wr_flags = 0;
388		if (wr->send_flags & IB_SEND_SOLICITED)
389			t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
390		if (wr->send_flags & IB_SEND_FENCE)
391			t3_wr_flags |= T3_READ_FENCE_FLAG;
392		if (wr->send_flags & IB_SEND_SIGNALED)
393			t3_wr_flags |= T3_COMPLETION_FLAG;
394		sqp = qhp->wq.sq +
395		      Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
396		switch (wr->opcode) {
397		case IB_WR_SEND:
398		case IB_WR_SEND_WITH_IMM:
399			t3_wr_opcode = T3_WR_SEND;
400			err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
401			break;
402		case IB_WR_RDMA_WRITE:
403		case IB_WR_RDMA_WRITE_WITH_IMM:
404			t3_wr_opcode = T3_WR_WRITE;
405			err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
406			break;
407		case IB_WR_RDMA_READ:
408			t3_wr_opcode = T3_WR_READ;
409			t3_wr_flags = 0; /* T3 reads are always signaled */
410			err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
411			if (err)
412				break;
413			sqp->read_len = wqe->read.local_len;
414			if (!qhp->wq.oldest_read)
415				qhp->wq.oldest_read = sqp;
416			break;
417		default:
418			CTR2(KTR_IW_CXGB, "%s post of type=%d TBD!", __FUNCTION__,
419			     wr->opcode);
420			err = -EINVAL;
421		}
422		if (err)
423			break;
424
425		wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
426		sqp->wr_id = wr->wr_id;
427		sqp->opcode = wr2opcode(t3_wr_opcode);
428		sqp->sq_wptr = qhp->wq.sq_wptr;
429		sqp->complete = 0;
430		sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED);
431
432		build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
433			       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
434			       0, t3_wr_flit_cnt);
435		CTR5(KTR_IW_CXGB, "%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d",
436		     __FUNCTION__, (unsigned long long) wr->wr_id, idx,
437		     Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
438		     sqp->opcode);
439		wr = wr->next;
440		num_wrs--;
441		++(qhp->wq.wptr);
442		++(qhp->wq.sq_wptr);
443	}
444	mtx_unlock(&qhp->lock);
445	ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
446out:
447	if (err)
448		*bad_wr = wr;
449	return err;
450}
451
452int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
453		      struct ib_recv_wr **bad_wr)
454{
455	int err = 0;
456	struct iwch_qp *qhp;
457	u32 idx;
458	union t3_wr *wqe;
459	u32 num_wrs;
460
461	qhp = to_iwch_qp(ibqp);
462	mtx_lock(&qhp->lock);
463	if (qhp->attr.state > IWCH_QP_STATE_RTS) {
464		mtx_unlock(&qhp->lock);
465		err = -EINVAL;
466		goto out;
467	}
468	num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
469			    qhp->wq.rq_size_log2) - 1;
470	if (!wr) {
471		mtx_unlock(&qhp->lock);
472		err = -EINVAL;
473		goto out;
474	}
475
476	while (wr) {
477	        if (wr->num_sge > T3_MAX_SGE) {
478                        err = -EINVAL;
479                        break;
480                }
481
482		idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
483		wqe = (union t3_wr *) (qhp->wq.queue + idx);
484		if (num_wrs) {
485                        if (wr->sg_list[0].lkey)
486                                err = build_rdma_recv(qhp, wqe, wr);
487                        else
488                                err = build_zero_stag_recv(qhp, wqe, wr);
489		} else
490			err = -ENOMEM;
491		if (err)
492			break;
493
494		build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
495			       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
496			       0, sizeof(struct t3_receive_wr) >> 3);
497		CTR6(KTR_IW_CXGB, "%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
498		     "wqe %p ", __FUNCTION__, (unsigned long long) wr->wr_id,
499		     idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
500		++(qhp->wq.rq_wptr);
501		++(qhp->wq.wptr);
502		wr = wr->next;
503		num_wrs--;
504	}
505	mtx_unlock(&qhp->lock);
506	ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
507out:
508        if (err)
509                *bad_wr = wr;
510	return err;
511}
512
513int iwch_bind_mw(struct ib_qp *qp,
514			     struct ib_mw *mw,
515			     struct ib_mw_bind *mw_bind)
516{
517	struct iwch_dev *rhp;
518	struct iwch_mw *mhp;
519	struct iwch_qp *qhp;
520	union t3_wr *wqe;
521	u32 pbl_addr;
522	u8 page_size;
523	u32 num_wrs;
524	struct ib_sge sgl;
525	int err=0;
526	enum t3_wr_flags t3_wr_flags;
527	u32 idx;
528	struct t3_swsq *sqp;
529
530	qhp = to_iwch_qp(qp);
531	mhp = to_iwch_mw(mw);
532	rhp = qhp->rhp;
533
534	mtx_lock(&qhp->lock);
535	if (qhp->attr.state > IWCH_QP_STATE_RTS) {
536		mtx_unlock(&qhp->lock);
537		return (-EINVAL);
538	}
539	num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
540			    qhp->wq.sq_size_log2);
541	if ((num_wrs) == 0) {
542		mtx_unlock(&qhp->lock);
543		return (-ENOMEM);
544	}
545	idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
546	CTR4(KTR_IW_CXGB, "%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p", __FUNCTION__, idx,
547	     mw, mw_bind);
548	wqe = (union t3_wr *) (qhp->wq.queue + idx);
549
550	t3_wr_flags = 0;
551	if (mw_bind->send_flags & IB_SEND_SIGNALED)
552		t3_wr_flags = T3_COMPLETION_FLAG;
553
554	sgl.addr = mw_bind->bind_info.addr;
555	sgl.lkey = mw_bind->bind_info.mr->lkey;
556	sgl.length = mw_bind->bind_info.length;
557	wqe->bind.reserved = 0;
558	wqe->bind.type = T3_VA_BASED_TO;
559
560	/* TBD: check perms */
561	wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->bind_info.mw_access_flags);
562	wqe->bind.mr_stag = htobe32(mw_bind->bind_info.mr->lkey);
563	wqe->bind.mw_stag = htobe32(mw->rkey);
564	wqe->bind.mw_len = htobe32(mw_bind->bind_info.length);
565	wqe->bind.mw_va = htobe64(mw_bind->bind_info.addr);
566	err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
567	if (err) {
568		mtx_unlock(&qhp->lock);
569	        return (err);
570	}
571	wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
572	sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
573	sqp->wr_id = mw_bind->wr_id;
574	sqp->opcode = T3_BIND_MW;
575	sqp->sq_wptr = qhp->wq.sq_wptr;
576	sqp->complete = 0;
577	sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
578	wqe->bind.mr_pbl_addr = htobe32(pbl_addr);
579	wqe->bind.mr_pagesz = page_size;
580	wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id;
581	build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
582		       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
583			        sizeof(struct t3_bind_mw_wr) >> 3);
584	++(qhp->wq.wptr);
585	++(qhp->wq.sq_wptr);
586	mtx_unlock(&qhp->lock);
587
588	ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
589
590	return err;
591}
592
593static void build_term_codes(struct respQ_msg_t *rsp_msg,
594				    u8 *layer_type, u8 *ecode)
595{
596	int status = TPT_ERR_INTERNAL_ERR;
597	int tagged = 0;
598	int opcode = -1;
599	int rqtype = 0;
600	int send_inv = 0;
601
602	if (rsp_msg) {
603		status = CQE_STATUS(rsp_msg->cqe);
604		opcode = CQE_OPCODE(rsp_msg->cqe);
605		rqtype = RQ_TYPE(rsp_msg->cqe);
606		send_inv = (opcode == T3_SEND_WITH_INV) ||
607		           (opcode == T3_SEND_WITH_SE_INV);
608		tagged = (opcode == T3_RDMA_WRITE) ||
609			 (rqtype && (opcode == T3_READ_RESP));
610	}
611
612	switch (status) {
613	case TPT_ERR_STAG:
614		if (send_inv) {
615			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
616			*ecode = RDMAP_CANT_INV_STAG;
617		} else {
618			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
619			*ecode = RDMAP_INV_STAG;
620		}
621		break;
622	case TPT_ERR_PDID:
623		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
624		if ((opcode == T3_SEND_WITH_INV) ||
625		    (opcode == T3_SEND_WITH_SE_INV))
626			*ecode = RDMAP_CANT_INV_STAG;
627		else
628			*ecode = RDMAP_STAG_NOT_ASSOC;
629		break;
630	case TPT_ERR_QPID:
631		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
632		*ecode = RDMAP_STAG_NOT_ASSOC;
633		break;
634	case TPT_ERR_ACCESS:
635		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
636		*ecode = RDMAP_ACC_VIOL;
637		break;
638	case TPT_ERR_WRAP:
639		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
640		*ecode = RDMAP_TO_WRAP;
641		break;
642	case TPT_ERR_BOUND:
643		if (tagged) {
644			*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
645			*ecode = DDPT_BASE_BOUNDS;
646		} else {
647			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
648			*ecode = RDMAP_BASE_BOUNDS;
649		}
650		break;
651	case TPT_ERR_INVALIDATE_SHARED_MR:
652	case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
653		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
654		*ecode = RDMAP_CANT_INV_STAG;
655		break;
656	case TPT_ERR_ECC:
657	case TPT_ERR_ECC_PSTAG:
658	case TPT_ERR_INTERNAL_ERR:
659		*layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
660		*ecode = 0;
661		break;
662	case TPT_ERR_OUT_OF_RQE:
663		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
664		*ecode = DDPU_INV_MSN_NOBUF;
665		break;
666	case TPT_ERR_PBL_ADDR_BOUND:
667		*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
668		*ecode = DDPT_BASE_BOUNDS;
669		break;
670	case TPT_ERR_CRC:
671		*layer_type = LAYER_MPA|DDP_LLP;
672		*ecode = MPA_CRC_ERR;
673		break;
674	case TPT_ERR_MARKER:
675		*layer_type = LAYER_MPA|DDP_LLP;
676		*ecode = MPA_MARKER_ERR;
677		break;
678	case TPT_ERR_PDU_LEN_ERR:
679		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
680		*ecode = DDPU_MSG_TOOBIG;
681		break;
682	case TPT_ERR_DDP_VERSION:
683		if (tagged) {
684			*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
685			*ecode = DDPT_INV_VERS;
686		} else {
687			*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
688			*ecode = DDPU_INV_VERS;
689		}
690		break;
691	case TPT_ERR_RDMA_VERSION:
692		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
693		*ecode = RDMAP_INV_VERS;
694		break;
695	case TPT_ERR_OPCODE:
696		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
697		*ecode = RDMAP_INV_OPCODE;
698		break;
699	case TPT_ERR_DDP_QUEUE_NUM:
700		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
701		*ecode = DDPU_INV_QN;
702		break;
703	case TPT_ERR_MSN:
704	case TPT_ERR_MSN_GAP:
705	case TPT_ERR_MSN_RANGE:
706	case TPT_ERR_IRD_OVERFLOW:
707		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
708		*ecode = DDPU_INV_MSN_RANGE;
709		break;
710	case TPT_ERR_TBIT:
711		*layer_type = LAYER_DDP|DDP_LOCAL_CATA;
712		*ecode = 0;
713		break;
714	case TPT_ERR_MO:
715		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
716		*ecode = DDPU_INV_MO;
717		break;
718	default:
719		*layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
720		*ecode = 0;
721		break;
722	}
723}
724
725/*
726 * This posts a TERMINATE with layer=RDMA, type=catastrophic.
727 */
728int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
729{
730	union t3_wr *wqe;
731	struct terminate_message *term;
732	struct mbuf *m;
733	struct ofld_hdr *oh;
734
735	CTR3(KTR_IW_CXGB, "%s: tid %u, %p", __func__, qhp->ep->hwtid, rsp_msg);
736	m = m_gethdr(M_NOWAIT, MT_DATA);
737	if (m == NULL) {
738		log(LOG_ERR, "%s cannot send TERMINATE!\n", __FUNCTION__);
739		return (-ENOMEM);
740	}
741	oh = mtod(m, struct ofld_hdr *);
742	m->m_pkthdr.len = m->m_len = sizeof(*oh) + 40;
743	oh->flags = V_HDR_NDESC(1) | V_HDR_CTRL(CPL_PRIORITY_DATA) | V_HDR_QSET(0);
744	wqe = (void *)(oh + 1);
745	memset(wqe, 0, 40);
746	wqe->send.rdmaop = T3_TERMINATE;
747
748	/* immediate data length */
749	wqe->send.plen = htonl(4);
750
751	/* immediate data starts here. */
752	term = (struct terminate_message *)wqe->send.sgl;
753	build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
754	wqe->send.wrh.op_seop_flags = htobe32(V_FW_RIWR_OP(T3_WR_SEND) |
755		V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
756	wqe->send.wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(qhp->ep->hwtid));
757
758	return t3_offload_tx(qhp->rhp->rdev.adap, m);
759}
760
761/*
762 * Assumes qhp lock is held.
763 */
764static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp,
765			struct iwch_cq *schp)
766{
767	int count;
768	int flushed;
769
770	CTR4(KTR_IW_CXGB, "%s qhp %p rchp %p schp %p", __FUNCTION__, qhp, rchp, schp);
771	/* take a ref on the qhp since we must release the lock */
772	qhp->refcnt++;
773	mtx_unlock(&qhp->lock);
774
775	/* locking hierarchy: cq lock first, then qp lock. */
776	mtx_lock(&rchp->lock);
777	mtx_lock(&qhp->lock);
778	cxio_flush_hw_cq(&rchp->cq);
779	cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
780	flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
781	mtx_unlock(&qhp->lock);
782	mtx_unlock(&rchp->lock);
783	if (flushed)
784 		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
785
786	/* locking hierarchy: cq lock first, then qp lock. */
787	mtx_lock(&schp->lock);
788	mtx_lock(&qhp->lock);
789	cxio_flush_hw_cq(&schp->cq);
790	cxio_count_scqes(&schp->cq, &qhp->wq, &count);
791	flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
792	mtx_unlock(&qhp->lock);
793	mtx_unlock(&schp->lock);
794	if (flushed)
795 		(*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
796
797	/* deref */
798	mtx_lock(&qhp->lock);
799	if (--qhp->refcnt == 0)
800		wakeup(qhp);
801}
802
803static void flush_qp(struct iwch_qp *qhp)
804{
805	struct iwch_cq *rchp, *schp;
806
807	rchp = get_chp(qhp->rhp, qhp->attr.rcq);
808	schp = get_chp(qhp->rhp, qhp->attr.scq);
809
810	if (qhp->ibqp.uobject) {
811		cxio_set_wq_in_error(&qhp->wq);
812		cxio_set_cq_in_error(&rchp->cq);
813               	(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
814               	if (schp != rchp) {
815                	cxio_set_cq_in_error(&schp->cq);
816                       	(*schp->ibcq.comp_handler)(&schp->ibcq,
817                        				schp->ibcq.cq_context);
818               	}
819               	return;
820       	}
821       	__flush_qp(qhp, rchp, schp);
822}
823
824
825/*
826 * Return non zero if at least one RECV was pre-posted.
827 */
828static int rqes_posted(struct iwch_qp *qhp)
829{
830       union t3_wr *wqe = qhp->wq.queue;
831        u16 count = 0;
832        while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
833                count++;
834                wqe++;
835        }
836        return count;
837}
838
839static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
840				enum iwch_qp_attr_mask mask,
841				struct iwch_qp_attributes *attrs)
842{
843	struct t3_rdma_init_attr init_attr;
844	int ret;
845	struct socket *so = qhp->ep->com.so;
846	struct inpcb *inp = sotoinpcb(so);
847	struct tcpcb *tp;
848	struct toepcb *toep;
849
850	init_attr.tid = qhp->ep->hwtid;
851	init_attr.qpid = qhp->wq.qpid;
852	init_attr.pdid = qhp->attr.pd;
853	init_attr.scqid = qhp->attr.scq;
854	init_attr.rcqid = qhp->attr.rcq;
855	init_attr.rq_addr = qhp->wq.rq_addr;
856	init_attr.rq_size = 1 << qhp->wq.rq_size_log2;
857	init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE |
858		qhp->attr.mpa_attr.recv_marker_enabled |
859		(qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
860		(qhp->attr.mpa_attr.crc_enabled << 2);
861
862	init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE |
863			   uP_RI_QP_RDMA_WRITE_ENABLE |
864			   uP_RI_QP_BIND_ENABLE;
865	if (!qhp->ibqp.uobject)
866		init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE;
867	init_attr.tcp_emss = qhp->ep->emss;
868	init_attr.ord = qhp->attr.max_ord;
869	init_attr.ird = qhp->attr.max_ird;
870	init_attr.qp_dma_addr = qhp->wq.dma_addr;
871	init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
872	init_attr.rqe_count = rqes_posted(qhp);
873	init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
874	init_attr.rtr_type = 0;
875	tp = intotcpcb(inp);
876	toep = tp->t_toe;
877	init_attr.chan = toep->tp_l2t->smt_idx;
878	init_attr.irs = qhp->ep->rcv_seq;
879	CTR5(KTR_IW_CXGB, "%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
880	     "flags 0x%x qpcaps 0x%x", __FUNCTION__,
881	     init_attr.rq_addr, init_attr.rq_size,
882	     init_attr.flags, init_attr.qpcaps);
883	ret = cxio_rdma_init(&rhp->rdev, &init_attr, qhp->ep->com.so);
884	CTR2(KTR_IW_CXGB, "%s ret %d", __FUNCTION__, ret);
885	return ret;
886}
887
888int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
889				enum iwch_qp_attr_mask mask,
890				struct iwch_qp_attributes *attrs,
891				int internal)
892{
893	int ret = 0;
894	struct iwch_qp_attributes newattr = qhp->attr;
895	int disconnect = 0;
896	int terminate = 0;
897	int abort = 0;
898	int free = 0;
899	struct iwch_ep *ep = NULL;
900
901	CTR6(KTR_IW_CXGB, "%s qhp %p qpid 0x%x ep %p state %d -> %d", __FUNCTION__,
902	     qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
903	     (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
904
905	mtx_lock(&qhp->lock);
906
907	/* Process attr changes if in IDLE */
908	if (mask & IWCH_QP_ATTR_VALID_MODIFY) {
909		if (qhp->attr.state != IWCH_QP_STATE_IDLE) {
910			ret = -EIO;
911			goto out;
912		}
913		if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ)
914			newattr.enable_rdma_read = attrs->enable_rdma_read;
915		if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE)
916			newattr.enable_rdma_write = attrs->enable_rdma_write;
917		if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND)
918			newattr.enable_bind = attrs->enable_bind;
919		if (mask & IWCH_QP_ATTR_MAX_ORD) {
920			if (attrs->max_ord >
921			    rhp->attr.max_rdma_read_qp_depth) {
922				ret = -EINVAL;
923				goto out;
924			}
925			newattr.max_ord = attrs->max_ord;
926		}
927		if (mask & IWCH_QP_ATTR_MAX_IRD) {
928			if (attrs->max_ird >
929			    rhp->attr.max_rdma_reads_per_qp) {
930				ret = -EINVAL;
931				goto out;
932			}
933			newattr.max_ird = attrs->max_ird;
934		}
935		qhp->attr = newattr;
936	}
937
938	if (!(mask & IWCH_QP_ATTR_NEXT_STATE))
939		goto out;
940	if (qhp->attr.state == attrs->next_state)
941		goto out;
942
943	switch (qhp->attr.state) {
944	case IWCH_QP_STATE_IDLE:
945		switch (attrs->next_state) {
946		case IWCH_QP_STATE_RTS:
947			if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) {
948				ret = -EINVAL;
949				goto out;
950			}
951			if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) {
952				ret = -EINVAL;
953				goto out;
954			}
955			qhp->attr.mpa_attr = attrs->mpa_attr;
956			qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
957			qhp->ep = qhp->attr.llp_stream_handle;
958			qhp->attr.state = IWCH_QP_STATE_RTS;
959
960			/*
961			 * Ref the endpoint here and deref when we
962			 * disassociate the endpoint from the QP.  This
963			 * happens in CLOSING->IDLE transition or *->ERROR
964			 * transition.
965			 */
966			get_ep(&qhp->ep->com);
967			mtx_unlock(&qhp->lock);
968			ret = rdma_init(rhp, qhp, mask, attrs);
969			mtx_lock(&qhp->lock);
970			if (ret)
971				goto err;
972			break;
973		case IWCH_QP_STATE_ERROR:
974			qhp->attr.state = IWCH_QP_STATE_ERROR;
975			flush_qp(qhp);
976			break;
977		default:
978			ret = -EINVAL;
979			goto out;
980		}
981		break;
982	case IWCH_QP_STATE_RTS:
983		switch (attrs->next_state) {
984		case IWCH_QP_STATE_CLOSING:
985			PANIC_IF(atomic_load_acq_int(&qhp->ep->com.refcount) < 2);
986			qhp->attr.state = IWCH_QP_STATE_CLOSING;
987			if (!internal) {
988				abort=0;
989				disconnect = 1;
990				ep = qhp->ep;
991				get_ep(&ep->com);
992			}
993			break;
994		case IWCH_QP_STATE_TERMINATE:
995			qhp->attr.state = IWCH_QP_STATE_TERMINATE;
996			if (qhp->ibqp.uobject)
997				cxio_set_wq_in_error(&qhp->wq);
998			if (!internal)
999				terminate = 1;
1000			break;
1001		case IWCH_QP_STATE_ERROR:
1002			qhp->attr.state = IWCH_QP_STATE_ERROR;
1003			if (!internal) {
1004				abort=1;
1005				disconnect = 1;
1006				ep = qhp->ep;
1007				get_ep(&ep->com);
1008			}
1009			goto err;
1010			break;
1011		default:
1012			ret = -EINVAL;
1013			goto out;
1014		}
1015		break;
1016	case IWCH_QP_STATE_CLOSING:
1017		if (!internal) {
1018			ret = -EINVAL;
1019			goto out;
1020		}
1021		switch (attrs->next_state) {
1022			case IWCH_QP_STATE_IDLE:
1023				flush_qp(qhp);
1024				qhp->attr.state = IWCH_QP_STATE_IDLE;
1025				qhp->attr.llp_stream_handle = NULL;
1026				put_ep(&qhp->ep->com);
1027				qhp->ep = NULL;
1028				wakeup(qhp);
1029				break;
1030			case IWCH_QP_STATE_ERROR:
1031				goto err;
1032			default:
1033				ret = -EINVAL;
1034				goto err;
1035		}
1036		break;
1037	case IWCH_QP_STATE_ERROR:
1038		if (attrs->next_state != IWCH_QP_STATE_IDLE) {
1039			ret = -EINVAL;
1040			goto out;
1041		}
1042
1043		if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) ||
1044		    !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) {
1045			ret = -EINVAL;
1046			goto out;
1047		}
1048		qhp->attr.state = IWCH_QP_STATE_IDLE;
1049		memset(&qhp->attr, 0, sizeof(qhp->attr));
1050		break;
1051	case IWCH_QP_STATE_TERMINATE:
1052		if (!internal) {
1053			ret = -EINVAL;
1054			goto out;
1055		}
1056		goto err;
1057		break;
1058	default:
1059		log(LOG_ERR, "%s in a bad state %d\n",
1060		       __FUNCTION__, qhp->attr.state);
1061		ret = -EINVAL;
1062		goto err;
1063		break;
1064	}
1065	goto out;
1066err:
1067	CTR3(KTR_IW_CXGB, "%s disassociating ep %p qpid 0x%x", __FUNCTION__, qhp->ep,
1068	     qhp->wq.qpid);
1069
1070	/* disassociate the LLP connection */
1071	qhp->attr.llp_stream_handle = NULL;
1072	ep = qhp->ep;
1073	qhp->ep = NULL;
1074	qhp->attr.state = IWCH_QP_STATE_ERROR;
1075	free=1;
1076	wakeup(qhp);
1077	PANIC_IF(!ep);
1078	flush_qp(qhp);
1079out:
1080	mtx_unlock(&qhp->lock);
1081
1082	if (terminate)
1083		iwch_post_terminate(qhp, NULL);
1084
1085
1086	/*
1087	 * If disconnect is 1, then we need to initiate a disconnect
1088	 * on the EP.  This can be a normal close (RTS->CLOSING) or
1089	 * an abnormal close (RTS/CLOSING->ERROR).
1090	 */
1091	if (disconnect) {
1092		iwch_ep_disconnect(ep, abort, M_NOWAIT);
1093		put_ep(&ep->com);
1094	}
1095
1096	/*
1097	 * If free is 1, then we've disassociated the EP from the QP
1098	 * and we need to dereference the EP.
1099	 */
1100	if (free)
1101		put_ep(&ep->com);
1102
1103	CTR2(KTR_IW_CXGB, "%s exit state %d", __FUNCTION__, qhp->attr.state);
1104	return ret;
1105}
1106#endif
1107