iw_cxgb_qp.c revision 302408
1/************************************************************************** 2 3Copyright (c) 2007, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: stable/11/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c 278886 2015-02-17 08:40:27Z hselasky $"); 31 32#include "opt_inet.h" 33 34#ifdef TCP_OFFLOAD 35#include <sys/param.h> 36#include <sys/systm.h> 37#include <sys/kernel.h> 38#include <sys/bus.h> 39#include <sys/pciio.h> 40#include <sys/conf.h> 41#include <machine/bus.h> 42#include <machine/resource.h> 43#include <sys/bus_dma.h> 44#include <sys/rman.h> 45#include <sys/ioccom.h> 46#include <sys/mbuf.h> 47#include <sys/mutex.h> 48#include <sys/rwlock.h> 49#include <sys/linker.h> 50#include <sys/firmware.h> 51#include <sys/socket.h> 52#include <sys/socketvar.h> 53#include <sys/sockio.h> 54#include <sys/smp.h> 55#include <sys/sysctl.h> 56#include <sys/syslog.h> 57#include <sys/queue.h> 58#include <sys/taskqueue.h> 59#include <sys/proc.h> 60#include <sys/queue.h> 61 62#include <net/route.h> 63#include <netinet/in_systm.h> 64#include <netinet/in.h> 65#include <netinet/toecore.h> 66#include <netinet/in_pcb.h> 67#include <netinet/ip.h> 68#include <netinet/ip_var.h> 69#include <netinet/tcp_var.h> 70#include <netinet/tcp.h> 71#include <netinet/tcpip.h> 72 73#include <rdma/ib_verbs.h> 74#include <rdma/ib_umem.h> 75#include <rdma/ib_user_verbs.h> 76#include <linux/idr.h> 77#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h> 78 79#include <cxgb_include.h> 80#include <ulp/tom/cxgb_l2t.h> 81#include <ulp/tom/cxgb_toepcb.h> 82#include <ulp/iw_cxgb/iw_cxgb_wr.h> 83#include <ulp/iw_cxgb/iw_cxgb_hal.h> 84#include <ulp/iw_cxgb/iw_cxgb_provider.h> 85#include <ulp/iw_cxgb/iw_cxgb_cm.h> 86#include <ulp/iw_cxgb/iw_cxgb.h> 87#include <ulp/iw_cxgb/iw_cxgb_resource.h> 88#include <ulp/iw_cxgb/iw_cxgb_user.h> 89 90#define NO_SUPPORT -1 91 92static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, 93 u8 * flit_cnt) 94{ 95 int i; 96 u32 plen; 97 98 switch (wr->opcode) { 99 case IB_WR_SEND: 100 if (wr->send_flags & IB_SEND_SOLICITED) 101 wqe->send.rdmaop = T3_SEND_WITH_SE; 102 else 103 wqe->send.rdmaop = T3_SEND; 104 wqe->send.rem_stag = 0; 105 break; 106 case IB_WR_SEND_WITH_IMM: 107 if (wr->send_flags & IB_SEND_SOLICITED) 108 wqe->send.rdmaop = T3_SEND_WITH_SE_INV; 109 else 110 wqe->send.rdmaop = T3_SEND_WITH_INV; 111 wqe->send.rem_stag = 0; 112 break; 113 default: 114 return -EINVAL; 115 } 116 if (wr->num_sge > T3_MAX_SGE) 117 return (-EINVAL); 118 wqe->send.reserved[0] = 0; 119 wqe->send.reserved[1] = 0; 120 wqe->send.reserved[2] = 0; 121 plen = 0; 122 for (i = 0; i < wr->num_sge; i++) { 123 if ((plen + wr->sg_list[i].length) < plen) { 124 return (-EMSGSIZE); 125 } 126 plen += wr->sg_list[i].length; 127 wqe->send.sgl[i].stag = 128 htobe32(wr->sg_list[i].lkey); 129 wqe->send.sgl[i].len = 130 htobe32(wr->sg_list[i].length); 131 wqe->send.sgl[i].to = htobe64(wr->sg_list[i].addr); 132 } 133 wqe->send.num_sgle = htobe32(wr->num_sge); 134 *flit_cnt = 4 + ((wr->num_sge) << 1); 135 wqe->send.plen = htobe32(plen); 136 return 0; 137} 138 139static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, 140 u8 *flit_cnt) 141{ 142 int i; 143 u32 plen; 144 145 if (wr->num_sge > T3_MAX_SGE) 146 return (-EINVAL); 147 wqe->write.rdmaop = T3_RDMA_WRITE; 148 wqe->write.reserved[0] = 0; 149 wqe->write.reserved[1] = 0; 150 wqe->write.reserved[2] = 0; 151 wqe->write.stag_sink = htobe32(wr->wr.rdma.rkey); 152 wqe->write.to_sink = htobe64(wr->wr.rdma.remote_addr); 153 154 if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { 155 plen = 4; 156 wqe->write.sgl[0].stag = wr->ex.imm_data; 157 wqe->write.sgl[0].len = 0; 158 wqe->write.num_sgle = 0; 159 *flit_cnt = 6; 160 } else { 161 plen = 0; 162 for (i = 0; i < wr->num_sge; i++) { 163 if ((plen + wr->sg_list[i].length) < plen) { 164 return (-EMSGSIZE); 165 } 166 plen += wr->sg_list[i].length; 167 wqe->write.sgl[i].stag = 168 htobe32(wr->sg_list[i].lkey); 169 wqe->write.sgl[i].len = 170 htobe32(wr->sg_list[i].length); 171 wqe->write.sgl[i].to = 172 htobe64(wr->sg_list[i].addr); 173 } 174 wqe->write.num_sgle = htobe32(wr->num_sge); 175 *flit_cnt = 5 + ((wr->num_sge) << 1); 176 } 177 wqe->write.plen = htobe32(plen); 178 return 0; 179} 180 181static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, 182 u8 *flit_cnt) 183{ 184 if (wr->num_sge > 1) 185 return (-EINVAL); 186 wqe->read.rdmaop = T3_READ_REQ; 187 wqe->read.reserved[0] = 0; 188 wqe->read.reserved[1] = 0; 189 wqe->read.reserved[2] = 0; 190 wqe->read.rem_stag = htobe32(wr->wr.rdma.rkey); 191 wqe->read.rem_to = htobe64(wr->wr.rdma.remote_addr); 192 wqe->read.local_stag = htobe32(wr->sg_list[0].lkey); 193 wqe->read.local_len = htobe32(wr->sg_list[0].length); 194 wqe->read.local_to = htobe64(wr->sg_list[0].addr); 195 *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3; 196 return 0; 197} 198 199static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, 200 u32 num_sgle, u32 * pbl_addr, u8 * page_size) 201{ 202 int i; 203 struct iwch_mr *mhp; 204 u64 offset; 205 for (i = 0; i < num_sgle; i++) { 206 207 mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8); 208 if (!mhp) { 209 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); 210 return (-EIO); 211 } 212 if (!mhp->attr.state) { 213 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); 214 return (-EIO); 215 } 216 if (mhp->attr.zbva) { 217 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); 218 return (-EIO); 219 } 220 221 if (sg_list[i].addr < mhp->attr.va_fbo) { 222 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); 223 return (-EINVAL); 224 } 225 if (sg_list[i].addr + ((u64) sg_list[i].length) < 226 sg_list[i].addr) { 227 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); 228 return (-EINVAL); 229 } 230 if (sg_list[i].addr + ((u64) sg_list[i].length) > 231 mhp->attr.va_fbo + ((u64) mhp->attr.len)) { 232 CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); 233 return (-EINVAL); 234 } 235 offset = sg_list[i].addr - mhp->attr.va_fbo; 236 offset += mhp->attr.va_fbo & 237 ((1UL << (12 + mhp->attr.page_size)) - 1); 238 pbl_addr[i] = ((mhp->attr.pbl_addr - 239 rhp->rdev.rnic_info.pbl_base) >> 3) + 240 (offset >> (12 + mhp->attr.page_size)); 241 page_size[i] = mhp->attr.page_size; 242 } 243 return 0; 244} 245 246static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, 247 struct ib_recv_wr *wr) 248{ 249 int i, err = 0; 250 u32 pbl_addr[T3_MAX_SGE]; 251 u8 page_size[T3_MAX_SGE]; 252 253 if (wr->num_sge > T3_MAX_SGE) 254 return (-EINVAL); 255 256 257 err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr, 258 page_size); 259 if (err) 260 return err; 261 wqe->recv.pagesz[0] = page_size[0]; 262 wqe->recv.pagesz[1] = page_size[1]; 263 wqe->recv.pagesz[2] = page_size[2]; 264 wqe->recv.pagesz[3] = page_size[3]; 265 wqe->recv.num_sgle = htobe32(wr->num_sge); 266 267 for (i = 0; i < wr->num_sge; i++) { 268 wqe->recv.sgl[i].stag = htobe32(wr->sg_list[i].lkey); 269 wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length); 270 wqe->recv.sgl[i].to = htobe64(((u32)wr->sg_list[i].addr) & 271 ((1UL << (12 + page_size[i])) - 1)); 272 /* pbl_addr is the adapters address in the PBL */ 273 wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]); 274 } 275 for (; i < T3_MAX_SGE; i++) { 276 wqe->recv.sgl[i].stag = 0; 277 wqe->recv.sgl[i].len = 0; 278 wqe->recv.sgl[i].to = 0; 279 wqe->recv.pbl_addr[i] = 0; 280 } 281 282 qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, 283 qhp->wq.rq_size_log2)].wr_id = wr->wr_id; 284 qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, 285 qhp->wq.rq_size_log2)].pbl_addr = 0; 286 287 return 0; 288} 289 290static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, 291 struct ib_recv_wr *wr) 292{ 293 int i; 294 u32 pbl_addr; 295 u32 pbl_offset; 296 297 298 /* 299 * The T3 HW requires the PBL in the HW recv descriptor to reference 300 * a PBL entry. So we allocate the max needed PBL memory here and pass 301 * it to the uP in the recv WR. The uP will build the PBL and setup 302 * the HW recv descriptor. 303 */ 304 pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE); 305 if (!pbl_addr) 306 return -ENOMEM; 307 308 /* 309 * Compute the 8B aligned offset. 310 */ 311 pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3; 312 313 wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); 314 315 for (i = 0; i < wr->num_sge; i++) { 316 317 /* 318 * Use a 128MB page size. This and an imposed 128MB 319 * sge length limit allows us to require only a 2-entry HW 320 * PBL for each SGE. This restriction is acceptable since 321 * since it is not possible to allocate 128MB of contiguous 322 * DMA coherent memory! 323 */ 324 if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN) 325 return -EINVAL; 326 wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT; 327 328 /* 329 * T3 restricts a recv to all zero-stag or all non-zero-stag. 330 */ 331 if (wr->sg_list[i].lkey != 0) 332 return -EINVAL; 333 wqe->recv.sgl[i].stag = 0; 334 wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length); 335 wqe->recv.sgl[i].to = htobe64(wr->sg_list[i].addr); 336 wqe->recv.pbl_addr[i] = htobe32(pbl_offset); 337 pbl_offset += 2; 338 } 339 for (; i < T3_MAX_SGE; i++) { 340 wqe->recv.pagesz[i] = 0; 341 wqe->recv.sgl[i].stag = 0; 342 wqe->recv.sgl[i].len = 0; 343 wqe->recv.sgl[i].to = 0; 344 wqe->recv.pbl_addr[i] = 0; 345 } 346 qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, 347 qhp->wq.rq_size_log2)].wr_id = wr->wr_id; 348 qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, 349 qhp->wq.rq_size_log2)].pbl_addr = pbl_addr; 350 return 0; 351} 352 353int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 354 struct ib_send_wr **bad_wr) 355{ 356 int err = 0; 357 u8 t3_wr_flit_cnt = 0; 358 enum t3_wr_opcode t3_wr_opcode = 0; 359 enum t3_wr_flags t3_wr_flags; 360 struct iwch_qp *qhp; 361 u32 idx; 362 union t3_wr *wqe; 363 u32 num_wrs; 364 struct t3_swsq *sqp; 365 366 qhp = to_iwch_qp(ibqp); 367 mtx_lock(&qhp->lock); 368 if (qhp->attr.state > IWCH_QP_STATE_RTS) { 369 mtx_unlock(&qhp->lock); 370 err = -EINVAL; 371 goto out; 372 } 373 num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, 374 qhp->wq.sq_size_log2); 375 if (num_wrs == 0) { 376 mtx_unlock(&qhp->lock); 377 err = -EINVAL; 378 goto out; 379 } 380 while (wr) { 381 if (num_wrs == 0) { 382 err = -ENOMEM; 383 break; 384 } 385 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); 386 wqe = (union t3_wr *) (qhp->wq.queue + idx); 387 t3_wr_flags = 0; 388 if (wr->send_flags & IB_SEND_SOLICITED) 389 t3_wr_flags |= T3_SOLICITED_EVENT_FLAG; 390 if (wr->send_flags & IB_SEND_FENCE) 391 t3_wr_flags |= T3_READ_FENCE_FLAG; 392 if (wr->send_flags & IB_SEND_SIGNALED) 393 t3_wr_flags |= T3_COMPLETION_FLAG; 394 sqp = qhp->wq.sq + 395 Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); 396 switch (wr->opcode) { 397 case IB_WR_SEND: 398 case IB_WR_SEND_WITH_IMM: 399 t3_wr_opcode = T3_WR_SEND; 400 err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt); 401 break; 402 case IB_WR_RDMA_WRITE: 403 case IB_WR_RDMA_WRITE_WITH_IMM: 404 t3_wr_opcode = T3_WR_WRITE; 405 err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt); 406 break; 407 case IB_WR_RDMA_READ: 408 t3_wr_opcode = T3_WR_READ; 409 t3_wr_flags = 0; /* T3 reads are always signaled */ 410 err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt); 411 if (err) 412 break; 413 sqp->read_len = wqe->read.local_len; 414 if (!qhp->wq.oldest_read) 415 qhp->wq.oldest_read = sqp; 416 break; 417 default: 418 CTR2(KTR_IW_CXGB, "%s post of type=%d TBD!", __FUNCTION__, 419 wr->opcode); 420 err = -EINVAL; 421 } 422 if (err) 423 break; 424 425 wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; 426 sqp->wr_id = wr->wr_id; 427 sqp->opcode = wr2opcode(t3_wr_opcode); 428 sqp->sq_wptr = qhp->wq.sq_wptr; 429 sqp->complete = 0; 430 sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED); 431 432 build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags, 433 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 434 0, t3_wr_flit_cnt); 435 CTR5(KTR_IW_CXGB, "%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d", 436 __FUNCTION__, (unsigned long long) wr->wr_id, idx, 437 Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2), 438 sqp->opcode); 439 wr = wr->next; 440 num_wrs--; 441 ++(qhp->wq.wptr); 442 ++(qhp->wq.sq_wptr); 443 } 444 mtx_unlock(&qhp->lock); 445 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); 446out: 447 if (err) 448 *bad_wr = wr; 449 return err; 450} 451 452int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 453 struct ib_recv_wr **bad_wr) 454{ 455 int err = 0; 456 struct iwch_qp *qhp; 457 u32 idx; 458 union t3_wr *wqe; 459 u32 num_wrs; 460 461 qhp = to_iwch_qp(ibqp); 462 mtx_lock(&qhp->lock); 463 if (qhp->attr.state > IWCH_QP_STATE_RTS) { 464 mtx_unlock(&qhp->lock); 465 err = -EINVAL; 466 goto out; 467 } 468 num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr, 469 qhp->wq.rq_size_log2) - 1; 470 if (!wr) { 471 mtx_unlock(&qhp->lock); 472 err = -EINVAL; 473 goto out; 474 } 475 476 while (wr) { 477 if (wr->num_sge > T3_MAX_SGE) { 478 err = -EINVAL; 479 break; 480 } 481 482 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); 483 wqe = (union t3_wr *) (qhp->wq.queue + idx); 484 if (num_wrs) { 485 if (wr->sg_list[0].lkey) 486 err = build_rdma_recv(qhp, wqe, wr); 487 else 488 err = build_zero_stag_recv(qhp, wqe, wr); 489 } else 490 err = -ENOMEM; 491 if (err) 492 break; 493 494 build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, 495 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 496 0, sizeof(struct t3_receive_wr) >> 3); 497 CTR6(KTR_IW_CXGB, "%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x " 498 "wqe %p ", __FUNCTION__, (unsigned long long) wr->wr_id, 499 idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe); 500 ++(qhp->wq.rq_wptr); 501 ++(qhp->wq.wptr); 502 wr = wr->next; 503 num_wrs--; 504 } 505 mtx_unlock(&qhp->lock); 506 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); 507out: 508 if (err) 509 *bad_wr = wr; 510 return err; 511} 512 513int iwch_bind_mw(struct ib_qp *qp, 514 struct ib_mw *mw, 515 struct ib_mw_bind *mw_bind) 516{ 517 struct iwch_dev *rhp; 518 struct iwch_mw *mhp; 519 struct iwch_qp *qhp; 520 union t3_wr *wqe; 521 u32 pbl_addr; 522 u8 page_size; 523 u32 num_wrs; 524 struct ib_sge sgl; 525 int err=0; 526 enum t3_wr_flags t3_wr_flags; 527 u32 idx; 528 struct t3_swsq *sqp; 529 530 qhp = to_iwch_qp(qp); 531 mhp = to_iwch_mw(mw); 532 rhp = qhp->rhp; 533 534 mtx_lock(&qhp->lock); 535 if (qhp->attr.state > IWCH_QP_STATE_RTS) { 536 mtx_unlock(&qhp->lock); 537 return (-EINVAL); 538 } 539 num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, 540 qhp->wq.sq_size_log2); 541 if ((num_wrs) == 0) { 542 mtx_unlock(&qhp->lock); 543 return (-ENOMEM); 544 } 545 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); 546 CTR4(KTR_IW_CXGB, "%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p", __FUNCTION__, idx, 547 mw, mw_bind); 548 wqe = (union t3_wr *) (qhp->wq.queue + idx); 549 550 t3_wr_flags = 0; 551 if (mw_bind->send_flags & IB_SEND_SIGNALED) 552 t3_wr_flags = T3_COMPLETION_FLAG; 553 554 sgl.addr = mw_bind->bind_info.addr; 555 sgl.lkey = mw_bind->bind_info.mr->lkey; 556 sgl.length = mw_bind->bind_info.length; 557 wqe->bind.reserved = 0; 558 wqe->bind.type = T3_VA_BASED_TO; 559 560 /* TBD: check perms */ 561 wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->bind_info.mw_access_flags); 562 wqe->bind.mr_stag = htobe32(mw_bind->bind_info.mr->lkey); 563 wqe->bind.mw_stag = htobe32(mw->rkey); 564 wqe->bind.mw_len = htobe32(mw_bind->bind_info.length); 565 wqe->bind.mw_va = htobe64(mw_bind->bind_info.addr); 566 err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size); 567 if (err) { 568 mtx_unlock(&qhp->lock); 569 return (err); 570 } 571 wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; 572 sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); 573 sqp->wr_id = mw_bind->wr_id; 574 sqp->opcode = T3_BIND_MW; 575 sqp->sq_wptr = qhp->wq.sq_wptr; 576 sqp->complete = 0; 577 sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED); 578 wqe->bind.mr_pbl_addr = htobe32(pbl_addr); 579 wqe->bind.mr_pagesz = page_size; 580 wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id; 581 build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags, 582 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0, 583 sizeof(struct t3_bind_mw_wr) >> 3); 584 ++(qhp->wq.wptr); 585 ++(qhp->wq.sq_wptr); 586 mtx_unlock(&qhp->lock); 587 588 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); 589 590 return err; 591} 592 593static void build_term_codes(struct respQ_msg_t *rsp_msg, 594 u8 *layer_type, u8 *ecode) 595{ 596 int status = TPT_ERR_INTERNAL_ERR; 597 int tagged = 0; 598 int opcode = -1; 599 int rqtype = 0; 600 int send_inv = 0; 601 602 if (rsp_msg) { 603 status = CQE_STATUS(rsp_msg->cqe); 604 opcode = CQE_OPCODE(rsp_msg->cqe); 605 rqtype = RQ_TYPE(rsp_msg->cqe); 606 send_inv = (opcode == T3_SEND_WITH_INV) || 607 (opcode == T3_SEND_WITH_SE_INV); 608 tagged = (opcode == T3_RDMA_WRITE) || 609 (rqtype && (opcode == T3_READ_RESP)); 610 } 611 612 switch (status) { 613 case TPT_ERR_STAG: 614 if (send_inv) { 615 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; 616 *ecode = RDMAP_CANT_INV_STAG; 617 } else { 618 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; 619 *ecode = RDMAP_INV_STAG; 620 } 621 break; 622 case TPT_ERR_PDID: 623 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; 624 if ((opcode == T3_SEND_WITH_INV) || 625 (opcode == T3_SEND_WITH_SE_INV)) 626 *ecode = RDMAP_CANT_INV_STAG; 627 else 628 *ecode = RDMAP_STAG_NOT_ASSOC; 629 break; 630 case TPT_ERR_QPID: 631 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; 632 *ecode = RDMAP_STAG_NOT_ASSOC; 633 break; 634 case TPT_ERR_ACCESS: 635 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; 636 *ecode = RDMAP_ACC_VIOL; 637 break; 638 case TPT_ERR_WRAP: 639 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; 640 *ecode = RDMAP_TO_WRAP; 641 break; 642 case TPT_ERR_BOUND: 643 if (tagged) { 644 *layer_type = LAYER_DDP|DDP_TAGGED_ERR; 645 *ecode = DDPT_BASE_BOUNDS; 646 } else { 647 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; 648 *ecode = RDMAP_BASE_BOUNDS; 649 } 650 break; 651 case TPT_ERR_INVALIDATE_SHARED_MR: 652 case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: 653 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; 654 *ecode = RDMAP_CANT_INV_STAG; 655 break; 656 case TPT_ERR_ECC: 657 case TPT_ERR_ECC_PSTAG: 658 case TPT_ERR_INTERNAL_ERR: 659 *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA; 660 *ecode = 0; 661 break; 662 case TPT_ERR_OUT_OF_RQE: 663 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; 664 *ecode = DDPU_INV_MSN_NOBUF; 665 break; 666 case TPT_ERR_PBL_ADDR_BOUND: 667 *layer_type = LAYER_DDP|DDP_TAGGED_ERR; 668 *ecode = DDPT_BASE_BOUNDS; 669 break; 670 case TPT_ERR_CRC: 671 *layer_type = LAYER_MPA|DDP_LLP; 672 *ecode = MPA_CRC_ERR; 673 break; 674 case TPT_ERR_MARKER: 675 *layer_type = LAYER_MPA|DDP_LLP; 676 *ecode = MPA_MARKER_ERR; 677 break; 678 case TPT_ERR_PDU_LEN_ERR: 679 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; 680 *ecode = DDPU_MSG_TOOBIG; 681 break; 682 case TPT_ERR_DDP_VERSION: 683 if (tagged) { 684 *layer_type = LAYER_DDP|DDP_TAGGED_ERR; 685 *ecode = DDPT_INV_VERS; 686 } else { 687 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; 688 *ecode = DDPU_INV_VERS; 689 } 690 break; 691 case TPT_ERR_RDMA_VERSION: 692 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; 693 *ecode = RDMAP_INV_VERS; 694 break; 695 case TPT_ERR_OPCODE: 696 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; 697 *ecode = RDMAP_INV_OPCODE; 698 break; 699 case TPT_ERR_DDP_QUEUE_NUM: 700 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; 701 *ecode = DDPU_INV_QN; 702 break; 703 case TPT_ERR_MSN: 704 case TPT_ERR_MSN_GAP: 705 case TPT_ERR_MSN_RANGE: 706 case TPT_ERR_IRD_OVERFLOW: 707 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; 708 *ecode = DDPU_INV_MSN_RANGE; 709 break; 710 case TPT_ERR_TBIT: 711 *layer_type = LAYER_DDP|DDP_LOCAL_CATA; 712 *ecode = 0; 713 break; 714 case TPT_ERR_MO: 715 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; 716 *ecode = DDPU_INV_MO; 717 break; 718 default: 719 *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; 720 *ecode = 0; 721 break; 722 } 723} 724 725/* 726 * This posts a TERMINATE with layer=RDMA, type=catastrophic. 727 */ 728int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) 729{ 730 union t3_wr *wqe; 731 struct terminate_message *term; 732 struct mbuf *m; 733 struct ofld_hdr *oh; 734 735 CTR3(KTR_IW_CXGB, "%s: tid %u, %p", __func__, qhp->ep->hwtid, rsp_msg); 736 m = m_gethdr(M_NOWAIT, MT_DATA); 737 if (m == NULL) { 738 log(LOG_ERR, "%s cannot send TERMINATE!\n", __FUNCTION__); 739 return (-ENOMEM); 740 } 741 oh = mtod(m, struct ofld_hdr *); 742 m->m_pkthdr.len = m->m_len = sizeof(*oh) + 40; 743 oh->flags = V_HDR_NDESC(1) | V_HDR_CTRL(CPL_PRIORITY_DATA) | V_HDR_QSET(0); 744 wqe = (void *)(oh + 1); 745 memset(wqe, 0, 40); 746 wqe->send.rdmaop = T3_TERMINATE; 747 748 /* immediate data length */ 749 wqe->send.plen = htonl(4); 750 751 /* immediate data starts here. */ 752 term = (struct terminate_message *)wqe->send.sgl; 753 build_term_codes(rsp_msg, &term->layer_etype, &term->ecode); 754 wqe->send.wrh.op_seop_flags = htobe32(V_FW_RIWR_OP(T3_WR_SEND) | 755 V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG)); 756 wqe->send.wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(qhp->ep->hwtid)); 757 758 return t3_offload_tx(qhp->rhp->rdev.adap, m); 759} 760 761/* 762 * Assumes qhp lock is held. 763 */ 764static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, 765 struct iwch_cq *schp) 766{ 767 int count; 768 int flushed; 769 770 CTR4(KTR_IW_CXGB, "%s qhp %p rchp %p schp %p", __FUNCTION__, qhp, rchp, schp); 771 /* take a ref on the qhp since we must release the lock */ 772 qhp->refcnt++; 773 mtx_unlock(&qhp->lock); 774 775 /* locking hierarchy: cq lock first, then qp lock. */ 776 mtx_lock(&rchp->lock); 777 mtx_lock(&qhp->lock); 778 cxio_flush_hw_cq(&rchp->cq); 779 cxio_count_rcqes(&rchp->cq, &qhp->wq, &count); 780 flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count); 781 mtx_unlock(&qhp->lock); 782 mtx_unlock(&rchp->lock); 783 if (flushed) 784 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); 785 786 /* locking hierarchy: cq lock first, then qp lock. */ 787 mtx_lock(&schp->lock); 788 mtx_lock(&qhp->lock); 789 cxio_flush_hw_cq(&schp->cq); 790 cxio_count_scqes(&schp->cq, &qhp->wq, &count); 791 flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count); 792 mtx_unlock(&qhp->lock); 793 mtx_unlock(&schp->lock); 794 if (flushed) 795 (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); 796 797 /* deref */ 798 mtx_lock(&qhp->lock); 799 if (--qhp->refcnt == 0) 800 wakeup(qhp); 801} 802 803static void flush_qp(struct iwch_qp *qhp) 804{ 805 struct iwch_cq *rchp, *schp; 806 807 rchp = get_chp(qhp->rhp, qhp->attr.rcq); 808 schp = get_chp(qhp->rhp, qhp->attr.scq); 809 810 if (qhp->ibqp.uobject) { 811 cxio_set_wq_in_error(&qhp->wq); 812 cxio_set_cq_in_error(&rchp->cq); 813 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); 814 if (schp != rchp) { 815 cxio_set_cq_in_error(&schp->cq); 816 (*schp->ibcq.comp_handler)(&schp->ibcq, 817 schp->ibcq.cq_context); 818 } 819 return; 820 } 821 __flush_qp(qhp, rchp, schp); 822} 823 824 825/* 826 * Return non zero if at least one RECV was pre-posted. 827 */ 828static int rqes_posted(struct iwch_qp *qhp) 829{ 830 union t3_wr *wqe = qhp->wq.queue; 831 u16 count = 0; 832 while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { 833 count++; 834 wqe++; 835 } 836 return count; 837} 838 839static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, 840 enum iwch_qp_attr_mask mask, 841 struct iwch_qp_attributes *attrs) 842{ 843 struct t3_rdma_init_attr init_attr; 844 int ret; 845 struct socket *so = qhp->ep->com.so; 846 struct inpcb *inp = sotoinpcb(so); 847 struct tcpcb *tp; 848 struct toepcb *toep; 849 850 init_attr.tid = qhp->ep->hwtid; 851 init_attr.qpid = qhp->wq.qpid; 852 init_attr.pdid = qhp->attr.pd; 853 init_attr.scqid = qhp->attr.scq; 854 init_attr.rcqid = qhp->attr.rcq; 855 init_attr.rq_addr = qhp->wq.rq_addr; 856 init_attr.rq_size = 1 << qhp->wq.rq_size_log2; 857 init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE | 858 qhp->attr.mpa_attr.recv_marker_enabled | 859 (qhp->attr.mpa_attr.xmit_marker_enabled << 1) | 860 (qhp->attr.mpa_attr.crc_enabled << 2); 861 862 init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE | 863 uP_RI_QP_RDMA_WRITE_ENABLE | 864 uP_RI_QP_BIND_ENABLE; 865 if (!qhp->ibqp.uobject) 866 init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE; 867 init_attr.tcp_emss = qhp->ep->emss; 868 init_attr.ord = qhp->attr.max_ord; 869 init_attr.ird = qhp->attr.max_ird; 870 init_attr.qp_dma_addr = qhp->wq.dma_addr; 871 init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); 872 init_attr.rqe_count = rqes_posted(qhp); 873 init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; 874 init_attr.rtr_type = 0; 875 tp = intotcpcb(inp); 876 toep = tp->t_toe; 877 init_attr.chan = toep->tp_l2t->smt_idx; 878 init_attr.irs = qhp->ep->rcv_seq; 879 CTR5(KTR_IW_CXGB, "%s init_attr.rq_addr 0x%x init_attr.rq_size = %d " 880 "flags 0x%x qpcaps 0x%x", __FUNCTION__, 881 init_attr.rq_addr, init_attr.rq_size, 882 init_attr.flags, init_attr.qpcaps); 883 ret = cxio_rdma_init(&rhp->rdev, &init_attr, qhp->ep->com.so); 884 CTR2(KTR_IW_CXGB, "%s ret %d", __FUNCTION__, ret); 885 return ret; 886} 887 888int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, 889 enum iwch_qp_attr_mask mask, 890 struct iwch_qp_attributes *attrs, 891 int internal) 892{ 893 int ret = 0; 894 struct iwch_qp_attributes newattr = qhp->attr; 895 int disconnect = 0; 896 int terminate = 0; 897 int abort = 0; 898 int free = 0; 899 struct iwch_ep *ep = NULL; 900 901 CTR6(KTR_IW_CXGB, "%s qhp %p qpid 0x%x ep %p state %d -> %d", __FUNCTION__, 902 qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state, 903 (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1); 904 905 mtx_lock(&qhp->lock); 906 907 /* Process attr changes if in IDLE */ 908 if (mask & IWCH_QP_ATTR_VALID_MODIFY) { 909 if (qhp->attr.state != IWCH_QP_STATE_IDLE) { 910 ret = -EIO; 911 goto out; 912 } 913 if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ) 914 newattr.enable_rdma_read = attrs->enable_rdma_read; 915 if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE) 916 newattr.enable_rdma_write = attrs->enable_rdma_write; 917 if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND) 918 newattr.enable_bind = attrs->enable_bind; 919 if (mask & IWCH_QP_ATTR_MAX_ORD) { 920 if (attrs->max_ord > 921 rhp->attr.max_rdma_read_qp_depth) { 922 ret = -EINVAL; 923 goto out; 924 } 925 newattr.max_ord = attrs->max_ord; 926 } 927 if (mask & IWCH_QP_ATTR_MAX_IRD) { 928 if (attrs->max_ird > 929 rhp->attr.max_rdma_reads_per_qp) { 930 ret = -EINVAL; 931 goto out; 932 } 933 newattr.max_ird = attrs->max_ird; 934 } 935 qhp->attr = newattr; 936 } 937 938 if (!(mask & IWCH_QP_ATTR_NEXT_STATE)) 939 goto out; 940 if (qhp->attr.state == attrs->next_state) 941 goto out; 942 943 switch (qhp->attr.state) { 944 case IWCH_QP_STATE_IDLE: 945 switch (attrs->next_state) { 946 case IWCH_QP_STATE_RTS: 947 if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) { 948 ret = -EINVAL; 949 goto out; 950 } 951 if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) { 952 ret = -EINVAL; 953 goto out; 954 } 955 qhp->attr.mpa_attr = attrs->mpa_attr; 956 qhp->attr.llp_stream_handle = attrs->llp_stream_handle; 957 qhp->ep = qhp->attr.llp_stream_handle; 958 qhp->attr.state = IWCH_QP_STATE_RTS; 959 960 /* 961 * Ref the endpoint here and deref when we 962 * disassociate the endpoint from the QP. This 963 * happens in CLOSING->IDLE transition or *->ERROR 964 * transition. 965 */ 966 get_ep(&qhp->ep->com); 967 mtx_unlock(&qhp->lock); 968 ret = rdma_init(rhp, qhp, mask, attrs); 969 mtx_lock(&qhp->lock); 970 if (ret) 971 goto err; 972 break; 973 case IWCH_QP_STATE_ERROR: 974 qhp->attr.state = IWCH_QP_STATE_ERROR; 975 flush_qp(qhp); 976 break; 977 default: 978 ret = -EINVAL; 979 goto out; 980 } 981 break; 982 case IWCH_QP_STATE_RTS: 983 switch (attrs->next_state) { 984 case IWCH_QP_STATE_CLOSING: 985 PANIC_IF(atomic_load_acq_int(&qhp->ep->com.refcount) < 2); 986 qhp->attr.state = IWCH_QP_STATE_CLOSING; 987 if (!internal) { 988 abort=0; 989 disconnect = 1; 990 ep = qhp->ep; 991 get_ep(&ep->com); 992 } 993 break; 994 case IWCH_QP_STATE_TERMINATE: 995 qhp->attr.state = IWCH_QP_STATE_TERMINATE; 996 if (qhp->ibqp.uobject) 997 cxio_set_wq_in_error(&qhp->wq); 998 if (!internal) 999 terminate = 1; 1000 break; 1001 case IWCH_QP_STATE_ERROR: 1002 qhp->attr.state = IWCH_QP_STATE_ERROR; 1003 if (!internal) { 1004 abort=1; 1005 disconnect = 1; 1006 ep = qhp->ep; 1007 get_ep(&ep->com); 1008 } 1009 goto err; 1010 break; 1011 default: 1012 ret = -EINVAL; 1013 goto out; 1014 } 1015 break; 1016 case IWCH_QP_STATE_CLOSING: 1017 if (!internal) { 1018 ret = -EINVAL; 1019 goto out; 1020 } 1021 switch (attrs->next_state) { 1022 case IWCH_QP_STATE_IDLE: 1023 flush_qp(qhp); 1024 qhp->attr.state = IWCH_QP_STATE_IDLE; 1025 qhp->attr.llp_stream_handle = NULL; 1026 put_ep(&qhp->ep->com); 1027 qhp->ep = NULL; 1028 wakeup(qhp); 1029 break; 1030 case IWCH_QP_STATE_ERROR: 1031 goto err; 1032 default: 1033 ret = -EINVAL; 1034 goto err; 1035 } 1036 break; 1037 case IWCH_QP_STATE_ERROR: 1038 if (attrs->next_state != IWCH_QP_STATE_IDLE) { 1039 ret = -EINVAL; 1040 goto out; 1041 } 1042 1043 if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) || 1044 !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) { 1045 ret = -EINVAL; 1046 goto out; 1047 } 1048 qhp->attr.state = IWCH_QP_STATE_IDLE; 1049 memset(&qhp->attr, 0, sizeof(qhp->attr)); 1050 break; 1051 case IWCH_QP_STATE_TERMINATE: 1052 if (!internal) { 1053 ret = -EINVAL; 1054 goto out; 1055 } 1056 goto err; 1057 break; 1058 default: 1059 log(LOG_ERR, "%s in a bad state %d\n", 1060 __FUNCTION__, qhp->attr.state); 1061 ret = -EINVAL; 1062 goto err; 1063 break; 1064 } 1065 goto out; 1066err: 1067 CTR3(KTR_IW_CXGB, "%s disassociating ep %p qpid 0x%x", __FUNCTION__, qhp->ep, 1068 qhp->wq.qpid); 1069 1070 /* disassociate the LLP connection */ 1071 qhp->attr.llp_stream_handle = NULL; 1072 ep = qhp->ep; 1073 qhp->ep = NULL; 1074 qhp->attr.state = IWCH_QP_STATE_ERROR; 1075 free=1; 1076 wakeup(qhp); 1077 PANIC_IF(!ep); 1078 flush_qp(qhp); 1079out: 1080 mtx_unlock(&qhp->lock); 1081 1082 if (terminate) 1083 iwch_post_terminate(qhp, NULL); 1084 1085 1086 /* 1087 * If disconnect is 1, then we need to initiate a disconnect 1088 * on the EP. This can be a normal close (RTS->CLOSING) or 1089 * an abnormal close (RTS/CLOSING->ERROR). 1090 */ 1091 if (disconnect) { 1092 iwch_ep_disconnect(ep, abort, M_NOWAIT); 1093 put_ep(&ep->com); 1094 } 1095 1096 /* 1097 * If free is 1, then we've disassociated the EP from the QP 1098 * and we need to dereference the EP. 1099 */ 1100 if (free) 1101 put_ep(&ep->com); 1102 1103 CTR2(KTR_IW_CXGB, "%s exit state %d", __FUNCTION__, qhp->attr.state); 1104 return ret; 1105} 1106#endif 1107