1/* 2 * IBM eServer eHCA Infiniband device driver for Linux on POWER 3 * 4 * post_send/recv, poll_cq, req_notify 5 * 6 * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com> 7 * Waleri Fomin <fomin@de.ibm.com> 8 * Joachim Fenkes <fenkes@de.ibm.com> 9 * Reinhard Ernst <rernst@de.ibm.com> 10 * 11 * Copyright (c) 2005 IBM Corporation 12 * 13 * All rights reserved. 14 * 15 * This source code is distributed under a dual license of GPL v2.0 and OpenIB 16 * BSD. 17 * 18 * OpenIB BSD License 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions are met: 22 * 23 * Redistributions of source code must retain the above copyright notice, this 24 * list of conditions and the following disclaimer. 25 * 26 * Redistributions in binary form must reproduce the above copyright notice, 27 * this list of conditions and the following disclaimer in the documentation 28 * and/or other materials 29 * provided with the distribution. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 32 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 35 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 36 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 37 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 39 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 41 * POSSIBILITY OF SUCH DAMAGE. 42 */ 43 44 45#include <asm/system.h> 46#include "ehca_classes.h" 47#include "ehca_tools.h" 48#include "ehca_qes.h" 49#include "ehca_iverbs.h" 50#include "hcp_if.h" 51#include "hipz_fns.h" 52 53/* in RC traffic, insert an empty RDMA READ every this many packets */ 54#define ACK_CIRC_THRESHOLD 2000000 55 56static u64 replace_wr_id(u64 wr_id, u16 idx) 57{ 58 u64 ret; 59 60 ret = wr_id & ~QMAP_IDX_MASK; 61 ret |= idx & QMAP_IDX_MASK; 62 63 return ret; 64} 65 66static u16 get_app_wr_id(u64 wr_id) 67{ 68 return wr_id & QMAP_IDX_MASK; 69} 70 71static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, 72 struct ehca_wqe *wqe_p, 73 struct ib_recv_wr *recv_wr, 74 u32 rq_map_idx) 75{ 76 u8 cnt_ds; 77 if (unlikely((recv_wr->num_sge < 0) || 78 (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) { 79 ehca_gen_err("Invalid number of WQE SGE. " 80 "num_sqe=%x max_nr_of_sg=%x", 81 recv_wr->num_sge, ipz_rqueue->act_nr_of_sg); 82 return -EINVAL; /* invalid SG list length */ 83 } 84 85 /* clear wqe header until sglist */ 86 memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); 87 88 wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx); 89 wqe_p->nr_of_data_seg = recv_wr->num_sge; 90 91 for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { 92 wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr = 93 recv_wr->sg_list[cnt_ds].addr; 94 wqe_p->u.all_rcv.sg_list[cnt_ds].lkey = 95 recv_wr->sg_list[cnt_ds].lkey; 96 wqe_p->u.all_rcv.sg_list[cnt_ds].length = 97 recv_wr->sg_list[cnt_ds].length; 98 } 99 100 if (ehca_debug_level >= 3) { 101 ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", 102 ipz_rqueue); 103 ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); 104 } 105 106 return 0; 107} 108 109#if defined(DEBUG_GSI_SEND_WR) 110 111/* need ib_mad struct */ 112#include <rdma/ib_mad.h> 113 114static void trace_send_wr_ud(const struct ib_send_wr *send_wr) 115{ 116 int idx; 117 int j; 118 while (send_wr) { 119 struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; 120 struct ib_sge *sge = send_wr->sg_list; 121 ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " 122 "send_flags=%x opcode=%x", idx, send_wr->wr_id, 123 send_wr->num_sge, send_wr->send_flags, 124 send_wr->opcode); 125 if (mad_hdr) { 126 ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x " 127 "mgmt_class=%x class_version=%x method=%x " 128 "status=%x class_specific=%x tid=%lx " 129 "attr_id=%x resv=%x attr_mod=%x", 130 idx, mad_hdr->base_version, 131 mad_hdr->mgmt_class, 132 mad_hdr->class_version, mad_hdr->method, 133 mad_hdr->status, mad_hdr->class_specific, 134 mad_hdr->tid, mad_hdr->attr_id, 135 mad_hdr->resv, 136 mad_hdr->attr_mod); 137 } 138 for (j = 0; j < send_wr->num_sge; j++) { 139 u8 *data = (u8 *)abs_to_virt(sge->addr); 140 ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " 141 "lkey=%x", 142 idx, j, data, sge->length, sge->lkey); 143 /* assume length is n*16 */ 144 ehca_dmp(data, sge->length, "send_wr#%x sge#%x", 145 idx, j); 146 sge++; 147 } /* eof for j */ 148 idx++; 149 send_wr = send_wr->next; 150 } /* eof while send_wr */ 151} 152 153#endif /* DEBUG_GSI_SEND_WR */ 154 155static inline int ehca_write_swqe(struct ehca_qp *qp, 156 struct ehca_wqe *wqe_p, 157 const struct ib_send_wr *send_wr, 158 u32 sq_map_idx, 159 int hidden) 160{ 161 u32 idx; 162 u64 dma_length; 163 struct ehca_av *my_av; 164 u32 remote_qkey = send_wr->wr.ud.remote_qkey; 165 struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; 166 167 if (unlikely((send_wr->num_sge < 0) || 168 (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { 169 ehca_gen_err("Invalid number of WQE SGE. " 170 "num_sqe=%x max_nr_of_sg=%x", 171 send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg); 172 return -EINVAL; /* invalid SG list length */ 173 } 174 175 /* clear wqe header until sglist */ 176 memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); 177 178 wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx); 179 180 qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); 181 qmap_entry->reported = 0; 182 qmap_entry->cqe_req = 0; 183 184 switch (send_wr->opcode) { 185 case IB_WR_SEND: 186 case IB_WR_SEND_WITH_IMM: 187 wqe_p->optype = WQE_OPTYPE_SEND; 188 break; 189 case IB_WR_RDMA_WRITE: 190 case IB_WR_RDMA_WRITE_WITH_IMM: 191 wqe_p->optype = WQE_OPTYPE_RDMAWRITE; 192 break; 193 case IB_WR_RDMA_READ: 194 wqe_p->optype = WQE_OPTYPE_RDMAREAD; 195 break; 196 default: 197 ehca_gen_err("Invalid opcode=%x", send_wr->opcode); 198 return -EINVAL; /* invalid opcode */ 199 } 200 201 wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE; 202 203 wqe_p->wr_flag = 0; 204 205 if ((send_wr->send_flags & IB_SEND_SIGNALED || 206 qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) 207 && !hidden) { 208 wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; 209 qmap_entry->cqe_req = 1; 210 } 211 212 if (send_wr->opcode == IB_WR_SEND_WITH_IMM || 213 send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { 214 /* this might not work as long as HW does not support it */ 215 wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data); 216 wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT; 217 } 218 219 wqe_p->nr_of_data_seg = send_wr->num_sge; 220 221 switch (qp->qp_type) { 222 case IB_QPT_SMI: 223 case IB_QPT_GSI: 224 /* no break is intential here */ 225 case IB_QPT_UD: 226 /* IB 1.2 spec C10-15 compliance */ 227 if (send_wr->wr.ud.remote_qkey & 0x80000000) 228 remote_qkey = qp->qkey; 229 230 wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; 231 wqe_p->local_ee_context_qkey = remote_qkey; 232 if (unlikely(!send_wr->wr.ud.ah)) { 233 ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); 234 return -EINVAL; 235 } 236 if (unlikely(send_wr->wr.ud.remote_qpn == 0)) { 237 ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num); 238 return -EINVAL; 239 } 240 my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah); 241 wqe_p->u.ud_av.ud_av = my_av->av; 242 243 /* 244 * omitted check of IB_SEND_INLINE 245 * since HW does not support it 246 */ 247 for (idx = 0; idx < send_wr->num_sge; idx++) { 248 wqe_p->u.ud_av.sg_list[idx].vaddr = 249 send_wr->sg_list[idx].addr; 250 wqe_p->u.ud_av.sg_list[idx].lkey = 251 send_wr->sg_list[idx].lkey; 252 wqe_p->u.ud_av.sg_list[idx].length = 253 send_wr->sg_list[idx].length; 254 } /* eof for idx */ 255 if (qp->qp_type == IB_QPT_SMI || 256 qp->qp_type == IB_QPT_GSI) 257 wqe_p->u.ud_av.ud_av.pmtu = 1; 258 if (qp->qp_type == IB_QPT_GSI) { 259 wqe_p->pkeyi = send_wr->wr.ud.pkey_index; 260#ifdef DEBUG_GSI_SEND_WR 261 trace_send_wr_ud(send_wr); 262#endif /* DEBUG_GSI_SEND_WR */ 263 } 264 break; 265 266 case IB_QPT_UC: 267 if (send_wr->send_flags & IB_SEND_FENCE) 268 wqe_p->wr_flag |= WQE_WRFLAG_FENCE; 269 /* no break is intentional here */ 270 case IB_QPT_RC: 271 /* TODO: atomic not implemented */ 272 wqe_p->u.nud.remote_virtual_address = 273 send_wr->wr.rdma.remote_addr; 274 wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; 275 276 /* 277 * omitted checking of IB_SEND_INLINE 278 * since HW does not support it 279 */ 280 dma_length = 0; 281 for (idx = 0; idx < send_wr->num_sge; idx++) { 282 wqe_p->u.nud.sg_list[idx].vaddr = 283 send_wr->sg_list[idx].addr; 284 wqe_p->u.nud.sg_list[idx].lkey = 285 send_wr->sg_list[idx].lkey; 286 wqe_p->u.nud.sg_list[idx].length = 287 send_wr->sg_list[idx].length; 288 dma_length += send_wr->sg_list[idx].length; 289 } /* eof idx */ 290 wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; 291 292 /* unsolicited ack circumvention */ 293 if (send_wr->opcode == IB_WR_RDMA_READ) { 294 /* on RDMA read, switch on and reset counters */ 295 qp->message_count = qp->packet_count = 0; 296 qp->unsol_ack_circ = 1; 297 } else 298 /* else estimate #packets */ 299 qp->packet_count += (dma_length >> qp->mtu_shift) + 1; 300 301 break; 302 303 default: 304 ehca_gen_err("Invalid qptype=%x", qp->qp_type); 305 return -EINVAL; 306 } 307 308 if (ehca_debug_level >= 3) { 309 ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp); 310 ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe"); 311 } 312 return 0; 313} 314 315/* map_ib_wc_status converts raw cqe_status to ib_wc_status */ 316static inline void map_ib_wc_status(u32 cqe_status, 317 enum ib_wc_status *wc_status) 318{ 319 if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) { 320 switch (cqe_status & 0x3F) { 321 case 0x01: 322 case 0x21: 323 *wc_status = IB_WC_LOC_LEN_ERR; 324 break; 325 case 0x02: 326 case 0x22: 327 *wc_status = IB_WC_LOC_QP_OP_ERR; 328 break; 329 case 0x03: 330 case 0x23: 331 *wc_status = IB_WC_LOC_EEC_OP_ERR; 332 break; 333 case 0x04: 334 case 0x24: 335 *wc_status = IB_WC_LOC_PROT_ERR; 336 break; 337 case 0x05: 338 case 0x25: 339 *wc_status = IB_WC_WR_FLUSH_ERR; 340 break; 341 case 0x06: 342 *wc_status = IB_WC_MW_BIND_ERR; 343 break; 344 case 0x07: /* remote error - look into bits 20:24 */ 345 switch ((cqe_status 346 & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) { 347 case 0x0: 348 /* 349 * PSN Sequence Error! 350 * couldn't find a matching status! 351 */ 352 *wc_status = IB_WC_GENERAL_ERR; 353 break; 354 case 0x1: 355 *wc_status = IB_WC_REM_INV_REQ_ERR; 356 break; 357 case 0x2: 358 *wc_status = IB_WC_REM_ACCESS_ERR; 359 break; 360 case 0x3: 361 *wc_status = IB_WC_REM_OP_ERR; 362 break; 363 case 0x4: 364 *wc_status = IB_WC_REM_INV_RD_REQ_ERR; 365 break; 366 } 367 break; 368 case 0x08: 369 *wc_status = IB_WC_RETRY_EXC_ERR; 370 break; 371 case 0x09: 372 *wc_status = IB_WC_RNR_RETRY_EXC_ERR; 373 break; 374 case 0x0A: 375 case 0x2D: 376 *wc_status = IB_WC_REM_ABORT_ERR; 377 break; 378 case 0x0B: 379 case 0x2E: 380 *wc_status = IB_WC_INV_EECN_ERR; 381 break; 382 case 0x0C: 383 case 0x2F: 384 *wc_status = IB_WC_INV_EEC_STATE_ERR; 385 break; 386 case 0x0D: 387 *wc_status = IB_WC_BAD_RESP_ERR; 388 break; 389 case 0x10: 390 /* WQE purged */ 391 *wc_status = IB_WC_WR_FLUSH_ERR; 392 break; 393 default: 394 *wc_status = IB_WC_FATAL_ERR; 395 396 } 397 } else 398 *wc_status = IB_WC_SUCCESS; 399} 400 401static inline int post_one_send(struct ehca_qp *my_qp, 402 struct ib_send_wr *cur_send_wr, 403 int hidden) 404{ 405 struct ehca_wqe *wqe_p; 406 int ret; 407 u32 sq_map_idx; 408 u64 start_offset = my_qp->ipz_squeue.current_q_offset; 409 410 /* get pointer next to free WQE */ 411 wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); 412 if (unlikely(!wqe_p)) { 413 /* too many posted work requests: queue overflow */ 414 ehca_err(my_qp->ib_qp.device, "Too many posted WQEs " 415 "qp_num=%x", my_qp->ib_qp.qp_num); 416 return -ENOMEM; 417 } 418 419 /* 420 * Get the index of the WQE in the send queue. The same index is used 421 * for writing into the sq_map. 422 */ 423 sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size; 424 425 /* write a SEND WQE into the QUEUE */ 426 ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden); 427 /* 428 * if something failed, 429 * reset the free entry pointer to the start value 430 */ 431 if (unlikely(ret)) { 432 my_qp->ipz_squeue.current_q_offset = start_offset; 433 ehca_err(my_qp->ib_qp.device, "Could not write WQE " 434 "qp_num=%x", my_qp->ib_qp.qp_num); 435 return -EINVAL; 436 } 437 438 return 0; 439} 440 441int ehca_post_send(struct ib_qp *qp, 442 struct ib_send_wr *send_wr, 443 struct ib_send_wr **bad_send_wr) 444{ 445 struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); 446 int wqe_cnt = 0; 447 int ret = 0; 448 unsigned long flags; 449 450 /* Reject WR if QP is in RESET, INIT or RTR state */ 451 if (unlikely(my_qp->state < IB_QPS_RTS)) { 452 ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", 453 my_qp->state, qp->qp_num); 454 ret = -EINVAL; 455 goto out; 456 } 457 458 /* LOCK the QUEUE */ 459 spin_lock_irqsave(&my_qp->spinlock_s, flags); 460 461 /* Send an empty extra RDMA read if: 462 * 1) there has been an RDMA read on this connection before 463 * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets 464 * 3) we can be sure that any previous extra RDMA read has been 465 * processed so we don't overflow the SQ 466 */ 467 if (unlikely(my_qp->unsol_ack_circ && 468 my_qp->packet_count > ACK_CIRC_THRESHOLD && 469 my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) { 470 /* insert an empty RDMA READ to fix up the remote QP state */ 471 struct ib_send_wr circ_wr; 472 memset(&circ_wr, 0, sizeof(circ_wr)); 473 circ_wr.opcode = IB_WR_RDMA_READ; 474 post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */ 475 wqe_cnt++; 476 ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num); 477 my_qp->message_count = my_qp->packet_count = 0; 478 } 479 480 /* loop processes list of send reqs */ 481 while (send_wr) { 482 ret = post_one_send(my_qp, send_wr, 0); 483 if (unlikely(ret)) { 484 goto post_send_exit0; 485 } 486 wqe_cnt++; 487 send_wr = send_wr->next; 488 } 489 490post_send_exit0: 491 iosync(); /* serialize GAL register access */ 492 hipz_update_sqa(my_qp, wqe_cnt); 493 if (unlikely(ret || ehca_debug_level >= 2)) 494 ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", 495 my_qp, qp->qp_num, wqe_cnt, ret); 496 my_qp->message_count += wqe_cnt; 497 spin_unlock_irqrestore(&my_qp->spinlock_s, flags); 498 499out: 500 if (ret) 501 *bad_send_wr = send_wr; 502 return ret; 503} 504 505static int internal_post_recv(struct ehca_qp *my_qp, 506 struct ib_device *dev, 507 struct ib_recv_wr *recv_wr, 508 struct ib_recv_wr **bad_recv_wr) 509{ 510 struct ehca_wqe *wqe_p; 511 int wqe_cnt = 0; 512 int ret = 0; 513 u32 rq_map_idx; 514 unsigned long flags; 515 struct ehca_qmap_entry *qmap_entry; 516 517 if (unlikely(!HAS_RQ(my_qp))) { 518 ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", 519 my_qp, my_qp->real_qp_num, my_qp->ext_type); 520 ret = -ENODEV; 521 goto out; 522 } 523 524 /* LOCK the QUEUE */ 525 spin_lock_irqsave(&my_qp->spinlock_r, flags); 526 527 /* loop processes list of recv reqs */ 528 while (recv_wr) { 529 u64 start_offset = my_qp->ipz_rqueue.current_q_offset; 530 /* get pointer next to free WQE */ 531 wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue); 532 if (unlikely(!wqe_p)) { 533 /* too many posted work requests: queue overflow */ 534 ret = -ENOMEM; 535 ehca_err(dev, "Too many posted WQEs " 536 "qp_num=%x", my_qp->real_qp_num); 537 goto post_recv_exit0; 538 } 539 /* 540 * Get the index of the WQE in the recv queue. The same index 541 * is used for writing into the rq_map. 542 */ 543 rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; 544 545 /* write a RECV WQE into the QUEUE */ 546 ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr, 547 rq_map_idx); 548 /* 549 * if something failed, 550 * reset the free entry pointer to the start value 551 */ 552 if (unlikely(ret)) { 553 my_qp->ipz_rqueue.current_q_offset = start_offset; 554 ret = -EINVAL; 555 ehca_err(dev, "Could not write WQE " 556 "qp_num=%x", my_qp->real_qp_num); 557 goto post_recv_exit0; 558 } 559 560 qmap_entry = &my_qp->rq_map.map[rq_map_idx]; 561 qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id); 562 qmap_entry->reported = 0; 563 qmap_entry->cqe_req = 1; 564 565 wqe_cnt++; 566 recv_wr = recv_wr->next; 567 } /* eof for recv_wr */ 568 569post_recv_exit0: 570 iosync(); /* serialize GAL register access */ 571 hipz_update_rqa(my_qp, wqe_cnt); 572 if (unlikely(ret || ehca_debug_level >= 2)) 573 ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", 574 my_qp, my_qp->real_qp_num, wqe_cnt, ret); 575 spin_unlock_irqrestore(&my_qp->spinlock_r, flags); 576 577out: 578 if (ret) 579 *bad_recv_wr = recv_wr; 580 581 return ret; 582} 583 584int ehca_post_recv(struct ib_qp *qp, 585 struct ib_recv_wr *recv_wr, 586 struct ib_recv_wr **bad_recv_wr) 587{ 588 struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); 589 590 /* Reject WR if QP is in RESET state */ 591 if (unlikely(my_qp->state == IB_QPS_RESET)) { 592 ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", 593 my_qp->state, qp->qp_num); 594 *bad_recv_wr = recv_wr; 595 return -EINVAL; 596 } 597 598 return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr); 599} 600 601int ehca_post_srq_recv(struct ib_srq *srq, 602 struct ib_recv_wr *recv_wr, 603 struct ib_recv_wr **bad_recv_wr) 604{ 605 return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq), 606 srq->device, recv_wr, bad_recv_wr); 607} 608 609/* 610 * ib_wc_opcode table converts ehca wc opcode to ib 611 * Since we use zero to indicate invalid opcode, the actual ib opcode must 612 * be decremented!!! 613 */ 614static const u8 ib_wc_opcode[255] = { 615 [0x01] = IB_WC_RECV+1, 616 [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1, 617 [0x04] = IB_WC_BIND_MW+1, 618 [0x08] = IB_WC_FETCH_ADD+1, 619 [0x10] = IB_WC_COMP_SWAP+1, 620 [0x20] = IB_WC_RDMA_WRITE+1, 621 [0x40] = IB_WC_RDMA_READ+1, 622 [0x80] = IB_WC_SEND+1 623}; 624 625/* internal function to poll one entry of cq */ 626static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) 627{ 628 int ret = 0, qmap_tail_idx; 629 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); 630 struct ehca_cqe *cqe; 631 struct ehca_qp *my_qp; 632 struct ehca_qmap_entry *qmap_entry; 633 struct ehca_queue_map *qmap; 634 int cqe_count = 0, is_error; 635 636repoll: 637 cqe = (struct ehca_cqe *) 638 ipz_qeit_get_inc_valid(&my_cq->ipz_queue); 639 if (!cqe) { 640 ret = -EAGAIN; 641 if (ehca_debug_level >= 3) 642 ehca_dbg(cq->device, "Completion queue is empty " 643 "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number); 644 goto poll_cq_one_exit0; 645 } 646 647 /* prevents loads being reordered across this point */ 648 rmb(); 649 650 cqe_count++; 651 if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { 652 struct ehca_qp *qp; 653 int purgeflag; 654 unsigned long flags; 655 656 qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number); 657 if (!qp) { 658 ehca_err(cq->device, "cq_num=%x qp_num=%x " 659 "could not find qp -> ignore cqe", 660 my_cq->cq_number, cqe->local_qp_number); 661 ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x", 662 my_cq->cq_number, cqe->local_qp_number); 663 /* ignore this purged cqe */ 664 goto repoll; 665 } 666 spin_lock_irqsave(&qp->spinlock_s, flags); 667 purgeflag = qp->sqerr_purgeflag; 668 spin_unlock_irqrestore(&qp->spinlock_s, flags); 669 670 if (purgeflag) { 671 ehca_dbg(cq->device, 672 "Got CQE with purged bit qp_num=%x src_qp=%x", 673 cqe->local_qp_number, cqe->remote_qp_number); 674 if (ehca_debug_level >= 2) 675 ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", 676 cqe->local_qp_number, 677 cqe->remote_qp_number); 678 /* 679 * ignore this to avoid double cqes of bad wqe 680 * that caused sqe and turn off purge flag 681 */ 682 qp->sqerr_purgeflag = 0; 683 goto repoll; 684 } 685 } 686 687 is_error = cqe->status & WC_STATUS_ERROR_BIT; 688 689 /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */ 690 if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) { 691 ehca_dbg(cq->device, 692 "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----", 693 is_error ? "ERROR " : "", my_cq, my_cq->cq_number); 694 ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", 695 my_cq, my_cq->cq_number); 696 ehca_dbg(cq->device, 697 "ehca_cq=%p cq_num=%x -------------------------", 698 my_cq, my_cq->cq_number); 699 } 700 701 read_lock(&ehca_qp_idr_lock); 702 my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); 703 read_unlock(&ehca_qp_idr_lock); 704 if (!my_qp) 705 goto repoll; 706 wc->qp = &my_qp->ib_qp; 707 708 qmap_tail_idx = get_app_wr_id(cqe->work_request_id); 709 if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) 710 /* We got a send completion. */ 711 qmap = &my_qp->sq_map; 712 else 713 /* We got a receive completion. */ 714 qmap = &my_qp->rq_map; 715 716 /* advance the tail pointer */ 717 qmap->tail = qmap_tail_idx; 718 719 if (is_error) { 720 /* 721 * set left_to_poll to 0 because in error state, we will not 722 * get any additional CQEs 723 */ 724 my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, 725 my_qp->sq_map.entries); 726 my_qp->sq_map.left_to_poll = 0; 727 ehca_add_to_err_list(my_qp, 1); 728 729 my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, 730 my_qp->rq_map.entries); 731 my_qp->rq_map.left_to_poll = 0; 732 if (HAS_RQ(my_qp)) 733 ehca_add_to_err_list(my_qp, 0); 734 } 735 736 qmap_entry = &qmap->map[qmap_tail_idx]; 737 if (qmap_entry->reported) { 738 ehca_warn(cq->device, "Double cqe on qp_num=%#x", 739 my_qp->real_qp_num); 740 /* found a double cqe, discard it and read next one */ 741 goto repoll; 742 } 743 744 wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); 745 qmap_entry->reported = 1; 746 747 /* if left_to_poll is decremented to 0, add the QP to the error list */ 748 if (qmap->left_to_poll > 0) { 749 qmap->left_to_poll--; 750 if ((my_qp->sq_map.left_to_poll == 0) && 751 (my_qp->rq_map.left_to_poll == 0)) { 752 ehca_add_to_err_list(my_qp, 1); 753 if (HAS_RQ(my_qp)) 754 ehca_add_to_err_list(my_qp, 0); 755 } 756 } 757 758 /* eval ib_wc_opcode */ 759 wc->opcode = ib_wc_opcode[cqe->optype]-1; 760 if (unlikely(wc->opcode == -1)) { 761 ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x " 762 "ehca_cq=%p cq_num=%x", 763 cqe->optype, cqe->status, my_cq, my_cq->cq_number); 764 /* dump cqe for other infos */ 765 ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", 766 my_cq, my_cq->cq_number); 767 /* update also queue adder to throw away this entry!!! */ 768 goto repoll; 769 } 770 771 /* eval ib_wc_status */ 772 if (unlikely(is_error)) { 773 /* complete with errors */ 774 map_ib_wc_status(cqe->status, &wc->status); 775 wc->vendor_err = wc->status; 776 } else 777 wc->status = IB_WC_SUCCESS; 778 779 wc->byte_len = cqe->nr_bytes_transferred; 780 wc->pkey_index = cqe->pkey_index; 781 wc->slid = cqe->rlid; 782 wc->dlid_path_bits = cqe->dlid; 783 wc->src_qp = cqe->remote_qp_number; 784 /* 785 * HW has "Immed data present" and "GRH present" in bits 6 and 5. 786 * SW defines those in bits 1 and 0, so we can just shift and mask. 787 */ 788 wc->wc_flags = (cqe->w_completion_flags >> 5) & 3; 789 wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); 790 wc->sl = cqe->service_level; 791 792poll_cq_one_exit0: 793 if (cqe_count > 0) 794 hipz_update_feca(my_cq, cqe_count); 795 796 return ret; 797} 798 799static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, 800 struct ib_wc *wc, int num_entries, 801 struct ipz_queue *ipz_queue, int on_sq) 802{ 803 int nr = 0; 804 struct ehca_wqe *wqe; 805 u64 offset; 806 struct ehca_queue_map *qmap; 807 struct ehca_qmap_entry *qmap_entry; 808 809 if (on_sq) 810 qmap = &my_qp->sq_map; 811 else 812 qmap = &my_qp->rq_map; 813 814 qmap_entry = &qmap->map[qmap->next_wqe_idx]; 815 816 while ((nr < num_entries) && (qmap_entry->reported == 0)) { 817 /* generate flush CQE */ 818 819 memset(wc, 0, sizeof(*wc)); 820 821 offset = qmap->next_wqe_idx * ipz_queue->qe_size; 822 wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); 823 if (!wqe) { 824 ehca_err(cq->device, "Invalid wqe offset=%#llx on " 825 "qp_num=%#x", offset, my_qp->real_qp_num); 826 return nr; 827 } 828 829 wc->wr_id = replace_wr_id(wqe->work_request_id, 830 qmap_entry->app_wr_id); 831 832 if (on_sq) { 833 switch (wqe->optype) { 834 case WQE_OPTYPE_SEND: 835 wc->opcode = IB_WC_SEND; 836 break; 837 case WQE_OPTYPE_RDMAWRITE: 838 wc->opcode = IB_WC_RDMA_WRITE; 839 break; 840 case WQE_OPTYPE_RDMAREAD: 841 wc->opcode = IB_WC_RDMA_READ; 842 break; 843 default: 844 ehca_err(cq->device, "Invalid optype=%x", 845 wqe->optype); 846 return nr; 847 } 848 } else 849 wc->opcode = IB_WC_RECV; 850 851 if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) { 852 wc->ex.imm_data = wqe->immediate_data; 853 wc->wc_flags |= IB_WC_WITH_IMM; 854 } 855 856 wc->status = IB_WC_WR_FLUSH_ERR; 857 858 wc->qp = &my_qp->ib_qp; 859 860 /* mark as reported and advance next_wqe pointer */ 861 qmap_entry->reported = 1; 862 qmap->next_wqe_idx = next_index(qmap->next_wqe_idx, 863 qmap->entries); 864 qmap_entry = &qmap->map[qmap->next_wqe_idx]; 865 866 wc++; nr++; 867 } 868 869 return nr; 870 871} 872 873int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) 874{ 875 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); 876 int nr; 877 struct ehca_qp *err_qp; 878 struct ib_wc *current_wc = wc; 879 int ret = 0; 880 unsigned long flags; 881 int entries_left = num_entries; 882 883 if (num_entries < 1) { 884 ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " 885 "cq_num=%x", num_entries, my_cq, my_cq->cq_number); 886 ret = -EINVAL; 887 goto poll_cq_exit0; 888 } 889 890 spin_lock_irqsave(&my_cq->spinlock, flags); 891 892 /* generate flush cqes for send queues */ 893 list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) { 894 nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, 895 &err_qp->ipz_squeue, 1); 896 entries_left -= nr; 897 current_wc += nr; 898 899 if (entries_left == 0) 900 break; 901 } 902 903 /* generate flush cqes for receive queues */ 904 list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) { 905 nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, 906 &err_qp->ipz_rqueue, 0); 907 entries_left -= nr; 908 current_wc += nr; 909 910 if (entries_left == 0) 911 break; 912 } 913 914 for (nr = 0; nr < entries_left; nr++) { 915 ret = ehca_poll_cq_one(cq, current_wc); 916 if (ret) 917 break; 918 current_wc++; 919 } /* eof for nr */ 920 entries_left -= nr; 921 922 spin_unlock_irqrestore(&my_cq->spinlock, flags); 923 if (ret == -EAGAIN || !ret) 924 ret = num_entries - entries_left; 925 926poll_cq_exit0: 927 return ret; 928} 929 930int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) 931{ 932 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); 933 int ret = 0; 934 935 switch (notify_flags & IB_CQ_SOLICITED_MASK) { 936 case IB_CQ_SOLICITED: 937 hipz_set_cqx_n0(my_cq, 1); 938 break; 939 case IB_CQ_NEXT_COMP: 940 hipz_set_cqx_n1(my_cq, 1); 941 break; 942 default: 943 return -EINVAL; 944 } 945 946 if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { 947 unsigned long spl_flags; 948 spin_lock_irqsave(&my_cq->spinlock, spl_flags); 949 ret = ipz_qeit_is_valid(&my_cq->ipz_queue); 950 spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); 951 } 952 953 return ret; 954} 955