1321936Shselasky/* 2321936Shselasky * Copyright (c) 2005 Topspin Communications. All rights reserved. 3321936Shselasky * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. 4321936Shselasky * Copyright (c) 2007 Cisco, Inc. All rights reserved. 5321936Shselasky * 6321936Shselasky * This software is available to you under a choice of one of two 7321936Shselasky * licenses. You may choose to be licensed under the terms of the GNU 8321936Shselasky * General Public License (GPL) Version 2, available from the file 9321936Shselasky * COPYING in the main directory of this source tree, or the 10321936Shselasky * OpenIB.org BSD license below: 11321936Shselasky * 12321936Shselasky * Redistribution and use in source and binary forms, with or 13321936Shselasky * without modification, are permitted provided that the following 14321936Shselasky * conditions are met: 15321936Shselasky * 16321936Shselasky * - Redistributions of source code must retain the above 17321936Shselasky * copyright notice, this list of conditions and the following 18321936Shselasky * disclaimer. 19321936Shselasky * 20321936Shselasky * - Redistributions in binary form must reproduce the above 21321936Shselasky * copyright notice, this list of conditions and the following 22321936Shselasky * disclaimer in the documentation and/or other materials 23321936Shselasky * provided with the distribution. 24321936Shselasky * 25321936Shselasky * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26321936Shselasky * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27321936Shselasky * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28321936Shselasky * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29321936Shselasky * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30321936Shselasky * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31321936Shselasky * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32321936Shselasky * SOFTWARE. 33321936Shselasky */ 34321936Shselasky 35321936Shselasky#include <config.h> 36321936Shselasky 37321936Shselasky#include <stdlib.h> 38321936Shselasky#include <pthread.h> 39321936Shselasky#include <string.h> 40321936Shselasky#include <errno.h> 41321936Shselasky 42321936Shselasky#include "mlx4.h" 43321936Shselasky#include "doorbell.h" 44321936Shselasky#include "wqe.h" 45321936Shselasky 46321936Shselaskystatic const uint32_t mlx4_ib_opcode[] = { 47321936Shselasky [IBV_WR_SEND] = MLX4_OPCODE_SEND, 48321936Shselasky [IBV_WR_SEND_WITH_IMM] = MLX4_OPCODE_SEND_IMM, 49321936Shselasky [IBV_WR_RDMA_WRITE] = MLX4_OPCODE_RDMA_WRITE, 50321936Shselasky [IBV_WR_RDMA_WRITE_WITH_IMM] = MLX4_OPCODE_RDMA_WRITE_IMM, 51321936Shselasky [IBV_WR_RDMA_READ] = MLX4_OPCODE_RDMA_READ, 52321936Shselasky [IBV_WR_ATOMIC_CMP_AND_SWP] = MLX4_OPCODE_ATOMIC_CS, 53321936Shselasky [IBV_WR_ATOMIC_FETCH_AND_ADD] = MLX4_OPCODE_ATOMIC_FA, 54321936Shselasky [IBV_WR_LOCAL_INV] = MLX4_OPCODE_LOCAL_INVAL, 55321936Shselasky [IBV_WR_BIND_MW] = MLX4_OPCODE_BIND_MW, 56321936Shselasky [IBV_WR_SEND_WITH_INV] = MLX4_OPCODE_SEND_INVAL, 57321936Shselasky}; 58321936Shselasky 59321936Shselaskystatic void *get_recv_wqe(struct mlx4_qp *qp, int n) 60321936Shselasky{ 61321936Shselasky return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift); 62321936Shselasky} 63321936Shselasky 64321936Shselaskystatic void *get_send_wqe(struct mlx4_qp *qp, int n) 65321936Shselasky{ 66321936Shselasky return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift); 67321936Shselasky} 68321936Shselasky 69321936Shselasky/* 70321936Shselasky * Stamp a SQ WQE so that it is invalid if prefetched by marking the 71321936Shselasky * first four bytes of every 64 byte chunk with 0xffffffff, except for 72321936Shselasky * the very first chunk of the WQE. 73321936Shselasky */ 74321936Shselaskystatic void stamp_send_wqe(struct mlx4_qp *qp, int n) 75321936Shselasky{ 76321936Shselasky uint32_t *wqe = get_send_wqe(qp, n); 77321936Shselasky int i; 78321936Shselasky int ds = (((struct mlx4_wqe_ctrl_seg *)wqe)->fence_size & 0x3f) << 2; 79321936Shselasky 80321936Shselasky for (i = 16; i < ds; i += 16) 81321936Shselasky wqe[i] = 0xffffffff; 82321936Shselasky} 83321936Shselasky 84321936Shselaskyvoid mlx4_init_qp_indices(struct mlx4_qp *qp) 85321936Shselasky{ 86321936Shselasky qp->sq.head = 0; 87321936Shselasky qp->sq.tail = 0; 88321936Shselasky qp->rq.head = 0; 89321936Shselasky qp->rq.tail = 0; 90321936Shselasky} 91321936Shselasky 92321936Shselaskyvoid mlx4_qp_init_sq_ownership(struct mlx4_qp *qp) 93321936Shselasky{ 94321936Shselasky struct mlx4_wqe_ctrl_seg *ctrl; 95321936Shselasky int i; 96321936Shselasky 97321936Shselasky for (i = 0; i < qp->sq.wqe_cnt; ++i) { 98321936Shselasky ctrl = get_send_wqe(qp, i); 99321936Shselasky ctrl->owner_opcode = htobe32(1 << 31); 100321936Shselasky ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4); 101321936Shselasky 102321936Shselasky stamp_send_wqe(qp, i); 103321936Shselasky } 104321936Shselasky} 105321936Shselasky 106321936Shselaskystatic int wq_overflow(struct mlx4_wq *wq, int nreq, struct mlx4_cq *cq) 107321936Shselasky{ 108321936Shselasky unsigned cur; 109321936Shselasky 110321936Shselasky cur = wq->head - wq->tail; 111321936Shselasky if (cur + nreq < wq->max_post) 112321936Shselasky return 0; 113321936Shselasky 114321936Shselasky pthread_spin_lock(&cq->lock); 115321936Shselasky cur = wq->head - wq->tail; 116321936Shselasky pthread_spin_unlock(&cq->lock); 117321936Shselasky 118321936Shselasky return cur + nreq >= wq->max_post; 119321936Shselasky} 120321936Shselasky 121321936Shselaskystatic void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ibv_send_wr *wr) 122321936Shselasky{ 123321936Shselasky int acc = wr->bind_mw.bind_info.mw_access_flags; 124321936Shselasky bseg->flags1 = 0; 125321936Shselasky if (acc & IBV_ACCESS_REMOTE_ATOMIC) 126321936Shselasky bseg->flags1 |= htobe32(MLX4_WQE_MW_ATOMIC); 127321936Shselasky if (acc & IBV_ACCESS_REMOTE_WRITE) 128321936Shselasky bseg->flags1 |= htobe32(MLX4_WQE_MW_REMOTE_WRITE); 129321936Shselasky if (acc & IBV_ACCESS_REMOTE_READ) 130321936Shselasky bseg->flags1 |= htobe32(MLX4_WQE_MW_REMOTE_READ); 131321936Shselasky 132321936Shselasky bseg->flags2 = 0; 133321936Shselasky if (((struct ibv_mw *)(wr->bind_mw.mw))->type == IBV_MW_TYPE_2) 134321936Shselasky bseg->flags2 |= htobe32(MLX4_WQE_BIND_TYPE_2); 135321936Shselasky if (acc & IBV_ACCESS_ZERO_BASED) 136321936Shselasky bseg->flags2 |= htobe32(MLX4_WQE_BIND_ZERO_BASED); 137321936Shselasky 138321936Shselasky bseg->new_rkey = htobe32(wr->bind_mw.rkey); 139321936Shselasky bseg->lkey = htobe32(wr->bind_mw.bind_info.mr->lkey); 140321936Shselasky bseg->addr = htobe64((uint64_t) wr->bind_mw.bind_info.addr); 141321936Shselasky bseg->length = htobe64(wr->bind_mw.bind_info.length); 142321936Shselasky} 143321936Shselasky 144321936Shselaskystatic inline void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, 145321936Shselasky uint32_t rkey) 146321936Shselasky{ 147321936Shselasky iseg->mem_key = htobe32(rkey); 148321936Shselasky 149321936Shselasky iseg->reserved1 = 0; 150321936Shselasky iseg->reserved2 = 0; 151321936Shselasky iseg->reserved3[0] = 0; 152321936Shselasky iseg->reserved3[1] = 0; 153321936Shselasky} 154321936Shselasky 155321936Shselaskystatic inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, 156321936Shselasky uint64_t remote_addr, uint32_t rkey) 157321936Shselasky{ 158321936Shselasky rseg->raddr = htobe64(remote_addr); 159321936Shselasky rseg->rkey = htobe32(rkey); 160321936Shselasky rseg->reserved = 0; 161321936Shselasky} 162321936Shselasky 163321936Shselaskystatic void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ibv_send_wr *wr) 164321936Shselasky{ 165321936Shselasky if (wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) { 166321936Shselasky aseg->swap_add = htobe64(wr->wr.atomic.swap); 167321936Shselasky aseg->compare = htobe64(wr->wr.atomic.compare_add); 168321936Shselasky } else { 169321936Shselasky aseg->swap_add = htobe64(wr->wr.atomic.compare_add); 170321936Shselasky aseg->compare = 0; 171321936Shselasky } 172321936Shselasky 173321936Shselasky} 174321936Shselasky 175321936Shselaskystatic void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, 176321936Shselasky struct ibv_send_wr *wr) 177321936Shselasky{ 178321936Shselasky memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); 179321936Shselasky dseg->dqpn = htobe32(wr->wr.ud.remote_qpn); 180321936Shselasky dseg->qkey = htobe32(wr->wr.ud.remote_qkey); 181321936Shselasky dseg->vlan = htobe16(to_mah(wr->wr.ud.ah)->vlan); 182321936Shselasky memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->mac, 6); 183321936Shselasky} 184321936Shselasky 185321936Shselaskystatic void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ibv_sge *sg) 186321936Shselasky{ 187321936Shselasky dseg->byte_count = htobe32(sg->length); 188321936Shselasky dseg->lkey = htobe32(sg->lkey); 189321936Shselasky dseg->addr = htobe64(sg->addr); 190321936Shselasky} 191321936Shselasky 192321936Shselaskystatic void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ibv_sge *sg) 193321936Shselasky{ 194321936Shselasky dseg->lkey = htobe32(sg->lkey); 195321936Shselasky dseg->addr = htobe64(sg->addr); 196321936Shselasky 197321936Shselasky /* 198321936Shselasky * Need a barrier here before writing the byte_count field to 199321936Shselasky * make sure that all the data is visible before the 200321936Shselasky * byte_count field is set. Otherwise, if the segment begins 201321936Shselasky * a new cacheline, the HCA prefetcher could grab the 64-byte 202321936Shselasky * chunk and get a valid (!= * 0xffffffff) byte count but 203321936Shselasky * stale data, and end up sending the wrong data. 204321936Shselasky */ 205321936Shselasky udma_to_device_barrier(); 206321936Shselasky 207321936Shselasky if (likely(sg->length)) 208321936Shselasky dseg->byte_count = htobe32(sg->length); 209321936Shselasky else 210321936Shselasky dseg->byte_count = htobe32(0x80000000); 211321936Shselasky} 212321936Shselasky 213321936Shselaskyint mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, 214321936Shselasky struct ibv_send_wr **bad_wr) 215321936Shselasky{ 216321936Shselasky struct mlx4_context *ctx; 217321936Shselasky struct mlx4_qp *qp = to_mqp(ibqp); 218321936Shselasky void *wqe; 219321936Shselasky struct mlx4_wqe_ctrl_seg *ctrl = NULL; 220321936Shselasky int ind; 221321936Shselasky int nreq; 222321936Shselasky int inl = 0; 223321936Shselasky int ret = 0; 224321936Shselasky int size = 0; 225321936Shselasky int i; 226321936Shselasky 227321936Shselasky pthread_spin_lock(&qp->sq.lock); 228321936Shselasky 229321936Shselasky /* XXX check that state is OK to post send */ 230321936Shselasky 231321936Shselasky ind = qp->sq.head; 232321936Shselasky 233321936Shselasky for (nreq = 0; wr; ++nreq, wr = wr->next) { 234321936Shselasky if (wq_overflow(&qp->sq, nreq, to_mcq(ibqp->send_cq))) { 235321936Shselasky ret = ENOMEM; 236321936Shselasky *bad_wr = wr; 237321936Shselasky goto out; 238321936Shselasky } 239321936Shselasky 240321936Shselasky if (wr->num_sge > qp->sq.max_gs) { 241321936Shselasky ret = ENOMEM; 242321936Shselasky *bad_wr = wr; 243321936Shselasky goto out; 244321936Shselasky } 245321936Shselasky 246321936Shselasky if (wr->opcode >= sizeof mlx4_ib_opcode / sizeof mlx4_ib_opcode[0]) { 247321936Shselasky ret = EINVAL; 248321936Shselasky *bad_wr = wr; 249321936Shselasky goto out; 250321936Shselasky } 251321936Shselasky 252321936Shselasky ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); 253321936Shselasky qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id; 254321936Shselasky 255321936Shselasky ctrl->srcrb_flags = 256321936Shselasky (wr->send_flags & IBV_SEND_SIGNALED ? 257321936Shselasky htobe32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) | 258321936Shselasky (wr->send_flags & IBV_SEND_SOLICITED ? 259321936Shselasky htobe32(MLX4_WQE_CTRL_SOLICIT) : 0) | 260321936Shselasky qp->sq_signal_bits; 261321936Shselasky 262321936Shselasky if (wr->opcode == IBV_WR_SEND_WITH_IMM || 263321936Shselasky wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM) 264321936Shselasky ctrl->imm = wr->imm_data; 265321936Shselasky else 266321936Shselasky ctrl->imm = 0; 267321936Shselasky 268321936Shselasky wqe += sizeof *ctrl; 269321936Shselasky size = sizeof *ctrl / 16; 270321936Shselasky 271321936Shselasky switch (ibqp->qp_type) { 272321936Shselasky case IBV_QPT_XRC_SEND: 273321936Shselasky ctrl->srcrb_flags |= MLX4_REMOTE_SRQN_FLAGS(wr); 274321936Shselasky /* fall through */ 275321936Shselasky case IBV_QPT_RC: 276321936Shselasky case IBV_QPT_UC: 277321936Shselasky switch (wr->opcode) { 278321936Shselasky case IBV_WR_ATOMIC_CMP_AND_SWP: 279321936Shselasky case IBV_WR_ATOMIC_FETCH_AND_ADD: 280321936Shselasky set_raddr_seg(wqe, wr->wr.atomic.remote_addr, 281321936Shselasky wr->wr.atomic.rkey); 282321936Shselasky wqe += sizeof (struct mlx4_wqe_raddr_seg); 283321936Shselasky 284321936Shselasky set_atomic_seg(wqe, wr); 285321936Shselasky wqe += sizeof (struct mlx4_wqe_atomic_seg); 286321936Shselasky size += (sizeof (struct mlx4_wqe_raddr_seg) + 287321936Shselasky sizeof (struct mlx4_wqe_atomic_seg)) / 16; 288321936Shselasky 289321936Shselasky break; 290321936Shselasky 291321936Shselasky case IBV_WR_RDMA_READ: 292321936Shselasky inl = 1; 293321936Shselasky /* fall through */ 294321936Shselasky case IBV_WR_RDMA_WRITE: 295321936Shselasky case IBV_WR_RDMA_WRITE_WITH_IMM: 296321936Shselasky if (!wr->num_sge) 297321936Shselasky inl = 1; 298321936Shselasky set_raddr_seg(wqe, wr->wr.rdma.remote_addr, 299321936Shselasky wr->wr.rdma.rkey); 300321936Shselasky wqe += sizeof (struct mlx4_wqe_raddr_seg); 301321936Shselasky size += sizeof (struct mlx4_wqe_raddr_seg) / 16; 302321936Shselasky 303321936Shselasky break; 304321936Shselasky case IBV_WR_LOCAL_INV: 305321936Shselasky ctrl->srcrb_flags |= 306321936Shselasky htobe32(MLX4_WQE_CTRL_STRONG_ORDER); 307321936Shselasky set_local_inv_seg(wqe, wr->imm_data); 308321936Shselasky wqe += sizeof 309321936Shselasky (struct mlx4_wqe_local_inval_seg); 310321936Shselasky size += sizeof 311321936Shselasky (struct mlx4_wqe_local_inval_seg) / 16; 312321936Shselasky break; 313321936Shselasky case IBV_WR_BIND_MW: 314321936Shselasky ctrl->srcrb_flags |= 315321936Shselasky htobe32(MLX4_WQE_CTRL_STRONG_ORDER); 316321936Shselasky set_bind_seg(wqe, wr); 317321936Shselasky wqe += sizeof 318321936Shselasky (struct mlx4_wqe_bind_seg); 319321936Shselasky size += sizeof 320321936Shselasky (struct mlx4_wqe_bind_seg) / 16; 321321936Shselasky break; 322321936Shselasky case IBV_WR_SEND_WITH_INV: 323321936Shselasky ctrl->imm = htobe32(wr->imm_data); 324321936Shselasky break; 325321936Shselasky 326321936Shselasky default: 327321936Shselasky /* No extra segments required for sends */ 328321936Shselasky break; 329321936Shselasky } 330321936Shselasky break; 331321936Shselasky 332321936Shselasky case IBV_QPT_UD: 333321936Shselasky set_datagram_seg(wqe, wr); 334321936Shselasky wqe += sizeof (struct mlx4_wqe_datagram_seg); 335321936Shselasky size += sizeof (struct mlx4_wqe_datagram_seg) / 16; 336321936Shselasky 337321936Shselasky if (wr->send_flags & IBV_SEND_IP_CSUM) { 338321936Shselasky if (!(qp->qp_cap_cache & MLX4_CSUM_SUPPORT_UD_OVER_IB)) { 339321936Shselasky ret = EINVAL; 340321936Shselasky *bad_wr = wr; 341321936Shselasky goto out; 342321936Shselasky } 343321936Shselasky ctrl->srcrb_flags |= htobe32(MLX4_WQE_CTRL_IP_HDR_CSUM | 344321936Shselasky MLX4_WQE_CTRL_TCP_UDP_CSUM); 345321936Shselasky } 346321936Shselasky break; 347321936Shselasky 348321936Shselasky case IBV_QPT_RAW_PACKET: 349321936Shselasky /* For raw eth, the MLX4_WQE_CTRL_SOLICIT flag is used 350321936Shselasky * to indicate that no icrc should be calculated */ 351321936Shselasky ctrl->srcrb_flags |= htobe32(MLX4_WQE_CTRL_SOLICIT); 352321936Shselasky if (wr->send_flags & IBV_SEND_IP_CSUM) { 353321936Shselasky if (!(qp->qp_cap_cache & MLX4_CSUM_SUPPORT_RAW_OVER_ETH)) { 354321936Shselasky ret = EINVAL; 355321936Shselasky *bad_wr = wr; 356321936Shselasky goto out; 357321936Shselasky } 358321936Shselasky ctrl->srcrb_flags |= htobe32(MLX4_WQE_CTRL_IP_HDR_CSUM | 359321936Shselasky MLX4_WQE_CTRL_TCP_UDP_CSUM); 360321936Shselasky } 361321936Shselasky break; 362321936Shselasky 363321936Shselasky default: 364321936Shselasky break; 365321936Shselasky } 366321936Shselasky 367321936Shselasky if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) { 368321936Shselasky struct mlx4_wqe_inline_seg *seg; 369321936Shselasky void *addr; 370321936Shselasky int len, seg_len; 371321936Shselasky int num_seg; 372321936Shselasky int off, to_copy; 373321936Shselasky 374321936Shselasky inl = 0; 375321936Shselasky 376321936Shselasky seg = wqe; 377321936Shselasky wqe += sizeof *seg; 378321936Shselasky off = ((uintptr_t) wqe) & (MLX4_INLINE_ALIGN - 1); 379321936Shselasky num_seg = 0; 380321936Shselasky seg_len = 0; 381321936Shselasky 382321936Shselasky for (i = 0; i < wr->num_sge; ++i) { 383321936Shselasky addr = (void *) (uintptr_t) wr->sg_list[i].addr; 384321936Shselasky len = wr->sg_list[i].length; 385321936Shselasky inl += len; 386321936Shselasky 387321936Shselasky if (inl > qp->max_inline_data) { 388321936Shselasky inl = 0; 389321936Shselasky ret = ENOMEM; 390321936Shselasky *bad_wr = wr; 391321936Shselasky goto out; 392321936Shselasky } 393321936Shselasky 394321936Shselasky while (len >= MLX4_INLINE_ALIGN - off) { 395321936Shselasky to_copy = MLX4_INLINE_ALIGN - off; 396321936Shselasky memcpy(wqe, addr, to_copy); 397321936Shselasky len -= to_copy; 398321936Shselasky wqe += to_copy; 399321936Shselasky addr += to_copy; 400321936Shselasky seg_len += to_copy; 401321936Shselasky udma_to_device_barrier(); /* see comment below */ 402321936Shselasky seg->byte_count = htobe32(MLX4_INLINE_SEG | seg_len); 403321936Shselasky seg_len = 0; 404321936Shselasky seg = wqe; 405321936Shselasky wqe += sizeof *seg; 406321936Shselasky off = sizeof *seg; 407321936Shselasky ++num_seg; 408321936Shselasky } 409321936Shselasky 410321936Shselasky memcpy(wqe, addr, len); 411321936Shselasky wqe += len; 412321936Shselasky seg_len += len; 413321936Shselasky off += len; 414321936Shselasky } 415321936Shselasky 416321936Shselasky if (seg_len) { 417321936Shselasky ++num_seg; 418321936Shselasky /* 419321936Shselasky * Need a barrier here to make sure 420321936Shselasky * all the data is visible before the 421321936Shselasky * byte_count field is set. Otherwise 422321936Shselasky * the HCA prefetcher could grab the 423321936Shselasky * 64-byte chunk with this inline 424321936Shselasky * segment and get a valid (!= 425321936Shselasky * 0xffffffff) byte count but stale 426321936Shselasky * data, and end up sending the wrong 427321936Shselasky * data. 428321936Shselasky */ 429321936Shselasky udma_to_device_barrier(); 430321936Shselasky seg->byte_count = htobe32(MLX4_INLINE_SEG | seg_len); 431321936Shselasky } 432321936Shselasky 433321936Shselasky size += (inl + num_seg * sizeof * seg + 15) / 16; 434321936Shselasky } else { 435321936Shselasky struct mlx4_wqe_data_seg *seg = wqe; 436321936Shselasky 437321936Shselasky for (i = wr->num_sge - 1; i >= 0 ; --i) 438321936Shselasky set_data_seg(seg + i, wr->sg_list + i); 439321936Shselasky 440321936Shselasky size += wr->num_sge * (sizeof *seg / 16); 441321936Shselasky } 442321936Shselasky 443321936Shselasky ctrl->fence_size = (wr->send_flags & IBV_SEND_FENCE ? 444321936Shselasky MLX4_WQE_CTRL_FENCE : 0) | size; 445321936Shselasky 446321936Shselasky /* 447321936Shselasky * Make sure descriptor is fully written before 448321936Shselasky * setting ownership bit (because HW can start 449321936Shselasky * executing as soon as we do). 450321936Shselasky */ 451321936Shselasky udma_to_device_barrier(); 452321936Shselasky 453321936Shselasky ctrl->owner_opcode = htobe32(mlx4_ib_opcode[wr->opcode]) | 454321936Shselasky (ind & qp->sq.wqe_cnt ? htobe32(1 << 31) : 0); 455321936Shselasky 456321936Shselasky /* 457321936Shselasky * We can improve latency by not stamping the last 458321936Shselasky * send queue WQE until after ringing the doorbell, so 459321936Shselasky * only stamp here if there are still more WQEs to post. 460321936Shselasky */ 461321936Shselasky if (wr->next) 462321936Shselasky stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) & 463321936Shselasky (qp->sq.wqe_cnt - 1)); 464321936Shselasky 465321936Shselasky ++ind; 466321936Shselasky } 467321936Shselasky 468321936Shselaskyout: 469321936Shselasky ctx = to_mctx(ibqp->context); 470321936Shselasky 471321936Shselasky if (nreq == 1 && inl && size > 1 && size <= ctx->bf_buf_size / 16) { 472321936Shselasky ctrl->owner_opcode |= htobe32((qp->sq.head & 0xffff) << 8); 473321936Shselasky 474321936Shselasky ctrl->bf_qpn |= qp->doorbell_qpn; 475321936Shselasky ++qp->sq.head; 476321936Shselasky /* 477321936Shselasky * Make sure that descriptor is written to memory 478321936Shselasky * before writing to BlueFlame page. 479321936Shselasky */ 480321936Shselasky mmio_wc_spinlock(&ctx->bf_lock); 481321936Shselasky 482321936Shselasky mlx4_bf_copy(ctx->bf_page + ctx->bf_offset, (unsigned long *) ctrl, 483321936Shselasky align(size * 16, 64)); 484321936Shselasky /* Flush before toggling bf_offset to be latency oriented */ 485321936Shselasky mmio_flush_writes(); 486321936Shselasky 487321936Shselasky ctx->bf_offset ^= ctx->bf_buf_size; 488321936Shselasky 489321936Shselasky pthread_spin_unlock(&ctx->bf_lock); 490321936Shselasky } else if (nreq) { 491321936Shselasky qp->sq.head += nreq; 492321936Shselasky 493321936Shselasky /* 494321936Shselasky * Make sure that descriptors are written before 495321936Shselasky * doorbell record. 496321936Shselasky */ 497321936Shselasky udma_to_device_barrier(); 498321936Shselasky 499321936Shselasky mmio_writel((unsigned long)(ctx->uar + MLX4_SEND_DOORBELL), 500321936Shselasky qp->doorbell_qpn); 501321936Shselasky } 502321936Shselasky 503321936Shselasky if (nreq) 504321936Shselasky stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) & 505321936Shselasky (qp->sq.wqe_cnt - 1)); 506321936Shselasky 507321936Shselasky pthread_spin_unlock(&qp->sq.lock); 508321936Shselasky 509321936Shselasky return ret; 510321936Shselasky} 511321936Shselasky 512321936Shselaskyint mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, 513321936Shselasky struct ibv_recv_wr **bad_wr) 514321936Shselasky{ 515321936Shselasky struct mlx4_qp *qp = to_mqp(ibqp); 516321936Shselasky struct mlx4_wqe_data_seg *scat; 517321936Shselasky int ret = 0; 518321936Shselasky int nreq; 519321936Shselasky int ind; 520321936Shselasky int i; 521321936Shselasky 522321936Shselasky pthread_spin_lock(&qp->rq.lock); 523321936Shselasky 524321936Shselasky /* XXX check that state is OK to post receive */ 525321936Shselasky 526321936Shselasky ind = qp->rq.head & (qp->rq.wqe_cnt - 1); 527321936Shselasky 528321936Shselasky for (nreq = 0; wr; ++nreq, wr = wr->next) { 529321936Shselasky if (wq_overflow(&qp->rq, nreq, to_mcq(ibqp->recv_cq))) { 530321936Shselasky ret = ENOMEM; 531321936Shselasky *bad_wr = wr; 532321936Shselasky goto out; 533321936Shselasky } 534321936Shselasky 535321936Shselasky if (wr->num_sge > qp->rq.max_gs) { 536321936Shselasky ret = ENOMEM; 537321936Shselasky *bad_wr = wr; 538321936Shselasky goto out; 539321936Shselasky } 540321936Shselasky 541321936Shselasky scat = get_recv_wqe(qp, ind); 542321936Shselasky 543321936Shselasky for (i = 0; i < wr->num_sge; ++i) 544321936Shselasky __set_data_seg(scat + i, wr->sg_list + i); 545321936Shselasky 546321936Shselasky if (i < qp->rq.max_gs) { 547321936Shselasky scat[i].byte_count = 0; 548321936Shselasky scat[i].lkey = htobe32(MLX4_INVALID_LKEY); 549321936Shselasky scat[i].addr = 0; 550321936Shselasky } 551321936Shselasky 552321936Shselasky qp->rq.wrid[ind] = wr->wr_id; 553321936Shselasky 554321936Shselasky ind = (ind + 1) & (qp->rq.wqe_cnt - 1); 555321936Shselasky } 556321936Shselasky 557321936Shselaskyout: 558321936Shselasky if (nreq) { 559321936Shselasky qp->rq.head += nreq; 560321936Shselasky 561321936Shselasky /* 562321936Shselasky * Make sure that descriptors are written before 563321936Shselasky * doorbell record. 564321936Shselasky */ 565321936Shselasky udma_to_device_barrier(); 566321936Shselasky 567321936Shselasky *qp->db = htobe32(qp->rq.head & 0xffff); 568321936Shselasky } 569321936Shselasky 570321936Shselasky pthread_spin_unlock(&qp->rq.lock); 571321936Shselasky 572321936Shselasky return ret; 573321936Shselasky} 574321936Shselasky 575321936Shselaskystatic int num_inline_segs(int data, enum ibv_qp_type type) 576321936Shselasky{ 577321936Shselasky /* 578321936Shselasky * Inline data segments are not allowed to cross 64 byte 579321936Shselasky * boundaries. For UD QPs, the data segments always start 580321936Shselasky * aligned to 64 bytes (16 byte control segment + 48 byte 581321936Shselasky * datagram segment); for other QPs, there will be a 16 byte 582321936Shselasky * control segment and possibly a 16 byte remote address 583321936Shselasky * segment, so in the worst case there will be only 32 bytes 584321936Shselasky * available for the first data segment. 585321936Shselasky */ 586321936Shselasky if (type == IBV_QPT_UD) 587321936Shselasky data += (sizeof (struct mlx4_wqe_ctrl_seg) + 588321936Shselasky sizeof (struct mlx4_wqe_datagram_seg)) % 589321936Shselasky MLX4_INLINE_ALIGN; 590321936Shselasky else 591321936Shselasky data += (sizeof (struct mlx4_wqe_ctrl_seg) + 592321936Shselasky sizeof (struct mlx4_wqe_raddr_seg)) % 593321936Shselasky MLX4_INLINE_ALIGN; 594321936Shselasky 595321936Shselasky return (data + MLX4_INLINE_ALIGN - sizeof (struct mlx4_wqe_inline_seg) - 1) / 596321936Shselasky (MLX4_INLINE_ALIGN - sizeof (struct mlx4_wqe_inline_seg)); 597321936Shselasky} 598321936Shselasky 599321936Shselaskyvoid mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type, 600321936Shselasky struct mlx4_qp *qp) 601321936Shselasky{ 602321936Shselasky int size; 603321936Shselasky int max_sq_sge; 604321936Shselasky 605321936Shselasky max_sq_sge = align(cap->max_inline_data + 606321936Shselasky num_inline_segs(cap->max_inline_data, type) * 607321936Shselasky sizeof (struct mlx4_wqe_inline_seg), 608321936Shselasky sizeof (struct mlx4_wqe_data_seg)) / 609321936Shselasky sizeof (struct mlx4_wqe_data_seg); 610321936Shselasky if (max_sq_sge < cap->max_send_sge) 611321936Shselasky max_sq_sge = cap->max_send_sge; 612321936Shselasky 613321936Shselasky size = max_sq_sge * sizeof (struct mlx4_wqe_data_seg); 614321936Shselasky switch (type) { 615321936Shselasky case IBV_QPT_UD: 616321936Shselasky size += sizeof (struct mlx4_wqe_datagram_seg); 617321936Shselasky break; 618321936Shselasky 619321936Shselasky case IBV_QPT_UC: 620321936Shselasky size += sizeof (struct mlx4_wqe_raddr_seg); 621321936Shselasky break; 622321936Shselasky 623321936Shselasky case IBV_QPT_XRC_SEND: 624321936Shselasky case IBV_QPT_RC: 625321936Shselasky size += sizeof (struct mlx4_wqe_raddr_seg); 626321936Shselasky /* 627321936Shselasky * An atomic op will require an atomic segment, a 628321936Shselasky * remote address segment and one scatter entry. 629321936Shselasky */ 630321936Shselasky if (size < (sizeof (struct mlx4_wqe_atomic_seg) + 631321936Shselasky sizeof (struct mlx4_wqe_raddr_seg) + 632321936Shselasky sizeof (struct mlx4_wqe_data_seg))) 633321936Shselasky size = (sizeof (struct mlx4_wqe_atomic_seg) + 634321936Shselasky sizeof (struct mlx4_wqe_raddr_seg) + 635321936Shselasky sizeof (struct mlx4_wqe_data_seg)); 636321936Shselasky break; 637321936Shselasky 638321936Shselasky default: 639321936Shselasky break; 640321936Shselasky } 641321936Shselasky 642321936Shselasky /* Make sure that we have enough space for a bind request */ 643321936Shselasky if (size < sizeof (struct mlx4_wqe_bind_seg)) 644321936Shselasky size = sizeof (struct mlx4_wqe_bind_seg); 645321936Shselasky 646321936Shselasky size += sizeof (struct mlx4_wqe_ctrl_seg); 647321936Shselasky 648321936Shselasky for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size; 649321936Shselasky qp->sq.wqe_shift++) 650321936Shselasky ; /* nothing */ 651321936Shselasky} 652321936Shselasky 653321936Shselaskyint mlx4_alloc_qp_buf(struct ibv_context *context, struct ibv_qp_cap *cap, 654321936Shselasky enum ibv_qp_type type, struct mlx4_qp *qp) 655321936Shselasky{ 656321936Shselasky qp->rq.max_gs = cap->max_recv_sge; 657321936Shselasky 658321936Shselasky if (qp->sq.wqe_cnt) { 659321936Shselasky qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t)); 660321936Shselasky if (!qp->sq.wrid) 661321936Shselasky return -1; 662321936Shselasky } 663321936Shselasky 664321936Shselasky if (qp->rq.wqe_cnt) { 665321936Shselasky qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof (uint64_t)); 666321936Shselasky if (!qp->rq.wrid) { 667321936Shselasky free(qp->sq.wrid); 668321936Shselasky return -1; 669321936Shselasky } 670321936Shselasky } 671321936Shselasky 672321936Shselasky for (qp->rq.wqe_shift = 4; 673321936Shselasky 1 << qp->rq.wqe_shift < qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg); 674321936Shselasky qp->rq.wqe_shift++) 675321936Shselasky ; /* nothing */ 676321936Shselasky 677321936Shselasky qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + 678321936Shselasky (qp->sq.wqe_cnt << qp->sq.wqe_shift); 679321936Shselasky if (qp->rq.wqe_shift > qp->sq.wqe_shift) { 680321936Shselasky qp->rq.offset = 0; 681321936Shselasky qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; 682321936Shselasky } else { 683321936Shselasky qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift; 684321936Shselasky qp->sq.offset = 0; 685321936Shselasky } 686321936Shselasky 687321936Shselasky if (qp->buf_size) { 688321936Shselasky if (mlx4_alloc_buf(&qp->buf, 689321936Shselasky align(qp->buf_size, to_mdev(context->device)->page_size), 690321936Shselasky to_mdev(context->device)->page_size)) { 691321936Shselasky free(qp->sq.wrid); 692321936Shselasky free(qp->rq.wrid); 693321936Shselasky return -1; 694321936Shselasky } 695321936Shselasky 696321936Shselasky memset(qp->buf.buf, 0, qp->buf_size); 697321936Shselasky } else { 698321936Shselasky qp->buf.buf = NULL; 699321936Shselasky } 700321936Shselasky 701321936Shselasky return 0; 702321936Shselasky} 703321936Shselasky 704321936Shselaskyvoid mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap, 705321936Shselasky enum ibv_qp_type type) 706321936Shselasky{ 707321936Shselasky int wqe_size; 708321936Shselasky 709321936Shselasky wqe_size = (1 << qp->sq.wqe_shift) - sizeof (struct mlx4_wqe_ctrl_seg); 710321936Shselasky switch (type) { 711321936Shselasky case IBV_QPT_UD: 712321936Shselasky wqe_size -= sizeof (struct mlx4_wqe_datagram_seg); 713321936Shselasky break; 714321936Shselasky 715321936Shselasky case IBV_QPT_XRC_SEND: 716321936Shselasky case IBV_QPT_UC: 717321936Shselasky case IBV_QPT_RC: 718321936Shselasky wqe_size -= sizeof (struct mlx4_wqe_raddr_seg); 719321936Shselasky break; 720321936Shselasky 721321936Shselasky default: 722321936Shselasky break; 723321936Shselasky } 724321936Shselasky 725321936Shselasky qp->sq.max_gs = wqe_size / sizeof (struct mlx4_wqe_data_seg); 726321936Shselasky cap->max_send_sge = qp->sq.max_gs; 727321936Shselasky qp->sq.max_post = qp->sq.wqe_cnt - qp->sq_spare_wqes; 728321936Shselasky cap->max_send_wr = qp->sq.max_post; 729321936Shselasky 730321936Shselasky /* 731321936Shselasky * Inline data segments can't cross a 64 byte boundary. So 732321936Shselasky * subtract off one segment header for each 64-byte chunk, 733321936Shselasky * taking into account the fact that wqe_size will be 32 mod 734321936Shselasky * 64 for non-UD QPs. 735321936Shselasky */ 736321936Shselasky qp->max_inline_data = wqe_size - 737321936Shselasky sizeof (struct mlx4_wqe_inline_seg) * 738321936Shselasky (align(wqe_size, MLX4_INLINE_ALIGN) / MLX4_INLINE_ALIGN); 739321936Shselasky cap->max_inline_data = qp->max_inline_data; 740321936Shselasky} 741321936Shselasky 742321936Shselaskystruct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn) 743321936Shselasky{ 744321936Shselasky int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; 745321936Shselasky 746321936Shselasky if (ctx->qp_table[tind].refcnt) 747321936Shselasky return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask]; 748321936Shselasky else 749321936Shselasky return NULL; 750321936Shselasky} 751321936Shselasky 752321936Shselaskyint mlx4_store_qp(struct mlx4_context *ctx, uint32_t qpn, struct mlx4_qp *qp) 753321936Shselasky{ 754321936Shselasky int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; 755321936Shselasky 756321936Shselasky if (!ctx->qp_table[tind].refcnt) { 757321936Shselasky ctx->qp_table[tind].table = calloc(ctx->qp_table_mask + 1, 758321936Shselasky sizeof (struct mlx4_qp *)); 759321936Shselasky if (!ctx->qp_table[tind].table) 760321936Shselasky return -1; 761321936Shselasky } 762321936Shselasky 763321936Shselasky ++ctx->qp_table[tind].refcnt; 764321936Shselasky ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp; 765321936Shselasky return 0; 766321936Shselasky} 767321936Shselasky 768321936Shselaskyvoid mlx4_clear_qp(struct mlx4_context *ctx, uint32_t qpn) 769321936Shselasky{ 770321936Shselasky int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; 771321936Shselasky 772321936Shselasky if (!--ctx->qp_table[tind].refcnt) 773321936Shselasky free(ctx->qp_table[tind].table); 774321936Shselasky else 775321936Shselasky ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL; 776321936Shselasky} 777