1/* 2 3 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. 4 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 34 35 #include <linux/log2.h> 36 #include <linux/slab.h> 37 #include <linux/netdevice.h> 38 #include <linux/bitops.h> 39 40 #include <rdma/ib_cache.h> 41 */ 42#include <rdma/ib_pack.h> 43/* 44 #include <rdma/ib_addr.h> 45 #include <rdma/ib_mad.h> 46 */ 47#include <linux/mlx4/qp.h> 48/* 49 #include <linux/mlx4/driver.h> 50 #include <linux/io.h> 51 */ 52#include <linux/err.h> 53#include <linux/log2.h> 54#include <linux/gfp.h> 55#include <linux/compiler.h> 56 57#include <netinet/in.h> 58#include <asm/byteorder.h> 59 60#include <debug.h> 61/* 62 #ifndef __linux__ 63 #define asm __asm 64 #endif 65 */ 66#include "mlx4_ib.h" 67#include "user.h" 68 69enum { 70 MLX4_IB_ACK_REQ_FREQ = 8, 71}; 72 73enum { 74 MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83, 75 MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f, 76 MLX4_IB_LINK_TYPE_IB = 0, 77 MLX4_IB_LINK_TYPE_ETH = 1 78}; 79 80enum { 81 82 /** Largest possible UD header: send with GRH and immediate 83 * data plus 18 bytes for an Ethernet header with VLAN/802.1Q 84 * tag. (LRH would only use 8 bytes, so Ethernet is the 85 * biggest case)*/ 86 87 MLX4_IB_UD_HEADER_SIZE = 82, MLX4_IB_LSO_HEADER_SPARE = 128, 88}; 89 90enum { 91 MLX4_IB_IBOE_ETHERTYPE = 0x8915 92}; 93 94struct mlx4_ib_sqp { 95 struct mlx4_ib_qp qp; 96 int pkey_index; 97 u32 qkey; 98 u32 send_psn; 99 struct ib_ud_header ud_header; 100 u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; 101}; 102 103enum { 104 MLX4_IB_MIN_SQ_STRIDE = 6, MLX4_IB_CACHE_LINE_SIZE = 64, 105}; 106 107enum { 108 MLX4_RAW_QP_MTU = 7, MLX4_RAW_QP_MSGMAX = 31, 109}; 110 111static const __be32 mlx4_ib_opcode[] = { [IB_WR_SEND] = cpu_to_be32( 112 MLX4_OPCODE_SEND), [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO), 113 [IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM), 114 [IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), 115 [IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), 116 [IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ), 117 [IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), 118 [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), 119 [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), 120 [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), 121 [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), 122 [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32( 123 MLX4_OPCODE_MASKED_ATOMIC_CS), 124 [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32( 125 MLX4_OPCODE_MASKED_ATOMIC_FA), }; 126 127#ifndef wc_wmb 128#if defined(__i386__) 129#define wc_wmb() __asm volatile("lock; addl $0,0(%%esp) " ::: "memory") 130#elif defined(__x86_64__) 131#define wc_wmb() __asm volatile("sfence" ::: "memory") 132#elif defined(__ia64__) 133#define wc_wmb() __asm volatile("fwb" ::: "memory") 134#else 135#define wc_wmb() wmb() 136#endif 137#endif 138 139static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) { 140 return container_of(mqp, struct mlx4_ib_sqp, qp); 141} 142/* 143 static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { 144 if (!mlx4_is_master(dev->dev)) 145 return 0; 146 147 return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn 148 && qp->mqp.qpn 149 < dev->dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX; 150 } 151 */ 152static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { 153 int proxy_sqp = 0; 154 int real_sqp = 0; 155 int i; 156 /*PPF or Native -- real SQP*/ 157 real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) 158 && qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn 159 && qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3); 160 if (real_sqp) 161 return 1; 162 /*VF or PF -- proxy SQP*/ 163 if (mlx4_is_mfunc(dev->dev)) { 164 for (i = 0; i < dev->dev->caps.num_ports; i++) { 165 if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] 166 || qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) { 167 proxy_sqp = 1; 168 break; 169 } 170 } 171 } 172 return proxy_sqp; 173} 174 175/*used for INIT/CLOSE port logic*/ 176static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { 177 int proxy_qp0 = 0; 178 int real_qp0 = 0; 179 int i; 180 /*PPF or Native -- real QP0*/ 181 real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) 182 && qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn 183 && qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1); 184 if (real_qp0) 185 return 1; 186 /*VF or PF -- proxy QP0*/ 187 if (mlx4_is_mfunc(dev->dev)) { 188 for (i = 0; i < dev->dev->caps.num_ports; i++) { 189 if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) { 190 proxy_qp0 = 1; 191 break; 192 } 193 } 194 } 195 return proxy_qp0; 196} 197 198static void *get_wqe(struct mlx4_ib_qp *qp, int offset) { 199 return mlx4_buf_offset(&qp->buf, offset); 200} 201 202static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n) { 203 return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); 204} 205 206static void *get_send_wqe(struct mlx4_ib_qp *qp, int n) { 207 return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift)); 208} 209/* 210 211 * Stamp a SQ WQE so that it is invalid if prefetched by marking the 212 * first four bytes of every 64 byte chunk with 213 * 0x7FFFFFF | (invalid_ownership_value << 31). 214 * 215 * When the max work request size is less than or equal to the WQE 216 * basic block size, as an optimization, we can stamp all WQEs with 217 * 0xffffffff, and skip the very first chunk of each WQE. 218 */ 219static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) { 220 __be32 *wqe; 221 int i; 222 int s; 223 int ind; 224 void *buf; 225 __be32 stamp; 226 struct mlx4_wqe_ctrl_seg *ctrl; 227 228 if (qp->sq_max_wqes_per_wr > 1) { 229 s = roundup(size, 1U << qp->sq.wqe_shift); 230 for (i = 0; i < s; i += 64) { 231 ind = (i >> qp->sq.wqe_shift) + n; 232 stamp = ind & qp->sq.wqe_cnt ? 233 cpu_to_be32(0x7fffffff) : cpu_to_be32(0xffffffff); 234 buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); 235 wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1)); 236 *wqe = stamp; 237 } 238 } else { 239 ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); 240 s = (ctrl->fence_size & 0x3f) << 4; 241 for (i = 64; i < s; i += 64) { 242 wqe = buf + i; 243 *wqe = cpu_to_be32(0xffffffff); 244 } 245 } 246} 247 248static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size) { 249 struct mlx4_wqe_ctrl_seg *ctrl; 250 struct mlx4_wqe_inline_seg *inl; 251 void *wqe; 252 int s; 253 254 ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); 255 s = sizeof(struct mlx4_wqe_ctrl_seg); 256 257 if (qp->ibqp.qp_type == IB_QPT_UD) { 258 struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl; 259 struct mlx4_av *av = (struct mlx4_av *) dgram->av; 260 memset(dgram, 0, sizeof *dgram); 261 av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn); 262 s += sizeof(struct mlx4_wqe_datagram_seg); 263 } 264 265 /*Pad the remainder of the WQE with an inline data segment.*/ 266 if (size > s) { 267 inl = wqe + s; 268 inl->byte_count = cpu_to_be32(1U << 31 | (size - s - sizeof *inl)); 269 } 270 ctrl->srcrb_flags = 0; 271 ctrl->fence_size = size / 16; 272 273 /** Make sure descriptor is fully written before setting ownership bit 274 * (because HW can start executing as soon as we do).*/ 275 276 wmb(); 277 278 ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) 279 | (n & qp->sq.wqe_cnt ? cpu_to_be32(1U << 31) : 0); 280 281 stamp_send_wqe(qp, n + qp->sq_spare_wqes, size); 282} 283/* 284 Post NOP WQE to prevent wrap-around in the middle of WR 285 */ 286static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind) { 287 unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1)); 288 if (unlikely(s < qp->sq_max_wqes_per_wr)) { 289 post_nop_wqe(qp, ind, s << qp->sq.wqe_shift); 290 ind += s; 291 } 292 return ind; 293} 294 295static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) { 296 struct ib_event event; 297 struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; 298 299 if (type == MLX4_EVENT_TYPE_PATH_MIG) 300 to_mibqp(qp)->port = to_mibqp(qp)->alt_port; 301 302 if (ibqp->event_handler) { 303 event.device = ibqp->device; 304 event.element.qp = ibqp; 305 switch (type) { 306 case MLX4_EVENT_TYPE_PATH_MIG: 307 event.event = IB_EVENT_PATH_MIG; 308 break; 309 case MLX4_EVENT_TYPE_COMM_EST: 310 event.event = IB_EVENT_COMM_EST; 311 break; 312 case MLX4_EVENT_TYPE_SQ_DRAINED: 313 event.event = IB_EVENT_SQ_DRAINED; 314 break; 315 case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE: 316 event.event = IB_EVENT_QP_LAST_WQE_REACHED; 317 break; 318 case MLX4_EVENT_TYPE_WQ_CATAS_ERROR: 319 event.event = IB_EVENT_QP_FATAL; 320 break; 321 case MLX4_EVENT_TYPE_PATH_MIG_FAILED: 322 event.event = IB_EVENT_PATH_MIG_ERR; 323 break; 324 case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR: 325 event.event = IB_EVENT_QP_REQ_ERR; 326 break; 327 case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR: 328 event.event = IB_EVENT_QP_ACCESS_ERR; 329 break; 330 default: 331 MLX4_WARN("Unexpected event type %d " 332 "on QP %06x\n", type, qp->qpn); 333 return; 334 } 335 336 ibqp->event_handler(&event, ibqp->qp_context); 337 } 338} 339 340static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) { 341 342 /** UD WQEs must have a datagram segment. 343 * RC and UC WQEs might have a remote address segment. 344 * MLX WQEs need two extra inline data segments (for the UD 345 * header and space for the ICRC).*/ 346 347 switch (type) { 348 case MLX4_IB_QPT_UD: 349 return sizeof(struct mlx4_wqe_ctrl_seg) 350 + sizeof(struct mlx4_wqe_datagram_seg) 351 + ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0); 352 case MLX4_IB_QPT_PROXY_SMI_OWNER: 353 case MLX4_IB_QPT_PROXY_SMI: 354 case MLX4_IB_QPT_PROXY_GSI: 355 return sizeof(struct mlx4_wqe_ctrl_seg) 356 + sizeof(struct mlx4_wqe_datagram_seg) + 64; 357 case MLX4_IB_QPT_TUN_SMI_OWNER: 358 case MLX4_IB_QPT_TUN_GSI: 359 return sizeof(struct mlx4_wqe_ctrl_seg) 360 + sizeof(struct mlx4_wqe_datagram_seg); 361 362 case MLX4_IB_QPT_UC: 363 return sizeof(struct mlx4_wqe_ctrl_seg) 364 + sizeof(struct mlx4_wqe_raddr_seg); 365 case MLX4_IB_QPT_RC: 366 return sizeof(struct mlx4_wqe_ctrl_seg) 367 + sizeof(struct mlx4_wqe_masked_atomic_seg) 368 + sizeof(struct mlx4_wqe_raddr_seg); 369 case MLX4_IB_QPT_SMI: 370 case MLX4_IB_QPT_GSI: 371 return sizeof(struct mlx4_wqe_ctrl_seg) 372 + ALIGN( 373 MLX4_IB_UD_HEADER_SIZE + DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE, MLX4_INLINE_ALIGN) * sizeof(struct mlx4_wqe_inline_seg), 374 sizeof(struct mlx4_wqe_data_seg)) 375 + ALIGN(4 + sizeof(struct mlx4_wqe_inline_seg), 376 sizeof(struct mlx4_wqe_data_seg)); 377 default: 378 return sizeof(struct mlx4_wqe_ctrl_seg); 379 } 380} 381 382static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 383 int is_user, int has_rq, struct mlx4_ib_qp *qp) { 384 /*Sanity check RQ size before proceeding*/ 385 if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE 386 || cap->max_recv_sge 387 > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)) 388 return -EINVAL; 389 390 if (!has_rq) { 391 if (cap->max_recv_wr) 392 return -EINVAL; 393 394 qp->rq.wqe_cnt = qp->rq.max_gs = 0; 395 } else { 396 /*HW requires >= 1 RQ entry with >= 1 gather entry*/ 397 if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) 398 return -EINVAL; 399 400 qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr)); 401 qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); 402 qp->rq.wqe_shift = ilog2( 403 qp->rq.max_gs * sizeof(struct mlx4_wqe_data_seg)); 404 } 405 406 /*leave userspace return values as they were, so as not to break ABI*/ 407 if (is_user) { 408 cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt; 409 cap->max_recv_sge = qp->rq.max_gs; 410 } else { 411 cap->max_recv_wr = qp->rq.max_post = min( 412 dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt); 413 cap->max_recv_sge = min(qp->rq.max_gs, 414 min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)); 415 } 416 417 return 0; 418} 419 420static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 421 enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp) { 422 int s; 423 424 /*Sanity check SQ size before proceeding*/ 425 if (cap->max_send_wr > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) 426 || cap->max_send_sge 427 > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) 428 || cap->max_inline_data + send_wqe_overhead(type, qp->flags) 429 + sizeof(struct mlx4_wqe_inline_seg) 430 > dev->dev->caps.max_sq_desc_sz) 431 return -EINVAL; 432 433 /** For MLX transport we need 2 extra S/G entries: 434 * one for the header and one for the checksum at the end*/ 435 436 if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI 437 || type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) 438 && cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) 439 return -EINVAL; 440 441 s = max(cap->max_send_sge * sizeof(struct mlx4_wqe_data_seg), 442 cap->max_inline_data + sizeof(struct mlx4_wqe_inline_seg)) 443 + send_wqe_overhead(type, qp->flags); 444 445 if (s > dev->dev->caps.max_sq_desc_sz) 446 return -EINVAL; 447 448 /** Hermon supports shrinking WQEs, such that a single work 449 * request can include multiple units of 1 << wqe_shift. This 450 * way, work requests can differ in size, and do not have to 451 * be a power of 2 in size, saving memory and speeding up send 452 * WR posting. Unfortunately, if we do this then the 453 * wqe_index field in CQEs can't be used to look up the WR ID 454 * anymore, so we do this only if selective signaling is off. 455 * 456 * Further, on 32-bit platforms, we can't use vmap() to make 457 * the QP buffer virtually contiguous. Thus we have to use 458 * constant-sized WRs to make sure a WR is always fully within 459 * a single page-sized chunk. 460 * 461 * Finally, we use NOP work requests to pad the end of the 462 * work queue, to avoid wrap-around in the middle of WR. We 463 * set NEC bit to avoid getting completions with error for 464 * these NOP WRs, but since NEC is only supported starting 465 * with firmware 2.2.232, we use constant-sized WRs for older 466 * firmware. 467 * 468 * And, since MLX QPs only support SEND, we use constant-sized 469 * WRs in this case. 470 * 471 * We look for the smallest value of wqe_shift such that the 472 * resulting number of wqes does not exceed device 473 * capabilities. 474 * 475 * We set WQE size to at least 64 bytes, this way stamping 476 * invalidates each WQE.*/ 477 478 if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && qp->sq_signal_bits 479 && BITS_PER_LONG == 64 && type != MLX4_IB_QPT_SMI 480 && type != MLX4_IB_QPT_GSI 481 && !(type 482 & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI 483 | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) 484 qp->sq.wqe_shift = ilog2(64); 485 else 486 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); 487 488 for (;;) { 489 qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift); 490 491 /** We need to leave 2 KB + 1 WR of headroom in the SQ to 492 * allow HW to prefetch.*/ 493 494 qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr; 495 qp->sq.wqe_cnt = roundup_pow_of_two( 496 cap->max_send_wr * qp->sq_max_wqes_per_wr + qp->sq_spare_wqes); 497 498 if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes) 499 break; 500 501 if (qp->sq_max_wqes_per_wr <= 1) 502 return -EINVAL; 503 504 ++qp->sq.wqe_shift; 505 } 506 507 qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz, 508 (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) 509 - send_wqe_overhead(type, qp->flags)) 510 / sizeof(struct mlx4_wqe_data_seg); 511 512 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) 513 + (qp->sq.wqe_cnt << qp->sq.wqe_shift); 514 if (qp->rq.wqe_shift > qp->sq.wqe_shift) { 515 qp->rq.offset = 0; 516 qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; 517 } else { 518 qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift; 519 qp->sq.offset = 0; 520 } 521 522 cap->max_send_wr = qp->sq.max_post = (qp->sq.wqe_cnt - qp->sq_spare_wqes) 523 / qp->sq_max_wqes_per_wr; 524 cap->max_send_sge = min(qp->sq.max_gs, 525 min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)); 526 qp->max_inline_data = cap->max_inline_data; 527 528 return 0; 529} 530/* 531 static int set_user_sq_size(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, 532 struct mlx4_ib_create_qp *ucmd) { 533 Sanity check SQ size before proceeding 534 if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes 535 || ucmd->log_sq_stride 536 > ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) 537 || ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE) 538 return -EINVAL; 539 540 qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; 541 qp->sq.wqe_shift = ucmd->log_sq_stride; 542 543 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) 544 + (qp->sq.wqe_cnt << qp->sq.wqe_shift); 545 546 return 0; 547 } 548 */ 549static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp) { 550 int i; 551 552 qp->sqp_proxy_rcv = malloc(sizeof(struct mlx4_ib_buf) * qp->rq.wqe_cnt); 553 if (!qp->sqp_proxy_rcv) 554 return -ENOMEM; 555 for (i = 0; i < qp->rq.wqe_cnt; i++) { 556 qp->sqp_proxy_rcv[i].addr = dma_alloc( 557 sizeof(struct mlx4_ib_proxy_sqp_hdr), 558 &qp->sqp_proxy_rcv[i].map); 559 560 /*qp->sqp_proxy_rcv[i].addr = malloc( 561 sizeof(struct mlx4_ib_proxy_sqp_hdr)); 562 if (!qp->sqp_proxy_rcv[i].addr) 563 goto err; 564 qp->sqp_proxy_rcv[i].map = ib_dma_map_single(dev, 565 qp->sqp_proxy_rcv[i].addr, sizeof(struct mlx4_ib_proxy_sqp_hdr), 566 DMA_FROM_DEVICE);*/ 567 } 568 return 0; 569 570 /*TODO: cleanup*/ 571 /*err:*//*while (i > 0) { 572 --i; 573 ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map, 574 sizeof(struct mlx4_ib_proxy_sqp_hdr), DMA_FROM_DEVICE); 575 free(qp->sqp_proxy_rcv[i].addr); 576 }*/ 577 /*free(qp->sqp_proxy_rcv); 578 qp->sqp_proxy_rcv = NULL; 579 return -ENOMEM;*/ 580} 581/* 582 static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp) { 583 int i; 584 585 for (i = 0; i < qp->rq.wqe_cnt; i++) { 586 ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map, 587 sizeof(struct mlx4_ib_proxy_sqp_hdr), DMA_FROM_DEVICE); 588 free(qp->sqp_proxy_rcv[i].addr); 589 } 590 free(qp->sqp_proxy_rcv); 591 } 592 */ 593static int qp_has_rq(struct ib_qp_init_attr *attr) { 594 if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT) 595 return 0; 596 597 return !attr->srq; 598} 599/* 600 #ifdef __linux__ 601 static int init_qpg_parent(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *pqp, 602 struct ib_qp_init_attr *attr, int *qpn) { 603 struct mlx4_ib_qpg_data *qpg_data; 604 int tss_num, rss_num; 605 int tss_align_num, rss_align_num; 606 int tss_base, rss_base = 0; 607 int err; 608 609 Parent is part of the TSS range (in SW TSS ARP is sent via parent) 610 tss_num = 1 + attr->parent_attrib.tss_child_count; 611 tss_align_num = roundup_pow_of_two(tss_num); 612 rss_num = attr->parent_attrib.rss_child_count; 613 rss_align_num = roundup_pow_of_two(rss_num); 614 615 if (rss_num > 1) { 616 RSS is requested 617 if (!(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS)) 618 return -ENOSYS; 619 if (rss_align_num > dev->dev->caps.max_rss_tbl_sz) 620 return -EINVAL; 621 We must work with power of two 622 attr->parent_attrib.rss_child_count = rss_align_num; 623 } 624 625 qpg_data = calloc(1,sizeof *qpg_data); 626 if (!qpg_data) 627 return -ENOMEM; 628 629 if (pqp->flags & MLX4_IB_QP_NETIF) 630 err = mlx4_ib_steer_qp_alloc(dev, tss_align_num, &tss_base); 631 else 632 err = mlx4_qp_reserve_range(dev->dev, tss_align_num, tss_align_num, &tss_base, 633 1); 634 if (err) 635 goto err1; 636 637 if (tss_num > 1) { 638 u32 alloc = BITS_TO_LONGS(tss_align_num) * sizeof(long); 639 qpg_data->tss_bitmap = calloc(1,alloc); 640 if (qpg_data->tss_bitmap == NULL) { 641 err = -ENOMEM; 642 goto err2; 643 } 644 bitmap_fill(qpg_data->tss_bitmap, tss_num); 645 Note parent takes first index 646 clear_bit(0, qpg_data->tss_bitmap); 647 } 648 649 if (rss_num > 1) { 650 u32 alloc = BITS_TO_LONGS(rss_align_num) * sizeof(long); 651 err = mlx4_qp_reserve_range(dev->dev, rss_align_num, 1, &rss_base, 0); 652 if (err) 653 goto err3; 654 qpg_data->rss_bitmap = calloc(1,alloc); 655 if (qpg_data->rss_bitmap == NULL) { 656 err = -ENOMEM; 657 goto err4; 658 } 659 bitmap_fill(qpg_data->rss_bitmap, rss_align_num); 660 } 661 662 qpg_data->tss_child_count = attr->parent_attrib.tss_child_count; 663 qpg_data->rss_child_count = attr->parent_attrib.rss_child_count; 664 qpg_data->qpg_parent = pqp; 665 qpg_data->qpg_tss_mask_sz = ilog2(tss_align_num); 666 qpg_data->tss_qpn_base = tss_base; 667 qpg_data->rss_qpn_base = rss_base; 668 669 pqp->qpg_data = qpg_data; 670 *qpn = tss_base; 671 672 return 0; 673 674 err4: mlx4_qp_release_range(dev->dev, rss_base, rss_align_num); 675 676 err3: 677 if (tss_num > 1) 678 free(qpg_data->tss_bitmap); 679 680 err2: if (pqp->flags & MLX4_IB_QP_NETIF) 681 mlx4_ib_steer_qp_free(dev, tss_base, tss_align_num); 682 else 683 mlx4_qp_release_range(dev->dev, tss_base, tss_align_num); 684 685 err1: 686 free(qpg_data); 687 return err; 688 } 689 690 static void free_qpg_parent(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *pqp) { 691 struct mlx4_ib_qpg_data *qpg_data = pqp->qpg_data; 692 int align_num; 693 694 if (qpg_data->tss_child_count > 1) 695 free(qpg_data->tss_bitmap); 696 697 align_num = roundup_pow_of_two(1 + qpg_data->tss_child_count); 698 if (pqp->flags & MLX4_IB_QP_NETIF) 699 mlx4_ib_steer_qp_free(dev, qpg_data->tss_qpn_base, align_num); 700 else 701 mlx4_qp_release_range(dev->dev, qpg_data->tss_qpn_base, align_num); 702 703 if (qpg_data->rss_child_count > 1) { 704 free(qpg_data->rss_bitmap); 705 align_num = roundup_pow_of_two(qpg_data->rss_child_count); 706 mlx4_qp_release_range(dev->dev, qpg_data->rss_qpn_base, align_num); 707 } 708 709 free(qpg_data); 710 } 711 712 static int alloc_qpg_qpn(struct ib_qp_init_attr *init_attr, 713 struct mlx4_ib_qp *pqp, int *qpn) { 714 struct mlx4_ib_qp *mqp = to_mqp(init_attr->qpg_parent); 715 struct mlx4_ib_qpg_data *qpg_data = mqp->qpg_data; 716 u32 idx, old; 717 718 switch (init_attr->qpg_type) { 719 case IB_QPG_CHILD_TX: 720 if (qpg_data->tss_child_count == 0) 721 return -EINVAL; 722 do { 723 Parent took index 0 724 idx = find_first_bit(qpg_data->tss_bitmap, qpg_data->tss_child_count + 1); 725 if (idx >= qpg_data->tss_child_count + 1) 726 return -ENOMEM; 727 old = test_and_clear_bit(idx, qpg_data->tss_bitmap); 728 } while (old == 0); 729 idx += qpg_data->tss_qpn_base; 730 break; 731 case IB_QPG_CHILD_RX: 732 if (qpg_data->rss_child_count == 0) 733 return -EINVAL; 734 do { 735 idx = find_first_bit(qpg_data->rss_bitmap, qpg_data->rss_child_count); 736 if (idx >= qpg_data->rss_child_count) 737 return -ENOMEM; 738 old = test_and_clear_bit(idx, qpg_data->rss_bitmap); 739 } while (old == 0); 740 idx += qpg_data->rss_qpn_base; 741 break; 742 default: 743 return -EINVAL; 744 } 745 746 pqp->qpg_data = qpg_data; 747 *qpn = idx; 748 749 return 0; 750 } 751 752 static void free_qpg_qpn(struct mlx4_ib_qp *mqp, int qpn) { 753 struct mlx4_ib_qpg_data *qpg_data = mqp->qpg_data; 754 755 switch (mqp->qpg_type) { 756 case IB_QPG_CHILD_TX: 757 Do range check 758 qpn -= qpg_data->tss_qpn_base; 759 set_bit(qpn, qpg_data->tss_bitmap); 760 break; 761 case IB_QPG_CHILD_RX: 762 qpn -= qpg_data->rss_qpn_base; 763 set_bit(qpn, qpg_data->rss_bitmap); 764 break; 765 default: 766 error 767 MLX4_WARN("wrong qpg type (%d)\n", mqp->qpg_type); 768 break; 769 } 770 } 771 #endif 772 */ 773static int alloc_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, 774 struct ib_qp_init_attr *attr, int *qpn) { 775 int err = 0; 776 777 switch (attr->qpg_type) { 778 case IB_QPG_NONE: 779 /*Raw packet QPNs must be aligned to 8 bits. If not, the WQE 780 * BlueFlame setup flow wrongly causes VLAN insertion.*/ 781 if (attr->qp_type == IB_QPT_RAW_PACKET) { 782 err = mlx4_qp_reserve_range(dev->dev, 1, 1, qpn, 1); 783 } else { 784 if (qp->flags & MLX4_IB_QP_NETIF) 785 err = mlx4_ib_steer_qp_alloc(dev, 1, qpn); 786 else 787 err = mlx4_qp_reserve_range(dev->dev, 1, 1, qpn, 0); 788 } 789 break; 790 case IB_QPG_PARENT: 791#ifdef __linux__ 792 err = init_qpg_parent(dev, qp, attr, qpn); 793#endif 794 break; 795 case IB_QPG_CHILD_TX: 796 case IB_QPG_CHILD_RX: 797#ifdef __linux__ 798 err = alloc_qpg_qpn(attr, qp, qpn); 799#endif 800 break; 801 default: 802 qp->qpg_type = IB_QPG_NONE; 803 err = -EINVAL; 804 break; 805 } 806 if (err) 807 return err; 808 qp->qpg_type = attr->qpg_type; 809 return 0; 810} 811/* 812 static void free_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, 813 enum ib_qpg_type qpg_type, int qpn) { 814 switch (qpg_type) { 815 case IB_QPG_NONE: 816 if (qp->flags & MLX4_IB_QP_NETIF) 817 mlx4_ib_steer_qp_free(dev, qpn, 1); 818 else 819 mlx4_qp_release_range(dev->dev, qpn, 1); 820 break; 821 case IB_QPG_PARENT: 822 #ifdef __linux__ 823 free_qpg_parent(dev, qp); 824 #endif 825 break; 826 case IB_QPG_CHILD_TX: 827 case IB_QPG_CHILD_RX: 828 #ifdef __linux__ 829 free_qpg_qpn(qp, qpn); 830 #endif 831 break; 832 default: 833 break; 834 } 835 } 836 837 Revert allocation on create_qp_common 838 static void unalloc_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, 839 struct ib_qp_init_attr *attr, int qpn) { 840 free_qpn_common(dev, qp, attr->qpg_type, qpn); 841 } 842 843 static void release_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { 844 free_qpn_common(dev, qp, qp->qpg_type, qp->mqp.qpn); 845 } 846 */ 847static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, 848 struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, 849 struct mlx4_ib_qp **caller_qp) { 850 int qpn; 851 int err; 852 struct mlx4_ib_sqp *sqp; 853 struct mlx4_ib_qp *qp; 854 enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; 855 856#ifndef __linux__ 857 init_attr->qpg_type = IB_QPG_NONE; 858#endif 859 860 /*When tunneling special qps, we use a plain UD qp*/ 861 if (sqpn) { 862 if (mlx4_is_mfunc(dev->dev) 863 && (!mlx4_is_master(dev->dev) 864 || !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) { 865 if (init_attr->qp_type == IB_QPT_GSI) 866 qp_type = MLX4_IB_QPT_PROXY_GSI; 867 else if (mlx4_is_master(dev->dev)) 868 qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER; 869 else 870 qp_type = MLX4_IB_QPT_PROXY_SMI; 871 } 872 qpn = sqpn; 873 /*add extra sg entry for tunneling*/ 874 init_attr->cap.max_recv_sge++; 875 } else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) { 876 struct mlx4_ib_qp_tunnel_init_attr *tnl_init = container_of(init_attr, 877 struct mlx4_ib_qp_tunnel_init_attr, init_attr); 878 if ((tnl_init->proxy_qp_type != IB_QPT_SMI 879 && tnl_init->proxy_qp_type != IB_QPT_GSI) 880 || !mlx4_is_master(dev->dev)) 881 return -EINVAL; 882 if (tnl_init->proxy_qp_type == IB_QPT_GSI) 883 qp_type = MLX4_IB_QPT_TUN_GSI; 884 else if (tnl_init->slave == mlx4_master_func_num(dev->dev)) 885 qp_type = MLX4_IB_QPT_TUN_SMI_OWNER; 886 else 887 qp_type = MLX4_IB_QPT_TUN_SMI; 888 /*we are definitely in the PPF here, since we are creating 889 * tunnel QPs. base_tunnel_sqpn is therefore valid.*/ 890 qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave 891 + tnl_init->proxy_qp_type * 2 + tnl_init->port - 1; 892 sqpn = qpn; 893 } 894 895 if (!*caller_qp) { 896 if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI 897 || (qp_type 898 & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER 899 | MLX4_IB_QPT_PROXY_GSI 900 | MLX4_IB_QPT_TUN_SMI_OWNER))) { 901 sqp = calloc(1, sizeof(struct mlx4_ib_sqp)); 902 if (!sqp) 903 return -ENOMEM; 904 qp = &sqp->qp; 905 qp->pri.vid = qp->alt.vid = 0xFFFF; 906 } else { 907 qp = calloc(1, sizeof(struct mlx4_ib_qp)); 908 if (!qp) 909 return -ENOMEM; 910 qp->pri.vid = qp->alt.vid = 0xFFFF; 911 } 912 } else 913 qp = *caller_qp; 914 915 qp->mlx4_ib_qp_type = qp_type; 916 917 /*mutex_init(&qp->mutex); 918 spin_lock_init(&qp->sq.lock); 919 spin_lock_init(&qp->rq.lock);*/ 920 INIT_LIST_HEAD(&qp->gid_list); 921 INIT_LIST_HEAD(&qp->steering_rules); 922 INIT_LIST_HEAD(&qp->rules_list); 923 924 qp->state = IB_QPS_RESET; 925 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) 926 qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); 927 928 err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), 929 qp); 930 if (err) 931 goto err; 932 933 if (pd->uobject) { 934 assert(!"NYI"); 935 /*struct mlx4_ib_create_qp ucmd; 936 int shift; 937 int n; 938 939 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { 940 err = -EFAULT; 941 goto err; 942 } 943 944 qp->sq_no_prefetch = ucmd.sq_no_prefetch; 945 946 err = set_user_sq_size(dev, qp, &ucmd); 947 if (err) 948 goto err; 949 950 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, 951 qp->buf_size, 0, 0); 952 if (IS_ERR(qp->umem)) { 953 err = PTR_ERR(qp->umem); 954 goto err; 955 } 956 957 n = ib_umem_page_count(qp->umem); 958 shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n); 959 err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt); 960 961 if (err) 962 goto err_buf; 963 964 err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem); 965 if (err) 966 goto err_mtt; 967 968 if (qp_has_rq(init_attr)) { 969 err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), 970 ucmd.db_addr, &qp->db); 971 if (err) 972 goto err_mtt; 973 }*/ 974 } else { 975 qp->sq_no_prefetch = 0; 976 977 if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) 978 qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; 979 980 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) 981 qp->flags |= MLX4_IB_QP_LSO; 982 983 if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP 984 && dev->dev->caps.steering_mode 985 == MLX4_STEERING_MODE_DEVICE_MANAGED 986 && !mlx4_is_mfunc(dev->dev)) 987 qp->flags |= MLX4_IB_QP_NETIF; 988 989 err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp); 990 if (err) 991 goto err; 992 993 if (qp_has_rq(init_attr)) { 994 err = mlx4_db_alloc(dev->dev, &qp->db, 0); 995 if (err) 996 goto err; 997 998 *qp->db.db = 0; 999 } 1000 1001 if (qp->max_inline_data) { 1002 err = mlx4_bf_alloc(dev->dev, &qp->bf, 0); 1003 if (err) { 1004 MLX4_DEBUG("failed to allocate blue flame" 1005 " register (%d)", err); 1006 qp->bf.uar = &dev->priv_uar; 1007 } 1008 } else 1009 qp->bf.uar = &dev->priv_uar; 1010 1011 if (mlx4_buf_alloc(dev->dev, qp->buf_size, BASE_PAGE_SIZE * 2, 1012 &qp->buf)) { 1013 err = -ENOMEM; 1014 goto err_db; 1015 } 1016 1017 err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift, 1018 &qp->mtt); 1019 if (err) 1020 goto err_buf; 1021 1022 err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf); 1023 if (err) 1024 goto err_mtt; 1025 1026 qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof(u64)); 1027 qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof(u64)); 1028 1029 if (!qp->sq.wrid || !qp->rq.wrid) { 1030 err = -ENOMEM; 1031 goto err_wrid; 1032 } 1033 } 1034 1035 if (sqpn) { 1036 if (qp->mlx4_ib_qp_type 1037 & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI 1038 | MLX4_IB_QPT_PROXY_GSI)) { 1039 if (alloc_proxy_bufs(pd->device, qp)) { 1040 err = -ENOMEM; 1041 goto err_wrid; 1042 } 1043 } 1044 } else { 1045 err = alloc_qpn_common(dev, qp, init_attr, &qpn); 1046 if (err) 1047 goto err_proxy; 1048 } 1049 1050 err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); 1051 if (err) 1052 goto err_qpn; 1053 1054 if (init_attr->qp_type == IB_QPT_XRC_TGT) 1055 qp->mqp.qpn |= (1 << 23); 1056 1057 /** Hardware wants QPN written in big-endian order (after 1058 * shifting) for send doorbell. Precompute this value to save 1059 * a little bit when posting sends.*/ 1060 1061 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); 1062 1063 qp->mqp.event = mlx4_ib_qp_event; 1064 if (!*caller_qp) 1065 *caller_qp = qp; 1066 return 0; 1067 1068 err_qpn: /*unalloc_qpn_common(dev, qp, init_attr, qpn);*/ 1069 1070 err_proxy: /*if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) 1071 free_proxy_bufs(pd->device, qp);*/ 1072 err_wrid: /*if (pd->uobject) { 1073 if (qp_has_rq(init_attr)) 1074 mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); 1075 } else {*/ 1076 free(qp->sq.wrid); 1077 free(qp->rq.wrid); 1078 /*}*/ 1079 1080 err_mtt: /*mlx4_mtt_cleanup(dev->dev, &qp->mtt);*/ 1081 1082 err_buf: /*if (pd->uobject) 1083 ib_umem_release(qp->umem); 1084 else 1085 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);*/ 1086 1087 err_db: /*if (!pd->uobject && qp_has_rq(init_attr)) 1088 mlx4_db_free(dev->dev, &qp->db); 1089 1090 if (qp->max_inline_data) 1091 mlx4_bf_free(dev->dev, &qp->bf);*/ 1092 1093 err: if (!*caller_qp) 1094 free(qp); 1095 return err; 1096} 1097 1098static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state) { 1099 switch (state) { 1100 case IB_QPS_RESET: 1101 return MLX4_QP_STATE_RST; 1102 case IB_QPS_INIT: 1103 return MLX4_QP_STATE_INIT; 1104 case IB_QPS_RTR: 1105 return MLX4_QP_STATE_RTR; 1106 case IB_QPS_RTS: 1107 return MLX4_QP_STATE_RTS; 1108 case IB_QPS_SQD: 1109 return MLX4_QP_STATE_SQD; 1110 case IB_QPS_SQE: 1111 return MLX4_QP_STATE_SQER; 1112 case IB_QPS_ERR: 1113 return MLX4_QP_STATE_ERR; 1114 default: 1115 return -1; 1116 } 1117} 1118/* 1119 static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, 1120 struct mlx4_ib_cq *recv_cq) 1121 __acquires(&send_cq->lock) __acquires(&recv_cq->lock) 1122 { 1123 if (send_cq == recv_cq) { 1124 spin_lock_irq(&send_cq->lock); 1125 (void) __acquire(&recv_cq->lock); 1126 } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { 1127 spin_lock_irq(&send_cq->lock); 1128 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); 1129 } else { 1130 spin_lock_irq(&recv_cq->lock); 1131 spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); 1132 } 1133 } 1134 1135 static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, 1136 struct mlx4_ib_cq *recv_cq) 1137 __releases(&send_cq->lock) __releases(&recv_cq->lock) 1138 { 1139 if (send_cq == recv_cq) { 1140 (void) __release(&recv_cq->lock); 1141 spin_unlock_irq(&send_cq->lock); 1142 } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { 1143 spin_unlock(&recv_cq->lock); 1144 spin_unlock_irq(&send_cq->lock); 1145 } else { 1146 spin_unlock(&send_cq->lock); 1147 spin_unlock_irq(&recv_cq->lock); 1148 } 1149 } 1150 1151 static void del_gid_entries(struct mlx4_ib_qp *qp) { 1152 struct mlx4_ib_gid_entry *ge, *tmp; 1153 1154 list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) 1155 { 1156 list_del(&ge->list); 1157 free(ge); 1158 } 1159 } 1160 */ 1161static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp) { 1162 if (qp->ibqp.qp_type == IB_QPT_XRC_TGT) 1163 return to_mpd(to_mxrcd(qp->ibqp.xrcd)->pd); 1164 else 1165 return to_mpd(qp->ibqp.pd); 1166} 1167 1168static void get_cqs(struct mlx4_ib_qp *qp, struct mlx4_ib_cq **send_cq, 1169 struct mlx4_ib_cq **recv_cq) { 1170 switch (qp->ibqp.qp_type) { 1171 case IB_QPT_XRC_TGT: 1172 *send_cq = to_mcq(to_mxrcd(qp->ibqp.xrcd)->cq); 1173 *recv_cq = *send_cq; 1174 break; 1175 case IB_QPT_XRC_INI: 1176 *send_cq = to_mcq(qp->ibqp.send_cq); 1177 *recv_cq = *send_cq; 1178 break; 1179 default: 1180 *send_cq = to_mcq(qp->ibqp.send_cq); 1181 *recv_cq = to_mcq(qp->ibqp.recv_cq); 1182 break; 1183 } 1184} 1185/* 1186 static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, 1187 int is_user) { 1188 struct mlx4_ib_cq *send_cq, *recv_cq; 1189 1190 if (qp->state != IB_QPS_RESET) { 1191 if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state), MLX4_QP_STATE_RST, 1192 NULL, 0, 0, &qp->mqp)) 1193 MLX4_WARN("modify QP %06x to RESET failed.\n", qp->mqp.qpn); 1194 if (qp->pri.smac) { 1195 mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); 1196 qp->pri.smac = 0; 1197 } 1198 if (qp->alt.smac) { 1199 mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); 1200 qp->alt.smac = 0; 1201 } 1202 if (qp->pri.vid < 0x1000) { 1203 mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid); 1204 qp->pri.vid = 0xFFFF; 1205 qp->pri.candidate_vid = 0xFFFF; 1206 qp->pri.update_vid = 0; 1207 } 1208 if (qp->alt.vid < 0x1000) { 1209 mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid); 1210 qp->alt.vid = 0xFFFF; 1211 qp->alt.candidate_vid = 0xFFFF; 1212 qp->alt.update_vid = 0; 1213 } 1214 } 1215 1216 get_cqs(qp, &send_cq, &recv_cq); 1217 1218 mlx4_ib_lock_cqs(send_cq, recv_cq); 1219 1220 if (!is_user) { 1221 __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, 1222 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); 1223 if (send_cq != recv_cq) 1224 __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); 1225 } 1226 1227 mlx4_qp_remove(dev->dev, &qp->mqp); 1228 1229 mlx4_ib_unlock_cqs(send_cq, recv_cq); 1230 1231 mlx4_qp_free(dev->dev, &qp->mqp); 1232 1233 if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) 1234 release_qpn_common(dev, qp); 1235 1236 mlx4_mtt_cleanup(dev->dev, &qp->mtt); 1237 1238 if (is_user) { 1239 if (qp->rq.wqe_cnt) 1240 mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context), &qp->db); 1241 ib_umem_release(qp->umem); 1242 } else { 1243 free(qp->sq.wrid); 1244 free(qp->rq.wrid); 1245 if (qp->mlx4_ib_qp_type 1246 & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) 1247 free_proxy_bufs(&dev->ib_dev, qp); 1248 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); 1249 if (qp->max_inline_data) 1250 mlx4_bf_free(dev->dev, &qp->bf); 1251 1252 if (qp->rq.wqe_cnt) 1253 mlx4_db_free(dev->dev, &qp->db); 1254 } 1255 1256 del_gid_entries(qp); 1257 } 1258 */ 1259static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) { 1260 /*Native or PPF*/ 1261 if (!mlx4_is_mfunc(dev->dev) 1262 || (mlx4_is_master(dev->dev) 1263 && attr->create_flags & MLX4_IB_SRIOV_SQP)) { 1264 return dev->dev->phys_caps.base_sqpn 1265 + (attr->qp_type == IB_QPT_SMI ? 0 : 2) + attr->port_num - 1; 1266 } 1267 /*PF or VF -- creating proxies*/ 1268 if (attr->qp_type == IB_QPT_SMI) 1269 return dev->dev->caps.qp0_proxy[attr->port_num - 1]; 1270 else 1271 return dev->dev->caps.qp1_proxy[attr->port_num - 1]; 1272} 1273/* 1274 #ifdef __linux__ 1275 static int check_qpg_attr(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) { 1276 if (attr->qpg_type == IB_QPG_NONE) 1277 return 0; 1278 1279 if (attr->qp_type != IB_QPT_UD) 1280 return -EINVAL; 1281 1282 if (attr->qpg_type == IB_QPG_PARENT) { 1283 if (attr->parent_attrib.tss_child_count == 1) 1284 return -EINVAL; Doesn't make sense 1285 if (attr->parent_attrib.rss_child_count == 1) 1286 return -EINVAL; Doesn't make sense 1287 if ((attr->parent_attrib.tss_child_count == 0) 1288 && (attr->parent_attrib.rss_child_count == 0)) 1289 Should be called with IP_QPG_NONE 1290 return -EINVAL; 1291 if (attr->parent_attrib.rss_child_count > 1) { 1292 int rss_align_num; 1293 if (!(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS)) 1294 return -ENOSYS; 1295 rss_align_num = roundup_pow_of_two(attr->parent_attrib.rss_child_count); 1296 if (rss_align_num > dev->dev->caps.max_rss_tbl_sz) 1297 return -EINVAL; 1298 } 1299 } else { 1300 struct mlx4_ib_qpg_data *qpg_data; 1301 if (attr->qpg_parent == NULL) 1302 return -EINVAL; 1303 if (IS_ERR(attr->qpg_parent)) 1304 return -EINVAL; 1305 qpg_data = to_mqp(attr->qpg_parent)->qpg_data; 1306 if (qpg_data == NULL) 1307 return -EINVAL; 1308 if (attr->qpg_type == IB_QPG_CHILD_TX && !qpg_data->tss_child_count) 1309 return -EINVAL; 1310 if (attr->qpg_type == IB_QPG_CHILD_RX && !qpg_data->rss_child_count) 1311 return -EINVAL; 1312 } 1313 return 0; 1314 } 1315 #endif 1316 */ 1317#define RESERVED_FLAGS_MASK ((((unsigned int)IB_QP_CREATE_RESERVED_END - 1) | IB_QP_CREATE_RESERVED_END) \ 1318 & ~(IB_QP_CREATE_RESERVED_START - 1)) 1319 1320static enum mlx4_ib_qp_flags to_mlx4_ib_qp_flags( 1321 enum ib_qp_create_flags ib_qp_flags) { 1322 enum mlx4_ib_qp_flags mlx4_ib_qp_flags = 0; 1323 1324 if (ib_qp_flags & IB_QP_CREATE_IPOIB_UD_LSO) 1325 mlx4_ib_qp_flags |= MLX4_IB_QP_LSO; 1326 1327 if (ib_qp_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) 1328 mlx4_ib_qp_flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; 1329 1330 if (ib_qp_flags & IB_QP_CREATE_NETIF_QP) 1331 mlx4_ib_qp_flags |= MLX4_IB_QP_NETIF; 1332 1333 /* reserved flags*/ 1334 mlx4_ib_qp_flags |= (ib_qp_flags & RESERVED_FLAGS_MASK); 1335 1336 return mlx4_ib_qp_flags; 1337} 1338 1339struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, 1340 struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { 1341 struct mlx4_ib_qp *qp = NULL; 1342 int err; 1343 u16 xrcdn = 0; 1344 enum mlx4_ib_qp_flags mlx4_qp_flags = to_mlx4_ib_qp_flags( 1345 init_attr->create_flags); 1346 struct ib_device *device; 1347 1348 /*see ib_core 1349 ::ib_create_qp same handling*/ 1350 device = pd ? pd->device : init_attr->xrcd->device; 1351 1352 /**We 1353 only support 1354 LSO, vendor 1355 flag1, and 1356 multicast loopback 1357 blocking, *and 1358 only 1359 for kernel UD QPs.*/ 1360 1361 if (mlx4_qp_flags 1362 & ~(MLX4_IB_QP_LSO | MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK 1363 | MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP 1364 | MLX4_IB_QP_NETIF)) 1365 return ERR_PTR(-EINVAL); 1366 1367 if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { 1368 if (init_attr->qp_type != IB_QPT_UD) 1369 return ERR_PTR(-EINVAL); 1370 } 1371 1372 if (init_attr->create_flags 1373 && (udata 1374 || ((mlx4_qp_flags & ~MLX4_IB_SRIOV_SQP) 1375 && init_attr->qp_type != IB_QPT_UD) 1376 || ((mlx4_qp_flags & MLX4_IB_SRIOV_SQP) 1377 && init_attr->qp_type > IB_QPT_GSI))) 1378 return ERR_PTR(-EINVAL); 1379 1380#ifdef __linux__ 1381 assert(!"NYI"); 1382 /*err = check_qpg_attr(to_mdev(device), init_attr); 1383 if (err) 1384 return ERR_PTR(err);*/ 1385#endif 1386 1387 switch (init_attr->qp_type) { 1388 case IB_QPT_XRC_TGT: 1389 assert(!"NYI"); 1390 /*pd = to_mxrcd(init_attr->xrcd)->pd; 1391 xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; 1392 init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq;*/ 1393 /*fall through*/ 1394 case IB_QPT_XRC_INI: 1395 assert(!"NYI"); 1396 /*if (!(to_mdev(device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) 1397 return ERR_PTR(-ENOSYS); 1398 init_attr->recv_cq = init_attr->send_cq;*/ 1399 /*fall through*/ 1400 case IB_QPT_RC: 1401 case IB_QPT_UC: 1402 case IB_QPT_RAW_PACKET: 1403 qp = calloc(1, sizeof *qp); 1404 if (!qp) 1405 return ERR_PTR(-ENOMEM); 1406 qp->pri.vid = qp->alt.vid = 0xFFFF; 1407 /*fall through*/ 1408 case IB_QPT_UD: { 1409 err = create_qp_common(to_mdev(device), pd, init_attr, udata, 0, &qp); 1410 if (err) { 1411 free(qp); 1412 return ERR_PTR(err); 1413 } 1414 1415 qp->ibqp.qp_num = qp->mqp.qpn; 1416 qp->xrcdn = xrcdn; 1417 1418 break; 1419 } 1420 case IB_QPT_SMI: 1421 case IB_QPT_GSI: { 1422 /*Userspace is not allowed to create special QPs:*/ 1423 if (udata) 1424 return ERR_PTR(-EINVAL); 1425 1426 err = create_qp_common(to_mdev(device), pd, init_attr, udata, 1427 get_sqp_num(to_mdev(device), init_attr), &qp); 1428 if (err) 1429 return ERR_PTR(err); 1430 1431 qp->port = init_attr->port_num; 1432 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; 1433 1434 break; 1435 } 1436 default: 1437 /*Don't support raw QPs*/ 1438 return ERR_PTR(-EINVAL); 1439 } 1440 1441 return &qp->ibqp; 1442} 1443/* 1444 int mlx4_ib_destroy_qp(struct ib_qp *qp) { 1445 struct mlx4_ib_dev *dev = to_mdev(qp->device); 1446 struct mlx4_ib_qp *mqp = to_mqp(qp); 1447 struct mlx4_ib_pd *pd; 1448 1449 if (is_qp0(dev, mqp)) 1450 mlx4_CLOSE_PORT(dev->dev, mqp->port); 1451 1452 pd = get_pd(mqp); 1453 destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); 1454 1455 if (is_sqp(dev, mqp)) 1456 free(to_msqp(mqp)); 1457 else 1458 free(mqp); 1459 1460 return 0; 1461 } 1462 */ 1463static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type) { 1464 switch (type) { 1465 case MLX4_IB_QPT_RC: 1466 return MLX4_QP_ST_RC; 1467 case MLX4_IB_QPT_UC: 1468 return MLX4_QP_ST_UC; 1469 case MLX4_IB_QPT_UD: 1470 return MLX4_QP_ST_UD; 1471 case MLX4_IB_QPT_XRC_INI: 1472 case MLX4_IB_QPT_XRC_TGT: 1473 return MLX4_QP_ST_XRC; 1474 case MLX4_IB_QPT_SMI: 1475 case MLX4_IB_QPT_GSI: 1476 case MLX4_IB_QPT_RAW_PACKET: 1477 return MLX4_QP_ST_MLX; 1478 1479 case MLX4_IB_QPT_PROXY_SMI_OWNER: 1480 case MLX4_IB_QPT_TUN_SMI_OWNER: 1481 return (mlx4_is_mfunc(dev->dev) ? MLX4_QP_ST_MLX : -1); 1482 case MLX4_IB_QPT_PROXY_SMI: 1483 case MLX4_IB_QPT_TUN_SMI: 1484 case MLX4_IB_QPT_PROXY_GSI: 1485 case MLX4_IB_QPT_TUN_GSI: 1486 return (mlx4_is_mfunc(dev->dev) ? MLX4_QP_ST_UD : -1); 1487 default: 1488 return -1; 1489 } 1490} 1491 1492static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, 1493 const struct ib_qp_attr *attr, int attr_mask) { 1494 u8 dest_rd_atomic; 1495 u32 access_flags; 1496 u32 hw_access_flags = 0; 1497 1498 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 1499 dest_rd_atomic = attr->max_dest_rd_atomic; 1500 else 1501 dest_rd_atomic = qp->resp_depth; 1502 1503 if (attr_mask & IB_QP_ACCESS_FLAGS) 1504 access_flags = attr->qp_access_flags; 1505 else 1506 access_flags = qp->atomic_rd_en; 1507 1508 if (!dest_rd_atomic) 1509 access_flags &= IB_ACCESS_REMOTE_WRITE; 1510 1511 if (access_flags & IB_ACCESS_REMOTE_READ) 1512 hw_access_flags |= MLX4_QP_BIT_RRE; 1513 if (access_flags & IB_ACCESS_REMOTE_ATOMIC) 1514 hw_access_flags |= MLX4_QP_BIT_RAE; 1515 if (access_flags & IB_ACCESS_REMOTE_WRITE) 1516 hw_access_flags |= MLX4_QP_BIT_RWE; 1517 1518 return cpu_to_be32(hw_access_flags); 1519} 1520 1521static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, 1522 const struct ib_qp_attr *attr, int attr_mask) { 1523 if (attr_mask & IB_QP_PKEY_INDEX) 1524 sqp->pkey_index = attr->pkey_index; 1525 if (attr_mask & IB_QP_QKEY) 1526 sqp->qkey = attr->qkey; 1527 if (attr_mask & IB_QP_SQ_PSN) 1528 sqp->send_psn = attr->sq_psn; 1529} 1530 1531static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) { 1532 path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6); 1533} 1534 1535static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, 1536 struct mlx4_ib_qp *qp, struct mlx4_qp_path *path, u8 port, 1537 int is_primary) { 1538 /*struct net_device *ndev; 1539 int err;*/ 1540 int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) 1541 == IB_LINK_LAYER_ETHERNET; 1542 /*u8 mac[6]; 1543 int is_mcast; 1544 u16 vlan_tag; 1545 int vidx; 1546 int smac_index; 1547 u64 u64_mac; 1548 u8 *smac; 1549 struct mlx4_roce_smac_vlan_info *smac_info;*/ 1550 1551 path->grh_mylmc = ah->src_path_bits & 0x7f; 1552 path->rlid = cpu_to_be16(ah->dlid); 1553 if (ah->static_rate) { 1554 path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET; 1555 while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET 1556 && !(1 << path->static_rate & dev->dev->caps.stat_rate_support)) 1557 --path->static_rate; 1558 } else 1559 path->static_rate = 0; 1560 1561 if (ah->ah_flags & IB_AH_GRH) { 1562 if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) { 1563 MLX4_ERR("sgid_index (%u) too large. max is %d\n", 1564 ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1); 1565 return -1; 1566 } 1567 1568 path->grh_mylmc |= 1 << 7; 1569 path->mgid_index = ah->grh.sgid_index; 1570 path->hop_limit = ah->grh.hop_limit; 1571 path->tclass_flowlabel = cpu_to_be32( 1572 (ah->grh.traffic_class << 20) | (ah->grh.flow_label)); 1573 memcpy(path->rgid, ah->grh.dgid.raw, 16); 1574 } 1575 1576 if (is_eth) { 1577 assert(!"NYI"); 1578 /*if (!(ah->ah_flags & IB_AH_GRH)) 1579 return -1; 1580 1581 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((port - 1) << 6) 1582 | ((ah->sl & 7) << 3); 1583 1584 if (is_primary) 1585 smac_info = &qp->pri; 1586 else 1587 smac_info = &qp->alt; 1588 1589 vlan_tag = rdma_get_vlan_id( 1590 &dev->iboe.gid_table[port - 1][ah->grh.sgid_index]); 1591 if (vlan_tag < 0x1000) { 1592 if (smac_info->vid < 0x1000) { 1593 both valid vlan ids 1594 if (smac_info->vid != vlan_tag) { 1595 different VIDs. unreg old and reg new 1596 err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx); 1597 if (err) 1598 return err; 1599 smac_info->candidate_vid = vlan_tag; 1600 smac_info->candidate_vlan_index = vidx; 1601 smac_info->candidate_vlan_port = port; 1602 smac_info->update_vid = 1; 1603 path->vlan_index = vidx; 1604 path->fl = 1 << 6; 1605 } else { 1606 path->vlan_index = smac_info->vlan_index; 1607 path->fl = 1 << 6; 1608 } 1609 } else { 1610 no current vlan tag in qp 1611 err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx); 1612 if (err) 1613 return err; 1614 smac_info->candidate_vid = vlan_tag; 1615 smac_info->candidate_vlan_index = vidx; 1616 smac_info->candidate_vlan_port = port; 1617 smac_info->update_vid = 1; 1618 path->vlan_index = vidx; 1619 path->fl = 1 << 6; 1620 } 1621 } else { 1622 have current vlan tag. unregister it at modify-qp success 1623 if (smac_info->vid < 0x1000) { 1624 smac_info->candidate_vid = 0xFFFF; 1625 smac_info->update_vid = 1; 1626 } 1627 } 1628 1629 err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port); 1630 if (err) 1631 return err; 1632 1633 get smac_index for RoCE use. 1634 * If no smac was yet assigned, register one. 1635 * If one was already assigned, but the new mac differs, 1636 * unregister the old one and register the new one. 1637 1638 spin_lock(&dev->iboe.lock); 1639 ndev = dev->iboe.netdevs[port - 1]; 1640 if (ndev) { 1641 #ifdef __linux__ 1642 smac = ndev->dev_addr; fixme: cache this value 1643 #else 1644 smac = IF_LLADDR(ndev); fixme: cache this value 1645 #endif 1646 1647 u64_mac = mlx4_mac_to_u64(smac); 1648 } else 1649 u64_mac = dev->dev->caps.def_mac[port]; 1650 spin_unlock(&dev->iboe.lock); 1651 1652 if (!smac_info->smac || smac_info->smac != u64_mac) { 1653 register candidate now, unreg if needed, after success 1654 smac_index = mlx4_register_mac(dev->dev, port, u64_mac); 1655 if (smac_index >= 0) { 1656 smac_info->candidate_smac_index = smac_index; 1657 smac_info->candidate_smac = u64_mac; 1658 smac_info->candidate_smac_port = port; 1659 } else 1660 return -EINVAL; 1661 } else 1662 smac_index = smac_info->smac_index; 1663 1664 memcpy(path->dmac, mac, 6); 1665 path->ackto = MLX4_IB_LINK_TYPE_ETH; 1666 put MAC table smac index for IBoE 1667 path->grh_mylmc = (u8)(smac_index) | 0x80;*/ 1668 1669 } else 1670 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((port - 1) << 6) 1671 | ((ah->sl & 0xf) << 2); 1672 1673 return 0; 1674} 1675 1676static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { 1677 struct mlx4_ib_gid_entry *ge, *tmp; 1678 1679 list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) 1680 { 1681 assert(!"NYI"); 1682 /*if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) { 1683 ge->added = 1; 1684 ge->port = qp->port; 1685 }*/ 1686 } 1687} 1688/* 1689 static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, 1690 struct mlx4_ib_qp *qp, struct mlx4_qp_context *context) { 1691 struct net_device *ndev; 1692 u64 u64_mac; 1693 u8 *smac; 1694 int smac_index; 1695 1696 ndev = dev->iboe.netdevs[qp->port - 1]; 1697 if (ndev) { 1698 #ifdef __linux__ 1699 smac = ndev->dev_addr; fixme: cache this value 1700 #else 1701 smac = IF_LLADDR(ndev); fixme: cache this value 1702 #endif 1703 u64_mac = mlx4_mac_to_u64(smac); 1704 } else 1705 u64_mac = dev->dev->caps.def_mac[qp->port]; 1706 1707 context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE 1708 | ((qp->port - 1) << 6); 1709 if (!qp->pri.smac) { 1710 smac_index = mlx4_register_mac(dev->dev, qp->port, u64_mac); 1711 if (smac_index >= 0) { 1712 qp->pri.candidate_smac_index = smac_index; 1713 qp->pri.candidate_smac = u64_mac; 1714 qp->pri.candidate_smac_port = qp->port; 1715 context->pri_path.grh_mylmc = 0x80 | (u8) smac_index; 1716 } else 1717 return -ENOENT; 1718 } 1719 return 0; 1720 } 1721 */ 1722static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, 1723 const struct ib_qp_attr *attr, int attr_mask, 1724 enum ib_qp_state cur_state, enum ib_qp_state new_state) { 1725 struct mlx4_ib_dev *dev = to_mdev(ibqp->device); 1726 struct mlx4_ib_qp *qp = to_mqp(ibqp); 1727 struct mlx4_ib_pd *pd; 1728 struct mlx4_ib_cq *send_cq, *recv_cq; 1729 struct mlx4_qp_context *context; 1730 enum mlx4_qp_optpar optpar = 0; 1731 int sqd_event; 1732 int steer_qp = 0; 1733 int err = -EINVAL; 1734 int is_eth = -1; 1735 1736 context = calloc(1, sizeof *context); 1737 if (!context) 1738 return -ENOMEM; 1739 1740 context->flags = cpu_to_be32( 1741 (to_mlx4_state(new_state) << 28) 1742 | (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16)); 1743 1744 if (!(attr_mask & IB_QP_PATH_MIG_STATE)) 1745 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); 1746 else { 1747 optpar |= MLX4_QP_OPTPAR_PM_STATE; 1748 switch (attr->path_mig_state) { 1749 case IB_MIG_MIGRATED: 1750 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); 1751 break; 1752 case IB_MIG_REARM: 1753 context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11); 1754 break; 1755 case IB_MIG_ARMED: 1756 context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11); 1757 break; 1758 } 1759 } 1760 1761 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) 1762 context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; 1763 else if (ibqp->qp_type == IB_QPT_RAW_PACKET) 1764 context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX; 1765 else if (ibqp->qp_type == IB_QPT_UD) { 1766 if (qp->flags & MLX4_IB_QP_LSO) 1767 context->mtu_msgmax = (IB_MTU_4096 << 5) 1768 | ilog2(dev->dev->caps.max_gso_sz); 1769 else 1770 context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; 1771 } else if (attr_mask & IB_QP_PATH_MTU) { 1772 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { 1773 MLX4_ERR("path MTU (%u) is invalid\n", attr->path_mtu); 1774 goto out; 1775 } 1776 context->mtu_msgmax = (attr->path_mtu << 5) 1777 | ilog2(dev->dev->caps.max_msg_sz); 1778 } 1779 1780 if (qp->rq.wqe_cnt) 1781 context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3; 1782 context->rq_size_stride |= qp->rq.wqe_shift - 4; 1783 1784 if (qp->sq.wqe_cnt) 1785 context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; 1786 context->sq_size_stride |= qp->sq.wqe_shift - 4; 1787 1788 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { 1789 context->sq_size_stride |= !!qp->sq_no_prefetch << 7; 1790 context->xrcd = cpu_to_be32((u32) qp->xrcdn); 1791 context->param3 |= cpu_to_be32(1 << 30); 1792 } 1793 1794 if (qp->ibqp.uobject) 1795 context->usr_page = cpu_to_be32( 1796 to_mucontext(ibqp->uobject->context)->uar.index); 1797 else 1798 context->usr_page = cpu_to_be32(qp->bf.uar->index); 1799 1800 if (attr_mask & IB_QP_DEST_QPN) 1801 context->remote_qpn = cpu_to_be32(attr->dest_qp_num); 1802 1803 if (attr_mask & IB_QP_PORT) { 1804 if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD 1805 && !(attr_mask & IB_QP_AV)) { 1806 mlx4_set_sched(&context->pri_path, attr->port_num); 1807 optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE; 1808 } 1809 } 1810 1811 if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { 1812 if (dev->counters[qp->port - 1] != -1) { 1813 context->pri_path.counter_index = dev->counters[qp->port - 1]; 1814 optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX; 1815 } else 1816 context->pri_path.counter_index = 0xff; 1817 1818 if (qp->flags & MLX4_IB_QP_NETIF 1819 && (qp->qpg_type == IB_QPG_NONE || qp->qpg_type == IB_QPG_PARENT)) { 1820 mlx4_ib_steer_qp_reg(dev, qp, 1); 1821 steer_qp = 1; 1822 } 1823 } 1824 1825 if (attr_mask & IB_QP_PKEY_INDEX) { 1826 if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) 1827 context->pri_path.disable_pkey_check = 0x40; 1828 context->pri_path.pkey_index = attr->pkey_index; 1829 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; 1830 } 1831 1832 if (attr_mask & IB_QP_AV) { 1833 if (mlx4_set_path(dev, &attr->ah_attr, qp, &context->pri_path, 1834 attr_mask & IB_QP_PORT ? attr->port_num : qp->port, 1)) 1835 goto out; 1836 1837 optpar |= 1838 (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | MLX4_QP_OPTPAR_SCHED_QUEUE); 1839 } 1840 1841 if (attr_mask & IB_QP_TIMEOUT) { 1842 context->pri_path.ackto |= attr->timeout << 3; 1843 optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT; 1844 } 1845 1846 if (attr_mask & IB_QP_ALT_PATH) { 1847 if (attr->alt_port_num == 0 1848 || attr->alt_port_num > dev->dev->caps.num_ports) 1849 goto out; 1850 1851 if (attr->alt_pkey_index 1852 >= dev->dev->caps.pkey_table_len[attr->alt_port_num]) 1853 goto out; 1854 1855 if (mlx4_set_path(dev, &attr->alt_ah_attr, qp, &context->alt_path, 1856 attr->alt_port_num, 0)) 1857 goto out; 1858 1859 context->alt_path.pkey_index = attr->alt_pkey_index; 1860 context->alt_path.ackto = attr->alt_timeout << 3; 1861 optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; 1862 } 1863 1864 pd = get_pd(qp); 1865 get_cqs(qp, &send_cq, &recv_cq); 1866 context->pd = cpu_to_be32(pd->pdn); 1867 context->cqn_send = cpu_to_be32(send_cq->mcq.cqn); 1868 context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn); 1869 context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); 1870 1871 /*Set "fast registration enabled" for all kernel QPs*/ 1872 if (!qp->ibqp.uobject) 1873 context->params1 |= cpu_to_be32(1 << 11); 1874 1875 if (attr_mask & IB_QP_RNR_RETRY) { 1876 context->params1 |= cpu_to_be32(attr->rnr_retry << 13); 1877 optpar |= MLX4_QP_OPTPAR_RNR_RETRY; 1878 } 1879 1880 if (attr_mask & IB_QP_RETRY_CNT) { 1881 context->params1 |= cpu_to_be32(attr->retry_cnt << 16); 1882 optpar |= MLX4_QP_OPTPAR_RETRY_COUNT; 1883 } 1884 1885 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { 1886 if (attr->max_rd_atomic) 1887 context->params1 |= cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21); 1888 optpar |= MLX4_QP_OPTPAR_SRA_MAX; 1889 } 1890 1891 if (attr_mask & IB_QP_SQ_PSN) 1892 context->next_send_psn = cpu_to_be32(attr->sq_psn); 1893 1894 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { 1895 if (attr->max_dest_rd_atomic) 1896 context->params2 |= cpu_to_be32( 1897 fls(attr->max_dest_rd_atomic - 1) << 21); 1898 optpar |= MLX4_QP_OPTPAR_RRA_MAX; 1899 } 1900 1901 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { 1902 context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask); 1903 optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE; 1904 } 1905 1906 if (attr_mask & IB_M_EXT_CLASS_1) 1907 context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_MASTER); 1908 1909 /*for now we enable also sqe on send*/ 1910 if (attr_mask & IB_M_EXT_CLASS_2) { 1911 context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_SYNC_SQ); 1912 context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_MASTER); 1913 } 1914 1915 if (attr_mask & IB_M_EXT_CLASS_3) 1916 context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_SYNC_RQ); 1917 1918 if (ibqp->srq) 1919 context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC); 1920 1921 if (attr_mask & IB_QP_MIN_RNR_TIMER) { 1922 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); 1923 optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT; 1924 } 1925 if (attr_mask & IB_QP_RQ_PSN) 1926 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); 1927 1928 /*proxy and tunnel qp qkeys will be changed in modify-qp wrappers*/ 1929 if (attr_mask & IB_QP_QKEY) { 1930 if (qp->mlx4_ib_qp_type 1931 & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) 1932 context->qkey = cpu_to_be32(IB_QP_SET_QKEY); 1933 else { 1934 if (mlx4_is_mfunc( 1935 dev->dev) && !(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) 1936 && (attr->qkey & MLX4_RESERVED_QKEY_MASK) 1937 == MLX4_RESERVED_QKEY_BASE) { 1938 MLX4_ERR("Cannot use reserved QKEY" 1939 " 0x%x (range 0xffff0000..0xffffffff" 1940 " is reserved)\n", attr->qkey); 1941 err = -EINVAL; 1942 goto out; 1943 } 1944 context->qkey = cpu_to_be32(attr->qkey); 1945 } 1946 optpar |= MLX4_QP_OPTPAR_Q_KEY; 1947 } 1948 1949 if (ibqp->srq) 1950 context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn); 1951 1952 if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) 1953 context->db_rec_addr = cpu_to_be64(qp->db.dma); 1954 1955 if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR 1956 && (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI 1957 || ibqp->qp_type == IB_QPT_UD 1958 || ibqp->qp_type == IB_QPT_RAW_PACKET)) { 1959 context->pri_path.sched_queue = (qp->port - 1) << 6; 1960 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI 1961 || qp->mlx4_ib_qp_type 1962 & (MLX4_IB_QPT_PROXY_SMI_OWNER 1963 | MLX4_IB_QPT_TUN_SMI_OWNER)) { 1964 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; 1965 if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI) 1966 context->pri_path.fl = 0x80; 1967 } else { 1968 if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) 1969 context->pri_path.fl = 0x80; 1970 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; 1971 } 1972 is_eth = rdma_port_get_link_layer(&dev->ib_dev, qp->port) 1973 == IB_LINK_LAYER_ETHERNET; 1974 if (is_eth) { 1975 assert(!"NYI"); 1976 /*if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI 1977 || qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) 1978 context->pri_path.feup = 1 << 7; don't fsm 1979 handle smac_index 1980 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD 1981 || qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI 1982 || qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) { 1983 err = handle_eth_ud_smac_index(dev, qp, context); 1984 if (err) 1985 return -EINVAL; 1986 }*/ 1987 } 1988 } 1989 1990 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD 1991 && attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY 1992 && attr->en_sqd_async_notify) 1993 sqd_event = 1; 1994 else 1995 sqd_event = 0; 1996 1997 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) 1998 context->rlkey |= (1 << 4); 1999 2000 if ((attr_mask & IB_QP_GROUP_RSS) && (qp->qpg_data->rss_child_count > 1)) { 2001 struct mlx4_ib_qpg_data *qpg_data = qp->qpg_data; 2002 void *rss_context_base = &context->pri_path; 2003 struct mlx4_rss_context *rss_context = 2004 (struct mlx4_rss_context *) (rss_context_base 2005 + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH); 2006 2007 context->flags |= cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET); 2008 2009 /*This should be tbl_sz_base_qpn*/ 2010 rss_context->base_qpn = cpu_to_be32( 2011 qpg_data->rss_qpn_base 2012 | (ilog2(qpg_data->rss_child_count) << 24)); 2013 rss_context->default_qpn = cpu_to_be32(qpg_data->rss_qpn_base); 2014 /*This should be flags_hash_fn*/ 2015 rss_context->flags = MLX4_RSS_TCP_IPV6 | MLX4_RSS_TCP_IPV4; 2016 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UDP_RSS) { 2017 rss_context->base_qpn_udp = rss_context->default_qpn; 2018 rss_context->flags |= MLX4_RSS_IPV6 | MLX4_RSS_IPV4 2019 | MLX4_RSS_UDP_IPV6 | MLX4_RSS_UDP_IPV4; 2020 } 2021 if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP) { 2022 static const u32 rsskey[10] = { 0xD181C62C, 0xF7F4DB5B, 0x1983A2FC, 2023 0x943E1ADB, 0xD9389E6B, 0xD1039C2C, 0xA74499AD, 0x593D56D9, 2024 0xF3253C06, 0x2ADC1FFC }; 2025 rss_context->hash_fn = MLX4_RSS_HASH_TOP; 2026 memcpy(rss_context->rss_key, rsskey, sizeof(rss_context->rss_key)); 2027 } else { 2028 rss_context->hash_fn = MLX4_RSS_HASH_XOR; 2029 memset(rss_context->rss_key, 0, sizeof(rss_context->rss_key)); 2030 } 2031 } 2032 2033 /** Before passing a kernel QP to the HW, make sure that the 2034 * ownership bits of the send queue are set and the SQ 2035 * headroom is stamped so that the hardware doesn't start 2036 * processing stale work requests.*/ 2037 2038 if (!ibqp->uobject && cur_state == IB_QPS_RESET 2039 && new_state == IB_QPS_INIT) { 2040 struct mlx4_wqe_ctrl_seg *ctrl; 2041 int i; 2042 2043 for (i = 0; i < qp->sq.wqe_cnt; ++i) { 2044 ctrl = get_send_wqe(qp, i); 2045 ctrl->owner_opcode = cpu_to_be32(1U << 31); 2046 if (qp->sq_max_wqes_per_wr == 1) 2047 ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4); 2048 2049 stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); 2050 } 2051 } 2052 2053 err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state), 2054 to_mlx4_state(new_state), context, optpar, sqd_event, &qp->mqp); 2055 if (err) 2056 goto out; 2057 2058 qp->state = new_state; 2059 2060 if (attr_mask & IB_QP_ACCESS_FLAGS) 2061 qp->atomic_rd_en = attr->qp_access_flags; 2062 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 2063 qp->resp_depth = attr->max_dest_rd_atomic; 2064 if (attr_mask & IB_QP_PORT) { 2065 qp->port = attr->port_num; 2066 update_mcg_macs(dev, qp); 2067 } 2068 if (attr_mask & IB_QP_ALT_PATH) 2069 qp->alt_port = attr->alt_port_num; 2070 2071 if (is_sqp(dev, qp)) 2072 store_sqp_attrs(to_msqp(qp), attr, attr_mask); 2073 2074 /*Set 'ignore_cq_overrun' bits for collectives offload*/ 2075 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { 2076 if (attr_mask & (IB_M_EXT_CLASS_2 | IB_M_EXT_CLASS_3)) { 2077 err = mlx4_ib_ignore_overrun_cq(ibqp->send_cq); 2078 if (err) { 2079 MLX4_ERR("Failed to set ignore CQ " 2080 "overrun for QP 0x%x's send CQ\n", ibqp->qp_num); 2081 goto out; 2082 } 2083 2084 if (ibqp->recv_cq != ibqp->send_cq) { 2085 err = mlx4_ib_ignore_overrun_cq(ibqp->recv_cq); 2086 if (err) { 2087 MLX4_ERR("Failed to set ignore " 2088 "CQ overrun for QP 0x%x's recv " 2089 "CQ\n", ibqp->qp_num); 2090 goto out; 2091 } 2092 } 2093 } 2094 } 2095 2096 /** If we moved QP0 to RTR, bring the IB link up; if we moved 2097 * QP0 to RESET or ERROR, bring the link back down.*/ 2098 2099 if (is_qp0(dev, qp)) { 2100 if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR) 2101 if (mlx4_INIT_PORT(dev->dev, qp->port)) 2102 MLX4_WARN("INIT_PORT failed for port %d\n", qp->port); 2103 2104 if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR 2105 && (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) 2106 assert(!"NYI"); 2107 /*mlx4_CLOSE_PORT(dev->dev, qp->port);*/ 2108 } 2109 2110 /** If we moved a kernel QP to RESET, clean up all old CQ 2111 * entries and reinitialize the QP.*/ 2112 2113 if (new_state == IB_QPS_RESET) { 2114 assert(!"NYI"); 2115 /*if (!ibqp->uobject) { 2116 mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, 2117 ibqp->srq ? to_msrq(ibqp->srq) : NULL); 2118 if (send_cq != recv_cq) 2119 mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); 2120 2121 qp->rq.head = 0; 2122 qp->rq.tail = 0; 2123 qp->sq.head = 0; 2124 qp->sq.tail = 0; 2125 qp->sq_next_wqe = 0; 2126 if (qp->rq.wqe_cnt) 2127 *qp->db.db = 0; 2128 2129 if (qp->flags & MLX4_IB_QP_NETIF 2130 && (qp->qpg_type == IB_QPG_NONE 2131 || qp->qpg_type == IB_QPG_PARENT)) 2132 mlx4_ib_steer_qp_reg(dev, qp, 0); 2133 } 2134 if (qp->pri.smac) { 2135 mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); 2136 qp->pri.smac = 0; 2137 } 2138 if (qp->alt.smac) { 2139 mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); 2140 qp->alt.smac = 0; 2141 } 2142 if (qp->pri.vid < 0x1000) { 2143 mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid); 2144 qp->pri.vid = 0xFFFF; 2145 qp->pri.candidate_vid = 0xFFFF; 2146 qp->pri.update_vid = 0; 2147 } 2148 2149 if (qp->alt.vid < 0x1000) { 2150 mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid); 2151 qp->alt.vid = 0xFFFF; 2152 qp->alt.candidate_vid = 0xFFFF; 2153 qp->alt.update_vid = 0; 2154 }*/ 2155 } 2156 2157 /*TODO: cleanup*/ 2158 out: /*if (err && steer_qp) 2159 mlx4_ib_steer_qp_reg(dev, qp, 0); 2160 free(context); 2161 if (qp->pri.candidate_smac) { 2162 if (err) 2163 mlx4_unregister_mac(dev->dev, qp->pri.candidate_smac_port, 2164 qp->pri.candidate_smac); 2165 else { 2166 if (qp->pri.smac) { 2167 mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); 2168 } 2169 qp->pri.smac = qp->pri.candidate_smac; 2170 qp->pri.smac_index = qp->pri.candidate_smac_index; 2171 qp->pri.smac_port = qp->pri.candidate_smac_port; 2172 2173 } 2174 qp->pri.candidate_smac = 0; 2175 qp->pri.candidate_smac_index = 0; 2176 qp->pri.candidate_smac_port = 0; 2177 } 2178 if (qp->alt.candidate_smac) { 2179 if (err) 2180 mlx4_unregister_mac(dev->dev, qp->alt.candidate_smac_port, 2181 qp->pri.candidate_smac); 2182 else { 2183 if (qp->pri.smac) { 2184 mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); 2185 } 2186 qp->alt.smac = qp->alt.candidate_smac; 2187 qp->alt.smac_index = qp->alt.candidate_smac_index; 2188 qp->alt.smac_port = qp->alt.candidate_smac_port; 2189 2190 } 2191 qp->pri.candidate_smac = 0; 2192 qp->pri.candidate_smac_index = 0; 2193 qp->pri.candidate_smac_port = 0; 2194 } 2195 2196 if (qp->pri.update_vid) { 2197 if (err) { 2198 if (qp->pri.candidate_vid < 0x1000) 2199 mlx4_unregister_vlan(dev->dev, qp->pri.candidate_vlan_port, 2200 qp->pri.candidate_vid); 2201 } else { 2202 if (qp->pri.vid < 0x1000) 2203 mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid); 2204 qp->pri.vid = qp->pri.candidate_vid; 2205 qp->pri.vlan_port = qp->pri.candidate_vlan_port; 2206 qp->pri.vlan_index = qp->pri.candidate_vlan_index; 2207 } 2208 qp->pri.candidate_vid = 0xFFFF; 2209 qp->pri.update_vid = 0; 2210 } 2211 2212 if (qp->alt.update_vid) { 2213 if (err) { 2214 if (qp->alt.candidate_vid < 0x1000) 2215 mlx4_unregister_vlan(dev->dev, qp->alt.candidate_vlan_port, 2216 qp->alt.candidate_vid); 2217 } else { 2218 if (qp->alt.vid < 0x1000) 2219 mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid); 2220 qp->alt.vid = qp->alt.candidate_vid; 2221 qp->alt.vlan_port = qp->alt.candidate_vlan_port; 2222 qp->alt.vlan_index = qp->alt.candidate_vlan_index; 2223 } 2224 qp->alt.candidate_vid = 0xFFFF; 2225 qp->alt.update_vid = 0; 2226 }*/ 2227 2228 return err; 2229} 2230 2231int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 2232 int attr_mask, struct ib_udata *udata) { 2233 struct mlx4_ib_dev *dev = to_mdev(ibqp->device); 2234 struct mlx4_ib_qp *qp = to_mqp(ibqp); 2235 enum ib_qp_state cur_state, new_state; 2236 int err = -EINVAL; 2237 2238 /*mutex_lock(&qp->mutex);*/ 2239 2240 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; 2241 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 2242 2243 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, 2244 attr_mask & ~IB_M_QP_MOD_VEND_MASK)) { 2245 MLX4_DEBUG("qpn 0x%x: invalid attribute mask specified " 2246 "for transition %d to %d. qp_type %d," 2247 " attr_mask 0x%x\n", ibqp->qp_num, cur_state, new_state, 2248 ibqp->qp_type, attr_mask); 2249 goto out; 2250 } 2251 2252 if ((attr_mask & IB_M_QP_MOD_VEND_MASK) && !dev->dev->caps.sync_qp) { 2253 MLX4_ERR("extended verbs are not supported by %s\n", dev->ib_dev.name); 2254 goto out; 2255 } 2256 2257 if ((attr_mask & IB_QP_PORT) 2258 && (attr->port_num == 0 || attr->port_num > dev->num_ports)) { 2259 MLX4_DEBUG("qpn 0x%x: invalid port number (%d) specified " 2260 "for transition %d to %d. qp_type %d\n", ibqp->qp_num, 2261 attr->port_num, cur_state, new_state, ibqp->qp_type); 2262 goto out; 2263 } 2264 2265 if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_PACKET) 2266 && (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num) 2267 != IB_LINK_LAYER_ETHERNET)) 2268 goto out; 2269 2270 if (attr_mask & IB_QP_PKEY_INDEX) { 2271 int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; 2272 if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) { 2273 MLX4_DEBUG("qpn 0x%x: invalid pkey index (%d) specified " 2274 "for transition %d to %d. qp_type %d\n", ibqp->qp_num, 2275 attr->pkey_index, cur_state, new_state, ibqp->qp_type); 2276 goto out; 2277 } 2278 } 2279 2280 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC 2281 && attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) { 2282 MLX4_DEBUG("qpn 0x%x: max_rd_atomic (%d) too large. " 2283 "Transition %d to %d. qp_type %d\n", ibqp->qp_num, 2284 attr->max_rd_atomic, cur_state, new_state, ibqp->qp_type); 2285 goto out; 2286 } 2287 2288 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC 2289 && attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) { 2290 MLX4_DEBUG("qpn 0x%x: max_dest_rd_atomic (%d) too large. " 2291 "Transition %d to %d. qp_type %d\n", ibqp->qp_num, 2292 attr->max_dest_rd_atomic, cur_state, new_state, ibqp->qp_type); 2293 goto out; 2294 } 2295 2296 if (cur_state == new_state && cur_state == IB_QPS_RESET) { 2297 err = 0; 2298 goto out; 2299 } 2300 2301 err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); 2302 2303 out: /*mutex_unlock(&qp->mutex);*/ 2304 return err; 2305} 2306/* 2307 static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, 2308 struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) { 2309 struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device); 2310 struct ib_device *ib_dev = &mdev->ib_dev; 2311 struct mlx4_wqe_mlx_seg *mlx = wqe; 2312 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; 2313 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); 2314 u16 pkey; 2315 u32 qkey; 2316 int send_size; 2317 int header_size; 2318 int spc; 2319 int i; 2320 2321 if (wr->opcode != IB_WR_SEND) 2322 return -EINVAL; 2323 2324 send_size = 0; 2325 2326 for (i = 0; i < wr->num_sge; ++i) 2327 send_size += wr->sg_list[i].length; 2328 2329 for proxy-qp0 sends, need to add in size of tunnel header 2330 for tunnel-qp0 sends, tunnel header is already in s/g list 2331 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) 2332 send_size += sizeof(struct mlx4_ib_tunnel_header); 2333 2334 ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header); 2335 2336 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) { 2337 sqp->ud_header.lrh.service_level = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) 2338 >> 28; 2339 sqp->ud_header.lrh.destination_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); 2340 sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); 2341 } 2342 2343 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); 2344 2345 force loopback 2346 mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR); 2347 mlx->rlid = sqp->ud_header.lrh.destination_lid; 2348 2349 sqp->ud_header.lrh.virtual_lane = 0; 2350 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); 2351 ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey); 2352 sqp->ud_header.bth.pkey = cpu_to_be16(pkey); 2353 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) 2354 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); 2355 else 2356 sqp->ud_header.bth.destination_qpn = cpu_to_be32( 2357 mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]); 2358 2359 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); 2360 if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) 2361 return -EINVAL; 2362 sqp->ud_header.deth.qkey = cpu_to_be32(qkey); 2363 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn); 2364 2365 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; 2366 sqp->ud_header.immediate_present = 0; 2367 2368 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); 2369 2370 2371 * Inline data segments may not cross a 64 byte boundary. If 2372 * our UD header is bigger than the space available up to the 2373 * next 64 byte boundary in the WQE, use two inline data 2374 * segments to hold the UD header. 2375 2376 spc = MLX4_INLINE_ALIGN - ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); 2377 if (header_size <= spc) { 2378 inl->byte_count = cpu_to_be32(1U << 31 | header_size); 2379 memcpy(inl + 1, sqp->header_buf, header_size); 2380 i = 1; 2381 } else { 2382 inl->byte_count = cpu_to_be32(1U << 31 | spc); 2383 memcpy(inl + 1, sqp->header_buf, spc); 2384 2385 inl = (void *) (inl + 1) + spc; 2386 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); 2387 2388 * Need a barrier here to make sure all the data is 2389 * visible before the byte_count field is set. 2390 * Otherwise the HCA prefetcher could grab the 64-byte 2391 * chunk with this inline segment and get a valid (!= 2392 * 0xffffffff) byte count but stale data, and end up 2393 * generating a packet with bad headers. 2394 * 2395 * The first inline segment's byte_count field doesn't 2396 * need a barrier, because it comes after a 2397 * control/MLX segment and therefore is at an offset 2398 * of 16 mod 64. 2399 2400 wmb(); 2401 inl->byte_count = cpu_to_be32(1U << 31 | (header_size - spc)); 2402 i = 2; 2403 } 2404 2405 *mlx_seg_len = ALIGN(i * sizeof(struct mlx4_wqe_inline_seg) + header_size, 16); 2406 return 0; 2407 } 2408 */ 2409static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, 2410 void *wqe, unsigned *mlx_seg_len) { 2411 /*struct ib_device *ib_dev = sqp->qp.ibqp.device;*/ 2412 struct mlx4_wqe_mlx_seg *mlx = wqe; 2413 /*struct mlx4_wqe_ctrl_seg *ctrl = wqe;*/ 2414 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; 2415 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); 2416 /*union ib_gid sgid; 2417 u16 pkey;*/ 2418 int send_size; 2419 int header_size; 2420 int spc; 2421 int i; 2422 int is_eth; 2423 int is_vlan = 0; 2424 int is_grh; 2425 /*u16 vlan = 0; 2426 int err = 0;*/ 2427 2428 send_size = 0; 2429 for (i = 0; i < wr->num_sge; ++i) 2430 send_size += wr->sg_list[i].length; 2431 2432 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) 2433 == IB_LINK_LAYER_ETHERNET; 2434 is_grh = mlx4_ib_ah_grh_present(ah); 2435 if (is_eth) { 2436 assert(!"NYI"); 2437 /*if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { 2438 When multi-function is enabled, the ib_core gid 2439 * indexes don't necessarily match the hw ones, so 2440 * we must use our own cache 2441 err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev, 2442 be32_to_cpu(ah->av.ib.port_pd) >> 24, ah->av.ib.gid_index, 2443 &sgid.raw[0]); 2444 if (err) 2445 return err; 2446 } else { 2447 err = ib_get_cached_gid(ib_dev, 2448 be32_to_cpu(ah->av.ib.port_pd) >> 24, ah->av.ib.gid_index, 2449 &sgid); 2450 if (err) 2451 return err; 2452 } 2453 2454 vlan = rdma_get_vlan_id(&sgid); 2455 is_vlan = vlan < 0x1000;*/ 2456 } 2457 ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, 2458 &sqp->ud_header); 2459 2460 if (!is_eth) { 2461 sqp->ud_header.lrh.service_level = be32_to_cpu( 2462 ah->av.ib.sl_tclass_flowlabel) >> 28; 2463 sqp->ud_header.lrh.destination_lid = ah->av.ib.dlid; 2464 sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); 2465 } 2466 2467 if (is_grh) { 2468 assert(!"NYI"); 2469 /*sqp->ud_header.grh.traffic_class = (be32_to_cpu( 2470 ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff; 2471 sqp->ud_header.grh.flow_label = ah->av.ib.sl_tclass_flowlabel 2472 & cpu_to_be32(0xfffff); 2473 sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; 2474 if (is_eth) 2475 memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16); 2476 else { 2477 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { 2478 When multi-function is enabled, the ib_core gid 2479 * indexes don't necessarily match the hw ones, so 2480 * we must use our own cache 2481 sqp->ud_header.grh.source_gid.global.subnet_prefix = to_mdev( 2482 ib_dev)->sriov.demux[sqp->qp.port - 1].subnet_prefix; 2483 sqp->ud_header.grh.source_gid.global.interface_id = 2484 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].guid_cache[ah->av.ib.gid_index]; 2485 } else 2486 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, 2487 ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid); 2488 } 2489 memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.ib.dgid, 16);*/ 2490 } 2491 2492 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); 2493 2494 if (!is_eth) { 2495 mlx->flags |= cpu_to_be32( 2496 (!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) 2497 | (sqp->ud_header.lrh.destination_lid == 2498 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) 2499 | (sqp->ud_header.lrh.service_level << 8)); 2500 if (ah->av.ib.port_pd & cpu_to_be32(0x80000000)) 2501 mlx->flags |= cpu_to_be32(0x1); /* force loopback */ 2502 mlx->rlid = sqp->ud_header.lrh.destination_lid; 2503 } 2504 2505 switch (wr->opcode) { 2506 case IB_WR_SEND: 2507 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; 2508 sqp->ud_header.immediate_present = 0; 2509 break; 2510 case IB_WR_SEND_WITH_IMM: 2511 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; 2512 sqp->ud_header.immediate_present = 1; 2513 sqp->ud_header.immediate_data = wr->ex.imm_data; 2514 break; 2515 default: 2516 return -EINVAL; 2517 } 2518 2519 if (is_eth) { 2520 assert(!"NYI"); 2521 /*u8 smac[6]; 2522 struct in6_addr in6; 2523 2524 u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; 2525 2526 mlx->sched_prio = cpu_to_be16(pcp); 2527 2528 memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); 2529 FIXME: cache smac value? 2530 memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2); 2531 memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4); 2532 memcpy(&in6, sgid.raw, sizeof(in6)); 2533 rdma_get_ll_mac(&in6, smac); 2534 memcpy(sqp->ud_header.eth.smac_h, smac, 6); 2535 if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) 2536 mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); 2537 if (!is_vlan) { 2538 sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); 2539 } else { 2540 sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); 2541 sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); 2542 }*/ 2543 } else { 2544 sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; 2545 if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) 2546 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; 2547 } 2548 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); 2549 /*if (!sqp->qp.ibqp.qp_num) 2550 ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); 2551 else 2552 ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey);*/ 2553 sqp->ud_header.bth.pkey = 0;/*cpu_to_be16(pkey);*/ 2554 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); 2555 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); 2556 sqp->ud_header.deth.qkey = cpu_to_be32( 2557 wr->wr.ud.remote_qkey & 0x80000000 ? 2558 sqp->qkey : wr->wr.ud.remote_qkey); 2559 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); 2560 2561 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); 2562 2563 if (0) { 2564 /*pr_err("built UD header of size %d:\n", header_size); 2565 for (i = 0; i < header_size / 4; ++i) { 2566 if (i % 8 == 0) 2567 pr_err(" [%02x] ", i * 4); 2568 pr_cont(" %08x", be32_to_cpu(((__be32 *) sqp->header_buf)[i])); 2569 if ((i + 1) % 8 == 0) 2570 pr_cont("\n"); 2571 } 2572 pr_err("\n");*/ 2573 } 2574 2575 /* 2576 * Inline data segments may not cross a 64 byte boundary. If 2577 * our UD header is bigger than the space available up to the 2578 * next 64 byte boundary in the WQE, use two inline data 2579 * segments to hold the UD header. 2580 */ 2581 spc = MLX4_INLINE_ALIGN 2582 - ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); 2583 if (header_size <= spc) { 2584 inl->byte_count = cpu_to_be32(1U << 31 | header_size); 2585 memcpy(inl + 1, sqp->header_buf, header_size); 2586 i = 1; 2587 } else { 2588 inl->byte_count = cpu_to_be32(1U << 31 | spc); 2589 memcpy(inl + 1, sqp->header_buf, spc); 2590 2591 inl = (void *) (inl + 1) + spc; 2592 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); 2593 /* 2594 * Need a barrier here to make sure all the data is 2595 * visible before the byte_count field is set. 2596 * Otherwise the HCA prefetcher could grab the 64-byte 2597 * chunk with this inline segment and get a valid (!= 2598 * 0xffffffff) byte count but stale data, and end up 2599 * generating a packet with bad headers. 2600 * 2601 * The first inline segment's byte_count field doesn't 2602 * need a barrier, because it comes after a 2603 * control/MLX segment and therefore is at an offset 2604 * of 16 mod 64. 2605 */ 2606 wmb(); 2607 inl->byte_count = cpu_to_be32(1U << 31 | (header_size - spc)); 2608 i = 2; 2609 } 2610 2611 *mlx_seg_len = ALIGN(i * sizeof(struct mlx4_wqe_inline_seg) + header_size, 2612 16); 2613 return 0; 2614} 2615 2616static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, 2617 struct ib_cq *ib_cq) { 2618 unsigned cur; 2619 struct mlx4_ib_cq *cq; 2620 2621 cur = wq->head - wq->tail; 2622 if (/*likely(*/cur + nreq < wq->max_post/*)*/) 2623 return 0; 2624 2625 cq = to_mcq(ib_cq); 2626 /*spin_lock(&cq->lock);*/ 2627 cur = wq->head - wq->tail; 2628 /*spin_unlock(&cq->lock);*/ 2629 2630 return cur + nreq >= wq->max_post; 2631} 2632/* 2633 static __be32 convert_access(int acc) { 2634 return ( 2635 acc & IB_ACCESS_REMOTE_ATOMIC ? 2636 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC) : 0) 2637 | ( 2638 acc & IB_ACCESS_REMOTE_WRITE ? 2639 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) 2640 | ( 2641 acc & IB_ACCESS_REMOTE_READ ? 2642 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ) : 0) 2643 | (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) 2644 | cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); 2645 } 2646 2647 static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) { 2648 struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); 2649 int i; 2650 2651 for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i) 2652 mfrpl->mapped_page_list[i] = cpu_to_be64( 2653 wr->wr.fast_reg.page_list->page_list[i] | MLX4_MTT_FLAG_PRESENT); 2654 2655 fseg->flags = convert_access(wr->wr.fast_reg.access_flags); 2656 fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey); 2657 fseg->buf_list = cpu_to_be64(mfrpl->map); 2658 fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); 2659 fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length); 2660 fseg->offset = 0; XXX -- is this just for ZBVA? 2661 fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift); 2662 fseg->reserved[0] = 0; 2663 fseg->reserved[1] = 0; 2664 } 2665 2666 static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) { 2667 iseg->mem_key = cpu_to_be32(rkey); 2668 2669 iseg->reserved1 = 0; 2670 iseg->reserved2 = 0; 2671 iseg->reserved3[0] = 0; 2672 iseg->reserved3[1] = 0; 2673 } 2674 */ 2675static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, 2676 u64 remote_addr, u32 rkey) { 2677 rseg->raddr = cpu_to_be64(remote_addr); 2678 rseg->rkey = cpu_to_be32(rkey); 2679 rseg->reserved = 0; 2680} 2681/* 2682 static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, 2683 struct ib_send_wr *wr) { 2684 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 2685 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); 2686 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); 2687 } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { 2688 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); 2689 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask); 2690 } else { 2691 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); 2692 aseg->compare = 0; 2693 } 2694 2695 } 2696 2697 static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, 2698 struct ib_send_wr *wr) { 2699 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); 2700 aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask); 2701 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); 2702 aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask); 2703 } 2704 2705 static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, 2706 struct ib_send_wr *wr) { 2707 memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx4_av)); 2708 dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); 2709 dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); 2710 dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan; 2711 memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); 2712 } 2713 2714 static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, 2715 struct mlx4_wqe_datagram_seg *dseg, struct ib_send_wr *wr, enum ib_qp_type qpt) { 2716 union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av; 2717 struct mlx4_av sqp_av = { 0 }; 2718 int port = *((u8 *) &av->ib.port_pd) & 0x3; 2719 2720 force loopback 2721 sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000); 2722 sqp_av.g_slid = av->ib.g_slid & 0x7f; no GRH 2723 sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel 2724 & cpu_to_be32(0xf0000000); 2725 2726 memcpy(dseg->av, &sqp_av, sizeof(struct mlx4_av)); 2727 This function used only for sending on QP1 proxies 2728 dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]); 2729 Use QKEY from the QP context, which is set by master 2730 dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); 2731 } 2732 2733 static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, 2734 unsigned *mlx_seg_len) { 2735 struct mlx4_wqe_inline_seg *inl = wqe; 2736 struct mlx4_ib_tunnel_header hdr; 2737 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); 2738 int spc; 2739 int i; 2740 2741 memcpy(&hdr.av, &ah->av, sizeof hdr.av); 2742 hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); 2743 hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index); 2744 hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); 2745 2746 spc = MLX4_INLINE_ALIGN - ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); 2747 if (sizeof(hdr) <= spc) { 2748 memcpy(inl + 1, &hdr, sizeof(hdr)); 2749 wmb(); 2750 inl->byte_count = cpu_to_be32(1U << 31 | sizeof(hdr)); 2751 i = 1; 2752 } else { 2753 memcpy(inl + 1, &hdr, spc); 2754 wmb(); 2755 inl->byte_count = cpu_to_be32(1U << 31 | spc); 2756 2757 inl = (void *) (inl + 1) + spc; 2758 memcpy(inl + 1, (void *) &hdr + spc, sizeof(hdr) - spc); 2759 wmb(); 2760 inl->byte_count = cpu_to_be32(1U << 31 | (sizeof(hdr) - spc)); 2761 i = 2; 2762 } 2763 2764 *mlx_seg_len = ALIGN(i * sizeof(struct mlx4_wqe_inline_seg) + sizeof(hdr), 16); 2765 } 2766 */ 2767static void set_mlx_icrc_seg(void *dseg) { 2768 u32 *t = dseg; 2769 struct mlx4_wqe_inline_seg *iseg = dseg; 2770 2771 t[1] = 0; 2772 2773 /** Need a barrier here before writing the byte_count field to 2774 * make sure that all the data is visible before the 2775 * byte_count field is set. Otherwise, if the segment begins 2776 * a new cacheline, the HCA prefetcher could grab the 64-byte 2777 * chunk and get a valid (!= * 0xffffffff) byte count but 2778 * stale data, and end up sending the wrong data.*/ 2779 2780 wmb(); 2781 2782 iseg->byte_count = cpu_to_be32((1U << 31) | 4); 2783} 2784 2785static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) { 2786 dseg->lkey = cpu_to_be32(sg->lkey); 2787 dseg->addr = cpu_to_be64(sg->addr); 2788 2789 /** Need a barrier here before writing the byte_count field to 2790 * make sure that all the data is visible before the 2791 * byte_count field is set. Otherwise, if the segment begins 2792 * a new cacheline, the HCA prefetcher could grab the 64-byte 2793 * chunk and get a valid (!= * 0xffffffff) byte count but 2794 * stale data, and end up sending the wrong data.*/ 2795 2796 wmb(); 2797 2798 dseg->byte_count = cpu_to_be32(sg->length); 2799} 2800 2801static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) { 2802 dseg->byte_count = cpu_to_be32(sg->length); 2803 dseg->lkey = cpu_to_be32(sg->lkey); 2804 dseg->addr = cpu_to_be64(sg->addr); 2805} 2806/* 2807 static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, 2808 struct mlx4_ib_qp *qp, unsigned *lso_seg_len, __be32 *lso_hdr_sz, __be32 *blh) { 2809 unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); 2810 2811 if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE)) 2812 *blh = cpu_to_be32(1 << 6); 2813 2814 if (unlikely( 2815 !(qp->flags & MLX4_IB_QP_LSO) && wr->num_sge > qp->sq.max_gs - (halign >> 4))) 2816 return -EINVAL; 2817 2818 memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); 2819 2820 *lso_hdr_sz = cpu_to_be32( 2821 (wr->wr.ud.mss - wr->wr.ud.hlen) << 16 | wr->wr.ud.hlen); 2822 *lso_seg_len = halign; 2823 return 0; 2824 } 2825 */ 2826static __be32 send_ieth(struct ib_send_wr *wr) { 2827 switch (wr->opcode) { 2828 case IB_WR_SEND_WITH_IMM: 2829 case IB_WR_RDMA_WRITE_WITH_IMM: 2830 return wr->ex.imm_data; 2831 2832 case IB_WR_SEND_WITH_INV: 2833 return cpu_to_be32(wr->ex.invalidate_rkey); 2834 2835 default: 2836 return 0; 2837 } 2838} 2839/* 2840 static void add_zero_len_inline(void *wqe) { 2841 struct mlx4_wqe_inline_seg *inl = wqe; 2842 memset(wqe, 0, 16); 2843 inl->byte_count = cpu_to_be32(1U << 31); 2844 } 2845 */ 2846static int lay_inline_data(struct mlx4_ib_qp *qp, struct ib_send_wr *wr, 2847 void *wqe, int *sz) { 2848 struct mlx4_wqe_inline_seg *seg; 2849 void *addr; 2850 int len, seg_len; 2851 int num_seg; 2852 int off, to_copy; 2853 int i; 2854 int inl = 0; 2855 2856 seg = wqe; 2857 wqe += sizeof *seg; 2858 off = ((unsigned long) wqe) & (unsigned long) (MLX4_INLINE_ALIGN - 1); 2859 num_seg = 0; 2860 seg_len = 0; 2861 2862 for (i = 0; i < wr->num_sge; ++i) { 2863 addr = (void *) (unsigned long) (wr->sg_list[i].addr); 2864 len = wr->sg_list[i].length; 2865 inl += len; 2866 2867 if (inl > qp->max_inline_data) { 2868 inl = 0; 2869 return -1; 2870 } 2871 2872 while (len >= MLX4_INLINE_ALIGN - off) { 2873 to_copy = MLX4_INLINE_ALIGN - off; 2874 memcpy(wqe, addr, to_copy); 2875 len -= to_copy; 2876 wqe += to_copy; 2877 addr += to_copy; 2878 seg_len += to_copy; 2879 wmb(); 2880 /*see comment below*/ 2881 seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len); 2882 seg_len = 0; 2883 seg = wqe; 2884 wqe += sizeof *seg; 2885 off = sizeof *seg; 2886 ++num_seg; 2887 } 2888 2889 memcpy(wqe, addr, len); 2890 wqe += len; 2891 seg_len += len; 2892 off += len; 2893 } 2894 2895 if (seg_len) { 2896 ++num_seg; 2897 2898 /** Need a barrier here to make sure 2899 * all the data is visible before the 2900 * byte_count field is set. Otherwise 2901 * the HCA prefetcher could grab the 2902 * 64-byte chunk with this inline 2903 * segment and get a valid (!= 2904 * 0xffffffff) byte count but stale 2905 * data, and end up sending the wrong 2906 * data.*/ 2907 2908 wmb(); 2909 seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len); 2910 } 2911 2912 *sz = (inl + num_seg * sizeof *seg + 15) / 16; 2913 2914 return 0; 2915} 2916/* 2917 2918 * Avoid using memcpy() to copy to BlueFlame page, since memcpy() 2919 * implementations may use move-string-buffer assembler instructions, 2920 * which do not guarantee order of copying. 2921 */ 2922static void mlx4_bf_copy(unsigned long *dst, unsigned long *src, 2923 unsigned bytecnt) { 2924 __iowrite64_copy(dst, src, bytecnt / 8); 2925} 2926 2927int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 2928 struct ib_send_wr **bad_wr) { 2929 struct mlx4_ib_qp *qp = to_mqp(ibqp); 2930 void *wqe; 2931 struct mlx4_wqe_ctrl_seg *uninitialized_var( ctrl); 2932 struct mlx4_wqe_data_seg *dseg; 2933 /*unsigned long flags;*/ 2934 int nreq; 2935 int err = 0; 2936 unsigned ind; 2937 int uninitialized_var( stamp); 2938 int uninitialized_var( size); 2939 unsigned uninitialized_var( seglen); 2940 __be32 dummy; 2941 __be32 *lso_wqe; 2942 __be32 uninitialized_var( lso_hdr_sz); 2943 __be32 blh; 2944 int i; 2945 int inl = 0; 2946 u32 *aux; 2947 /*spin_lock_irqsave(&qp->sq.lock, flags);*/ 2948 2949 ind = qp->sq_next_wqe; 2950 2951 for (nreq = 0; wr; ++nreq, wr = wr->next) { 2952 lso_wqe = &dummy; 2953 blh = 0; 2954 2955 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { 2956 err = -ENOMEM; 2957 *bad_wr = wr; 2958 goto out; 2959 } 2960 2961 if (unlikely(wr->num_sge > qp->sq.max_gs)) { 2962 err = -EINVAL; 2963 *bad_wr = wr; 2964 goto out; 2965 } 2966 2967 ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); 2968 aux = (u32 *) &ctrl->vlan_tag; 2969 *aux = 0; 2970 qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id; 2971 2972 ctrl->srcrb_flags = ( 2973 wr->send_flags & IB_SEND_SIGNALED ? 2974 cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) 2975 | (wr->send_flags & IB_SEND_SOLICITED ? 2976 cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) 2977 | ((wr->send_flags & IB_SEND_IP_CSUM) ? 2978 cpu_to_be32( 2979 MLX4_WQE_CTRL_IP_CSUM 2980 | MLX4_WQE_CTRL_TCP_UDP_CSUM) : 2981 0) | qp->sq_signal_bits; 2982 2983 ctrl->imm = send_ieth(wr); 2984 2985 wqe += sizeof *ctrl; 2986 size = sizeof *ctrl / 16; 2987 2988 switch (qp->mlx4_ib_qp_type) { 2989 case MLX4_IB_QPT_RC: 2990 case MLX4_IB_QPT_UC: 2991 switch (wr->opcode) { 2992 case IB_WR_ATOMIC_CMP_AND_SWP: 2993 case IB_WR_ATOMIC_FETCH_AND_ADD: 2994 case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: 2995 assert(!"NYI"); 2996 /*set_raddr_seg(wqe, wr->wr.atomic.remote_addr, 2997 wr->wr.atomic.rkey); 2998 wqe += sizeof(struct mlx4_wqe_raddr_seg); 2999 3000 set_atomic_seg(wqe, wr); 3001 wqe += sizeof(struct mlx4_wqe_atomic_seg); 3002 3003 size += (sizeof(struct mlx4_wqe_raddr_seg) 3004 + sizeof(struct mlx4_wqe_atomic_seg)) / 16;*/ 3005 3006 break; 3007 3008 case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: 3009 assert(!"NYI"); 3010 /*set_raddr_seg(wqe, wr->wr.atomic.remote_addr, 3011 wr->wr.atomic.rkey); 3012 wqe += sizeof(struct mlx4_wqe_raddr_seg); 3013 3014 set_masked_atomic_seg(wqe, wr); 3015 wqe += sizeof(struct mlx4_wqe_masked_atomic_seg); 3016 3017 size += (sizeof(struct mlx4_wqe_raddr_seg) 3018 + sizeof(struct mlx4_wqe_masked_atomic_seg)) / 16;*/ 3019 3020 break; 3021 3022 case IB_WR_RDMA_READ: 3023 case IB_WR_RDMA_WRITE: 3024 case IB_WR_RDMA_WRITE_WITH_IMM: 3025 set_raddr_seg(wqe, wr->wr.rdma.remote_addr, wr->wr.rdma.rkey); 3026 wqe += sizeof(struct mlx4_wqe_raddr_seg); 3027 size += sizeof(struct mlx4_wqe_raddr_seg) / 16; 3028 break; 3029 3030 case IB_WR_LOCAL_INV: 3031 assert(!"NYI"); 3032 /*ctrl->srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); 3033 set_local_inv_seg(wqe, wr->ex.invalidate_rkey); 3034 wqe += sizeof(struct mlx4_wqe_local_inval_seg); 3035 size += sizeof(struct mlx4_wqe_local_inval_seg) / 16;*/ 3036 break; 3037 3038 case IB_WR_FAST_REG_MR: 3039 assert(!"NYI"); 3040 /*ctrl->srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); 3041 set_fmr_seg(wqe, wr); 3042 wqe += sizeof(struct mlx4_wqe_fmr_seg); 3043 size += sizeof(struct mlx4_wqe_fmr_seg) / 16;*/ 3044 break; 3045 3046 default: 3047 /*No extra segments required for sends*/ 3048 break; 3049 } 3050 break; 3051 3052 case MLX4_IB_QPT_TUN_SMI_OWNER: 3053 assert(!"NYI"); 3054 /*err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); 3055 if (unlikely(err)) { 3056 *bad_wr = wr; 3057 goto out; 3058 } 3059 wqe += seglen; 3060 size += seglen / 16;*/ 3061 break; 3062 case MLX4_IB_QPT_TUN_SMI: 3063 case MLX4_IB_QPT_TUN_GSI: 3064 assert(!"NYI"); 3065 /*this is a UD qp used in MAD responses to slaves. 3066 set_datagram_seg(wqe, wr); 3067 set the forced-loopback bit in the data seg av 3068 *(__be32 *) wqe |= cpu_to_be32(0x80000000); 3069 wqe += sizeof(struct mlx4_wqe_datagram_seg); 3070 size += sizeof(struct mlx4_wqe_datagram_seg) / 16;*/ 3071 break; 3072 case MLX4_IB_QPT_UD: 3073 assert(!"NYI"); 3074 /*set_datagram_seg(wqe, wr); 3075 wqe += sizeof(struct mlx4_wqe_datagram_seg); 3076 size += sizeof(struct mlx4_wqe_datagram_seg) / 16; 3077 3078 if (wr->opcode == IB_WR_LSO) { 3079 err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh); 3080 if (unlikely(err)) { 3081 *bad_wr = wr; 3082 goto out; 3083 } 3084 lso_wqe = (__be32 *) wqe; 3085 wqe += seglen; 3086 size += seglen / 16; 3087 }*/ 3088 break; 3089 3090 case MLX4_IB_QPT_PROXY_SMI_OWNER: 3091 assert(!"NYI"); 3092 /*if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) { 3093 err = -ENOSYS; 3094 *bad_wr = wr; 3095 goto out; 3096 } 3097 err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); 3098 if (unlikely(err)) { 3099 *bad_wr = wr; 3100 goto out; 3101 } 3102 wqe += seglen; 3103 size += seglen / 16; 3104 to start tunnel header on a cache-line boundary 3105 add_zero_len_inline(wqe); 3106 wqe += 16; 3107 size++; 3108 build_tunnel_header(wr, wqe, &seglen); 3109 wqe += seglen; 3110 size += seglen / 16;*/ 3111 break; 3112 case MLX4_IB_QPT_PROXY_SMI: 3113 assert(!"NYI"); 3114 /*don't allow QP0 sends on guests*/ 3115 /*err = -ENOSYS; 3116 *bad_wr = wr;*/ 3117 goto out; 3118 case MLX4_IB_QPT_PROXY_GSI: 3119 assert(!"NYI"); 3120 /*If we are tunneling special qps, this is a UD qp. 3121 * In this case we first add a UD segment targeting 3122 * the tunnel qp, and then add a header with address 3123 * information*/ 3124 /*set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, 3125 ibqp->qp_type); 3126 wqe += sizeof(struct mlx4_wqe_datagram_seg); 3127 size += sizeof(struct mlx4_wqe_datagram_seg) / 16; 3128 build_tunnel_header(wr, wqe, &seglen); 3129 wqe += seglen; 3130 size += seglen / 16;*/ 3131 break; 3132 3133 case MLX4_IB_QPT_SMI: 3134 case MLX4_IB_QPT_GSI: 3135 err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen); 3136 if (unlikely(err)) { 3137 *bad_wr = wr; 3138 goto out; 3139 } 3140 wqe += seglen; 3141 size += seglen / 16; 3142 break; 3143 3144 default: 3145 break; 3146 } 3147 3148 /** Write data segments in reverse order, so as to 3149 * overwrite cacheline stamp last within each 3150 * cacheline. This avoids issues with WQE 3151 * prefetching.*/ 3152 3153 dseg = wqe; 3154 dseg += wr->num_sge - 1; 3155 3156 /*Add one more inline data segment for ICRC for MLX sends*/ 3157 if (unlikely( 3158 qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI 3159 || qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI 3160 || qp->mlx4_ib_qp_type 3161 & (MLX4_IB_QPT_PROXY_SMI_OWNER 3162 | MLX4_IB_QPT_TUN_SMI_OWNER))) { 3163 set_mlx_icrc_seg(dseg + 1); 3164 size += sizeof(struct mlx4_wqe_data_seg) / 16; 3165 } 3166 3167 if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) { 3168 int sz; 3169 err = lay_inline_data(qp, wr, wqe, &sz); 3170 if (!err) { 3171 inl = 1; 3172 size += sz; 3173 } 3174 } else { 3175 size += wr->num_sge * (sizeof(struct mlx4_wqe_data_seg) / 16); 3176 for (i = wr->num_sge - 1; i >= 0; --i, --dseg) 3177 set_data_seg(dseg, wr->sg_list + i); 3178 } 3179 3180 /** Possibly overwrite stamping in cacheline with LSO 3181 * segment only after making sure all data segments 3182 * are written.*/ 3183 3184 wmb(); 3185 *lso_wqe = lso_hdr_sz; 3186 ctrl->fence_size = ( 3187 wr->send_flags & IB_SEND_FENCE ? MLX4_WQE_CTRL_FENCE : 0) 3188 | size; 3189 3190 /** Make sure descriptor is fully written before 3191 * setting ownership bit (because HW can start 3192 * executing as soon as we do).*/ 3193 3194 wmb(); 3195 3196 if (wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) { 3197 *bad_wr = wr; 3198 err = -EINVAL; 3199 goto out; 3200 } 3201 3202 ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] 3203 | (ind & qp->sq.wqe_cnt ? cpu_to_be32(1U << 31) : 0) | blh; 3204 3205 stamp = ind + qp->sq_spare_wqes; 3206 ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); 3207 3208 /** We can improve latency by not stamping the last 3209 * send queue WQE until after ringing the doorbell, so 3210 * only stamp here if there are still more WQEs to post. 3211 * 3212 * Same optimization applies to padding with NOP wqe 3213 * in case of WQE shrinking (used to prevent wrap-around 3214 * in the middle of WR).*/ 3215 3216 if (wr->next) { 3217 stamp_send_wqe(qp, stamp, size * 16); 3218 ind = pad_wraparound(qp, ind); 3219 } 3220 } 3221 3222 out: if (nreq == 1 && inl && size > 1 && size < qp->bf.buf_size / 16) { 3223 ctrl->owner_opcode |= htonl((qp->sq_next_wqe & 0xffff) << 8); 3224 /*We set above doorbell_qpn bits to 0 as part of vlan 3225 * tag initialization, so |= should be correct.*/ 3226 3227 aux = (u32 *) &ctrl->vlan_tag; 3228 *aux |= qp->doorbell_qpn; 3229 3230 /** Make sure that descriptor is written to memory 3231 * before writing to BlueFlame page.*/ 3232 3233 wmb(); 3234 3235 ++qp->sq.head; 3236 3237 mlx4_bf_copy(qp->bf.reg + qp->bf.offset, (unsigned long *) ctrl, 3238 ALIGN(size * 16, 64)); 3239 wc_wmb(); 3240 3241 qp->bf.offset ^= qp->bf.buf_size; 3242 3243 } else if (nreq) { 3244 qp->sq.head += nreq; 3245 3246 /* * Make sure that descriptors are written before 3247 * doorbell record.*/ 3248 3249 wmb(); 3250 3251 writel(qp->doorbell_qpn, qp->bf.uar->map + MLX4_SEND_DOORBELL); 3252 3253 /** Make sure doorbells don't leak out of SQ spinlock 3254 * and reach the HCA out of order.*/ 3255 3256 mmiowb(); 3257 3258 } 3259 3260 if (likely(nreq)) { 3261 stamp_send_wqe(qp, stamp, size * 16); 3262 ind = pad_wraparound(qp, ind); 3263 qp->sq_next_wqe = ind; 3264 } 3265 3266 /*spin_unlock_irqrestore(&qp->sq.lock, flags);*/ 3267 3268 return err; 3269} 3270 3271int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 3272 struct ib_recv_wr **bad_wr) { 3273 struct mlx4_ib_qp *qp = to_mqp(ibqp); 3274 struct mlx4_wqe_data_seg *scat; 3275 /*unsigned long flags;*/ 3276 int err = 0; 3277 int nreq; 3278 int ind; 3279 int max_gs; 3280 int i; 3281 3282 max_gs = qp->rq.max_gs; 3283 /*spin_lock_irqsave(&qp->rq.lock, flags);*/ 3284 3285 ind = qp->rq.head & (qp->rq.wqe_cnt - 1); 3286 3287 for (nreq = 0; wr; ++nreq, wr = wr->next) { 3288 if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { 3289 err = -ENOMEM; 3290 *bad_wr = wr; 3291 goto out; 3292 } 3293 3294 if (/*unlikely(*/wr->num_sge > qp->rq.max_gs/*)*/) { 3295 err = -EINVAL; 3296 *bad_wr = wr; 3297 goto out; 3298 } 3299 3300 scat = get_recv_wqe(qp, ind); 3301 3302 if (qp->mlx4_ib_qp_type 3303 & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI 3304 | MLX4_IB_QPT_PROXY_GSI)) { 3305 /*ib_dma_sync_single_for_device(ibqp->device, 3306 qp->sqp_proxy_rcv[ind].map, 3307 sizeof(struct mlx4_ib_proxy_sqp_hdr), DMA_FROM_DEVICE);*/ 3308 scat->byte_count = cpu_to_be32( 3309 sizeof(struct mlx4_ib_proxy_sqp_hdr)); 3310 /*use dma lkey from upper layer entry*/ 3311 scat->lkey = cpu_to_be32(wr->sg_list->lkey); 3312 scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map); 3313 scat++; 3314 max_gs--; 3315 } 3316 3317 for (i = 0; i < wr->num_sge; ++i) 3318 __set_data_seg(scat + i, wr->sg_list + i); 3319 3320 if (i < max_gs) { 3321 scat[i].byte_count = 0; 3322 scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY); 3323 scat[i].addr = 0; 3324 } 3325 3326 qp->rq.wrid[ind] = wr->wr_id; 3327 3328 ind = (ind + 1) & (qp->rq.wqe_cnt - 1); 3329 } 3330 3331 out: if (/*likely(*/nreq/*)*/) { 3332 qp->rq.head += nreq; 3333 3334 /** Make sure that descriptors are written before 3335 * doorbell record.*/ 3336 3337 wmb(); 3338 3339 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff); 3340 } 3341 3342 /*spin_unlock_irqrestore(&qp->rq.lock, flags);*/ 3343 3344 return err; 3345} 3346/* 3347 static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state) { 3348 switch (mlx4_state) { 3349 case MLX4_QP_STATE_RST: 3350 return IB_QPS_RESET; 3351 case MLX4_QP_STATE_INIT: 3352 return IB_QPS_INIT; 3353 case MLX4_QP_STATE_RTR: 3354 return IB_QPS_RTR; 3355 case MLX4_QP_STATE_RTS: 3356 return IB_QPS_RTS; 3357 case MLX4_QP_STATE_SQ_DRAINING: 3358 case MLX4_QP_STATE_SQD: 3359 return IB_QPS_SQD; 3360 case MLX4_QP_STATE_SQER: 3361 return IB_QPS_SQE; 3362 case MLX4_QP_STATE_ERR: 3363 return IB_QPS_ERR; 3364 default: 3365 return -1; 3366 } 3367 } 3368 3369 static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state) { 3370 switch (mlx4_mig_state) { 3371 case MLX4_QP_PM_ARMED: 3372 return IB_MIG_ARMED; 3373 case MLX4_QP_PM_REARM: 3374 return IB_MIG_REARM; 3375 case MLX4_QP_PM_MIGRATED: 3376 return IB_MIG_MIGRATED; 3377 default: 3378 return -1; 3379 } 3380 } 3381 3382 static int to_ib_qp_access_flags(int mlx4_flags) { 3383 int ib_flags = 0; 3384 3385 if (mlx4_flags & MLX4_QP_BIT_RRE) 3386 ib_flags |= IB_ACCESS_REMOTE_READ; 3387 if (mlx4_flags & MLX4_QP_BIT_RWE) 3388 ib_flags |= IB_ACCESS_REMOTE_WRITE; 3389 if (mlx4_flags & MLX4_QP_BIT_RAE) 3390 ib_flags |= IB_ACCESS_REMOTE_ATOMIC; 3391 3392 return ib_flags; 3393 } 3394 3395 static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, 3396 struct ib_ah_attr *ib_ah_attr, struct mlx4_qp_path *path) { 3397 struct mlx4_dev *dev = ibdev->dev; 3398 int is_eth; 3399 3400 memset(ib_ah_attr, 0, sizeof *ib_ah_attr); 3401 ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1; 3402 3403 if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) 3404 return; 3405 3406 is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) 3407 == IB_LINK_LAYER_ETHERNET; 3408 if (is_eth) 3409 ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) 3410 | ((path->sched_queue & 4) << 1); 3411 else 3412 ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; 3413 3414 ib_ah_attr->dlid = be16_to_cpu(path->rlid); 3415 ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f; 3416 ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0; 3417 ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0; 3418 if (ib_ah_attr->ah_flags) { 3419 ib_ah_attr->grh.sgid_index = path->mgid_index; 3420 ib_ah_attr->grh.hop_limit = path->hop_limit; 3421 ib_ah_attr->grh.traffic_class = (be32_to_cpu(path->tclass_flowlabel) >> 20) 3422 & 0xff; 3423 ib_ah_attr->grh.flow_label = be32_to_cpu(path->tclass_flowlabel) & 0xfffff; 3424 memcpy(ib_ah_attr->grh.dgid.raw, path->rgid, sizeof ib_ah_attr->grh.dgid.raw); 3425 } 3426 } 3427 3428 int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, 3429 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { 3430 struct mlx4_ib_dev *dev = to_mdev(ibqp->device); 3431 struct mlx4_ib_qp *qp = to_mqp(ibqp); 3432 struct mlx4_qp_context context; 3433 int mlx4_state; 3434 int err = 0; 3435 3436 mutex_lock(&qp->mutex); 3437 3438 if (qp->state == IB_QPS_RESET) { 3439 qp_attr->qp_state = IB_QPS_RESET; 3440 goto done; 3441 } 3442 3443 err = mlx4_qp_query(dev->dev, &qp->mqp, &context); 3444 if (err) { 3445 err = -EINVAL; 3446 goto out; 3447 } 3448 3449 mlx4_state = be32_to_cpu(context.flags) >> 28; 3450 3451 qp->state = to_ib_qp_state(mlx4_state); 3452 qp_attr->qp_state = qp->state; 3453 qp_attr->path_mtu = context.mtu_msgmax >> 5; 3454 qp_attr->path_mig_state = to_ib_mig_state( 3455 (be32_to_cpu(context.flags) >> 11) & 0x3); 3456 qp_attr->qkey = be32_to_cpu(context.qkey); 3457 qp_attr->rq_psn = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff; 3458 qp_attr->sq_psn = be32_to_cpu(context.next_send_psn) & 0xffffff; 3459 qp_attr->dest_qp_num = be32_to_cpu(context.remote_qpn) & 0xffffff; 3460 qp_attr->qp_access_flags = to_ib_qp_access_flags(be32_to_cpu(context.params2)); 3461 3462 if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { 3463 to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path); 3464 to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path); 3465 qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; 3466 qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; 3467 } 3468 3469 qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f; 3470 if (qp_attr->qp_state == IB_QPS_INIT) 3471 qp_attr->port_num = qp->port; 3472 else 3473 qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; 3474 3475 qp_attr->en_sqd_async_notify is only applicable in modify qp 3476 qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING; 3477 3478 qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7); 3479 3480 qp_attr->max_dest_rd_atomic = 1 << ((be32_to_cpu(context.params2) >> 21) & 0x7); 3481 qp_attr->min_rnr_timer = (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f; 3482 qp_attr->timeout = context.pri_path.ackto >> 3; 3483 qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7; 3484 qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7; 3485 qp_attr->alt_timeout = context.alt_path.ackto >> 3; 3486 3487 done: qp_attr->cur_qp_state = qp_attr->qp_state; 3488 qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; 3489 qp_attr->cap.max_recv_sge = qp->rq.max_gs; 3490 3491 if (!ibqp->uobject) { 3492 qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; 3493 qp_attr->cap.max_send_sge = qp->sq.max_gs; 3494 } else { 3495 qp_attr->cap.max_send_wr = 0; 3496 qp_attr->cap.max_send_sge = 0; 3497 } 3498 3499 3500 * We don't support inline sends for kernel QPs (yet), and we 3501 * don't know what userspace's value should be. 3502 3503 qp_attr->cap.max_inline_data = 0; 3504 3505 qp_init_attr->cap = qp_attr->cap; 3506 3507 qp_init_attr->create_flags = 0; 3508 if (qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) 3509 qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; 3510 3511 if (qp->flags & MLX4_IB_QP_LSO) 3512 qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; 3513 3514 if (qp->flags & MLX4_IB_QP_NETIF) 3515 qp_init_attr->create_flags |= IB_QP_CREATE_NETIF_QP; 3516 3517 qp_init_attr->sq_sig_type = 3518 qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ? 3519 IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; 3520 3521 qp_init_attr->qpg_type = ibqp->qpg_type; 3522 if (ibqp->qpg_type == IB_QPG_PARENT) 3523 qp_init_attr->cap.qpg_tss_mask_sz = qp->qpg_data->qpg_tss_mask_sz; 3524 else 3525 qp_init_attr->cap.qpg_tss_mask_sz = 0; 3526 3527 out: mutex_unlock(&qp->mutex); 3528 return err; 3529 } 3530 3531 */ 3532