1/* 2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34#include <rdma/ib_mad.h> 35#include <rdma/ib_user_verbs.h> 36#include <linux/io.h> 37#include <linux/slab.h> 38#include <linux/utsname.h> 39#include <linux/rculist.h> 40 41#include "ipath_kernel.h" 42#include "ipath_verbs.h" 43#include "ipath_common.h" 44 45static unsigned int ib_ipath_qp_table_size = 251; 46module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO); 47MODULE_PARM_DESC(qp_table_size, "QP table size"); 48 49unsigned int ib_ipath_lkey_table_size = 12; 50module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint, 51 S_IRUGO); 52MODULE_PARM_DESC(lkey_table_size, 53 "LKEY table size in bits (2^n, 1 <= n <= 23)"); 54 55static unsigned int ib_ipath_max_pds = 0xFFFF; 56module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO); 57MODULE_PARM_DESC(max_pds, 58 "Maximum number of protection domains to support"); 59 60static unsigned int ib_ipath_max_ahs = 0xFFFF; 61module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO); 62MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support"); 63 64unsigned int ib_ipath_max_cqes = 0x2FFFF; 65module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO); 66MODULE_PARM_DESC(max_cqes, 67 "Maximum number of completion queue entries to support"); 68 69unsigned int ib_ipath_max_cqs = 0x1FFFF; 70module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO); 71MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support"); 72 73unsigned int ib_ipath_max_qp_wrs = 0x3FFF; 74module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint, 75 S_IWUSR | S_IRUGO); 76MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); 77 78unsigned int ib_ipath_max_qps = 16384; 79module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO); 80MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); 81 82unsigned int ib_ipath_max_sges = 0x60; 83module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO); 84MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support"); 85 86unsigned int ib_ipath_max_mcast_grps = 16384; 87module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint, 88 S_IWUSR | S_IRUGO); 89MODULE_PARM_DESC(max_mcast_grps, 90 "Maximum number of multicast groups to support"); 91 92unsigned int ib_ipath_max_mcast_qp_attached = 16; 93module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached, 94 uint, S_IWUSR | S_IRUGO); 95MODULE_PARM_DESC(max_mcast_qp_attached, 96 "Maximum number of attached QPs to support"); 97 98unsigned int ib_ipath_max_srqs = 1024; 99module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO); 100MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support"); 101 102unsigned int ib_ipath_max_srq_sges = 128; 103module_param_named(max_srq_sges, ib_ipath_max_srq_sges, 104 uint, S_IWUSR | S_IRUGO); 105MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support"); 106 107unsigned int ib_ipath_max_srq_wrs = 0x1FFFF; 108module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs, 109 uint, S_IWUSR | S_IRUGO); 110MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); 111 112static unsigned int ib_ipath_disable_sma; 113module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO); 114MODULE_PARM_DESC(disable_sma, "Disable the SMA"); 115 116/* 117 * Note that it is OK to post send work requests in the SQE and ERR 118 * states; ipath_do_send() will process them and generate error 119 * completions as per IB 1.2 C10-96. 120 */ 121const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { 122 [IB_QPS_RESET] = 0, 123 [IB_QPS_INIT] = IPATH_POST_RECV_OK, 124 [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, 125 [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | 126 IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK | 127 IPATH_PROCESS_NEXT_SEND_OK, 128 [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | 129 IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, 130 [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | 131 IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, 132 [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV | 133 IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, 134}; 135 136struct ipath_ucontext { 137 struct ib_ucontext ibucontext; 138}; 139 140static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext 141 *ibucontext) 142{ 143 return container_of(ibucontext, struct ipath_ucontext, ibucontext); 144} 145 146/* 147 * Translate ib_wr_opcode into ib_wc_opcode. 148 */ 149const enum ib_wc_opcode ib_ipath_wc_opcode[] = { 150 [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, 151 [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, 152 [IB_WR_SEND] = IB_WC_SEND, 153 [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, 154 [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, 155 [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, 156 [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD 157}; 158 159/* 160 * System image GUID. 161 */ 162static __be64 sys_image_guid; 163 164/** 165 * ipath_copy_sge - copy data to SGE memory 166 * @ss: the SGE state 167 * @data: the data to copy 168 * @length: the length of the data 169 */ 170void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length) 171{ 172 struct ipath_sge *sge = &ss->sge; 173 174 while (length) { 175 u32 len = sge->length; 176 177 if (len > length) 178 len = length; 179 if (len > sge->sge_length) 180 len = sge->sge_length; 181 BUG_ON(len == 0); 182 memcpy(sge->vaddr, data, len); 183 sge->vaddr += len; 184 sge->length -= len; 185 sge->sge_length -= len; 186 if (sge->sge_length == 0) { 187 if (--ss->num_sge) 188 *sge = *ss->sg_list++; 189 } else if (sge->length == 0 && sge->mr != NULL) { 190 if (++sge->n >= IPATH_SEGSZ) { 191 if (++sge->m >= sge->mr->mapsz) 192 break; 193 sge->n = 0; 194 } 195 sge->vaddr = 196 sge->mr->map[sge->m]->segs[sge->n].vaddr; 197 sge->length = 198 sge->mr->map[sge->m]->segs[sge->n].length; 199 } 200 data += len; 201 length -= len; 202 } 203} 204 205void ipath_skip_sge(struct ipath_sge_state *ss, u32 length) 206{ 207 struct ipath_sge *sge = &ss->sge; 208 209 while (length) { 210 u32 len = sge->length; 211 212 if (len > length) 213 len = length; 214 if (len > sge->sge_length) 215 len = sge->sge_length; 216 BUG_ON(len == 0); 217 sge->vaddr += len; 218 sge->length -= len; 219 sge->sge_length -= len; 220 if (sge->sge_length == 0) { 221 if (--ss->num_sge) 222 *sge = *ss->sg_list++; 223 } else if (sge->length == 0 && sge->mr != NULL) { 224 if (++sge->n >= IPATH_SEGSZ) { 225 if (++sge->m >= sge->mr->mapsz) 226 break; 227 sge->n = 0; 228 } 229 sge->vaddr = 230 sge->mr->map[sge->m]->segs[sge->n].vaddr; 231 sge->length = 232 sge->mr->map[sge->m]->segs[sge->n].length; 233 } 234 length -= len; 235 } 236} 237 238/* 239 * Count the number of DMA descriptors needed to send length bytes of data. 240 * Don't modify the ipath_sge_state to get the count. 241 * Return zero if any of the segments is not aligned. 242 */ 243static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length) 244{ 245 struct ipath_sge *sg_list = ss->sg_list; 246 struct ipath_sge sge = ss->sge; 247 u8 num_sge = ss->num_sge; 248 u32 ndesc = 1; /* count the header */ 249 250 while (length) { 251 u32 len = sge.length; 252 253 if (len > length) 254 len = length; 255 if (len > sge.sge_length) 256 len = sge.sge_length; 257 BUG_ON(len == 0); 258 if (((long) sge.vaddr & (sizeof(u32) - 1)) || 259 (len != length && (len & (sizeof(u32) - 1)))) { 260 ndesc = 0; 261 break; 262 } 263 ndesc++; 264 sge.vaddr += len; 265 sge.length -= len; 266 sge.sge_length -= len; 267 if (sge.sge_length == 0) { 268 if (--num_sge) 269 sge = *sg_list++; 270 } else if (sge.length == 0 && sge.mr != NULL) { 271 if (++sge.n >= IPATH_SEGSZ) { 272 if (++sge.m >= sge.mr->mapsz) 273 break; 274 sge.n = 0; 275 } 276 sge.vaddr = 277 sge.mr->map[sge.m]->segs[sge.n].vaddr; 278 sge.length = 279 sge.mr->map[sge.m]->segs[sge.n].length; 280 } 281 length -= len; 282 } 283 return ndesc; 284} 285 286/* 287 * Copy from the SGEs to the data buffer. 288 */ 289static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss, 290 u32 length) 291{ 292 struct ipath_sge *sge = &ss->sge; 293 294 while (length) { 295 u32 len = sge->length; 296 297 if (len > length) 298 len = length; 299 if (len > sge->sge_length) 300 len = sge->sge_length; 301 BUG_ON(len == 0); 302 memcpy(data, sge->vaddr, len); 303 sge->vaddr += len; 304 sge->length -= len; 305 sge->sge_length -= len; 306 if (sge->sge_length == 0) { 307 if (--ss->num_sge) 308 *sge = *ss->sg_list++; 309 } else if (sge->length == 0 && sge->mr != NULL) { 310 if (++sge->n >= IPATH_SEGSZ) { 311 if (++sge->m >= sge->mr->mapsz) 312 break; 313 sge->n = 0; 314 } 315 sge->vaddr = 316 sge->mr->map[sge->m]->segs[sge->n].vaddr; 317 sge->length = 318 sge->mr->map[sge->m]->segs[sge->n].length; 319 } 320 data += len; 321 length -= len; 322 } 323} 324 325/** 326 * ipath_post_one_send - post one RC, UC, or UD send work request 327 * @qp: the QP to post on 328 * @wr: the work request to send 329 */ 330static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) 331{ 332 struct ipath_swqe *wqe; 333 u32 next; 334 int i; 335 int j; 336 int acc; 337 int ret; 338 unsigned long flags; 339 struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; 340 341 spin_lock_irqsave(&qp->s_lock, flags); 342 343 if (qp->ibqp.qp_type != IB_QPT_SMI && 344 !(dd->ipath_flags & IPATH_LINKACTIVE)) { 345 ret = -ENETDOWN; 346 goto bail; 347 } 348 349 /* Check that state is OK to post send. */ 350 if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) 351 goto bail_inval; 352 353 /* IB spec says that num_sge == 0 is OK. */ 354 if (wr->num_sge > qp->s_max_sge) 355 goto bail_inval; 356 357 /* 358 * Don't allow RDMA reads or atomic operations on UC or 359 * undefined operations. 360 * Make sure buffer is large enough to hold the result for atomics. 361 */ 362 if (qp->ibqp.qp_type == IB_QPT_UC) { 363 if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) 364 goto bail_inval; 365 } else if (qp->ibqp.qp_type == IB_QPT_UD) { 366 /* Check UD opcode */ 367 if (wr->opcode != IB_WR_SEND && 368 wr->opcode != IB_WR_SEND_WITH_IMM) 369 goto bail_inval; 370 /* Check UD destination address PD */ 371 if (qp->ibqp.pd != wr->wr.ud.ah->pd) 372 goto bail_inval; 373 } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) 374 goto bail_inval; 375 else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && 376 (wr->num_sge == 0 || 377 wr->sg_list[0].length < sizeof(u64) || 378 wr->sg_list[0].addr & (sizeof(u64) - 1))) 379 goto bail_inval; 380 else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) 381 goto bail_inval; 382 383 next = qp->s_head + 1; 384 if (next >= qp->s_size) 385 next = 0; 386 if (next == qp->s_last) { 387 ret = -ENOMEM; 388 goto bail; 389 } 390 391 wqe = get_swqe_ptr(qp, qp->s_head); 392 wqe->wr = *wr; 393 wqe->length = 0; 394 if (wr->num_sge) { 395 acc = wr->opcode >= IB_WR_RDMA_READ ? 396 IB_ACCESS_LOCAL_WRITE : 0; 397 for (i = 0, j = 0; i < wr->num_sge; i++) { 398 u32 length = wr->sg_list[i].length; 399 int ok; 400 401 if (length == 0) 402 continue; 403 ok = ipath_lkey_ok(qp, &wqe->sg_list[j], 404 &wr->sg_list[i], acc); 405 if (!ok) 406 goto bail_inval; 407 wqe->length += length; 408 j++; 409 } 410 wqe->wr.num_sge = j; 411 } 412 if (qp->ibqp.qp_type == IB_QPT_UC || 413 qp->ibqp.qp_type == IB_QPT_RC) { 414 if (wqe->length > 0x80000000U) 415 goto bail_inval; 416 } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu) 417 goto bail_inval; 418 wqe->ssn = qp->s_ssn++; 419 qp->s_head = next; 420 421 ret = 0; 422 goto bail; 423 424bail_inval: 425 ret = -EINVAL; 426bail: 427 spin_unlock_irqrestore(&qp->s_lock, flags); 428 return ret; 429} 430 431/** 432 * ipath_post_send - post a send on a QP 433 * @ibqp: the QP to post the send on 434 * @wr: the list of work requests to post 435 * @bad_wr: the first bad WR is put here 436 * 437 * This may be called from interrupt context. 438 */ 439static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 440 struct ib_send_wr **bad_wr) 441{ 442 struct ipath_qp *qp = to_iqp(ibqp); 443 int err = 0; 444 445 for (; wr; wr = wr->next) { 446 err = ipath_post_one_send(qp, wr); 447 if (err) { 448 *bad_wr = wr; 449 goto bail; 450 } 451 } 452 453 /* Try to do the send work in the caller's context. */ 454 ipath_do_send((unsigned long) qp); 455 456bail: 457 return err; 458} 459 460/** 461 * ipath_post_receive - post a receive on a QP 462 * @ibqp: the QP to post the receive on 463 * @wr: the WR to post 464 * @bad_wr: the first bad WR is put here 465 * 466 * This may be called from interrupt context. 467 */ 468static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 469 struct ib_recv_wr **bad_wr) 470{ 471 struct ipath_qp *qp = to_iqp(ibqp); 472 struct ipath_rwq *wq = qp->r_rq.wq; 473 unsigned long flags; 474 int ret; 475 476 /* Check that state is OK to post receive. */ 477 if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) { 478 *bad_wr = wr; 479 ret = -EINVAL; 480 goto bail; 481 } 482 483 for (; wr; wr = wr->next) { 484 struct ipath_rwqe *wqe; 485 u32 next; 486 int i; 487 488 if ((unsigned) wr->num_sge > qp->r_rq.max_sge) { 489 *bad_wr = wr; 490 ret = -EINVAL; 491 goto bail; 492 } 493 494 spin_lock_irqsave(&qp->r_rq.lock, flags); 495 next = wq->head + 1; 496 if (next >= qp->r_rq.size) 497 next = 0; 498 if (next == wq->tail) { 499 spin_unlock_irqrestore(&qp->r_rq.lock, flags); 500 *bad_wr = wr; 501 ret = -ENOMEM; 502 goto bail; 503 } 504 505 wqe = get_rwqe_ptr(&qp->r_rq, wq->head); 506 wqe->wr_id = wr->wr_id; 507 wqe->num_sge = wr->num_sge; 508 for (i = 0; i < wr->num_sge; i++) 509 wqe->sg_list[i] = wr->sg_list[i]; 510 /* Make sure queue entry is written before the head index. */ 511 smp_wmb(); 512 wq->head = next; 513 spin_unlock_irqrestore(&qp->r_rq.lock, flags); 514 } 515 ret = 0; 516 517bail: 518 return ret; 519} 520 521/** 522 * ipath_qp_rcv - processing an incoming packet on a QP 523 * @dev: the device the packet came on 524 * @hdr: the packet header 525 * @has_grh: true if the packet has a GRH 526 * @data: the packet data 527 * @tlen: the packet length 528 * @qp: the QP the packet came on 529 * 530 * This is called from ipath_ib_rcv() to process an incoming packet 531 * for the given QP. 532 * Called at interrupt level. 533 */ 534static void ipath_qp_rcv(struct ipath_ibdev *dev, 535 struct ipath_ib_header *hdr, int has_grh, 536 void *data, u32 tlen, struct ipath_qp *qp) 537{ 538 /* Check for valid receive state. */ 539 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { 540 dev->n_pkt_drops++; 541 return; 542 } 543 544 switch (qp->ibqp.qp_type) { 545 case IB_QPT_SMI: 546 case IB_QPT_GSI: 547 if (ib_ipath_disable_sma) 548 break; 549 /* FALLTHROUGH */ 550 case IB_QPT_UD: 551 ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp); 552 break; 553 554 case IB_QPT_RC: 555 ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp); 556 break; 557 558 case IB_QPT_UC: 559 ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp); 560 break; 561 562 default: 563 break; 564 } 565} 566 567/** 568 * ipath_ib_rcv - process an incoming packet 569 * @arg: the device pointer 570 * @rhdr: the header of the packet 571 * @data: the packet data 572 * @tlen: the packet length 573 * 574 * This is called from ipath_kreceive() to process an incoming packet at 575 * interrupt level. Tlen is the length of the header + data + CRC in bytes. 576 */ 577void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data, 578 u32 tlen) 579{ 580 struct ipath_ib_header *hdr = rhdr; 581 struct ipath_other_headers *ohdr; 582 struct ipath_qp *qp; 583 u32 qp_num; 584 int lnh; 585 u8 opcode; 586 u16 lid; 587 588 if (unlikely(dev == NULL)) 589 goto bail; 590 591 if (unlikely(tlen < 24)) { /* LRH+BTH+CRC */ 592 dev->rcv_errors++; 593 goto bail; 594 } 595 596 /* Check for a valid destination LID (see ch. 7.11.1). */ 597 lid = be16_to_cpu(hdr->lrh[1]); 598 if (lid < IPATH_MULTICAST_LID_BASE) { 599 lid &= ~((1 << dev->dd->ipath_lmc) - 1); 600 if (unlikely(lid != dev->dd->ipath_lid)) { 601 dev->rcv_errors++; 602 goto bail; 603 } 604 } 605 606 /* Check for GRH */ 607 lnh = be16_to_cpu(hdr->lrh[0]) & 3; 608 if (lnh == IPATH_LRH_BTH) 609 ohdr = &hdr->u.oth; 610 else if (lnh == IPATH_LRH_GRH) 611 ohdr = &hdr->u.l.oth; 612 else { 613 dev->rcv_errors++; 614 goto bail; 615 } 616 617 opcode = be32_to_cpu(ohdr->bth[0]) >> 24; 618 dev->opstats[opcode].n_bytes += tlen; 619 dev->opstats[opcode].n_packets++; 620 621 /* Get the destination QP number. */ 622 qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK; 623 if (qp_num == IPATH_MULTICAST_QPN) { 624 struct ipath_mcast *mcast; 625 struct ipath_mcast_qp *p; 626 627 if (lnh != IPATH_LRH_GRH) { 628 dev->n_pkt_drops++; 629 goto bail; 630 } 631 mcast = ipath_mcast_find(&hdr->u.l.grh.dgid); 632 if (mcast == NULL) { 633 dev->n_pkt_drops++; 634 goto bail; 635 } 636 dev->n_multicast_rcv++; 637 list_for_each_entry_rcu(p, &mcast->qp_list, list) 638 ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp); 639 /* 640 * Notify ipath_multicast_detach() if it is waiting for us 641 * to finish. 642 */ 643 if (atomic_dec_return(&mcast->refcount) <= 1) 644 wake_up(&mcast->wait); 645 } else { 646 qp = ipath_lookup_qpn(&dev->qp_table, qp_num); 647 if (qp) { 648 dev->n_unicast_rcv++; 649 ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data, 650 tlen, qp); 651 /* 652 * Notify ipath_destroy_qp() if it is waiting 653 * for us to finish. 654 */ 655 if (atomic_dec_and_test(&qp->refcount)) 656 wake_up(&qp->wait); 657 } else 658 dev->n_pkt_drops++; 659 } 660 661bail:; 662} 663 664/** 665 * ipath_ib_timer - verbs timer 666 * @arg: the device pointer 667 * 668 * This is called from ipath_do_rcv_timer() at interrupt level to check for 669 * QPs which need retransmits and to collect performance numbers. 670 */ 671static void ipath_ib_timer(struct ipath_ibdev *dev) 672{ 673 struct ipath_qp *resend = NULL; 674 struct ipath_qp *rnr = NULL; 675 struct list_head *last; 676 struct ipath_qp *qp; 677 unsigned long flags; 678 679 if (dev == NULL) 680 return; 681 682 spin_lock_irqsave(&dev->pending_lock, flags); 683 /* Start filling the next pending queue. */ 684 if (++dev->pending_index >= ARRAY_SIZE(dev->pending)) 685 dev->pending_index = 0; 686 /* Save any requests still in the new queue, they have timed out. */ 687 last = &dev->pending[dev->pending_index]; 688 while (!list_empty(last)) { 689 qp = list_entry(last->next, struct ipath_qp, timerwait); 690 list_del_init(&qp->timerwait); 691 qp->timer_next = resend; 692 resend = qp; 693 atomic_inc(&qp->refcount); 694 } 695 last = &dev->rnrwait; 696 if (!list_empty(last)) { 697 qp = list_entry(last->next, struct ipath_qp, timerwait); 698 if (--qp->s_rnr_timeout == 0) { 699 do { 700 list_del_init(&qp->timerwait); 701 qp->timer_next = rnr; 702 rnr = qp; 703 atomic_inc(&qp->refcount); 704 if (list_empty(last)) 705 break; 706 qp = list_entry(last->next, struct ipath_qp, 707 timerwait); 708 } while (qp->s_rnr_timeout == 0); 709 } 710 } 711 /* 712 * We should only be in the started state if pma_sample_start != 0 713 */ 714 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED && 715 --dev->pma_sample_start == 0) { 716 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING; 717 ipath_snapshot_counters(dev->dd, &dev->ipath_sword, 718 &dev->ipath_rword, 719 &dev->ipath_spkts, 720 &dev->ipath_rpkts, 721 &dev->ipath_xmit_wait); 722 } 723 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) { 724 if (dev->pma_sample_interval == 0) { 725 u64 ta, tb, tc, td, te; 726 727 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE; 728 ipath_snapshot_counters(dev->dd, &ta, &tb, 729 &tc, &td, &te); 730 731 dev->ipath_sword = ta - dev->ipath_sword; 732 dev->ipath_rword = tb - dev->ipath_rword; 733 dev->ipath_spkts = tc - dev->ipath_spkts; 734 dev->ipath_rpkts = td - dev->ipath_rpkts; 735 dev->ipath_xmit_wait = te - dev->ipath_xmit_wait; 736 } 737 else 738 dev->pma_sample_interval--; 739 } 740 spin_unlock_irqrestore(&dev->pending_lock, flags); 741 742 while (resend != NULL) { 743 qp = resend; 744 resend = qp->timer_next; 745 746 spin_lock_irqsave(&qp->s_lock, flags); 747 if (qp->s_last != qp->s_tail && 748 ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { 749 dev->n_timeouts++; 750 ipath_restart_rc(qp, qp->s_last_psn + 1); 751 } 752 spin_unlock_irqrestore(&qp->s_lock, flags); 753 754 /* Notify ipath_destroy_qp() if it is waiting. */ 755 if (atomic_dec_and_test(&qp->refcount)) 756 wake_up(&qp->wait); 757 } 758 while (rnr != NULL) { 759 qp = rnr; 760 rnr = qp->timer_next; 761 762 spin_lock_irqsave(&qp->s_lock, flags); 763 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) 764 ipath_schedule_send(qp); 765 spin_unlock_irqrestore(&qp->s_lock, flags); 766 767 /* Notify ipath_destroy_qp() if it is waiting. */ 768 if (atomic_dec_and_test(&qp->refcount)) 769 wake_up(&qp->wait); 770 } 771} 772 773static void update_sge(struct ipath_sge_state *ss, u32 length) 774{ 775 struct ipath_sge *sge = &ss->sge; 776 777 sge->vaddr += length; 778 sge->length -= length; 779 sge->sge_length -= length; 780 if (sge->sge_length == 0) { 781 if (--ss->num_sge) 782 *sge = *ss->sg_list++; 783 } else if (sge->length == 0 && sge->mr != NULL) { 784 if (++sge->n >= IPATH_SEGSZ) { 785 if (++sge->m >= sge->mr->mapsz) 786 return; 787 sge->n = 0; 788 } 789 sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; 790 sge->length = sge->mr->map[sge->m]->segs[sge->n].length; 791 } 792} 793 794#ifdef __LITTLE_ENDIAN 795static inline u32 get_upper_bits(u32 data, u32 shift) 796{ 797 return data >> shift; 798} 799 800static inline u32 set_upper_bits(u32 data, u32 shift) 801{ 802 return data << shift; 803} 804 805static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) 806{ 807 data <<= ((sizeof(u32) - n) * BITS_PER_BYTE); 808 data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE); 809 return data; 810} 811#else 812static inline u32 get_upper_bits(u32 data, u32 shift) 813{ 814 return data << shift; 815} 816 817static inline u32 set_upper_bits(u32 data, u32 shift) 818{ 819 return data >> shift; 820} 821 822static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) 823{ 824 data >>= ((sizeof(u32) - n) * BITS_PER_BYTE); 825 data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE); 826 return data; 827} 828#endif 829 830static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, 831 u32 length, unsigned flush_wc) 832{ 833 u32 extra = 0; 834 u32 data = 0; 835 u32 last; 836 837 while (1) { 838 u32 len = ss->sge.length; 839 u32 off; 840 841 if (len > length) 842 len = length; 843 if (len > ss->sge.sge_length) 844 len = ss->sge.sge_length; 845 BUG_ON(len == 0); 846 /* If the source address is not aligned, try to align it. */ 847 off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); 848 if (off) { 849 u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr & 850 ~(sizeof(u32) - 1)); 851 u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE); 852 u32 y; 853 854 y = sizeof(u32) - off; 855 if (len > y) 856 len = y; 857 if (len + extra >= sizeof(u32)) { 858 data |= set_upper_bits(v, extra * 859 BITS_PER_BYTE); 860 len = sizeof(u32) - extra; 861 if (len == length) { 862 last = data; 863 break; 864 } 865 __raw_writel(data, piobuf); 866 piobuf++; 867 extra = 0; 868 data = 0; 869 } else { 870 /* Clear unused upper bytes */ 871 data |= clear_upper_bytes(v, len, extra); 872 if (len == length) { 873 last = data; 874 break; 875 } 876 extra += len; 877 } 878 } else if (extra) { 879 /* Source address is aligned. */ 880 u32 *addr = (u32 *) ss->sge.vaddr; 881 int shift = extra * BITS_PER_BYTE; 882 int ushift = 32 - shift; 883 u32 l = len; 884 885 while (l >= sizeof(u32)) { 886 u32 v = *addr; 887 888 data |= set_upper_bits(v, shift); 889 __raw_writel(data, piobuf); 890 data = get_upper_bits(v, ushift); 891 piobuf++; 892 addr++; 893 l -= sizeof(u32); 894 } 895 /* 896 * We still have 'extra' number of bytes leftover. 897 */ 898 if (l) { 899 u32 v = *addr; 900 901 if (l + extra >= sizeof(u32)) { 902 data |= set_upper_bits(v, shift); 903 len -= l + extra - sizeof(u32); 904 if (len == length) { 905 last = data; 906 break; 907 } 908 __raw_writel(data, piobuf); 909 piobuf++; 910 extra = 0; 911 data = 0; 912 } else { 913 /* Clear unused upper bytes */ 914 data |= clear_upper_bytes(v, l, 915 extra); 916 if (len == length) { 917 last = data; 918 break; 919 } 920 extra += l; 921 } 922 } else if (len == length) { 923 last = data; 924 break; 925 } 926 } else if (len == length) { 927 u32 w; 928 929 /* 930 * Need to round up for the last dword in the 931 * packet. 932 */ 933 w = (len + 3) >> 2; 934 __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1); 935 piobuf += w - 1; 936 last = ((u32 *) ss->sge.vaddr)[w - 1]; 937 break; 938 } else { 939 u32 w = len >> 2; 940 941 __iowrite32_copy(piobuf, ss->sge.vaddr, w); 942 piobuf += w; 943 944 extra = len & (sizeof(u32) - 1); 945 if (extra) { 946 u32 v = ((u32 *) ss->sge.vaddr)[w]; 947 948 /* Clear unused upper bytes */ 949 data = clear_upper_bytes(v, extra, 0); 950 } 951 } 952 update_sge(ss, len); 953 length -= len; 954 } 955 /* Update address before sending packet. */ 956 update_sge(ss, length); 957 if (flush_wc) { 958 /* must flush early everything before trigger word */ 959 ipath_flush_wc(); 960 __raw_writel(last, piobuf); 961 /* be sure trigger word is written */ 962 ipath_flush_wc(); 963 } else 964 __raw_writel(last, piobuf); 965} 966 967/* 968 * Convert IB rate to delay multiplier. 969 */ 970unsigned ipath_ib_rate_to_mult(enum ib_rate rate) 971{ 972 switch (rate) { 973 case IB_RATE_2_5_GBPS: return 8; 974 case IB_RATE_5_GBPS: return 4; 975 case IB_RATE_10_GBPS: return 2; 976 case IB_RATE_20_GBPS: return 1; 977 default: return 0; 978 } 979} 980 981/* 982 * Convert delay multiplier to IB rate 983 */ 984static enum ib_rate ipath_mult_to_ib_rate(unsigned mult) 985{ 986 switch (mult) { 987 case 8: return IB_RATE_2_5_GBPS; 988 case 4: return IB_RATE_5_GBPS; 989 case 2: return IB_RATE_10_GBPS; 990 case 1: return IB_RATE_20_GBPS; 991 default: return IB_RATE_PORT_CURRENT; 992 } 993} 994 995static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev) 996{ 997 struct ipath_verbs_txreq *tx = NULL; 998 unsigned long flags; 999 1000 spin_lock_irqsave(&dev->pending_lock, flags); 1001 if (!list_empty(&dev->txreq_free)) { 1002 struct list_head *l = dev->txreq_free.next; 1003 1004 list_del(l); 1005 tx = list_entry(l, struct ipath_verbs_txreq, txreq.list); 1006 } 1007 spin_unlock_irqrestore(&dev->pending_lock, flags); 1008 return tx; 1009} 1010 1011static inline void put_txreq(struct ipath_ibdev *dev, 1012 struct ipath_verbs_txreq *tx) 1013{ 1014 unsigned long flags; 1015 1016 spin_lock_irqsave(&dev->pending_lock, flags); 1017 list_add(&tx->txreq.list, &dev->txreq_free); 1018 spin_unlock_irqrestore(&dev->pending_lock, flags); 1019} 1020 1021static void sdma_complete(void *cookie, int status) 1022{ 1023 struct ipath_verbs_txreq *tx = cookie; 1024 struct ipath_qp *qp = tx->qp; 1025 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 1026 unsigned long flags; 1027 enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? 1028 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; 1029 1030 if (atomic_dec_and_test(&qp->s_dma_busy)) { 1031 spin_lock_irqsave(&qp->s_lock, flags); 1032 if (tx->wqe) 1033 ipath_send_complete(qp, tx->wqe, ibs); 1034 if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && 1035 qp->s_last != qp->s_head) || 1036 (qp->s_flags & IPATH_S_WAIT_DMA)) 1037 ipath_schedule_send(qp); 1038 spin_unlock_irqrestore(&qp->s_lock, flags); 1039 wake_up(&qp->wait_dma); 1040 } else if (tx->wqe) { 1041 spin_lock_irqsave(&qp->s_lock, flags); 1042 ipath_send_complete(qp, tx->wqe, ibs); 1043 spin_unlock_irqrestore(&qp->s_lock, flags); 1044 } 1045 1046 if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) 1047 kfree(tx->txreq.map_addr); 1048 put_txreq(dev, tx); 1049 1050 if (atomic_dec_and_test(&qp->refcount)) 1051 wake_up(&qp->wait); 1052} 1053 1054static void decrement_dma_busy(struct ipath_qp *qp) 1055{ 1056 unsigned long flags; 1057 1058 if (atomic_dec_and_test(&qp->s_dma_busy)) { 1059 spin_lock_irqsave(&qp->s_lock, flags); 1060 if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && 1061 qp->s_last != qp->s_head) || 1062 (qp->s_flags & IPATH_S_WAIT_DMA)) 1063 ipath_schedule_send(qp); 1064 spin_unlock_irqrestore(&qp->s_lock, flags); 1065 wake_up(&qp->wait_dma); 1066 } 1067} 1068 1069/* 1070 * Compute the number of clock cycles of delay before sending the next packet. 1071 * The multipliers reflect the number of clocks for the fastest rate so 1072 * one tick at 4xDDR is 8 ticks at 1xSDR. 1073 * If the destination port will take longer to receive a packet than 1074 * the outgoing link can send it, we need to delay sending the next packet 1075 * by the difference in time it takes the receiver to receive and the sender 1076 * to send this packet. 1077 * Note that this delay is always correct for UC and RC but not always 1078 * optimal for UD. For UD, the destination HCA can be different for each 1079 * packet, in which case, we could send packets to a different destination 1080 * while "waiting" for the delay. The overhead for doing this without 1081 * HW support is more than just paying the cost of delaying some packets 1082 * unnecessarily. 1083 */ 1084static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult) 1085{ 1086 return (rcv_mult > snd_mult) ? 1087 (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0; 1088} 1089 1090static int ipath_verbs_send_dma(struct ipath_qp *qp, 1091 struct ipath_ib_header *hdr, u32 hdrwords, 1092 struct ipath_sge_state *ss, u32 len, 1093 u32 plen, u32 dwords) 1094{ 1095 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 1096 struct ipath_devdata *dd = dev->dd; 1097 struct ipath_verbs_txreq *tx; 1098 u32 *piobuf; 1099 u32 control; 1100 u32 ndesc; 1101 int ret; 1102 1103 tx = qp->s_tx; 1104 if (tx) { 1105 qp->s_tx = NULL; 1106 /* resend previously constructed packet */ 1107 atomic_inc(&qp->s_dma_busy); 1108 ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx); 1109 if (ret) { 1110 qp->s_tx = tx; 1111 decrement_dma_busy(qp); 1112 } 1113 goto bail; 1114 } 1115 1116 tx = get_txreq(dev); 1117 if (!tx) { 1118 ret = -EBUSY; 1119 goto bail; 1120 } 1121 1122 /* 1123 * Get the saved delay count we computed for the previous packet 1124 * and save the delay count for this packet to be used next time 1125 * we get here. 1126 */ 1127 control = qp->s_pkt_delay; 1128 qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult); 1129 1130 tx->qp = qp; 1131 atomic_inc(&qp->refcount); 1132 tx->wqe = qp->s_wqe; 1133 tx->txreq.callback = sdma_complete; 1134 tx->txreq.callback_cookie = tx; 1135 tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST | 1136 IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC; 1137 if (plen + 1 >= IPATH_SMALLBUF_DWORDS) 1138 tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF; 1139 1140 /* VL15 packets bypass credit check */ 1141 if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) { 1142 control |= 1ULL << 31; 1143 tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15; 1144 } 1145 1146 if (len) { 1147 /* 1148 * Don't try to DMA if it takes more descriptors than 1149 * the queue holds. 1150 */ 1151 ndesc = ipath_count_sge(ss, len); 1152 if (ndesc >= dd->ipath_sdma_descq_cnt) 1153 ndesc = 0; 1154 } else 1155 ndesc = 1; 1156 if (ndesc) { 1157 tx->hdr.pbc[0] = cpu_to_le32(plen); 1158 tx->hdr.pbc[1] = cpu_to_le32(control); 1159 memcpy(&tx->hdr.hdr, hdr, hdrwords << 2); 1160 tx->txreq.sg_count = ndesc; 1161 tx->map_len = (hdrwords + 2) << 2; 1162 tx->txreq.map_addr = &tx->hdr; 1163 atomic_inc(&qp->s_dma_busy); 1164 ret = ipath_sdma_verbs_send(dd, ss, dwords, tx); 1165 if (ret) { 1166 /* save ss and length in dwords */ 1167 tx->ss = ss; 1168 tx->len = dwords; 1169 qp->s_tx = tx; 1170 decrement_dma_busy(qp); 1171 } 1172 goto bail; 1173 } 1174 1175 /* Allocate a buffer and copy the header and payload to it. */ 1176 tx->map_len = (plen + 1) << 2; 1177 piobuf = kmalloc(tx->map_len, GFP_ATOMIC); 1178 if (unlikely(piobuf == NULL)) { 1179 ret = -EBUSY; 1180 goto err_tx; 1181 } 1182 tx->txreq.map_addr = piobuf; 1183 tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF; 1184 tx->txreq.sg_count = 1; 1185 1186 *piobuf++ = (__force u32) cpu_to_le32(plen); 1187 *piobuf++ = (__force u32) cpu_to_le32(control); 1188 memcpy(piobuf, hdr, hdrwords << 2); 1189 ipath_copy_from_sge(piobuf + hdrwords, ss, len); 1190 1191 atomic_inc(&qp->s_dma_busy); 1192 ret = ipath_sdma_verbs_send(dd, NULL, 0, tx); 1193 /* 1194 * If we couldn't queue the DMA request, save the info 1195 * and try again later rather than destroying the 1196 * buffer and undoing the side effects of the copy. 1197 */ 1198 if (ret) { 1199 tx->ss = NULL; 1200 tx->len = 0; 1201 qp->s_tx = tx; 1202 decrement_dma_busy(qp); 1203 } 1204 dev->n_unaligned++; 1205 goto bail; 1206 1207err_tx: 1208 if (atomic_dec_and_test(&qp->refcount)) 1209 wake_up(&qp->wait); 1210 put_txreq(dev, tx); 1211bail: 1212 return ret; 1213} 1214 1215static int ipath_verbs_send_pio(struct ipath_qp *qp, 1216 struct ipath_ib_header *ibhdr, u32 hdrwords, 1217 struct ipath_sge_state *ss, u32 len, 1218 u32 plen, u32 dwords) 1219{ 1220 struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; 1221 u32 *hdr = (u32 *) ibhdr; 1222 u32 __iomem *piobuf; 1223 unsigned flush_wc; 1224 u32 control; 1225 int ret; 1226 unsigned long flags; 1227 1228 piobuf = ipath_getpiobuf(dd, plen, NULL); 1229 if (unlikely(piobuf == NULL)) { 1230 ret = -EBUSY; 1231 goto bail; 1232 } 1233 1234 /* 1235 * Get the saved delay count we computed for the previous packet 1236 * and save the delay count for this packet to be used next time 1237 * we get here. 1238 */ 1239 control = qp->s_pkt_delay; 1240 qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult); 1241 1242 /* VL15 packets bypass credit check */ 1243 if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15) 1244 control |= 1ULL << 31; 1245 1246 /* 1247 * Write the length to the control qword plus any needed flags. 1248 * We have to flush after the PBC for correctness on some cpus 1249 * or WC buffer can be written out of order. 1250 */ 1251 writeq(((u64) control << 32) | plen, piobuf); 1252 piobuf += 2; 1253 1254 flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC; 1255 if (len == 0) { 1256 /* 1257 * If there is just the header portion, must flush before 1258 * writing last word of header for correctness, and after 1259 * the last header word (trigger word). 1260 */ 1261 if (flush_wc) { 1262 ipath_flush_wc(); 1263 __iowrite32_copy(piobuf, hdr, hdrwords - 1); 1264 ipath_flush_wc(); 1265 __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); 1266 ipath_flush_wc(); 1267 } else 1268 __iowrite32_copy(piobuf, hdr, hdrwords); 1269 goto done; 1270 } 1271 1272 if (flush_wc) 1273 ipath_flush_wc(); 1274 __iowrite32_copy(piobuf, hdr, hdrwords); 1275 piobuf += hdrwords; 1276 1277 /* The common case is aligned and contained in one segment. */ 1278 if (likely(ss->num_sge == 1 && len <= ss->sge.length && 1279 !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { 1280 u32 *addr = (u32 *) ss->sge.vaddr; 1281 1282 /* Update address before sending packet. */ 1283 update_sge(ss, len); 1284 if (flush_wc) { 1285 __iowrite32_copy(piobuf, addr, dwords - 1); 1286 /* must flush early everything before trigger word */ 1287 ipath_flush_wc(); 1288 __raw_writel(addr[dwords - 1], piobuf + dwords - 1); 1289 /* be sure trigger word is written */ 1290 ipath_flush_wc(); 1291 } else 1292 __iowrite32_copy(piobuf, addr, dwords); 1293 goto done; 1294 } 1295 copy_io(piobuf, ss, len, flush_wc); 1296done: 1297 if (qp->s_wqe) { 1298 spin_lock_irqsave(&qp->s_lock, flags); 1299 ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); 1300 spin_unlock_irqrestore(&qp->s_lock, flags); 1301 } 1302 ret = 0; 1303bail: 1304 return ret; 1305} 1306 1307/** 1308 * ipath_verbs_send - send a packet 1309 * @qp: the QP to send on 1310 * @hdr: the packet header 1311 * @hdrwords: the number of 32-bit words in the header 1312 * @ss: the SGE to send 1313 * @len: the length of the packet in bytes 1314 */ 1315int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr, 1316 u32 hdrwords, struct ipath_sge_state *ss, u32 len) 1317{ 1318 struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; 1319 u32 plen; 1320 int ret; 1321 u32 dwords = (len + 3) >> 2; 1322 1323 /* 1324 * Calculate the send buffer trigger address. 1325 * The +1 counts for the pbc control dword following the pbc length. 1326 */ 1327 plen = hdrwords + dwords + 1; 1328 1329 /* 1330 * VL15 packets (IB_QPT_SMI) will always use PIO, so we 1331 * can defer SDMA restart until link goes ACTIVE without 1332 * worrying about just how we got there. 1333 */ 1334 if (qp->ibqp.qp_type == IB_QPT_SMI || 1335 !(dd->ipath_flags & IPATH_HAS_SEND_DMA)) 1336 ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, 1337 plen, dwords); 1338 else 1339 ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len, 1340 plen, dwords); 1341 1342 return ret; 1343} 1344 1345int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords, 1346 u64 *rwords, u64 *spkts, u64 *rpkts, 1347 u64 *xmit_wait) 1348{ 1349 int ret; 1350 1351 if (!(dd->ipath_flags & IPATH_INITTED)) { 1352 /* no hardware, freeze, etc. */ 1353 ret = -EINVAL; 1354 goto bail; 1355 } 1356 *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); 1357 *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); 1358 *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); 1359 *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); 1360 *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt); 1361 1362 ret = 0; 1363 1364bail: 1365 return ret; 1366} 1367 1368/** 1369 * ipath_get_counters - get various chip counters 1370 * @dd: the infinipath device 1371 * @cntrs: counters are placed here 1372 * 1373 * Return the counters needed by recv_pma_get_portcounters(). 1374 */ 1375int ipath_get_counters(struct ipath_devdata *dd, 1376 struct ipath_verbs_counters *cntrs) 1377{ 1378 struct ipath_cregs const *crp = dd->ipath_cregs; 1379 int ret; 1380 1381 if (!(dd->ipath_flags & IPATH_INITTED)) { 1382 /* no hardware, freeze, etc. */ 1383 ret = -EINVAL; 1384 goto bail; 1385 } 1386 cntrs->symbol_error_counter = 1387 ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt); 1388 cntrs->link_error_recovery_counter = 1389 ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt); 1390 /* 1391 * The link downed counter counts when the other side downs the 1392 * connection. We add in the number of times we downed the link 1393 * due to local link integrity errors to compensate. 1394 */ 1395 cntrs->link_downed_counter = 1396 ipath_snap_cntr(dd, crp->cr_iblinkdowncnt); 1397 cntrs->port_rcv_errors = 1398 ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) + 1399 ipath_snap_cntr(dd, crp->cr_rcvovflcnt) + 1400 ipath_snap_cntr(dd, crp->cr_portovflcnt) + 1401 ipath_snap_cntr(dd, crp->cr_err_rlencnt) + 1402 ipath_snap_cntr(dd, crp->cr_invalidrlencnt) + 1403 ipath_snap_cntr(dd, crp->cr_errlinkcnt) + 1404 ipath_snap_cntr(dd, crp->cr_erricrccnt) + 1405 ipath_snap_cntr(dd, crp->cr_errvcrccnt) + 1406 ipath_snap_cntr(dd, crp->cr_errlpcrccnt) + 1407 ipath_snap_cntr(dd, crp->cr_badformatcnt) + 1408 dd->ipath_rxfc_unsupvl_errs; 1409 if (crp->cr_rxotherlocalphyerrcnt) 1410 cntrs->port_rcv_errors += 1411 ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt); 1412 if (crp->cr_rxvlerrcnt) 1413 cntrs->port_rcv_errors += 1414 ipath_snap_cntr(dd, crp->cr_rxvlerrcnt); 1415 cntrs->port_rcv_remphys_errors = 1416 ipath_snap_cntr(dd, crp->cr_rcvebpcnt); 1417 cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt); 1418 cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt); 1419 cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt); 1420 cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt); 1421 cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt); 1422 cntrs->local_link_integrity_errors = 1423 crp->cr_locallinkintegrityerrcnt ? 1424 ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) : 1425 ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? 1426 dd->ipath_lli_errs : dd->ipath_lli_errors); 1427 cntrs->excessive_buffer_overrun_errors = 1428 crp->cr_excessbufferovflcnt ? 1429 ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) : 1430 dd->ipath_overrun_thresh_errs; 1431 cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ? 1432 ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0; 1433 1434 ret = 0; 1435 1436bail: 1437 return ret; 1438} 1439 1440/** 1441 * ipath_ib_piobufavail - callback when a PIO buffer is available 1442 * @arg: the device pointer 1443 * 1444 * This is called from ipath_intr() at interrupt level when a PIO buffer is 1445 * available after ipath_verbs_send() returned an error that no buffers were 1446 * available. Return 1 if we consumed all the PIO buffers and we still have 1447 * QPs waiting for buffers (for now, just restart the send tasklet and 1448 * return zero). 1449 */ 1450int ipath_ib_piobufavail(struct ipath_ibdev *dev) 1451{ 1452 struct list_head *list; 1453 struct ipath_qp *qplist; 1454 struct ipath_qp *qp; 1455 unsigned long flags; 1456 1457 if (dev == NULL) 1458 goto bail; 1459 1460 list = &dev->piowait; 1461 qplist = NULL; 1462 1463 spin_lock_irqsave(&dev->pending_lock, flags); 1464 while (!list_empty(list)) { 1465 qp = list_entry(list->next, struct ipath_qp, piowait); 1466 list_del_init(&qp->piowait); 1467 qp->pio_next = qplist; 1468 qplist = qp; 1469 atomic_inc(&qp->refcount); 1470 } 1471 spin_unlock_irqrestore(&dev->pending_lock, flags); 1472 1473 while (qplist != NULL) { 1474 qp = qplist; 1475 qplist = qp->pio_next; 1476 1477 spin_lock_irqsave(&qp->s_lock, flags); 1478 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) 1479 ipath_schedule_send(qp); 1480 spin_unlock_irqrestore(&qp->s_lock, flags); 1481 1482 /* Notify ipath_destroy_qp() if it is waiting. */ 1483 if (atomic_dec_and_test(&qp->refcount)) 1484 wake_up(&qp->wait); 1485 } 1486 1487bail: 1488 return 0; 1489} 1490 1491static int ipath_query_device(struct ib_device *ibdev, 1492 struct ib_device_attr *props) 1493{ 1494 struct ipath_ibdev *dev = to_idev(ibdev); 1495 1496 memset(props, 0, sizeof(*props)); 1497 1498 props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | 1499 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | 1500 IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | 1501 IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; 1502 props->page_size_cap = PAGE_SIZE; 1503 props->vendor_id = 1504 IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3; 1505 props->vendor_part_id = dev->dd->ipath_deviceid; 1506 props->hw_ver = dev->dd->ipath_pcirev; 1507 1508 props->sys_image_guid = dev->sys_image_guid; 1509 1510 props->max_mr_size = ~0ull; 1511 props->max_qp = ib_ipath_max_qps; 1512 props->max_qp_wr = ib_ipath_max_qp_wrs; 1513 props->max_sge = ib_ipath_max_sges; 1514 props->max_cq = ib_ipath_max_cqs; 1515 props->max_ah = ib_ipath_max_ahs; 1516 props->max_cqe = ib_ipath_max_cqes; 1517 props->max_mr = dev->lk_table.max; 1518 props->max_fmr = dev->lk_table.max; 1519 props->max_map_per_fmr = 32767; 1520 props->max_pd = ib_ipath_max_pds; 1521 props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC; 1522 props->max_qp_init_rd_atom = 255; 1523 /* props->max_res_rd_atom */ 1524 props->max_srq = ib_ipath_max_srqs; 1525 props->max_srq_wr = ib_ipath_max_srq_wrs; 1526 props->max_srq_sge = ib_ipath_max_srq_sges; 1527 /* props->local_ca_ack_delay */ 1528 props->atomic_cap = IB_ATOMIC_GLOB; 1529 props->max_pkeys = ipath_get_npkeys(dev->dd); 1530 props->max_mcast_grp = ib_ipath_max_mcast_grps; 1531 props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached; 1532 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 1533 props->max_mcast_grp; 1534 1535 return 0; 1536} 1537 1538const u8 ipath_cvt_physportstate[32] = { 1539 [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED, 1540 [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP, 1541 [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL, 1542 [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL, 1543 [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP, 1544 [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP, 1545 [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] = 1546 IB_PHYSPORTSTATE_CFG_TRAIN, 1547 [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] = 1548 IB_PHYSPORTSTATE_CFG_TRAIN, 1549 [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] = 1550 IB_PHYSPORTSTATE_CFG_TRAIN, 1551 [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN, 1552 [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] = 1553 IB_PHYSPORTSTATE_LINK_ERR_RECOVER, 1554 [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] = 1555 IB_PHYSPORTSTATE_LINK_ERR_RECOVER, 1556 [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 1557 IB_PHYSPORTSTATE_LINK_ERR_RECOVER, 1558 [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN, 1559 [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN, 1560 [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN, 1561 [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN, 1562 [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN, 1563 [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN, 1564 [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN, 1565 [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN 1566}; 1567 1568u32 ipath_get_cr_errpkey(struct ipath_devdata *dd) 1569{ 1570 return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey); 1571} 1572 1573static int ipath_query_port(struct ib_device *ibdev, 1574 u8 port, struct ib_port_attr *props) 1575{ 1576 struct ipath_ibdev *dev = to_idev(ibdev); 1577 struct ipath_devdata *dd = dev->dd; 1578 enum ib_mtu mtu; 1579 u16 lid = dd->ipath_lid; 1580 u64 ibcstat; 1581 1582 memset(props, 0, sizeof(*props)); 1583 props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE); 1584 props->lmc = dd->ipath_lmc; 1585 props->sm_lid = dev->sm_lid; 1586 props->sm_sl = dev->sm_sl; 1587 ibcstat = dd->ipath_lastibcstat; 1588 /* map LinkState to IB portinfo values. */ 1589 props->state = ipath_ib_linkstate(dd, ibcstat) + 1; 1590 1591 /* See phys_state_show() */ 1592 props->phys_state = /* MEA: assumes shift == 0 */ 1593 ipath_cvt_physportstate[dd->ipath_lastibcstat & 1594 dd->ibcs_lts_mask]; 1595 props->port_cap_flags = dev->port_cap_flags; 1596 props->gid_tbl_len = 1; 1597 props->max_msg_sz = 0x80000000; 1598 props->pkey_tbl_len = ipath_get_npkeys(dd); 1599 props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) - 1600 dev->z_pkey_violations; 1601 props->qkey_viol_cntr = dev->qkey_violations; 1602 props->active_width = dd->ipath_link_width_active; 1603 /* See rate_show() */ 1604 props->active_speed = dd->ipath_link_speed_active; 1605 props->max_vl_num = 1; /* VLCap = VL0 */ 1606 props->init_type_reply = 0; 1607 1608 props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048; 1609 switch (dd->ipath_ibmtu) { 1610 case 4096: 1611 mtu = IB_MTU_4096; 1612 break; 1613 case 2048: 1614 mtu = IB_MTU_2048; 1615 break; 1616 case 1024: 1617 mtu = IB_MTU_1024; 1618 break; 1619 case 512: 1620 mtu = IB_MTU_512; 1621 break; 1622 case 256: 1623 mtu = IB_MTU_256; 1624 break; 1625 default: 1626 mtu = IB_MTU_2048; 1627 } 1628 props->active_mtu = mtu; 1629 props->subnet_timeout = dev->subnet_timeout; 1630 1631 return 0; 1632} 1633 1634static int ipath_modify_device(struct ib_device *device, 1635 int device_modify_mask, 1636 struct ib_device_modify *device_modify) 1637{ 1638 int ret; 1639 1640 if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | 1641 IB_DEVICE_MODIFY_NODE_DESC)) { 1642 ret = -EOPNOTSUPP; 1643 goto bail; 1644 } 1645 1646 if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) 1647 memcpy(device->node_desc, device_modify->node_desc, 64); 1648 1649 if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) 1650 to_idev(device)->sys_image_guid = 1651 cpu_to_be64(device_modify->sys_image_guid); 1652 1653 ret = 0; 1654 1655bail: 1656 return ret; 1657} 1658 1659static int ipath_modify_port(struct ib_device *ibdev, 1660 u8 port, int port_modify_mask, 1661 struct ib_port_modify *props) 1662{ 1663 struct ipath_ibdev *dev = to_idev(ibdev); 1664 1665 dev->port_cap_flags |= props->set_port_cap_mask; 1666 dev->port_cap_flags &= ~props->clr_port_cap_mask; 1667 if (port_modify_mask & IB_PORT_SHUTDOWN) 1668 ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN); 1669 if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) 1670 dev->qkey_violations = 0; 1671 return 0; 1672} 1673 1674static int ipath_query_gid(struct ib_device *ibdev, u8 port, 1675 int index, union ib_gid *gid) 1676{ 1677 struct ipath_ibdev *dev = to_idev(ibdev); 1678 int ret; 1679 1680 if (index >= 1) { 1681 ret = -EINVAL; 1682 goto bail; 1683 } 1684 gid->global.subnet_prefix = dev->gid_prefix; 1685 gid->global.interface_id = dev->dd->ipath_guid; 1686 1687 ret = 0; 1688 1689bail: 1690 return ret; 1691} 1692 1693static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev, 1694 struct ib_ucontext *context, 1695 struct ib_udata *udata) 1696{ 1697 struct ipath_ibdev *dev = to_idev(ibdev); 1698 struct ipath_pd *pd; 1699 struct ib_pd *ret; 1700 1701 /* 1702 * This is actually totally arbitrary. Some correctness tests 1703 * assume there's a maximum number of PDs that can be allocated. 1704 * We don't actually have this limit, but we fail the test if 1705 * we allow allocations of more than we report for this value. 1706 */ 1707 1708 pd = kmalloc(sizeof *pd, GFP_KERNEL); 1709 if (!pd) { 1710 ret = ERR_PTR(-ENOMEM); 1711 goto bail; 1712 } 1713 1714 spin_lock(&dev->n_pds_lock); 1715 if (dev->n_pds_allocated == ib_ipath_max_pds) { 1716 spin_unlock(&dev->n_pds_lock); 1717 kfree(pd); 1718 ret = ERR_PTR(-ENOMEM); 1719 goto bail; 1720 } 1721 1722 dev->n_pds_allocated++; 1723 spin_unlock(&dev->n_pds_lock); 1724 1725 /* ib_alloc_pd() will initialize pd->ibpd. */ 1726 pd->user = udata != NULL; 1727 1728 ret = &pd->ibpd; 1729 1730bail: 1731 return ret; 1732} 1733 1734static int ipath_dealloc_pd(struct ib_pd *ibpd) 1735{ 1736 struct ipath_pd *pd = to_ipd(ibpd); 1737 struct ipath_ibdev *dev = to_idev(ibpd->device); 1738 1739 spin_lock(&dev->n_pds_lock); 1740 dev->n_pds_allocated--; 1741 spin_unlock(&dev->n_pds_lock); 1742 1743 kfree(pd); 1744 1745 return 0; 1746} 1747 1748/** 1749 * ipath_create_ah - create an address handle 1750 * @pd: the protection domain 1751 * @ah_attr: the attributes of the AH 1752 * 1753 * This may be called from interrupt context. 1754 */ 1755static struct ib_ah *ipath_create_ah(struct ib_pd *pd, 1756 struct ib_ah_attr *ah_attr) 1757{ 1758 struct ipath_ah *ah; 1759 struct ib_ah *ret; 1760 struct ipath_ibdev *dev = to_idev(pd->device); 1761 unsigned long flags; 1762 1763 /* A multicast address requires a GRH (see ch. 8.4.1). */ 1764 if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && 1765 ah_attr->dlid != IPATH_PERMISSIVE_LID && 1766 !(ah_attr->ah_flags & IB_AH_GRH)) { 1767 ret = ERR_PTR(-EINVAL); 1768 goto bail; 1769 } 1770 1771 if (ah_attr->dlid == 0) { 1772 ret = ERR_PTR(-EINVAL); 1773 goto bail; 1774 } 1775 1776 if (ah_attr->port_num < 1 || 1777 ah_attr->port_num > pd->device->phys_port_cnt) { 1778 ret = ERR_PTR(-EINVAL); 1779 goto bail; 1780 } 1781 1782 ah = kmalloc(sizeof *ah, GFP_ATOMIC); 1783 if (!ah) { 1784 ret = ERR_PTR(-ENOMEM); 1785 goto bail; 1786 } 1787 1788 spin_lock_irqsave(&dev->n_ahs_lock, flags); 1789 if (dev->n_ahs_allocated == ib_ipath_max_ahs) { 1790 spin_unlock_irqrestore(&dev->n_ahs_lock, flags); 1791 kfree(ah); 1792 ret = ERR_PTR(-ENOMEM); 1793 goto bail; 1794 } 1795 1796 dev->n_ahs_allocated++; 1797 spin_unlock_irqrestore(&dev->n_ahs_lock, flags); 1798 1799 /* ib_create_ah() will initialize ah->ibah. */ 1800 ah->attr = *ah_attr; 1801 ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate); 1802 1803 ret = &ah->ibah; 1804 1805bail: 1806 return ret; 1807} 1808 1809/** 1810 * ipath_destroy_ah - destroy an address handle 1811 * @ibah: the AH to destroy 1812 * 1813 * This may be called from interrupt context. 1814 */ 1815static int ipath_destroy_ah(struct ib_ah *ibah) 1816{ 1817 struct ipath_ibdev *dev = to_idev(ibah->device); 1818 struct ipath_ah *ah = to_iah(ibah); 1819 unsigned long flags; 1820 1821 spin_lock_irqsave(&dev->n_ahs_lock, flags); 1822 dev->n_ahs_allocated--; 1823 spin_unlock_irqrestore(&dev->n_ahs_lock, flags); 1824 1825 kfree(ah); 1826 1827 return 0; 1828} 1829 1830static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) 1831{ 1832 struct ipath_ah *ah = to_iah(ibah); 1833 1834 *ah_attr = ah->attr; 1835 ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate); 1836 1837 return 0; 1838} 1839 1840/** 1841 * ipath_get_npkeys - return the size of the PKEY table for port 0 1842 * @dd: the infinipath device 1843 */ 1844unsigned ipath_get_npkeys(struct ipath_devdata *dd) 1845{ 1846 return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys); 1847} 1848 1849/** 1850 * ipath_get_pkey - return the indexed PKEY from the port PKEY table 1851 * @dd: the infinipath device 1852 * @index: the PKEY index 1853 */ 1854unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index) 1855{ 1856 unsigned ret; 1857 1858 /* always a kernel port, no locking needed */ 1859 if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys)) 1860 ret = 0; 1861 else 1862 ret = dd->ipath_pd[0]->port_pkeys[index]; 1863 1864 return ret; 1865} 1866 1867static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 1868 u16 *pkey) 1869{ 1870 struct ipath_ibdev *dev = to_idev(ibdev); 1871 int ret; 1872 1873 if (index >= ipath_get_npkeys(dev->dd)) { 1874 ret = -EINVAL; 1875 goto bail; 1876 } 1877 1878 *pkey = ipath_get_pkey(dev->dd, index); 1879 ret = 0; 1880 1881bail: 1882 return ret; 1883} 1884 1885/** 1886 * ipath_alloc_ucontext - allocate a ucontest 1887 * @ibdev: the infiniband device 1888 * @udata: not used by the InfiniPath driver 1889 */ 1890 1891static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev, 1892 struct ib_udata *udata) 1893{ 1894 struct ipath_ucontext *context; 1895 struct ib_ucontext *ret; 1896 1897 context = kmalloc(sizeof *context, GFP_KERNEL); 1898 if (!context) { 1899 ret = ERR_PTR(-ENOMEM); 1900 goto bail; 1901 } 1902 1903 ret = &context->ibucontext; 1904 1905bail: 1906 return ret; 1907} 1908 1909static int ipath_dealloc_ucontext(struct ib_ucontext *context) 1910{ 1911 kfree(to_iucontext(context)); 1912 return 0; 1913} 1914 1915static int ipath_verbs_register_sysfs(struct ib_device *dev); 1916 1917static void __verbs_timer(unsigned long arg) 1918{ 1919 struct ipath_devdata *dd = (struct ipath_devdata *) arg; 1920 1921 /* Handle verbs layer timeouts. */ 1922 ipath_ib_timer(dd->verbs_dev); 1923 1924 mod_timer(&dd->verbs_timer, jiffies + 1); 1925} 1926 1927static int enable_timer(struct ipath_devdata *dd) 1928{ 1929 /* 1930 * Early chips had a design flaw where the chip and kernel idea 1931 * of the tail register don't always agree, and therefore we won't 1932 * get an interrupt on the next packet received. 1933 * If the board supports per packet receive interrupts, use it. 1934 * Otherwise, the timer function periodically checks for packets 1935 * to cover this case. 1936 * Either way, the timer is needed for verbs layer related 1937 * processing. 1938 */ 1939 if (dd->ipath_flags & IPATH_GPIO_INTR) { 1940 ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, 1941 0x2074076542310ULL); 1942 /* Enable GPIO bit 2 interrupt */ 1943 dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT); 1944 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 1945 dd->ipath_gpio_mask); 1946 } 1947 1948 init_timer(&dd->verbs_timer); 1949 dd->verbs_timer.function = __verbs_timer; 1950 dd->verbs_timer.data = (unsigned long)dd; 1951 dd->verbs_timer.expires = jiffies + 1; 1952 add_timer(&dd->verbs_timer); 1953 1954 return 0; 1955} 1956 1957static int disable_timer(struct ipath_devdata *dd) 1958{ 1959 /* Disable GPIO bit 2 interrupt */ 1960 if (dd->ipath_flags & IPATH_GPIO_INTR) { 1961 /* Disable GPIO bit 2 interrupt */ 1962 dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT)); 1963 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 1964 dd->ipath_gpio_mask); 1965 /* 1966 * We might want to undo changes to debugportselect, 1967 * but how? 1968 */ 1969 } 1970 1971 del_timer_sync(&dd->verbs_timer); 1972 1973 return 0; 1974} 1975 1976/** 1977 * ipath_register_ib_device - register our device with the infiniband core 1978 * @dd: the device data structure 1979 * Return the allocated ipath_ibdev pointer or NULL on error. 1980 */ 1981int ipath_register_ib_device(struct ipath_devdata *dd) 1982{ 1983 struct ipath_verbs_counters cntrs; 1984 struct ipath_ibdev *idev; 1985 struct ib_device *dev; 1986 struct ipath_verbs_txreq *tx; 1987 unsigned i; 1988 int ret; 1989 1990 idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev); 1991 if (idev == NULL) { 1992 ret = -ENOMEM; 1993 goto bail; 1994 } 1995 1996 dev = &idev->ibdev; 1997 1998 if (dd->ipath_sdma_descq_cnt) { 1999 tx = kmalloc(dd->ipath_sdma_descq_cnt * sizeof *tx, 2000 GFP_KERNEL); 2001 if (tx == NULL) { 2002 ret = -ENOMEM; 2003 goto err_tx; 2004 } 2005 } else 2006 tx = NULL; 2007 idev->txreq_bufs = tx; 2008 2009 /* Only need to initialize non-zero fields. */ 2010 spin_lock_init(&idev->n_pds_lock); 2011 spin_lock_init(&idev->n_ahs_lock); 2012 spin_lock_init(&idev->n_cqs_lock); 2013 spin_lock_init(&idev->n_qps_lock); 2014 spin_lock_init(&idev->n_srqs_lock); 2015 spin_lock_init(&idev->n_mcast_grps_lock); 2016 2017 spin_lock_init(&idev->qp_table.lock); 2018 spin_lock_init(&idev->lk_table.lock); 2019 idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE); 2020 /* Set the prefix to the default value (see ch. 4.1.1) */ 2021 idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL); 2022 2023 ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size); 2024 if (ret) 2025 goto err_qp; 2026 2027 /* 2028 * The top ib_ipath_lkey_table_size bits are used to index the 2029 * table. The lower 8 bits can be owned by the user (copied from 2030 * the LKEY). The remaining bits act as a generation number or tag. 2031 */ 2032 idev->lk_table.max = 1 << ib_ipath_lkey_table_size; 2033 idev->lk_table.table = kzalloc(idev->lk_table.max * 2034 sizeof(*idev->lk_table.table), 2035 GFP_KERNEL); 2036 if (idev->lk_table.table == NULL) { 2037 ret = -ENOMEM; 2038 goto err_lk; 2039 } 2040 INIT_LIST_HEAD(&idev->pending_mmaps); 2041 spin_lock_init(&idev->pending_lock); 2042 idev->mmap_offset = PAGE_SIZE; 2043 spin_lock_init(&idev->mmap_offset_lock); 2044 INIT_LIST_HEAD(&idev->pending[0]); 2045 INIT_LIST_HEAD(&idev->pending[1]); 2046 INIT_LIST_HEAD(&idev->pending[2]); 2047 INIT_LIST_HEAD(&idev->piowait); 2048 INIT_LIST_HEAD(&idev->rnrwait); 2049 INIT_LIST_HEAD(&idev->txreq_free); 2050 idev->pending_index = 0; 2051 idev->port_cap_flags = 2052 IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP; 2053 if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY) 2054 idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP; 2055 idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; 2056 idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; 2057 idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; 2058 idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; 2059 idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; 2060 2061 /* Snapshot current HW counters to "clear" them. */ 2062 ipath_get_counters(dd, &cntrs); 2063 idev->z_symbol_error_counter = cntrs.symbol_error_counter; 2064 idev->z_link_error_recovery_counter = 2065 cntrs.link_error_recovery_counter; 2066 idev->z_link_downed_counter = cntrs.link_downed_counter; 2067 idev->z_port_rcv_errors = cntrs.port_rcv_errors; 2068 idev->z_port_rcv_remphys_errors = 2069 cntrs.port_rcv_remphys_errors; 2070 idev->z_port_xmit_discards = cntrs.port_xmit_discards; 2071 idev->z_port_xmit_data = cntrs.port_xmit_data; 2072 idev->z_port_rcv_data = cntrs.port_rcv_data; 2073 idev->z_port_xmit_packets = cntrs.port_xmit_packets; 2074 idev->z_port_rcv_packets = cntrs.port_rcv_packets; 2075 idev->z_local_link_integrity_errors = 2076 cntrs.local_link_integrity_errors; 2077 idev->z_excessive_buffer_overrun_errors = 2078 cntrs.excessive_buffer_overrun_errors; 2079 idev->z_vl15_dropped = cntrs.vl15_dropped; 2080 2081 for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++) 2082 list_add(&tx->txreq.list, &idev->txreq_free); 2083 2084 /* 2085 * The system image GUID is supposed to be the same for all 2086 * IB HCAs in a single system but since there can be other 2087 * device types in the system, we can't be sure this is unique. 2088 */ 2089 if (!sys_image_guid) 2090 sys_image_guid = dd->ipath_guid; 2091 idev->sys_image_guid = sys_image_guid; 2092 idev->ib_unit = dd->ipath_unit; 2093 idev->dd = dd; 2094 2095 strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX); 2096 dev->owner = THIS_MODULE; 2097 dev->node_guid = dd->ipath_guid; 2098 dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION; 2099 dev->uverbs_cmd_mask = 2100 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 2101 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 2102 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 2103 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 2104 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 2105 (1ull << IB_USER_VERBS_CMD_CREATE_AH) | 2106 (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | 2107 (1ull << IB_USER_VERBS_CMD_QUERY_AH) | 2108 (1ull << IB_USER_VERBS_CMD_REG_MR) | 2109 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 2110 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 2111 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 2112 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | 2113 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 2114 (1ull << IB_USER_VERBS_CMD_POLL_CQ) | 2115 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | 2116 (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 2117 (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 2118 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 2119 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 2120 (1ull << IB_USER_VERBS_CMD_POST_SEND) | 2121 (1ull << IB_USER_VERBS_CMD_POST_RECV) | 2122 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 2123 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | 2124 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 2125 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 2126 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 2127 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 2128 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); 2129 dev->node_type = RDMA_NODE_IB_CA; 2130 dev->phys_port_cnt = 1; 2131 dev->num_comp_vectors = 1; 2132 dev->dma_device = &dd->pcidev->dev; 2133 dev->query_device = ipath_query_device; 2134 dev->modify_device = ipath_modify_device; 2135 dev->query_port = ipath_query_port; 2136 dev->modify_port = ipath_modify_port; 2137 dev->query_pkey = ipath_query_pkey; 2138 dev->query_gid = ipath_query_gid; 2139 dev->alloc_ucontext = ipath_alloc_ucontext; 2140 dev->dealloc_ucontext = ipath_dealloc_ucontext; 2141 dev->alloc_pd = ipath_alloc_pd; 2142 dev->dealloc_pd = ipath_dealloc_pd; 2143 dev->create_ah = ipath_create_ah; 2144 dev->destroy_ah = ipath_destroy_ah; 2145 dev->query_ah = ipath_query_ah; 2146 dev->create_srq = ipath_create_srq; 2147 dev->modify_srq = ipath_modify_srq; 2148 dev->query_srq = ipath_query_srq; 2149 dev->destroy_srq = ipath_destroy_srq; 2150 dev->create_qp = ipath_create_qp; 2151 dev->modify_qp = ipath_modify_qp; 2152 dev->query_qp = ipath_query_qp; 2153 dev->destroy_qp = ipath_destroy_qp; 2154 dev->post_send = ipath_post_send; 2155 dev->post_recv = ipath_post_receive; 2156 dev->post_srq_recv = ipath_post_srq_receive; 2157 dev->create_cq = ipath_create_cq; 2158 dev->destroy_cq = ipath_destroy_cq; 2159 dev->resize_cq = ipath_resize_cq; 2160 dev->poll_cq = ipath_poll_cq; 2161 dev->req_notify_cq = ipath_req_notify_cq; 2162 dev->get_dma_mr = ipath_get_dma_mr; 2163 dev->reg_phys_mr = ipath_reg_phys_mr; 2164 dev->reg_user_mr = ipath_reg_user_mr; 2165 dev->dereg_mr = ipath_dereg_mr; 2166 dev->alloc_fmr = ipath_alloc_fmr; 2167 dev->map_phys_fmr = ipath_map_phys_fmr; 2168 dev->unmap_fmr = ipath_unmap_fmr; 2169 dev->dealloc_fmr = ipath_dealloc_fmr; 2170 dev->attach_mcast = ipath_multicast_attach; 2171 dev->detach_mcast = ipath_multicast_detach; 2172 dev->process_mad = ipath_process_mad; 2173 dev->mmap = ipath_mmap; 2174 dev->dma_ops = &ipath_dma_mapping_ops; 2175 2176 snprintf(dev->node_desc, sizeof(dev->node_desc), 2177 IPATH_IDSTR " %s", init_utsname()->nodename); 2178 2179 ret = ib_register_device(dev, NULL); 2180 if (ret) 2181 goto err_reg; 2182 2183 if (ipath_verbs_register_sysfs(dev)) 2184 goto err_class; 2185 2186 enable_timer(dd); 2187 2188 goto bail; 2189 2190err_class: 2191 ib_unregister_device(dev); 2192err_reg: 2193 kfree(idev->lk_table.table); 2194err_lk: 2195 kfree(idev->qp_table.table); 2196err_qp: 2197 kfree(idev->txreq_bufs); 2198err_tx: 2199 ib_dealloc_device(dev); 2200 ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret); 2201 idev = NULL; 2202 2203bail: 2204 dd->verbs_dev = idev; 2205 return ret; 2206} 2207 2208void ipath_unregister_ib_device(struct ipath_ibdev *dev) 2209{ 2210 struct ib_device *ibdev = &dev->ibdev; 2211 u32 qps_inuse; 2212 2213 ib_unregister_device(ibdev); 2214 2215 disable_timer(dev->dd); 2216 2217 if (!list_empty(&dev->pending[0]) || 2218 !list_empty(&dev->pending[1]) || 2219 !list_empty(&dev->pending[2])) 2220 ipath_dev_err(dev->dd, "pending list not empty!\n"); 2221 if (!list_empty(&dev->piowait)) 2222 ipath_dev_err(dev->dd, "piowait list not empty!\n"); 2223 if (!list_empty(&dev->rnrwait)) 2224 ipath_dev_err(dev->dd, "rnrwait list not empty!\n"); 2225 if (!ipath_mcast_tree_empty()) 2226 ipath_dev_err(dev->dd, "multicast table memory leak!\n"); 2227 /* 2228 * Note that ipath_unregister_ib_device() can be called before all 2229 * the QPs are destroyed! 2230 */ 2231 qps_inuse = ipath_free_all_qps(&dev->qp_table); 2232 if (qps_inuse) 2233 ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n", 2234 qps_inuse); 2235 kfree(dev->qp_table.table); 2236 kfree(dev->lk_table.table); 2237 kfree(dev->txreq_bufs); 2238 ib_dealloc_device(ibdev); 2239} 2240 2241static ssize_t show_rev(struct device *device, struct device_attribute *attr, 2242 char *buf) 2243{ 2244 struct ipath_ibdev *dev = 2245 container_of(device, struct ipath_ibdev, ibdev.dev); 2246 2247 return sprintf(buf, "%x\n", dev->dd->ipath_pcirev); 2248} 2249 2250static ssize_t show_hca(struct device *device, struct device_attribute *attr, 2251 char *buf) 2252{ 2253 struct ipath_ibdev *dev = 2254 container_of(device, struct ipath_ibdev, ibdev.dev); 2255 int ret; 2256 2257 ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128); 2258 if (ret < 0) 2259 goto bail; 2260 strcat(buf, "\n"); 2261 ret = strlen(buf); 2262 2263bail: 2264 return ret; 2265} 2266 2267static ssize_t show_stats(struct device *device, struct device_attribute *attr, 2268 char *buf) 2269{ 2270 struct ipath_ibdev *dev = 2271 container_of(device, struct ipath_ibdev, ibdev.dev); 2272 int i; 2273 int len; 2274 2275 len = sprintf(buf, 2276 "RC resends %d\n" 2277 "RC no QACK %d\n" 2278 "RC ACKs %d\n" 2279 "RC SEQ NAKs %d\n" 2280 "RC RDMA seq %d\n" 2281 "RC RNR NAKs %d\n" 2282 "RC OTH NAKs %d\n" 2283 "RC timeouts %d\n" 2284 "RC RDMA dup %d\n" 2285 "piobuf wait %d\n" 2286 "unaligned %d\n" 2287 "PKT drops %d\n" 2288 "WQE errs %d\n", 2289 dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, 2290 dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, 2291 dev->n_other_naks, dev->n_timeouts, 2292 dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned, 2293 dev->n_pkt_drops, dev->n_wqe_errs); 2294 for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { 2295 const struct ipath_opcode_stats *si = &dev->opstats[i]; 2296 2297 if (!si->n_packets && !si->n_bytes) 2298 continue; 2299 len += sprintf(buf + len, "%02x %llu/%llu\n", i, 2300 (unsigned long long) si->n_packets, 2301 (unsigned long long) si->n_bytes); 2302 } 2303 return len; 2304} 2305 2306static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 2307static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 2308static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL); 2309static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL); 2310 2311static struct device_attribute *ipath_class_attributes[] = { 2312 &dev_attr_hw_rev, 2313 &dev_attr_hca_type, 2314 &dev_attr_board_id, 2315 &dev_attr_stats 2316}; 2317 2318static int ipath_verbs_register_sysfs(struct ib_device *dev) 2319{ 2320 int i; 2321 int ret; 2322 2323 for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) 2324 if (device_create_file(&dev->dev, 2325 ipath_class_attributes[i])) { 2326 ret = 1; 2327 goto bail; 2328 } 2329 2330 ret = 0; 2331 2332bail: 2333 return ret; 2334} 2335