1/* 2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34#include <rdma/ib_mad.h> 35#include <rdma/ib_user_verbs.h> 36#include <linux/io.h> 37#include <linux/utsname.h> 38 39#include "ipath_kernel.h" 40#include "ipath_verbs.h" 41#include "ipath_common.h" 42 43static unsigned int ib_ipath_qp_table_size = 251; 44module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO); 45MODULE_PARM_DESC(qp_table_size, "QP table size"); 46 47unsigned int ib_ipath_lkey_table_size = 12; 48module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint, 49 S_IRUGO); 50MODULE_PARM_DESC(lkey_table_size, 51 "LKEY table size in bits (2^n, 1 <= n <= 23)"); 52 53static unsigned int ib_ipath_max_pds = 0xFFFF; 54module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO); 55MODULE_PARM_DESC(max_pds, 56 "Maximum number of protection domains to support"); 57 58static unsigned int ib_ipath_max_ahs = 0xFFFF; 59module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO); 60MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support"); 61 62unsigned int ib_ipath_max_cqes = 0x2FFFF; 63module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO); 64MODULE_PARM_DESC(max_cqes, 65 "Maximum number of completion queue entries to support"); 66 67unsigned int ib_ipath_max_cqs = 0x1FFFF; 68module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO); 69MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support"); 70 71unsigned int ib_ipath_max_qp_wrs = 0x3FFF; 72module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint, 73 S_IWUSR | S_IRUGO); 74MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); 75 76unsigned int ib_ipath_max_qps = 16384; 77module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO); 78MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); 79 80unsigned int ib_ipath_max_sges = 0x60; 81module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO); 82MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support"); 83 84unsigned int ib_ipath_max_mcast_grps = 16384; 85module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint, 86 S_IWUSR | S_IRUGO); 87MODULE_PARM_DESC(max_mcast_grps, 88 "Maximum number of multicast groups to support"); 89 90unsigned int ib_ipath_max_mcast_qp_attached = 16; 91module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached, 92 uint, S_IWUSR | S_IRUGO); 93MODULE_PARM_DESC(max_mcast_qp_attached, 94 "Maximum number of attached QPs to support"); 95 96unsigned int ib_ipath_max_srqs = 1024; 97module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO); 98MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support"); 99 100unsigned int ib_ipath_max_srq_sges = 128; 101module_param_named(max_srq_sges, ib_ipath_max_srq_sges, 102 uint, S_IWUSR | S_IRUGO); 103MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support"); 104 105unsigned int ib_ipath_max_srq_wrs = 0x1FFFF; 106module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs, 107 uint, S_IWUSR | S_IRUGO); 108MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); 109 110static unsigned int ib_ipath_disable_sma; 111module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO); 112MODULE_PARM_DESC(ib_ipath_disable_sma, "Disable the SMA"); 113 114const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { 115 [IB_QPS_RESET] = 0, 116 [IB_QPS_INIT] = IPATH_POST_RECV_OK, 117 [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, 118 [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | 119 IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, 120 [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | 121 IPATH_POST_SEND_OK, 122 [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, 123 [IB_QPS_ERR] = 0, 124}; 125 126struct ipath_ucontext { 127 struct ib_ucontext ibucontext; 128}; 129 130static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext 131 *ibucontext) 132{ 133 return container_of(ibucontext, struct ipath_ucontext, ibucontext); 134} 135 136/* 137 * Translate ib_wr_opcode into ib_wc_opcode. 138 */ 139const enum ib_wc_opcode ib_ipath_wc_opcode[] = { 140 [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, 141 [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, 142 [IB_WR_SEND] = IB_WC_SEND, 143 [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, 144 [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, 145 [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, 146 [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD 147}; 148 149/* 150 * System image GUID. 151 */ 152static __be64 sys_image_guid; 153 154/** 155 * ipath_copy_sge - copy data to SGE memory 156 * @ss: the SGE state 157 * @data: the data to copy 158 * @length: the length of the data 159 */ 160void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length) 161{ 162 struct ipath_sge *sge = &ss->sge; 163 164 while (length) { 165 u32 len = sge->length; 166 167 BUG_ON(len == 0); 168 if (len > length) 169 len = length; 170 memcpy(sge->vaddr, data, len); 171 sge->vaddr += len; 172 sge->length -= len; 173 sge->sge_length -= len; 174 if (sge->sge_length == 0) { 175 if (--ss->num_sge) 176 *sge = *ss->sg_list++; 177 } else if (sge->length == 0 && sge->mr != NULL) { 178 if (++sge->n >= IPATH_SEGSZ) { 179 if (++sge->m >= sge->mr->mapsz) 180 break; 181 sge->n = 0; 182 } 183 sge->vaddr = 184 sge->mr->map[sge->m]->segs[sge->n].vaddr; 185 sge->length = 186 sge->mr->map[sge->m]->segs[sge->n].length; 187 } 188 data += len; 189 length -= len; 190 } 191} 192 193void ipath_skip_sge(struct ipath_sge_state *ss, u32 length) 194{ 195 struct ipath_sge *sge = &ss->sge; 196 197 while (length) { 198 u32 len = sge->length; 199 200 BUG_ON(len == 0); 201 if (len > length) 202 len = length; 203 sge->vaddr += len; 204 sge->length -= len; 205 sge->sge_length -= len; 206 if (sge->sge_length == 0) { 207 if (--ss->num_sge) 208 *sge = *ss->sg_list++; 209 } else if (sge->length == 0 && sge->mr != NULL) { 210 if (++sge->n >= IPATH_SEGSZ) { 211 if (++sge->m >= sge->mr->mapsz) 212 break; 213 sge->n = 0; 214 } 215 sge->vaddr = 216 sge->mr->map[sge->m]->segs[sge->n].vaddr; 217 sge->length = 218 sge->mr->map[sge->m]->segs[sge->n].length; 219 } 220 length -= len; 221 } 222} 223 224/** 225 * ipath_post_send - post a send on a QP 226 * @ibqp: the QP to post the send on 227 * @wr: the list of work requests to post 228 * @bad_wr: the first bad WR is put here 229 * 230 * This may be called from interrupt context. 231 */ 232static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 233 struct ib_send_wr **bad_wr) 234{ 235 struct ipath_qp *qp = to_iqp(ibqp); 236 int err = 0; 237 238 /* Check that state is OK to post send. */ 239 if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)) { 240 *bad_wr = wr; 241 err = -EINVAL; 242 goto bail; 243 } 244 245 for (; wr; wr = wr->next) { 246 switch (qp->ibqp.qp_type) { 247 case IB_QPT_UC: 248 case IB_QPT_RC: 249 err = ipath_post_ruc_send(qp, wr); 250 break; 251 252 case IB_QPT_SMI: 253 case IB_QPT_GSI: 254 case IB_QPT_UD: 255 err = ipath_post_ud_send(qp, wr); 256 break; 257 258 default: 259 err = -EINVAL; 260 } 261 if (err) { 262 *bad_wr = wr; 263 break; 264 } 265 } 266 267bail: 268 return err; 269} 270 271/** 272 * ipath_post_receive - post a receive on a QP 273 * @ibqp: the QP to post the receive on 274 * @wr: the WR to post 275 * @bad_wr: the first bad WR is put here 276 * 277 * This may be called from interrupt context. 278 */ 279static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 280 struct ib_recv_wr **bad_wr) 281{ 282 struct ipath_qp *qp = to_iqp(ibqp); 283 struct ipath_rwq *wq = qp->r_rq.wq; 284 unsigned long flags; 285 int ret; 286 287 /* Check that state is OK to post receive. */ 288 if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) { 289 *bad_wr = wr; 290 ret = -EINVAL; 291 goto bail; 292 } 293 294 for (; wr; wr = wr->next) { 295 struct ipath_rwqe *wqe; 296 u32 next; 297 int i; 298 299 if ((unsigned) wr->num_sge > qp->r_rq.max_sge) { 300 *bad_wr = wr; 301 ret = -ENOMEM; 302 goto bail; 303 } 304 305 spin_lock_irqsave(&qp->r_rq.lock, flags); 306 next = wq->head + 1; 307 if (next >= qp->r_rq.size) 308 next = 0; 309 if (next == wq->tail) { 310 spin_unlock_irqrestore(&qp->r_rq.lock, flags); 311 *bad_wr = wr; 312 ret = -ENOMEM; 313 goto bail; 314 } 315 316 wqe = get_rwqe_ptr(&qp->r_rq, wq->head); 317 wqe->wr_id = wr->wr_id; 318 wqe->num_sge = wr->num_sge; 319 for (i = 0; i < wr->num_sge; i++) 320 wqe->sg_list[i] = wr->sg_list[i]; 321 wq->head = next; 322 spin_unlock_irqrestore(&qp->r_rq.lock, flags); 323 } 324 ret = 0; 325 326bail: 327 return ret; 328} 329 330/** 331 * ipath_qp_rcv - processing an incoming packet on a QP 332 * @dev: the device the packet came on 333 * @hdr: the packet header 334 * @has_grh: true if the packet has a GRH 335 * @data: the packet data 336 * @tlen: the packet length 337 * @qp: the QP the packet came on 338 * 339 * This is called from ipath_ib_rcv() to process an incoming packet 340 * for the given QP. 341 * Called at interrupt level. 342 */ 343static void ipath_qp_rcv(struct ipath_ibdev *dev, 344 struct ipath_ib_header *hdr, int has_grh, 345 void *data, u32 tlen, struct ipath_qp *qp) 346{ 347 /* Check for valid receive state. */ 348 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { 349 dev->n_pkt_drops++; 350 return; 351 } 352 353 switch (qp->ibqp.qp_type) { 354 case IB_QPT_SMI: 355 case IB_QPT_GSI: 356 if (ib_ipath_disable_sma) 357 break; 358 /* FALLTHROUGH */ 359 case IB_QPT_UD: 360 ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp); 361 break; 362 363 case IB_QPT_RC: 364 ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp); 365 break; 366 367 case IB_QPT_UC: 368 ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp); 369 break; 370 371 default: 372 break; 373 } 374} 375 376/** 377 * ipath_ib_rcv - process an incoming packet 378 * @arg: the device pointer 379 * @rhdr: the header of the packet 380 * @data: the packet data 381 * @tlen: the packet length 382 * 383 * This is called from ipath_kreceive() to process an incoming packet at 384 * interrupt level. Tlen is the length of the header + data + CRC in bytes. 385 */ 386void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data, 387 u32 tlen) 388{ 389 struct ipath_ib_header *hdr = rhdr; 390 struct ipath_other_headers *ohdr; 391 struct ipath_qp *qp; 392 u32 qp_num; 393 int lnh; 394 u8 opcode; 395 u16 lid; 396 397 if (unlikely(dev == NULL)) 398 goto bail; 399 400 if (unlikely(tlen < 24)) { /* LRH+BTH+CRC */ 401 dev->rcv_errors++; 402 goto bail; 403 } 404 405 /* Check for a valid destination LID (see ch. 7.11.1). */ 406 lid = be16_to_cpu(hdr->lrh[1]); 407 if (lid < IPATH_MULTICAST_LID_BASE) { 408 lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); 409 if (unlikely(lid != dev->dd->ipath_lid)) { 410 dev->rcv_errors++; 411 goto bail; 412 } 413 } 414 415 /* Check for GRH */ 416 lnh = be16_to_cpu(hdr->lrh[0]) & 3; 417 if (lnh == IPATH_LRH_BTH) 418 ohdr = &hdr->u.oth; 419 else if (lnh == IPATH_LRH_GRH) 420 ohdr = &hdr->u.l.oth; 421 else { 422 dev->rcv_errors++; 423 goto bail; 424 } 425 426 opcode = be32_to_cpu(ohdr->bth[0]) >> 24; 427 dev->opstats[opcode].n_bytes += tlen; 428 dev->opstats[opcode].n_packets++; 429 430 /* Get the destination QP number. */ 431 qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK; 432 if (qp_num == IPATH_MULTICAST_QPN) { 433 struct ipath_mcast *mcast; 434 struct ipath_mcast_qp *p; 435 436 if (lnh != IPATH_LRH_GRH) { 437 dev->n_pkt_drops++; 438 goto bail; 439 } 440 mcast = ipath_mcast_find(&hdr->u.l.grh.dgid); 441 if (mcast == NULL) { 442 dev->n_pkt_drops++; 443 goto bail; 444 } 445 dev->n_multicast_rcv++; 446 list_for_each_entry_rcu(p, &mcast->qp_list, list) 447 ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp); 448 /* 449 * Notify ipath_multicast_detach() if it is waiting for us 450 * to finish. 451 */ 452 if (atomic_dec_return(&mcast->refcount) <= 1) 453 wake_up(&mcast->wait); 454 } else { 455 qp = ipath_lookup_qpn(&dev->qp_table, qp_num); 456 if (qp) { 457 dev->n_unicast_rcv++; 458 ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data, 459 tlen, qp); 460 /* 461 * Notify ipath_destroy_qp() if it is waiting 462 * for us to finish. 463 */ 464 if (atomic_dec_and_test(&qp->refcount)) 465 wake_up(&qp->wait); 466 } else 467 dev->n_pkt_drops++; 468 } 469 470bail:; 471} 472 473/** 474 * ipath_ib_timer - verbs timer 475 * @arg: the device pointer 476 * 477 * This is called from ipath_do_rcv_timer() at interrupt level to check for 478 * QPs which need retransmits and to collect performance numbers. 479 */ 480void ipath_ib_timer(struct ipath_ibdev *dev) 481{ 482 struct ipath_qp *resend = NULL; 483 struct list_head *last; 484 struct ipath_qp *qp; 485 unsigned long flags; 486 487 if (dev == NULL) 488 return; 489 490 spin_lock_irqsave(&dev->pending_lock, flags); 491 /* Start filling the next pending queue. */ 492 if (++dev->pending_index >= ARRAY_SIZE(dev->pending)) 493 dev->pending_index = 0; 494 /* Save any requests still in the new queue, they have timed out. */ 495 last = &dev->pending[dev->pending_index]; 496 while (!list_empty(last)) { 497 qp = list_entry(last->next, struct ipath_qp, timerwait); 498 list_del_init(&qp->timerwait); 499 qp->timer_next = resend; 500 resend = qp; 501 atomic_inc(&qp->refcount); 502 } 503 last = &dev->rnrwait; 504 if (!list_empty(last)) { 505 qp = list_entry(last->next, struct ipath_qp, timerwait); 506 if (--qp->s_rnr_timeout == 0) { 507 do { 508 list_del_init(&qp->timerwait); 509 tasklet_hi_schedule(&qp->s_task); 510 if (list_empty(last)) 511 break; 512 qp = list_entry(last->next, struct ipath_qp, 513 timerwait); 514 } while (qp->s_rnr_timeout == 0); 515 } 516 } 517 /* 518 * We should only be in the started state if pma_sample_start != 0 519 */ 520 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED && 521 --dev->pma_sample_start == 0) { 522 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING; 523 ipath_snapshot_counters(dev->dd, &dev->ipath_sword, 524 &dev->ipath_rword, 525 &dev->ipath_spkts, 526 &dev->ipath_rpkts, 527 &dev->ipath_xmit_wait); 528 } 529 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) { 530 if (dev->pma_sample_interval == 0) { 531 u64 ta, tb, tc, td, te; 532 533 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE; 534 ipath_snapshot_counters(dev->dd, &ta, &tb, 535 &tc, &td, &te); 536 537 dev->ipath_sword = ta - dev->ipath_sword; 538 dev->ipath_rword = tb - dev->ipath_rword; 539 dev->ipath_spkts = tc - dev->ipath_spkts; 540 dev->ipath_rpkts = td - dev->ipath_rpkts; 541 dev->ipath_xmit_wait = te - dev->ipath_xmit_wait; 542 } 543 else 544 dev->pma_sample_interval--; 545 } 546 spin_unlock_irqrestore(&dev->pending_lock, flags); 547 548 for (qp = resend; qp != NULL; qp = qp->timer_next) { 549 struct ib_wc wc; 550 551 spin_lock_irqsave(&qp->s_lock, flags); 552 if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) { 553 dev->n_timeouts++; 554 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); 555 } 556 spin_unlock_irqrestore(&qp->s_lock, flags); 557 558 /* Notify ipath_destroy_qp() if it is waiting. */ 559 if (atomic_dec_and_test(&qp->refcount)) 560 wake_up(&qp->wait); 561 } 562} 563 564static void update_sge(struct ipath_sge_state *ss, u32 length) 565{ 566 struct ipath_sge *sge = &ss->sge; 567 568 sge->vaddr += length; 569 sge->length -= length; 570 sge->sge_length -= length; 571 if (sge->sge_length == 0) { 572 if (--ss->num_sge) 573 *sge = *ss->sg_list++; 574 } else if (sge->length == 0 && sge->mr != NULL) { 575 if (++sge->n >= IPATH_SEGSZ) { 576 if (++sge->m >= sge->mr->mapsz) 577 return; 578 sge->n = 0; 579 } 580 sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; 581 sge->length = sge->mr->map[sge->m]->segs[sge->n].length; 582 } 583} 584 585#ifdef __LITTLE_ENDIAN 586static inline u32 get_upper_bits(u32 data, u32 shift) 587{ 588 return data >> shift; 589} 590 591static inline u32 set_upper_bits(u32 data, u32 shift) 592{ 593 return data << shift; 594} 595 596static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) 597{ 598 data <<= ((sizeof(u32) - n) * BITS_PER_BYTE); 599 data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE); 600 return data; 601} 602#else 603static inline u32 get_upper_bits(u32 data, u32 shift) 604{ 605 return data << shift; 606} 607 608static inline u32 set_upper_bits(u32 data, u32 shift) 609{ 610 return data >> shift; 611} 612 613static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) 614{ 615 data >>= ((sizeof(u32) - n) * BITS_PER_BYTE); 616 data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE); 617 return data; 618} 619#endif 620 621static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, 622 u32 length) 623{ 624 u32 extra = 0; 625 u32 data = 0; 626 u32 last; 627 628 while (1) { 629 u32 len = ss->sge.length; 630 u32 off; 631 632 BUG_ON(len == 0); 633 if (len > length) 634 len = length; 635 if (len > ss->sge.sge_length) 636 len = ss->sge.sge_length; 637 /* If the source address is not aligned, try to align it. */ 638 off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); 639 if (off) { 640 u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr & 641 ~(sizeof(u32) - 1)); 642 u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE); 643 u32 y; 644 645 y = sizeof(u32) - off; 646 if (len > y) 647 len = y; 648 if (len + extra >= sizeof(u32)) { 649 data |= set_upper_bits(v, extra * 650 BITS_PER_BYTE); 651 len = sizeof(u32) - extra; 652 if (len == length) { 653 last = data; 654 break; 655 } 656 __raw_writel(data, piobuf); 657 piobuf++; 658 extra = 0; 659 data = 0; 660 } else { 661 /* Clear unused upper bytes */ 662 data |= clear_upper_bytes(v, len, extra); 663 if (len == length) { 664 last = data; 665 break; 666 } 667 extra += len; 668 } 669 } else if (extra) { 670 /* Source address is aligned. */ 671 u32 *addr = (u32 *) ss->sge.vaddr; 672 int shift = extra * BITS_PER_BYTE; 673 int ushift = 32 - shift; 674 u32 l = len; 675 676 while (l >= sizeof(u32)) { 677 u32 v = *addr; 678 679 data |= set_upper_bits(v, shift); 680 __raw_writel(data, piobuf); 681 data = get_upper_bits(v, ushift); 682 piobuf++; 683 addr++; 684 l -= sizeof(u32); 685 } 686 /* 687 * We still have 'extra' number of bytes leftover. 688 */ 689 if (l) { 690 u32 v = *addr; 691 692 if (l + extra >= sizeof(u32)) { 693 data |= set_upper_bits(v, shift); 694 len -= l + extra - sizeof(u32); 695 if (len == length) { 696 last = data; 697 break; 698 } 699 __raw_writel(data, piobuf); 700 piobuf++; 701 extra = 0; 702 data = 0; 703 } else { 704 /* Clear unused upper bytes */ 705 data |= clear_upper_bytes(v, l, 706 extra); 707 if (len == length) { 708 last = data; 709 break; 710 } 711 extra += l; 712 } 713 } else if (len == length) { 714 last = data; 715 break; 716 } 717 } else if (len == length) { 718 u32 w; 719 720 /* 721 * Need to round up for the last dword in the 722 * packet. 723 */ 724 w = (len + 3) >> 2; 725 __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1); 726 piobuf += w - 1; 727 last = ((u32 *) ss->sge.vaddr)[w - 1]; 728 break; 729 } else { 730 u32 w = len >> 2; 731 732 __iowrite32_copy(piobuf, ss->sge.vaddr, w); 733 piobuf += w; 734 735 extra = len & (sizeof(u32) - 1); 736 if (extra) { 737 u32 v = ((u32 *) ss->sge.vaddr)[w]; 738 739 /* Clear unused upper bytes */ 740 data = clear_upper_bytes(v, extra, 0); 741 } 742 } 743 update_sge(ss, len); 744 length -= len; 745 } 746 /* Update address before sending packet. */ 747 update_sge(ss, length); 748 /* must flush early everything before trigger word */ 749 ipath_flush_wc(); 750 __raw_writel(last, piobuf); 751 /* be sure trigger word is written */ 752 ipath_flush_wc(); 753} 754 755/** 756 * ipath_verbs_send - send a packet 757 * @dd: the infinipath device 758 * @hdrwords: the number of words in the header 759 * @hdr: the packet header 760 * @len: the length of the packet in bytes 761 * @ss: the SGE to send 762 */ 763int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, 764 u32 *hdr, u32 len, struct ipath_sge_state *ss) 765{ 766 u32 __iomem *piobuf; 767 u32 plen; 768 int ret; 769 770 /* +1 is for the qword padding of pbc */ 771 plen = hdrwords + ((len + 3) >> 2) + 1; 772 if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) { 773 ret = -EINVAL; 774 goto bail; 775 } 776 777 /* Get a PIO buffer to use. */ 778 piobuf = ipath_getpiobuf(dd, NULL); 779 if (unlikely(piobuf == NULL)) { 780 ret = -EBUSY; 781 goto bail; 782 } 783 784 /* 785 * Write len to control qword, no flags. 786 * We have to flush after the PBC for correctness on some cpus 787 * or WC buffer can be written out of order. 788 */ 789 writeq(plen, piobuf); 790 ipath_flush_wc(); 791 piobuf += 2; 792 if (len == 0) { 793 /* 794 * If there is just the header portion, must flush before 795 * writing last word of header for correctness, and after 796 * the last header word (trigger word). 797 */ 798 __iowrite32_copy(piobuf, hdr, hdrwords - 1); 799 ipath_flush_wc(); 800 __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); 801 ipath_flush_wc(); 802 ret = 0; 803 goto bail; 804 } 805 806 __iowrite32_copy(piobuf, hdr, hdrwords); 807 piobuf += hdrwords; 808 809 /* The common case is aligned and contained in one segment. */ 810 if (likely(ss->num_sge == 1 && len <= ss->sge.length && 811 !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { 812 u32 w; 813 u32 *addr = (u32 *) ss->sge.vaddr; 814 815 /* Update address before sending packet. */ 816 update_sge(ss, len); 817 /* Need to round up for the last dword in the packet. */ 818 w = (len + 3) >> 2; 819 __iowrite32_copy(piobuf, addr, w - 1); 820 /* must flush early everything before trigger word */ 821 ipath_flush_wc(); 822 __raw_writel(addr[w - 1], piobuf + w - 1); 823 /* be sure trigger word is written */ 824 ipath_flush_wc(); 825 ret = 0; 826 goto bail; 827 } 828 copy_io(piobuf, ss, len); 829 ret = 0; 830 831bail: 832 return ret; 833} 834 835int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords, 836 u64 *rwords, u64 *spkts, u64 *rpkts, 837 u64 *xmit_wait) 838{ 839 int ret; 840 841 if (!(dd->ipath_flags & IPATH_INITTED)) { 842 /* no hardware, freeze, etc. */ 843 ipath_dbg("unit %u not usable\n", dd->ipath_unit); 844 ret = -EINVAL; 845 goto bail; 846 } 847 *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); 848 *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); 849 *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); 850 *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); 851 *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt); 852 853 ret = 0; 854 855bail: 856 return ret; 857} 858 859/** 860 * ipath_get_counters - get various chip counters 861 * @dd: the infinipath device 862 * @cntrs: counters are placed here 863 * 864 * Return the counters needed by recv_pma_get_portcounters(). 865 */ 866int ipath_get_counters(struct ipath_devdata *dd, 867 struct ipath_verbs_counters *cntrs) 868{ 869 int ret; 870 871 if (!(dd->ipath_flags & IPATH_INITTED)) { 872 /* no hardware, freeze, etc. */ 873 ipath_dbg("unit %u not usable\n", dd->ipath_unit); 874 ret = -EINVAL; 875 goto bail; 876 } 877 cntrs->symbol_error_counter = 878 ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt); 879 cntrs->link_error_recovery_counter = 880 ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt); 881 /* 882 * The link downed counter counts when the other side downs the 883 * connection. We add in the number of times we downed the link 884 * due to local link integrity errors to compensate. 885 */ 886 cntrs->link_downed_counter = 887 ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt); 888 cntrs->port_rcv_errors = 889 ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) + 890 ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) + 891 ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) + 892 ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) + 893 ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) + 894 ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) + 895 ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) + 896 ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) + 897 ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt) + 898 dd->ipath_rxfc_unsupvl_errs; 899 cntrs->port_rcv_remphys_errors = 900 ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt); 901 cntrs->port_xmit_discards = 902 ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt); 903 cntrs->port_xmit_data = 904 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); 905 cntrs->port_rcv_data = 906 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); 907 cntrs->port_xmit_packets = 908 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); 909 cntrs->port_rcv_packets = 910 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); 911 cntrs->local_link_integrity_errors = 912 (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? 913 dd->ipath_lli_errs : dd->ipath_lli_errors; 914 cntrs->excessive_buffer_overrun_errors = dd->ipath_overrun_thresh_errs; 915 916 ret = 0; 917 918bail: 919 return ret; 920} 921 922/** 923 * ipath_ib_piobufavail - callback when a PIO buffer is available 924 * @arg: the device pointer 925 * 926 * This is called from ipath_intr() at interrupt level when a PIO buffer is 927 * available after ipath_verbs_send() returned an error that no buffers were 928 * available. Return 1 if we consumed all the PIO buffers and we still have 929 * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and 930 * return zero). 931 */ 932int ipath_ib_piobufavail(struct ipath_ibdev *dev) 933{ 934 struct ipath_qp *qp; 935 unsigned long flags; 936 937 if (dev == NULL) 938 goto bail; 939 940 spin_lock_irqsave(&dev->pending_lock, flags); 941 while (!list_empty(&dev->piowait)) { 942 qp = list_entry(dev->piowait.next, struct ipath_qp, 943 piowait); 944 list_del_init(&qp->piowait); 945 tasklet_hi_schedule(&qp->s_task); 946 } 947 spin_unlock_irqrestore(&dev->pending_lock, flags); 948 949bail: 950 return 0; 951} 952 953static int ipath_query_device(struct ib_device *ibdev, 954 struct ib_device_attr *props) 955{ 956 struct ipath_ibdev *dev = to_idev(ibdev); 957 958 memset(props, 0, sizeof(*props)); 959 960 props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | 961 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | 962 IB_DEVICE_SYS_IMAGE_GUID; 963 props->page_size_cap = PAGE_SIZE; 964 props->vendor_id = dev->dd->ipath_vendorid; 965 props->vendor_part_id = dev->dd->ipath_deviceid; 966 props->hw_ver = dev->dd->ipath_pcirev; 967 968 props->sys_image_guid = dev->sys_image_guid; 969 970 props->max_mr_size = ~0ull; 971 props->max_qp = ib_ipath_max_qps; 972 props->max_qp_wr = ib_ipath_max_qp_wrs; 973 props->max_sge = ib_ipath_max_sges; 974 props->max_cq = ib_ipath_max_cqs; 975 props->max_ah = ib_ipath_max_ahs; 976 props->max_cqe = ib_ipath_max_cqes; 977 props->max_mr = dev->lk_table.max; 978 props->max_pd = ib_ipath_max_pds; 979 props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC; 980 props->max_qp_init_rd_atom = 255; 981 /* props->max_res_rd_atom */ 982 props->max_srq = ib_ipath_max_srqs; 983 props->max_srq_wr = ib_ipath_max_srq_wrs; 984 props->max_srq_sge = ib_ipath_max_srq_sges; 985 /* props->local_ca_ack_delay */ 986 props->atomic_cap = IB_ATOMIC_GLOB; 987 props->max_pkeys = ipath_get_npkeys(dev->dd); 988 props->max_mcast_grp = ib_ipath_max_mcast_grps; 989 props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached; 990 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 991 props->max_mcast_grp; 992 993 return 0; 994} 995 996const u8 ipath_cvt_physportstate[16] = { 997 [INFINIPATH_IBCS_LT_STATE_DISABLED] = 3, 998 [INFINIPATH_IBCS_LT_STATE_LINKUP] = 5, 999 [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = 2, 1000 [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = 2, 1001 [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = 1, 1002 [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = 1, 1003 [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] = 4, 1004 [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] = 4, 1005 [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] = 4, 1006 [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = 4, 1007 [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] = 6, 1008 [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] = 6, 1009 [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6, 1010}; 1011 1012u32 ipath_get_cr_errpkey(struct ipath_devdata *dd) 1013{ 1014 return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey); 1015} 1016 1017static int ipath_query_port(struct ib_device *ibdev, 1018 u8 port, struct ib_port_attr *props) 1019{ 1020 struct ipath_ibdev *dev = to_idev(ibdev); 1021 enum ib_mtu mtu; 1022 u16 lid = dev->dd->ipath_lid; 1023 u64 ibcstat; 1024 1025 memset(props, 0, sizeof(*props)); 1026 props->lid = lid ? lid : __constant_be16_to_cpu(IB_LID_PERMISSIVE); 1027 props->lmc = dev->mkeyprot_resv_lmc & 7; 1028 props->sm_lid = dev->sm_lid; 1029 props->sm_sl = dev->sm_sl; 1030 ibcstat = dev->dd->ipath_lastibcstat; 1031 props->state = ((ibcstat >> 4) & 0x3) + 1; 1032 /* See phys_state_show() */ 1033 props->phys_state = ipath_cvt_physportstate[ 1034 dev->dd->ipath_lastibcstat & 0xf]; 1035 props->port_cap_flags = dev->port_cap_flags; 1036 props->gid_tbl_len = 1; 1037 props->max_msg_sz = 0x80000000; 1038 props->pkey_tbl_len = ipath_get_npkeys(dev->dd); 1039 props->bad_pkey_cntr = ipath_get_cr_errpkey(dev->dd) - 1040 dev->z_pkey_violations; 1041 props->qkey_viol_cntr = dev->qkey_violations; 1042 props->active_width = IB_WIDTH_4X; 1043 /* See rate_show() */ 1044 props->active_speed = 1; /* Regular 10Mbs speed. */ 1045 props->max_vl_num = 1; /* VLCap = VL0 */ 1046 props->init_type_reply = 0; 1047 1048 props->max_mtu = IB_MTU_4096; 1049 switch (dev->dd->ipath_ibmtu) { 1050 case 4096: 1051 mtu = IB_MTU_4096; 1052 break; 1053 case 2048: 1054 mtu = IB_MTU_2048; 1055 break; 1056 case 1024: 1057 mtu = IB_MTU_1024; 1058 break; 1059 case 512: 1060 mtu = IB_MTU_512; 1061 break; 1062 case 256: 1063 mtu = IB_MTU_256; 1064 break; 1065 default: 1066 mtu = IB_MTU_2048; 1067 } 1068 props->active_mtu = mtu; 1069 props->subnet_timeout = dev->subnet_timeout; 1070 1071 return 0; 1072} 1073 1074static int ipath_modify_device(struct ib_device *device, 1075 int device_modify_mask, 1076 struct ib_device_modify *device_modify) 1077{ 1078 int ret; 1079 1080 if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | 1081 IB_DEVICE_MODIFY_NODE_DESC)) { 1082 ret = -EOPNOTSUPP; 1083 goto bail; 1084 } 1085 1086 if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) 1087 memcpy(device->node_desc, device_modify->node_desc, 64); 1088 1089 if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) 1090 to_idev(device)->sys_image_guid = 1091 cpu_to_be64(device_modify->sys_image_guid); 1092 1093 ret = 0; 1094 1095bail: 1096 return ret; 1097} 1098 1099static int ipath_modify_port(struct ib_device *ibdev, 1100 u8 port, int port_modify_mask, 1101 struct ib_port_modify *props) 1102{ 1103 struct ipath_ibdev *dev = to_idev(ibdev); 1104 1105 dev->port_cap_flags |= props->set_port_cap_mask; 1106 dev->port_cap_flags &= ~props->clr_port_cap_mask; 1107 if (port_modify_mask & IB_PORT_SHUTDOWN) 1108 ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN); 1109 if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) 1110 dev->qkey_violations = 0; 1111 return 0; 1112} 1113 1114static int ipath_query_gid(struct ib_device *ibdev, u8 port, 1115 int index, union ib_gid *gid) 1116{ 1117 struct ipath_ibdev *dev = to_idev(ibdev); 1118 int ret; 1119 1120 if (index >= 1) { 1121 ret = -EINVAL; 1122 goto bail; 1123 } 1124 gid->global.subnet_prefix = dev->gid_prefix; 1125 gid->global.interface_id = dev->dd->ipath_guid; 1126 1127 ret = 0; 1128 1129bail: 1130 return ret; 1131} 1132 1133static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev, 1134 struct ib_ucontext *context, 1135 struct ib_udata *udata) 1136{ 1137 struct ipath_ibdev *dev = to_idev(ibdev); 1138 struct ipath_pd *pd; 1139 struct ib_pd *ret; 1140 1141 /* 1142 * This is actually totally arbitrary. Some correctness tests 1143 * assume there's a maximum number of PDs that can be allocated. 1144 * We don't actually have this limit, but we fail the test if 1145 * we allow allocations of more than we report for this value. 1146 */ 1147 1148 pd = kmalloc(sizeof *pd, GFP_KERNEL); 1149 if (!pd) { 1150 ret = ERR_PTR(-ENOMEM); 1151 goto bail; 1152 } 1153 1154 spin_lock(&dev->n_pds_lock); 1155 if (dev->n_pds_allocated == ib_ipath_max_pds) { 1156 spin_unlock(&dev->n_pds_lock); 1157 kfree(pd); 1158 ret = ERR_PTR(-ENOMEM); 1159 goto bail; 1160 } 1161 1162 dev->n_pds_allocated++; 1163 spin_unlock(&dev->n_pds_lock); 1164 1165 /* ib_alloc_pd() will initialize pd->ibpd. */ 1166 pd->user = udata != NULL; 1167 1168 ret = &pd->ibpd; 1169 1170bail: 1171 return ret; 1172} 1173 1174static int ipath_dealloc_pd(struct ib_pd *ibpd) 1175{ 1176 struct ipath_pd *pd = to_ipd(ibpd); 1177 struct ipath_ibdev *dev = to_idev(ibpd->device); 1178 1179 spin_lock(&dev->n_pds_lock); 1180 dev->n_pds_allocated--; 1181 spin_unlock(&dev->n_pds_lock); 1182 1183 kfree(pd); 1184 1185 return 0; 1186} 1187 1188/** 1189 * ipath_create_ah - create an address handle 1190 * @pd: the protection domain 1191 * @ah_attr: the attributes of the AH 1192 * 1193 * This may be called from interrupt context. 1194 */ 1195static struct ib_ah *ipath_create_ah(struct ib_pd *pd, 1196 struct ib_ah_attr *ah_attr) 1197{ 1198 struct ipath_ah *ah; 1199 struct ib_ah *ret; 1200 struct ipath_ibdev *dev = to_idev(pd->device); 1201 unsigned long flags; 1202 1203 /* A multicast address requires a GRH (see ch. 8.4.1). */ 1204 if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && 1205 ah_attr->dlid != IPATH_PERMISSIVE_LID && 1206 !(ah_attr->ah_flags & IB_AH_GRH)) { 1207 ret = ERR_PTR(-EINVAL); 1208 goto bail; 1209 } 1210 1211 if (ah_attr->dlid == 0) { 1212 ret = ERR_PTR(-EINVAL); 1213 goto bail; 1214 } 1215 1216 if (ah_attr->port_num < 1 || 1217 ah_attr->port_num > pd->device->phys_port_cnt) { 1218 ret = ERR_PTR(-EINVAL); 1219 goto bail; 1220 } 1221 1222 ah = kmalloc(sizeof *ah, GFP_ATOMIC); 1223 if (!ah) { 1224 ret = ERR_PTR(-ENOMEM); 1225 goto bail; 1226 } 1227 1228 spin_lock_irqsave(&dev->n_ahs_lock, flags); 1229 if (dev->n_ahs_allocated == ib_ipath_max_ahs) { 1230 spin_unlock_irqrestore(&dev->n_ahs_lock, flags); 1231 kfree(ah); 1232 ret = ERR_PTR(-ENOMEM); 1233 goto bail; 1234 } 1235 1236 dev->n_ahs_allocated++; 1237 spin_unlock_irqrestore(&dev->n_ahs_lock, flags); 1238 1239 /* ib_create_ah() will initialize ah->ibah. */ 1240 ah->attr = *ah_attr; 1241 1242 ret = &ah->ibah; 1243 1244bail: 1245 return ret; 1246} 1247 1248/** 1249 * ipath_destroy_ah - destroy an address handle 1250 * @ibah: the AH to destroy 1251 * 1252 * This may be called from interrupt context. 1253 */ 1254static int ipath_destroy_ah(struct ib_ah *ibah) 1255{ 1256 struct ipath_ibdev *dev = to_idev(ibah->device); 1257 struct ipath_ah *ah = to_iah(ibah); 1258 unsigned long flags; 1259 1260 spin_lock_irqsave(&dev->n_ahs_lock, flags); 1261 dev->n_ahs_allocated--; 1262 spin_unlock_irqrestore(&dev->n_ahs_lock, flags); 1263 1264 kfree(ah); 1265 1266 return 0; 1267} 1268 1269static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) 1270{ 1271 struct ipath_ah *ah = to_iah(ibah); 1272 1273 *ah_attr = ah->attr; 1274 1275 return 0; 1276} 1277 1278/** 1279 * ipath_get_npkeys - return the size of the PKEY table for port 0 1280 * @dd: the infinipath device 1281 */ 1282unsigned ipath_get_npkeys(struct ipath_devdata *dd) 1283{ 1284 return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys); 1285} 1286 1287/** 1288 * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table 1289 * @dd: the infinipath device 1290 * @index: the PKEY index 1291 */ 1292unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index) 1293{ 1294 unsigned ret; 1295 1296 if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys)) 1297 ret = 0; 1298 else 1299 ret = dd->ipath_pd[0]->port_pkeys[index]; 1300 1301 return ret; 1302} 1303 1304static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 1305 u16 *pkey) 1306{ 1307 struct ipath_ibdev *dev = to_idev(ibdev); 1308 int ret; 1309 1310 if (index >= ipath_get_npkeys(dev->dd)) { 1311 ret = -EINVAL; 1312 goto bail; 1313 } 1314 1315 *pkey = ipath_get_pkey(dev->dd, index); 1316 ret = 0; 1317 1318bail: 1319 return ret; 1320} 1321 1322/** 1323 * ipath_alloc_ucontext - allocate a ucontest 1324 * @ibdev: the infiniband device 1325 * @udata: not used by the InfiniPath driver 1326 */ 1327 1328static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev, 1329 struct ib_udata *udata) 1330{ 1331 struct ipath_ucontext *context; 1332 struct ib_ucontext *ret; 1333 1334 context = kmalloc(sizeof *context, GFP_KERNEL); 1335 if (!context) { 1336 ret = ERR_PTR(-ENOMEM); 1337 goto bail; 1338 } 1339 1340 ret = &context->ibucontext; 1341 1342bail: 1343 return ret; 1344} 1345 1346static int ipath_dealloc_ucontext(struct ib_ucontext *context) 1347{ 1348 kfree(to_iucontext(context)); 1349 return 0; 1350} 1351 1352static int ipath_verbs_register_sysfs(struct ib_device *dev); 1353 1354static void __verbs_timer(unsigned long arg) 1355{ 1356 struct ipath_devdata *dd = (struct ipath_devdata *) arg; 1357 1358 /* 1359 * If port 0 receive packet interrupts are not available, or 1360 * can be missed, poll the receive queue 1361 */ 1362 if (dd->ipath_flags & IPATH_POLL_RX_INTR) 1363 ipath_kreceive(dd); 1364 1365 /* Handle verbs layer timeouts. */ 1366 ipath_ib_timer(dd->verbs_dev); 1367 1368 mod_timer(&dd->verbs_timer, jiffies + 1); 1369} 1370 1371static int enable_timer(struct ipath_devdata *dd) 1372{ 1373 /* 1374 * Early chips had a design flaw where the chip and kernel idea 1375 * of the tail register don't always agree, and therefore we won't 1376 * get an interrupt on the next packet received. 1377 * If the board supports per packet receive interrupts, use it. 1378 * Otherwise, the timer function periodically checks for packets 1379 * to cover this case. 1380 * Either way, the timer is needed for verbs layer related 1381 * processing. 1382 */ 1383 if (dd->ipath_flags & IPATH_GPIO_INTR) { 1384 ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, 1385 0x2074076542310ULL); 1386 /* Enable GPIO bit 2 interrupt */ 1387 dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT); 1388 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 1389 dd->ipath_gpio_mask); 1390 } 1391 1392 init_timer(&dd->verbs_timer); 1393 dd->verbs_timer.function = __verbs_timer; 1394 dd->verbs_timer.data = (unsigned long)dd; 1395 dd->verbs_timer.expires = jiffies + 1; 1396 add_timer(&dd->verbs_timer); 1397 1398 return 0; 1399} 1400 1401static int disable_timer(struct ipath_devdata *dd) 1402{ 1403 /* Disable GPIO bit 2 interrupt */ 1404 if (dd->ipath_flags & IPATH_GPIO_INTR) { 1405 u64 val; 1406 /* Disable GPIO bit 2 interrupt */ 1407 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); 1408 dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT)); 1409 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 1410 dd->ipath_gpio_mask); 1411 /* 1412 * We might want to undo changes to debugportselect, 1413 * but how? 1414 */ 1415 } 1416 1417 del_timer_sync(&dd->verbs_timer); 1418 1419 return 0; 1420} 1421 1422/** 1423 * ipath_register_ib_device - register our device with the infiniband core 1424 * @dd: the device data structure 1425 * Return the allocated ipath_ibdev pointer or NULL on error. 1426 */ 1427int ipath_register_ib_device(struct ipath_devdata *dd) 1428{ 1429 struct ipath_verbs_counters cntrs; 1430 struct ipath_ibdev *idev; 1431 struct ib_device *dev; 1432 int ret; 1433 1434 idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev); 1435 if (idev == NULL) { 1436 ret = -ENOMEM; 1437 goto bail; 1438 } 1439 1440 dev = &idev->ibdev; 1441 1442 /* Only need to initialize non-zero fields. */ 1443 spin_lock_init(&idev->n_pds_lock); 1444 spin_lock_init(&idev->n_ahs_lock); 1445 spin_lock_init(&idev->n_cqs_lock); 1446 spin_lock_init(&idev->n_qps_lock); 1447 spin_lock_init(&idev->n_srqs_lock); 1448 spin_lock_init(&idev->n_mcast_grps_lock); 1449 1450 spin_lock_init(&idev->qp_table.lock); 1451 spin_lock_init(&idev->lk_table.lock); 1452 idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE); 1453 /* Set the prefix to the default value (see ch. 4.1.1) */ 1454 idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL); 1455 1456 ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size); 1457 if (ret) 1458 goto err_qp; 1459 1460 /* 1461 * The top ib_ipath_lkey_table_size bits are used to index the 1462 * table. The lower 8 bits can be owned by the user (copied from 1463 * the LKEY). The remaining bits act as a generation number or tag. 1464 */ 1465 idev->lk_table.max = 1 << ib_ipath_lkey_table_size; 1466 idev->lk_table.table = kzalloc(idev->lk_table.max * 1467 sizeof(*idev->lk_table.table), 1468 GFP_KERNEL); 1469 if (idev->lk_table.table == NULL) { 1470 ret = -ENOMEM; 1471 goto err_lk; 1472 } 1473 INIT_LIST_HEAD(&idev->pending_mmaps); 1474 spin_lock_init(&idev->pending_lock); 1475 idev->mmap_offset = PAGE_SIZE; 1476 spin_lock_init(&idev->mmap_offset_lock); 1477 INIT_LIST_HEAD(&idev->pending[0]); 1478 INIT_LIST_HEAD(&idev->pending[1]); 1479 INIT_LIST_HEAD(&idev->pending[2]); 1480 INIT_LIST_HEAD(&idev->piowait); 1481 INIT_LIST_HEAD(&idev->rnrwait); 1482 idev->pending_index = 0; 1483 idev->port_cap_flags = 1484 IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP; 1485 idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; 1486 idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; 1487 idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; 1488 idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; 1489 idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; 1490 idev->link_width_enabled = 3; /* 1x or 4x */ 1491 1492 /* Snapshot current HW counters to "clear" them. */ 1493 ipath_get_counters(dd, &cntrs); 1494 idev->z_symbol_error_counter = cntrs.symbol_error_counter; 1495 idev->z_link_error_recovery_counter = 1496 cntrs.link_error_recovery_counter; 1497 idev->z_link_downed_counter = cntrs.link_downed_counter; 1498 idev->z_port_rcv_errors = cntrs.port_rcv_errors; 1499 idev->z_port_rcv_remphys_errors = 1500 cntrs.port_rcv_remphys_errors; 1501 idev->z_port_xmit_discards = cntrs.port_xmit_discards; 1502 idev->z_port_xmit_data = cntrs.port_xmit_data; 1503 idev->z_port_rcv_data = cntrs.port_rcv_data; 1504 idev->z_port_xmit_packets = cntrs.port_xmit_packets; 1505 idev->z_port_rcv_packets = cntrs.port_rcv_packets; 1506 idev->z_local_link_integrity_errors = 1507 cntrs.local_link_integrity_errors; 1508 idev->z_excessive_buffer_overrun_errors = 1509 cntrs.excessive_buffer_overrun_errors; 1510 1511 /* 1512 * The system image GUID is supposed to be the same for all 1513 * IB HCAs in a single system but since there can be other 1514 * device types in the system, we can't be sure this is unique. 1515 */ 1516 if (!sys_image_guid) 1517 sys_image_guid = dd->ipath_guid; 1518 idev->sys_image_guid = sys_image_guid; 1519 idev->ib_unit = dd->ipath_unit; 1520 idev->dd = dd; 1521 1522 strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX); 1523 dev->owner = THIS_MODULE; 1524 dev->node_guid = dd->ipath_guid; 1525 dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION; 1526 dev->uverbs_cmd_mask = 1527 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 1528 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 1529 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 1530 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 1531 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 1532 (1ull << IB_USER_VERBS_CMD_CREATE_AH) | 1533 (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | 1534 (1ull << IB_USER_VERBS_CMD_QUERY_AH) | 1535 (1ull << IB_USER_VERBS_CMD_REG_MR) | 1536 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 1537 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 1538 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 1539 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | 1540 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 1541 (1ull << IB_USER_VERBS_CMD_POLL_CQ) | 1542 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | 1543 (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 1544 (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 1545 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 1546 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 1547 (1ull << IB_USER_VERBS_CMD_POST_SEND) | 1548 (1ull << IB_USER_VERBS_CMD_POST_RECV) | 1549 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 1550 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | 1551 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 1552 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 1553 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 1554 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 1555 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); 1556 dev->node_type = RDMA_NODE_IB_CA; 1557 dev->phys_port_cnt = 1; 1558 dev->num_comp_vectors = 1; 1559 dev->dma_device = &dd->pcidev->dev; 1560 dev->query_device = ipath_query_device; 1561 dev->modify_device = ipath_modify_device; 1562 dev->query_port = ipath_query_port; 1563 dev->modify_port = ipath_modify_port; 1564 dev->query_pkey = ipath_query_pkey; 1565 dev->query_gid = ipath_query_gid; 1566 dev->alloc_ucontext = ipath_alloc_ucontext; 1567 dev->dealloc_ucontext = ipath_dealloc_ucontext; 1568 dev->alloc_pd = ipath_alloc_pd; 1569 dev->dealloc_pd = ipath_dealloc_pd; 1570 dev->create_ah = ipath_create_ah; 1571 dev->destroy_ah = ipath_destroy_ah; 1572 dev->query_ah = ipath_query_ah; 1573 dev->create_srq = ipath_create_srq; 1574 dev->modify_srq = ipath_modify_srq; 1575 dev->query_srq = ipath_query_srq; 1576 dev->destroy_srq = ipath_destroy_srq; 1577 dev->create_qp = ipath_create_qp; 1578 dev->modify_qp = ipath_modify_qp; 1579 dev->query_qp = ipath_query_qp; 1580 dev->destroy_qp = ipath_destroy_qp; 1581 dev->post_send = ipath_post_send; 1582 dev->post_recv = ipath_post_receive; 1583 dev->post_srq_recv = ipath_post_srq_receive; 1584 dev->create_cq = ipath_create_cq; 1585 dev->destroy_cq = ipath_destroy_cq; 1586 dev->resize_cq = ipath_resize_cq; 1587 dev->poll_cq = ipath_poll_cq; 1588 dev->req_notify_cq = ipath_req_notify_cq; 1589 dev->get_dma_mr = ipath_get_dma_mr; 1590 dev->reg_phys_mr = ipath_reg_phys_mr; 1591 dev->reg_user_mr = ipath_reg_user_mr; 1592 dev->dereg_mr = ipath_dereg_mr; 1593 dev->alloc_fmr = ipath_alloc_fmr; 1594 dev->map_phys_fmr = ipath_map_phys_fmr; 1595 dev->unmap_fmr = ipath_unmap_fmr; 1596 dev->dealloc_fmr = ipath_dealloc_fmr; 1597 dev->attach_mcast = ipath_multicast_attach; 1598 dev->detach_mcast = ipath_multicast_detach; 1599 dev->process_mad = ipath_process_mad; 1600 dev->mmap = ipath_mmap; 1601 dev->dma_ops = &ipath_dma_mapping_ops; 1602 1603 snprintf(dev->node_desc, sizeof(dev->node_desc), 1604 IPATH_IDSTR " %s", init_utsname()->nodename); 1605 1606 ret = ib_register_device(dev); 1607 if (ret) 1608 goto err_reg; 1609 1610 if (ipath_verbs_register_sysfs(dev)) 1611 goto err_class; 1612 1613 enable_timer(dd); 1614 1615 goto bail; 1616 1617err_class: 1618 ib_unregister_device(dev); 1619err_reg: 1620 kfree(idev->lk_table.table); 1621err_lk: 1622 kfree(idev->qp_table.table); 1623err_qp: 1624 ib_dealloc_device(dev); 1625 ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret); 1626 idev = NULL; 1627 1628bail: 1629 dd->verbs_dev = idev; 1630 return ret; 1631} 1632 1633void ipath_unregister_ib_device(struct ipath_ibdev *dev) 1634{ 1635 struct ib_device *ibdev = &dev->ibdev; 1636 1637 disable_timer(dev->dd); 1638 1639 ib_unregister_device(ibdev); 1640 1641 if (!list_empty(&dev->pending[0]) || 1642 !list_empty(&dev->pending[1]) || 1643 !list_empty(&dev->pending[2])) 1644 ipath_dev_err(dev->dd, "pending list not empty!\n"); 1645 if (!list_empty(&dev->piowait)) 1646 ipath_dev_err(dev->dd, "piowait list not empty!\n"); 1647 if (!list_empty(&dev->rnrwait)) 1648 ipath_dev_err(dev->dd, "rnrwait list not empty!\n"); 1649 if (!ipath_mcast_tree_empty()) 1650 ipath_dev_err(dev->dd, "multicast table memory leak!\n"); 1651 /* 1652 * Note that ipath_unregister_ib_device() can be called before all 1653 * the QPs are destroyed! 1654 */ 1655 ipath_free_all_qps(&dev->qp_table); 1656 kfree(dev->qp_table.table); 1657 kfree(dev->lk_table.table); 1658 ib_dealloc_device(ibdev); 1659} 1660 1661static ssize_t show_rev(struct class_device *cdev, char *buf) 1662{ 1663 struct ipath_ibdev *dev = 1664 container_of(cdev, struct ipath_ibdev, ibdev.class_dev); 1665 1666 return sprintf(buf, "%x\n", dev->dd->ipath_pcirev); 1667} 1668 1669static ssize_t show_hca(struct class_device *cdev, char *buf) 1670{ 1671 struct ipath_ibdev *dev = 1672 container_of(cdev, struct ipath_ibdev, ibdev.class_dev); 1673 int ret; 1674 1675 ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128); 1676 if (ret < 0) 1677 goto bail; 1678 strcat(buf, "\n"); 1679 ret = strlen(buf); 1680 1681bail: 1682 return ret; 1683} 1684 1685static ssize_t show_stats(struct class_device *cdev, char *buf) 1686{ 1687 struct ipath_ibdev *dev = 1688 container_of(cdev, struct ipath_ibdev, ibdev.class_dev); 1689 int i; 1690 int len; 1691 1692 len = sprintf(buf, 1693 "RC resends %d\n" 1694 "RC no QACK %d\n" 1695 "RC ACKs %d\n" 1696 "RC SEQ NAKs %d\n" 1697 "RC RDMA seq %d\n" 1698 "RC RNR NAKs %d\n" 1699 "RC OTH NAKs %d\n" 1700 "RC timeouts %d\n" 1701 "RC RDMA dup %d\n" 1702 "RC stalls %d\n" 1703 "piobuf wait %d\n" 1704 "no piobuf %d\n" 1705 "PKT drops %d\n" 1706 "WQE errs %d\n", 1707 dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, 1708 dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, 1709 dev->n_other_naks, dev->n_timeouts, 1710 dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait, 1711 dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs); 1712 for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { 1713 const struct ipath_opcode_stats *si = &dev->opstats[i]; 1714 1715 if (!si->n_packets && !si->n_bytes) 1716 continue; 1717 len += sprintf(buf + len, "%02x %llu/%llu\n", i, 1718 (unsigned long long) si->n_packets, 1719 (unsigned long long) si->n_bytes); 1720 } 1721 return len; 1722} 1723 1724static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 1725static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 1726static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL); 1727static CLASS_DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL); 1728 1729static struct class_device_attribute *ipath_class_attributes[] = { 1730 &class_device_attr_hw_rev, 1731 &class_device_attr_hca_type, 1732 &class_device_attr_board_id, 1733 &class_device_attr_stats 1734}; 1735 1736static int ipath_verbs_register_sysfs(struct ib_device *dev) 1737{ 1738 int i; 1739 int ret; 1740 1741 for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) 1742 if (class_device_create_file(&dev->class_dev, 1743 ipath_class_attributes[i])) { 1744 ret = 1; 1745 goto bail; 1746 } 1747 1748 ret = 0; 1749 1750bail: 1751 return ret; 1752} 1753