1/* 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 * 35 * $Id: ipoib_ib.c,v 1.1.1.1 2007/08/03 18:52:32 Exp $ 36 */ 37 38#include <linux/delay.h> 39#include <linux/dma-mapping.h> 40 41#include <rdma/ib_cache.h> 42 43#include "ipoib.h" 44 45#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA 46static int data_debug_level; 47 48module_param(data_debug_level, int, 0644); 49MODULE_PARM_DESC(data_debug_level, 50 "Enable data path debug tracing if > 0"); 51#endif 52 53static DEFINE_MUTEX(pkey_mutex); 54 55struct ipoib_ah *ipoib_create_ah(struct net_device *dev, 56 struct ib_pd *pd, struct ib_ah_attr *attr) 57{ 58 struct ipoib_ah *ah; 59 60 ah = kmalloc(sizeof *ah, GFP_KERNEL); 61 if (!ah) 62 return NULL; 63 64 ah->dev = dev; 65 ah->last_send = 0; 66 kref_init(&ah->ref); 67 68 ah->ah = ib_create_ah(pd, attr); 69 if (IS_ERR(ah->ah)) { 70 kfree(ah); 71 ah = NULL; 72 } else 73 ipoib_dbg(netdev_priv(dev), "Created ah %p\n", ah->ah); 74 75 return ah; 76} 77 78void ipoib_free_ah(struct kref *kref) 79{ 80 struct ipoib_ah *ah = container_of(kref, struct ipoib_ah, ref); 81 struct ipoib_dev_priv *priv = netdev_priv(ah->dev); 82 83 unsigned long flags; 84 85 spin_lock_irqsave(&priv->lock, flags); 86 list_add_tail(&ah->list, &priv->dead_ahs); 87 spin_unlock_irqrestore(&priv->lock, flags); 88} 89 90static int ipoib_ib_post_receive(struct net_device *dev, int id) 91{ 92 struct ipoib_dev_priv *priv = netdev_priv(dev); 93 struct ib_sge list; 94 struct ib_recv_wr param; 95 struct ib_recv_wr *bad_wr; 96 int ret; 97 98 list.addr = priv->rx_ring[id].mapping; 99 list.length = IPOIB_BUF_SIZE; 100 list.lkey = priv->mr->lkey; 101 102 param.next = NULL; 103 param.wr_id = id | IPOIB_OP_RECV; 104 param.sg_list = &list; 105 param.num_sge = 1; 106 107 ret = ib_post_recv(priv->qp, ¶m, &bad_wr); 108 if (unlikely(ret)) { 109 ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); 110 ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, 111 IPOIB_BUF_SIZE, DMA_FROM_DEVICE); 112 dev_kfree_skb_any(priv->rx_ring[id].skb); 113 priv->rx_ring[id].skb = NULL; 114 } 115 116 return ret; 117} 118 119static int ipoib_alloc_rx_skb(struct net_device *dev, int id) 120{ 121 struct ipoib_dev_priv *priv = netdev_priv(dev); 122 struct sk_buff *skb; 123 u64 addr; 124 125 skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); 126 if (!skb) 127 return -ENOMEM; 128 129 /* 130 * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte 131 * header. So we need 4 more bytes to get to 48 and align the 132 * IP header to a multiple of 16. 133 */ 134 skb_reserve(skb, 4); 135 136 addr = ib_dma_map_single(priv->ca, skb->data, IPOIB_BUF_SIZE, 137 DMA_FROM_DEVICE); 138 if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { 139 dev_kfree_skb_any(skb); 140 return -EIO; 141 } 142 143 priv->rx_ring[id].skb = skb; 144 priv->rx_ring[id].mapping = addr; 145 146 return 0; 147} 148 149static int ipoib_ib_post_receives(struct net_device *dev) 150{ 151 struct ipoib_dev_priv *priv = netdev_priv(dev); 152 int i; 153 154 for (i = 0; i < ipoib_recvq_size; ++i) { 155 if (ipoib_alloc_rx_skb(dev, i)) { 156 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); 157 return -ENOMEM; 158 } 159 if (ipoib_ib_post_receive(dev, i)) { 160 ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i); 161 return -EIO; 162 } 163 } 164 165 return 0; 166} 167 168static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) 169{ 170 struct ipoib_dev_priv *priv = netdev_priv(dev); 171 unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV; 172 struct sk_buff *skb; 173 u64 addr; 174 175 ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n", 176 wr_id, wc->status); 177 178 if (unlikely(wr_id >= ipoib_recvq_size)) { 179 ipoib_warn(priv, "recv completion event with wrid %d (> %d)\n", 180 wr_id, ipoib_recvq_size); 181 return; 182 } 183 184 skb = priv->rx_ring[wr_id].skb; 185 addr = priv->rx_ring[wr_id].mapping; 186 187 if (unlikely(wc->status != IB_WC_SUCCESS)) { 188 if (wc->status != IB_WC_WR_FLUSH_ERR) 189 ipoib_warn(priv, "failed recv event " 190 "(status=%d, wrid=%d vend_err %x)\n", 191 wc->status, wr_id, wc->vendor_err); 192 ib_dma_unmap_single(priv->ca, addr, 193 IPOIB_BUF_SIZE, DMA_FROM_DEVICE); 194 dev_kfree_skb_any(skb); 195 priv->rx_ring[wr_id].skb = NULL; 196 return; 197 } 198 199 /* 200 * If we can't allocate a new RX buffer, dump 201 * this packet and reuse the old buffer. 202 */ 203 if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { 204 ++priv->stats.rx_dropped; 205 goto repost; 206 } 207 208 ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", 209 wc->byte_len, wc->slid); 210 211 ib_dma_unmap_single(priv->ca, addr, IPOIB_BUF_SIZE, DMA_FROM_DEVICE); 212 213 skb_put(skb, wc->byte_len); 214 skb_pull(skb, IB_GRH_BYTES); 215 216 if (wc->slid != priv->local_lid || 217 wc->src_qp != priv->qp->qp_num) { 218 skb->protocol = ((struct ipoib_header *) skb->data)->proto; 219 skb_reset_mac_header(skb); 220 skb_pull(skb, IPOIB_ENCAP_LEN); 221 222 dev->last_rx = jiffies; 223 ++priv->stats.rx_packets; 224 priv->stats.rx_bytes += skb->len; 225 226 skb->dev = dev; 227 skb->pkt_type = PACKET_HOST; 228 netif_receive_skb(skb); 229 } else { 230 ipoib_dbg_data(priv, "dropping loopback packet\n"); 231 dev_kfree_skb_any(skb); 232 } 233 234repost: 235 if (unlikely(ipoib_ib_post_receive(dev, wr_id))) 236 ipoib_warn(priv, "ipoib_ib_post_receive failed " 237 "for buf %d\n", wr_id); 238} 239 240static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) 241{ 242 struct ipoib_dev_priv *priv = netdev_priv(dev); 243 unsigned int wr_id = wc->wr_id; 244 struct ipoib_tx_buf *tx_req; 245 unsigned long flags; 246 247 ipoib_dbg_data(priv, "send completion: id %d, status: %d\n", 248 wr_id, wc->status); 249 250 if (unlikely(wr_id >= ipoib_sendq_size)) { 251 ipoib_warn(priv, "send completion event with wrid %d (> %d)\n", 252 wr_id, ipoib_sendq_size); 253 return; 254 } 255 256 tx_req = &priv->tx_ring[wr_id]; 257 258 ib_dma_unmap_single(priv->ca, tx_req->mapping, 259 tx_req->skb->len, DMA_TO_DEVICE); 260 261 ++priv->stats.tx_packets; 262 priv->stats.tx_bytes += tx_req->skb->len; 263 264 dev_kfree_skb_any(tx_req->skb); 265 266 spin_lock_irqsave(&priv->tx_lock, flags); 267 ++priv->tx_tail; 268 if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags)) && 269 priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) { 270 clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); 271 netif_wake_queue(dev); 272 } 273 spin_unlock_irqrestore(&priv->tx_lock, flags); 274 275 if (wc->status != IB_WC_SUCCESS && 276 wc->status != IB_WC_WR_FLUSH_ERR) 277 ipoib_warn(priv, "failed send event " 278 "(status=%d, wrid=%d vend_err %x)\n", 279 wc->status, wr_id, wc->vendor_err); 280} 281 282int ipoib_poll(struct net_device *dev, int *budget) 283{ 284 struct ipoib_dev_priv *priv = netdev_priv(dev); 285 int max = min(*budget, dev->quota); 286 int done; 287 int t; 288 int empty; 289 int n, i; 290 291 done = 0; 292 empty = 0; 293 294 while (max) { 295 t = min(IPOIB_NUM_WC, max); 296 n = ib_poll_cq(priv->cq, t, priv->ibwc); 297 298 for (i = 0; i < n; ++i) { 299 struct ib_wc *wc = priv->ibwc + i; 300 301 if (wc->wr_id & IPOIB_CM_OP_SRQ) { 302 ++done; 303 --max; 304 ipoib_cm_handle_rx_wc(dev, wc); 305 } else if (wc->wr_id & IPOIB_OP_RECV) { 306 ++done; 307 --max; 308 ipoib_ib_handle_rx_wc(dev, wc); 309 } else 310 ipoib_ib_handle_tx_wc(dev, wc); 311 } 312 313 if (n != t) { 314 empty = 1; 315 break; 316 } 317 } 318 319 dev->quota -= done; 320 *budget -= done; 321 322 if (empty) { 323 netif_rx_complete(dev); 324 if (unlikely(ib_req_notify_cq(priv->cq, 325 IB_CQ_NEXT_COMP | 326 IB_CQ_REPORT_MISSED_EVENTS)) && 327 netif_rx_reschedule(dev, 0)) 328 return 1; 329 330 return 0; 331 } 332 333 return 1; 334} 335 336void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) 337{ 338 netif_rx_schedule(dev_ptr); 339} 340 341static inline int post_send(struct ipoib_dev_priv *priv, 342 unsigned int wr_id, 343 struct ib_ah *address, u32 qpn, 344 u64 addr, int len) 345{ 346 struct ib_send_wr *bad_wr; 347 348 priv->tx_sge.addr = addr; 349 priv->tx_sge.length = len; 350 351 priv->tx_wr.wr_id = wr_id; 352 priv->tx_wr.wr.ud.remote_qpn = qpn; 353 priv->tx_wr.wr.ud.ah = address; 354 355 return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr); 356} 357 358void ipoib_send(struct net_device *dev, struct sk_buff *skb, 359 struct ipoib_ah *address, u32 qpn) 360{ 361 struct ipoib_dev_priv *priv = netdev_priv(dev); 362 struct ipoib_tx_buf *tx_req; 363 u64 addr; 364 365 if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) { 366 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", 367 skb->len, priv->mcast_mtu + IPOIB_ENCAP_LEN); 368 ++priv->stats.tx_dropped; 369 ++priv->stats.tx_errors; 370 ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu); 371 return; 372 } 373 374 ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n", 375 skb->len, address, qpn); 376 377 /* 378 * We put the skb into the tx_ring _before_ we call post_send() 379 * because it's entirely possible that the completion handler will 380 * run before we execute anything after the post_send(). That 381 * means we have to make sure everything is properly recorded and 382 * our state is consistent before we call post_send(). 383 */ 384 tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)]; 385 tx_req->skb = skb; 386 addr = ib_dma_map_single(priv->ca, skb->data, skb->len, 387 DMA_TO_DEVICE); 388 if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { 389 ++priv->stats.tx_errors; 390 dev_kfree_skb_any(skb); 391 return; 392 } 393 tx_req->mapping = addr; 394 395 if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), 396 address->ah, qpn, addr, skb->len))) { 397 ipoib_warn(priv, "post_send failed\n"); 398 ++priv->stats.tx_errors; 399 ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE); 400 dev_kfree_skb_any(skb); 401 } else { 402 dev->trans_start = jiffies; 403 404 address->last_send = priv->tx_head; 405 ++priv->tx_head; 406 407 if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) { 408 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); 409 netif_stop_queue(dev); 410 set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); 411 } 412 } 413} 414 415static void __ipoib_reap_ah(struct net_device *dev) 416{ 417 struct ipoib_dev_priv *priv = netdev_priv(dev); 418 struct ipoib_ah *ah, *tah; 419 LIST_HEAD(remove_list); 420 421 spin_lock_irq(&priv->tx_lock); 422 spin_lock(&priv->lock); 423 list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) 424 if ((int) priv->tx_tail - (int) ah->last_send >= 0) { 425 list_del(&ah->list); 426 ib_destroy_ah(ah->ah); 427 kfree(ah); 428 } 429 spin_unlock(&priv->lock); 430 spin_unlock_irq(&priv->tx_lock); 431} 432 433void ipoib_reap_ah(struct work_struct *work) 434{ 435 struct ipoib_dev_priv *priv = 436 container_of(work, struct ipoib_dev_priv, ah_reap_task.work); 437 struct net_device *dev = priv->dev; 438 439 __ipoib_reap_ah(dev); 440 441 if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) 442 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); 443} 444 445int ipoib_ib_dev_open(struct net_device *dev) 446{ 447 struct ipoib_dev_priv *priv = netdev_priv(dev); 448 int ret; 449 450 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) { 451 ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey); 452 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 453 return -1; 454 } 455 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 456 457 ret = ipoib_init_qp(dev); 458 if (ret) { 459 ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret); 460 return -1; 461 } 462 463 ret = ipoib_ib_post_receives(dev); 464 if (ret) { 465 ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); 466 ipoib_ib_dev_stop(dev, 1); 467 return -1; 468 } 469 470 ret = ipoib_cm_dev_open(dev); 471 if (ret) { 472 ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret); 473 ipoib_ib_dev_stop(dev, 1); 474 return -1; 475 } 476 477 clear_bit(IPOIB_STOP_REAPER, &priv->flags); 478 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); 479 480 set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); 481 482 return 0; 483} 484 485static void ipoib_pkey_dev_check_presence(struct net_device *dev) 486{ 487 struct ipoib_dev_priv *priv = netdev_priv(dev); 488 u16 pkey_index = 0; 489 490 if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) 491 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 492 else 493 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 494} 495 496int ipoib_ib_dev_up(struct net_device *dev) 497{ 498 struct ipoib_dev_priv *priv = netdev_priv(dev); 499 500 ipoib_pkey_dev_check_presence(dev); 501 502 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { 503 ipoib_dbg(priv, "PKEY is not assigned.\n"); 504 return 0; 505 } 506 507 set_bit(IPOIB_FLAG_OPER_UP, &priv->flags); 508 509 return ipoib_mcast_start_thread(dev); 510} 511 512int ipoib_ib_dev_down(struct net_device *dev, int flush) 513{ 514 struct ipoib_dev_priv *priv = netdev_priv(dev); 515 516 ipoib_dbg(priv, "downing ib_dev\n"); 517 518 clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags); 519 netif_carrier_off(dev); 520 521 /* Shutdown the P_Key thread if still active */ 522 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { 523 mutex_lock(&pkey_mutex); 524 set_bit(IPOIB_PKEY_STOP, &priv->flags); 525 cancel_delayed_work(&priv->pkey_poll_task); 526 mutex_unlock(&pkey_mutex); 527 if (flush) 528 flush_workqueue(ipoib_workqueue); 529 } 530 531 ipoib_mcast_stop_thread(dev, flush); 532 ipoib_mcast_dev_flush(dev); 533 534 ipoib_flush_paths(dev); 535 536 return 0; 537} 538 539static int recvs_pending(struct net_device *dev) 540{ 541 struct ipoib_dev_priv *priv = netdev_priv(dev); 542 int pending = 0; 543 int i; 544 545 for (i = 0; i < ipoib_recvq_size; ++i) 546 if (priv->rx_ring[i].skb) 547 ++pending; 548 549 return pending; 550} 551 552void ipoib_drain_cq(struct net_device *dev) 553{ 554 struct ipoib_dev_priv *priv = netdev_priv(dev); 555 int i, n; 556 do { 557 n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc); 558 for (i = 0; i < n; ++i) { 559 if (priv->ibwc[i].wr_id & IPOIB_CM_OP_SRQ) 560 ipoib_cm_handle_rx_wc(dev, priv->ibwc + i); 561 else if (priv->ibwc[i].wr_id & IPOIB_OP_RECV) 562 ipoib_ib_handle_rx_wc(dev, priv->ibwc + i); 563 else 564 ipoib_ib_handle_tx_wc(dev, priv->ibwc + i); 565 } 566 } while (n == IPOIB_NUM_WC); 567} 568 569int ipoib_ib_dev_stop(struct net_device *dev, int flush) 570{ 571 struct ipoib_dev_priv *priv = netdev_priv(dev); 572 struct ib_qp_attr qp_attr; 573 unsigned long begin; 574 struct ipoib_tx_buf *tx_req; 575 int i; 576 577 clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); 578 netif_poll_disable(dev); 579 580 ipoib_cm_dev_stop(dev); 581 582 /* 583 * Move our QP to the error state and then reinitialize in 584 * when all work requests have completed or have been flushed. 585 */ 586 qp_attr.qp_state = IB_QPS_ERR; 587 if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) 588 ipoib_warn(priv, "Failed to modify QP to ERROR state\n"); 589 590 /* Wait for all sends and receives to complete */ 591 begin = jiffies; 592 593 while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) { 594 if (time_after(jiffies, begin + 5 * HZ)) { 595 ipoib_warn(priv, "timing out; %d sends %d receives not completed\n", 596 priv->tx_head - priv->tx_tail, recvs_pending(dev)); 597 598 /* 599 * assume the HW is wedged and just free up 600 * all our pending work requests. 601 */ 602 while ((int) priv->tx_tail - (int) priv->tx_head < 0) { 603 tx_req = &priv->tx_ring[priv->tx_tail & 604 (ipoib_sendq_size - 1)]; 605 ib_dma_unmap_single(priv->ca, 606 tx_req->mapping, 607 tx_req->skb->len, 608 DMA_TO_DEVICE); 609 dev_kfree_skb_any(tx_req->skb); 610 ++priv->tx_tail; 611 } 612 613 for (i = 0; i < ipoib_recvq_size; ++i) { 614 struct ipoib_rx_buf *rx_req; 615 616 rx_req = &priv->rx_ring[i]; 617 if (!rx_req->skb) 618 continue; 619 ib_dma_unmap_single(priv->ca, 620 rx_req->mapping, 621 IPOIB_BUF_SIZE, 622 DMA_FROM_DEVICE); 623 dev_kfree_skb_any(rx_req->skb); 624 rx_req->skb = NULL; 625 } 626 627 goto timeout; 628 } 629 630 ipoib_drain_cq(dev); 631 632 msleep(1); 633 } 634 635 ipoib_dbg(priv, "All sends and receives done.\n"); 636 637timeout: 638 qp_attr.qp_state = IB_QPS_RESET; 639 if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) 640 ipoib_warn(priv, "Failed to modify QP to RESET state\n"); 641 642 /* Wait for all AHs to be reaped */ 643 set_bit(IPOIB_STOP_REAPER, &priv->flags); 644 cancel_delayed_work(&priv->ah_reap_task); 645 if (flush) 646 flush_workqueue(ipoib_workqueue); 647 648 begin = jiffies; 649 650 while (!list_empty(&priv->dead_ahs)) { 651 __ipoib_reap_ah(dev); 652 653 if (time_after(jiffies, begin + HZ)) { 654 ipoib_warn(priv, "timing out; will leak address handles\n"); 655 break; 656 } 657 658 msleep(1); 659 } 660 661 netif_poll_enable(dev); 662 ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP); 663 664 return 0; 665} 666 667int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port) 668{ 669 struct ipoib_dev_priv *priv = netdev_priv(dev); 670 671 priv->ca = ca; 672 priv->port = port; 673 priv->qp = NULL; 674 675 if (ipoib_transport_dev_init(dev, ca)) { 676 printk(KERN_WARNING "%s: ipoib_transport_dev_init failed\n", ca->name); 677 return -ENODEV; 678 } 679 680 if (dev->flags & IFF_UP) { 681 if (ipoib_ib_dev_open(dev)) { 682 ipoib_transport_dev_cleanup(dev); 683 return -ENODEV; 684 } 685 } 686 687 return 0; 688} 689 690static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) 691{ 692 struct ipoib_dev_priv *cpriv; 693 struct net_device *dev = priv->dev; 694 u16 new_index; 695 696 mutex_lock(&priv->vlan_mutex); 697 698 /* 699 * Flush any child interfaces too -- they might be up even if 700 * the parent is down. 701 */ 702 list_for_each_entry(cpriv, &priv->child_intfs, list) 703 __ipoib_ib_dev_flush(cpriv, pkey_event); 704 705 mutex_unlock(&priv->vlan_mutex); 706 707 if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) { 708 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n"); 709 return; 710 } 711 712 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { 713 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n"); 714 return; 715 } 716 717 if (pkey_event) { 718 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) { 719 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 720 ipoib_ib_dev_down(dev, 0); 721 ipoib_pkey_dev_delay_open(dev); 722 return; 723 } 724 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 725 726 /* restart QP only if P_Key index is changed */ 727 if (new_index == priv->pkey_index) { 728 ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n"); 729 return; 730 } 731 priv->pkey_index = new_index; 732 } 733 734 ipoib_dbg(priv, "flushing\n"); 735 736 ipoib_ib_dev_down(dev, 0); 737 738 if (pkey_event) { 739 ipoib_ib_dev_stop(dev, 0); 740 ipoib_ib_dev_open(dev); 741 } 742 743 /* 744 * The device could have been brought down between the start and when 745 * we get here, don't bring it back up if it's not configured up 746 */ 747 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { 748 ipoib_ib_dev_up(dev); 749 ipoib_mcast_restart_task(&priv->restart_task); 750 } 751} 752 753void ipoib_ib_dev_flush(struct work_struct *work) 754{ 755 struct ipoib_dev_priv *priv = 756 container_of(work, struct ipoib_dev_priv, flush_task); 757 758 ipoib_dbg(priv, "Flushing %s\n", priv->dev->name); 759 __ipoib_ib_dev_flush(priv, 0); 760} 761 762void ipoib_pkey_event(struct work_struct *work) 763{ 764 struct ipoib_dev_priv *priv = 765 container_of(work, struct ipoib_dev_priv, pkey_event_task); 766 767 ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name); 768 __ipoib_ib_dev_flush(priv, 1); 769} 770 771void ipoib_ib_dev_cleanup(struct net_device *dev) 772{ 773 struct ipoib_dev_priv *priv = netdev_priv(dev); 774 775 ipoib_dbg(priv, "cleaning up ib_dev\n"); 776 777 ipoib_mcast_stop_thread(dev, 1); 778 ipoib_mcast_dev_flush(dev); 779 780 ipoib_transport_dev_cleanup(dev); 781} 782 783/* 784 * Delayed P_Key Assigment Interim Support 785 * 786 * The following is initial implementation of delayed P_Key assigment 787 * mechanism. It is using the same approach implemented for the multicast 788 * group join. The single goal of this implementation is to quickly address 789 * Bug #2507. This implementation will probably be removed when the P_Key 790 * change async notification is available. 791 */ 792 793void ipoib_pkey_poll(struct work_struct *work) 794{ 795 struct ipoib_dev_priv *priv = 796 container_of(work, struct ipoib_dev_priv, pkey_poll_task.work); 797 struct net_device *dev = priv->dev; 798 799 ipoib_pkey_dev_check_presence(dev); 800 801 if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) 802 ipoib_open(dev); 803 else { 804 mutex_lock(&pkey_mutex); 805 if (!test_bit(IPOIB_PKEY_STOP, &priv->flags)) 806 queue_delayed_work(ipoib_workqueue, 807 &priv->pkey_poll_task, 808 HZ); 809 mutex_unlock(&pkey_mutex); 810 } 811} 812 813int ipoib_pkey_dev_delay_open(struct net_device *dev) 814{ 815 struct ipoib_dev_priv *priv = netdev_priv(dev); 816 817 /* Look for the interface pkey value in the IB Port P_Key table and */ 818 /* set the interface pkey assigment flag */ 819 ipoib_pkey_dev_check_presence(dev); 820 821 /* P_Key value not assigned yet - start polling */ 822 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { 823 mutex_lock(&pkey_mutex); 824 clear_bit(IPOIB_PKEY_STOP, &priv->flags); 825 queue_delayed_work(ipoib_workqueue, 826 &priv->pkey_poll_task, 827 HZ); 828 mutex_unlock(&pkey_mutex); 829 return 1; 830 } 831 832 return 0; 833} 834