1/* 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35#include <linux/skbuff.h> 36#include <linux/rtnetlink.h> 37#include <linux/ip.h> 38#include <linux/in.h> 39#include <linux/igmp.h> 40#include <linux/inetdevice.h> 41#include <linux/delay.h> 42#include <linux/completion.h> 43#include <linux/slab.h> 44 45#include <net/dst.h> 46 47#include "ipoib.h" 48 49#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 50static int mcast_debug_level; 51 52module_param(mcast_debug_level, int, 0644); 53MODULE_PARM_DESC(mcast_debug_level, 54 "Enable multicast debug tracing if > 0"); 55#endif 56 57static DEFINE_MUTEX(mcast_mutex); 58 59struct ipoib_mcast_iter { 60 struct net_device *dev; 61 union ib_gid mgid; 62 unsigned long created; 63 unsigned int queuelen; 64 unsigned int complete; 65 unsigned int send_only; 66}; 67 68static void ipoib_mcast_free(struct ipoib_mcast *mcast) 69{ 70 struct net_device *dev = mcast->dev; 71 struct ipoib_dev_priv *priv = netdev_priv(dev); 72 struct ipoib_neigh *neigh, *tmp; 73 int tx_dropped = 0; 74 75 ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n", 76 mcast->mcmember.mgid.raw); 77 78 spin_lock_irq(&priv->lock); 79 80 list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) { 81 /* 82 * It's safe to call ipoib_put_ah() inside priv->lock 83 * here, because we know that mcast->ah will always 84 * hold one more reference, so ipoib_put_ah() will 85 * never do more than decrement the ref count. 86 */ 87 if (neigh->ah) 88 ipoib_put_ah(neigh->ah); 89 ipoib_neigh_free(dev, neigh); 90 } 91 92 spin_unlock_irq(&priv->lock); 93 94 if (mcast->ah) 95 ipoib_put_ah(mcast->ah); 96 97 while (!skb_queue_empty(&mcast->pkt_queue)) { 98 ++tx_dropped; 99 dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); 100 } 101 102 netif_tx_lock_bh(dev); 103 dev->stats.tx_dropped += tx_dropped; 104 netif_tx_unlock_bh(dev); 105 106 kfree(mcast); 107} 108 109static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, 110 int can_sleep) 111{ 112 struct ipoib_mcast *mcast; 113 114 mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC); 115 if (!mcast) 116 return NULL; 117 118 mcast->dev = dev; 119 mcast->created = jiffies; 120 mcast->backoff = 1; 121 122 INIT_LIST_HEAD(&mcast->list); 123 INIT_LIST_HEAD(&mcast->neigh_list); 124 skb_queue_head_init(&mcast->pkt_queue); 125 126 return mcast; 127} 128 129static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid) 130{ 131 struct ipoib_dev_priv *priv = netdev_priv(dev); 132 struct rb_node *n = priv->multicast_tree.rb_node; 133 134 while (n) { 135 struct ipoib_mcast *mcast; 136 int ret; 137 138 mcast = rb_entry(n, struct ipoib_mcast, rb_node); 139 140 ret = memcmp(mgid, mcast->mcmember.mgid.raw, 141 sizeof (union ib_gid)); 142 if (ret < 0) 143 n = n->rb_left; 144 else if (ret > 0) 145 n = n->rb_right; 146 else 147 return mcast; 148 } 149 150 return NULL; 151} 152 153static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast) 154{ 155 struct ipoib_dev_priv *priv = netdev_priv(dev); 156 struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL; 157 158 while (*n) { 159 struct ipoib_mcast *tmcast; 160 int ret; 161 162 pn = *n; 163 tmcast = rb_entry(pn, struct ipoib_mcast, rb_node); 164 165 ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw, 166 sizeof (union ib_gid)); 167 if (ret < 0) 168 n = &pn->rb_left; 169 else if (ret > 0) 170 n = &pn->rb_right; 171 else 172 return -EEXIST; 173 } 174 175 rb_link_node(&mcast->rb_node, pn, n); 176 rb_insert_color(&mcast->rb_node, &priv->multicast_tree); 177 178 return 0; 179} 180 181static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, 182 struct ib_sa_mcmember_rec *mcmember) 183{ 184 struct net_device *dev = mcast->dev; 185 struct ipoib_dev_priv *priv = netdev_priv(dev); 186 struct ipoib_ah *ah; 187 int ret; 188 int set_qkey = 0; 189 190 mcast->mcmember = *mcmember; 191 192 /* Set the cached Q_Key before we attach if it's the broadcast group */ 193 if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4, 194 sizeof (union ib_gid))) { 195 spin_lock_irq(&priv->lock); 196 if (!priv->broadcast) { 197 spin_unlock_irq(&priv->lock); 198 return -EAGAIN; 199 } 200 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); 201 spin_unlock_irq(&priv->lock); 202 priv->tx_wr.wr.ud.remote_qkey = priv->qkey; 203 set_qkey = 1; 204 } 205 206 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 207 if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 208 ipoib_warn(priv, "multicast group %pI6 already attached\n", 209 mcast->mcmember.mgid.raw); 210 211 return 0; 212 } 213 214 ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), 215 &mcast->mcmember.mgid, set_qkey); 216 if (ret < 0) { 217 ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n", 218 mcast->mcmember.mgid.raw); 219 220 clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags); 221 return ret; 222 } 223 } 224 225 { 226 struct ib_ah_attr av = { 227 .dlid = be16_to_cpu(mcast->mcmember.mlid), 228 .port_num = priv->port, 229 .sl = mcast->mcmember.sl, 230 .ah_flags = IB_AH_GRH, 231 .static_rate = mcast->mcmember.rate, 232 .grh = { 233 .flow_label = be32_to_cpu(mcast->mcmember.flow_label), 234 .hop_limit = mcast->mcmember.hop_limit, 235 .sgid_index = 0, 236 .traffic_class = mcast->mcmember.traffic_class 237 } 238 }; 239 av.grh.dgid = mcast->mcmember.mgid; 240 241 ah = ipoib_create_ah(dev, priv->pd, &av); 242 if (!ah) { 243 ipoib_warn(priv, "ib_address_create failed\n"); 244 } else { 245 spin_lock_irq(&priv->lock); 246 mcast->ah = ah; 247 spin_unlock_irq(&priv->lock); 248 249 ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n", 250 mcast->mcmember.mgid.raw, 251 mcast->ah->ah, 252 be16_to_cpu(mcast->mcmember.mlid), 253 mcast->mcmember.sl); 254 } 255 } 256 257 /* actually send any queued packets */ 258 netif_tx_lock_bh(dev); 259 while (!skb_queue_empty(&mcast->pkt_queue)) { 260 struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); 261 netif_tx_unlock_bh(dev); 262 263 skb->dev = dev; 264 265 if (!skb_dst(skb) || !skb_dst(skb)->neighbour) { 266 /* put pseudoheader back on for next time */ 267 skb_push(skb, sizeof (struct ipoib_pseudoheader)); 268 } 269 270 if (dev_queue_xmit(skb)) 271 ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); 272 netif_tx_lock_bh(dev); 273 } 274 netif_tx_unlock_bh(dev); 275 276 return 0; 277} 278 279static int 280ipoib_mcast_sendonly_join_complete(int status, 281 struct ib_sa_multicast *multicast) 282{ 283 struct ipoib_mcast *mcast = multicast->context; 284 struct net_device *dev = mcast->dev; 285 286 /* We trap for port events ourselves. */ 287 if (status == -ENETRESET) 288 return 0; 289 290 if (!status) 291 status = ipoib_mcast_join_finish(mcast, &multicast->rec); 292 293 if (status) { 294 if (mcast->logcount++ < 20) 295 ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n", 296 mcast->mcmember.mgid.raw, status); 297 298 /* Flush out any queued packets */ 299 netif_tx_lock_bh(dev); 300 while (!skb_queue_empty(&mcast->pkt_queue)) { 301 ++dev->stats.tx_dropped; 302 dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); 303 } 304 netif_tx_unlock_bh(dev); 305 306 /* Clear the busy flag so we try again */ 307 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, 308 &mcast->flags); 309 } 310 return status; 311} 312 313static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) 314{ 315 struct net_device *dev = mcast->dev; 316 struct ipoib_dev_priv *priv = netdev_priv(dev); 317 struct ib_sa_mcmember_rec rec = { 318 .join_state = 1 319 }; 320 int ret = 0; 321 322 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { 323 ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n"); 324 return -ENODEV; 325 } 326 327 if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { 328 ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n"); 329 return -EBUSY; 330 } 331 332 rec.mgid = mcast->mcmember.mgid; 333 rec.port_gid = priv->local_gid; 334 rec.pkey = cpu_to_be16(priv->pkey); 335 336 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, 337 priv->port, &rec, 338 IB_SA_MCMEMBER_REC_MGID | 339 IB_SA_MCMEMBER_REC_PORT_GID | 340 IB_SA_MCMEMBER_REC_PKEY | 341 IB_SA_MCMEMBER_REC_JOIN_STATE, 342 GFP_ATOMIC, 343 ipoib_mcast_sendonly_join_complete, 344 mcast); 345 if (IS_ERR(mcast->mc)) { 346 ret = PTR_ERR(mcast->mc); 347 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 348 ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n", 349 ret); 350 } else { 351 ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n", 352 mcast->mcmember.mgid.raw); 353 } 354 355 return ret; 356} 357 358void ipoib_mcast_carrier_on_task(struct work_struct *work) 359{ 360 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 361 carrier_on_task); 362 struct ib_port_attr attr; 363 364 /* 365 * Take rtnl_lock to avoid racing with ipoib_stop() and 366 * turning the carrier back on while a device is being 367 * removed. 368 */ 369 if (ib_query_port(priv->ca, priv->port, &attr) || 370 attr.state != IB_PORT_ACTIVE) { 371 ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); 372 return; 373 } 374 375 rtnl_lock(); 376 netif_carrier_on(priv->dev); 377 rtnl_unlock(); 378} 379 380static int ipoib_mcast_join_complete(int status, 381 struct ib_sa_multicast *multicast) 382{ 383 struct ipoib_mcast *mcast = multicast->context; 384 struct net_device *dev = mcast->dev; 385 struct ipoib_dev_priv *priv = netdev_priv(dev); 386 387 ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n", 388 mcast->mcmember.mgid.raw, status); 389 390 /* We trap for port events ourselves. */ 391 if (status == -ENETRESET) 392 return 0; 393 394 if (!status) 395 status = ipoib_mcast_join_finish(mcast, &multicast->rec); 396 397 if (!status) { 398 mcast->backoff = 1; 399 mutex_lock(&mcast_mutex); 400 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 401 queue_delayed_work(ipoib_workqueue, 402 &priv->mcast_task, 0); 403 mutex_unlock(&mcast_mutex); 404 405 /* 406 * Defer carrier on work to ipoib_workqueue to avoid a 407 * deadlock on rtnl_lock here. 408 */ 409 if (mcast == priv->broadcast) 410 queue_work(ipoib_workqueue, &priv->carrier_on_task); 411 412 return 0; 413 } 414 415 if (mcast->logcount++ < 20) { 416 if (status == -ETIMEDOUT || status == -EAGAIN) { 417 ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n", 418 mcast->mcmember.mgid.raw, status); 419 } else { 420 ipoib_warn(priv, "multicast join failed for %pI6, status %d\n", 421 mcast->mcmember.mgid.raw, status); 422 } 423 } 424 425 mcast->backoff *= 2; 426 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 427 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; 428 429 /* Clear the busy flag so we try again */ 430 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 431 432 mutex_lock(&mcast_mutex); 433 spin_lock_irq(&priv->lock); 434 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 435 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 436 mcast->backoff * HZ); 437 spin_unlock_irq(&priv->lock); 438 mutex_unlock(&mcast_mutex); 439 440 return status; 441} 442 443static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, 444 int create) 445{ 446 struct ipoib_dev_priv *priv = netdev_priv(dev); 447 struct ib_sa_mcmember_rec rec = { 448 .join_state = 1 449 }; 450 ib_sa_comp_mask comp_mask; 451 int ret = 0; 452 453 ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw); 454 455 rec.mgid = mcast->mcmember.mgid; 456 rec.port_gid = priv->local_gid; 457 rec.pkey = cpu_to_be16(priv->pkey); 458 459 comp_mask = 460 IB_SA_MCMEMBER_REC_MGID | 461 IB_SA_MCMEMBER_REC_PORT_GID | 462 IB_SA_MCMEMBER_REC_PKEY | 463 IB_SA_MCMEMBER_REC_JOIN_STATE; 464 465 if (create) { 466 comp_mask |= 467 IB_SA_MCMEMBER_REC_QKEY | 468 IB_SA_MCMEMBER_REC_MTU_SELECTOR | 469 IB_SA_MCMEMBER_REC_MTU | 470 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS | 471 IB_SA_MCMEMBER_REC_RATE_SELECTOR | 472 IB_SA_MCMEMBER_REC_RATE | 473 IB_SA_MCMEMBER_REC_SL | 474 IB_SA_MCMEMBER_REC_FLOW_LABEL | 475 IB_SA_MCMEMBER_REC_HOP_LIMIT; 476 477 rec.qkey = priv->broadcast->mcmember.qkey; 478 rec.mtu_selector = IB_SA_EQ; 479 rec.mtu = priv->broadcast->mcmember.mtu; 480 rec.traffic_class = priv->broadcast->mcmember.traffic_class; 481 rec.rate_selector = IB_SA_EQ; 482 rec.rate = priv->broadcast->mcmember.rate; 483 rec.sl = priv->broadcast->mcmember.sl; 484 rec.flow_label = priv->broadcast->mcmember.flow_label; 485 rec.hop_limit = priv->broadcast->mcmember.hop_limit; 486 } 487 488 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 489 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, 490 &rec, comp_mask, GFP_KERNEL, 491 ipoib_mcast_join_complete, mcast); 492 if (IS_ERR(mcast->mc)) { 493 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 494 ret = PTR_ERR(mcast->mc); 495 ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); 496 497 mcast->backoff *= 2; 498 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 499 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; 500 501 mutex_lock(&mcast_mutex); 502 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 503 queue_delayed_work(ipoib_workqueue, 504 &priv->mcast_task, 505 mcast->backoff * HZ); 506 mutex_unlock(&mcast_mutex); 507 } 508} 509 510void ipoib_mcast_join_task(struct work_struct *work) 511{ 512 struct ipoib_dev_priv *priv = 513 container_of(work, struct ipoib_dev_priv, mcast_task.work); 514 struct net_device *dev = priv->dev; 515 516 if (!test_bit(IPOIB_MCAST_RUN, &priv->flags)) 517 return; 518 519 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) 520 ipoib_warn(priv, "ib_query_gid() failed\n"); 521 else 522 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); 523 524 { 525 struct ib_port_attr attr; 526 527 if (!ib_query_port(priv->ca, priv->port, &attr)) 528 priv->local_lid = attr.lid; 529 else 530 ipoib_warn(priv, "ib_query_port failed\n"); 531 } 532 533 if (!priv->broadcast) { 534 struct ipoib_mcast *broadcast; 535 536 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 537 return; 538 539 broadcast = ipoib_mcast_alloc(dev, 1); 540 if (!broadcast) { 541 ipoib_warn(priv, "failed to allocate broadcast group\n"); 542 mutex_lock(&mcast_mutex); 543 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 544 queue_delayed_work(ipoib_workqueue, 545 &priv->mcast_task, HZ); 546 mutex_unlock(&mcast_mutex); 547 return; 548 } 549 550 spin_lock_irq(&priv->lock); 551 memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4, 552 sizeof (union ib_gid)); 553 priv->broadcast = broadcast; 554 555 __ipoib_mcast_add(dev, priv->broadcast); 556 spin_unlock_irq(&priv->lock); 557 } 558 559 if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { 560 if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) 561 ipoib_mcast_join(dev, priv->broadcast, 0); 562 return; 563 } 564 565 while (1) { 566 struct ipoib_mcast *mcast = NULL; 567 568 spin_lock_irq(&priv->lock); 569 list_for_each_entry(mcast, &priv->multicast_list, list) { 570 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) 571 && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) 572 && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 573 /* Found the next unjoined group */ 574 break; 575 } 576 } 577 spin_unlock_irq(&priv->lock); 578 579 if (&mcast->list == &priv->multicast_list) { 580 /* All done */ 581 break; 582 } 583 584 ipoib_mcast_join(dev, mcast, 1); 585 return; 586 } 587 588 priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); 589 590 if (!ipoib_cm_admin_enabled(dev)) { 591 rtnl_lock(); 592 dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu)); 593 rtnl_unlock(); 594 } 595 596 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); 597 598 clear_bit(IPOIB_MCAST_RUN, &priv->flags); 599} 600 601int ipoib_mcast_start_thread(struct net_device *dev) 602{ 603 struct ipoib_dev_priv *priv = netdev_priv(dev); 604 605 ipoib_dbg_mcast(priv, "starting multicast thread\n"); 606 607 mutex_lock(&mcast_mutex); 608 if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) 609 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0); 610 mutex_unlock(&mcast_mutex); 611 612 return 0; 613} 614 615int ipoib_mcast_stop_thread(struct net_device *dev, int flush) 616{ 617 struct ipoib_dev_priv *priv = netdev_priv(dev); 618 619 ipoib_dbg_mcast(priv, "stopping multicast thread\n"); 620 621 mutex_lock(&mcast_mutex); 622 clear_bit(IPOIB_MCAST_RUN, &priv->flags); 623 cancel_delayed_work(&priv->mcast_task); 624 mutex_unlock(&mcast_mutex); 625 626 if (flush) 627 flush_workqueue(ipoib_workqueue); 628 629 return 0; 630} 631 632static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) 633{ 634 struct ipoib_dev_priv *priv = netdev_priv(dev); 635 int ret = 0; 636 637 if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) 638 ib_sa_free_multicast(mcast->mc); 639 640 if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 641 ipoib_dbg_mcast(priv, "leaving MGID %pI6\n", 642 mcast->mcmember.mgid.raw); 643 644 /* Remove ourselves from the multicast group */ 645 ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid, 646 be16_to_cpu(mcast->mcmember.mlid)); 647 if (ret) 648 ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); 649 } 650 651 return 0; 652} 653 654void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) 655{ 656 struct ipoib_dev_priv *priv = netdev_priv(dev); 657 struct ipoib_mcast *mcast; 658 unsigned long flags; 659 660 spin_lock_irqsave(&priv->lock, flags); 661 662 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) || 663 !priv->broadcast || 664 !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { 665 ++dev->stats.tx_dropped; 666 dev_kfree_skb_any(skb); 667 goto unlock; 668 } 669 670 mcast = __ipoib_mcast_find(dev, mgid); 671 if (!mcast) { 672 /* Let's create a new send only group now */ 673 ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n", 674 mgid); 675 676 mcast = ipoib_mcast_alloc(dev, 0); 677 if (!mcast) { 678 ipoib_warn(priv, "unable to allocate memory for " 679 "multicast structure\n"); 680 ++dev->stats.tx_dropped; 681 dev_kfree_skb_any(skb); 682 goto out; 683 } 684 685 set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); 686 memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid)); 687 __ipoib_mcast_add(dev, mcast); 688 list_add_tail(&mcast->list, &priv->multicast_list); 689 } 690 691 if (!mcast->ah) { 692 if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) 693 skb_queue_tail(&mcast->pkt_queue, skb); 694 else { 695 ++dev->stats.tx_dropped; 696 dev_kfree_skb_any(skb); 697 } 698 699 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) 700 ipoib_dbg_mcast(priv, "no address vector, " 701 "but multicast join already started\n"); 702 else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) 703 ipoib_mcast_sendonly_join(mcast); 704 705 /* 706 * If lookup completes between here and out:, don't 707 * want to send packet twice. 708 */ 709 mcast = NULL; 710 } 711 712out: 713 if (mcast && mcast->ah) { 714 if (skb_dst(skb) && 715 skb_dst(skb)->neighbour && 716 !*to_ipoib_neigh(skb_dst(skb)->neighbour)) { 717 struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb_dst(skb)->neighbour, 718 skb->dev); 719 720 if (neigh) { 721 kref_get(&mcast->ah->ref); 722 neigh->ah = mcast->ah; 723 list_add_tail(&neigh->list, &mcast->neigh_list); 724 } 725 } 726 727 spin_unlock_irqrestore(&priv->lock, flags); 728 ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); 729 return; 730 } 731 732unlock: 733 spin_unlock_irqrestore(&priv->lock, flags); 734} 735 736void ipoib_mcast_dev_flush(struct net_device *dev) 737{ 738 struct ipoib_dev_priv *priv = netdev_priv(dev); 739 LIST_HEAD(remove_list); 740 struct ipoib_mcast *mcast, *tmcast; 741 unsigned long flags; 742 743 ipoib_dbg_mcast(priv, "flushing multicast list\n"); 744 745 spin_lock_irqsave(&priv->lock, flags); 746 747 list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { 748 list_del(&mcast->list); 749 rb_erase(&mcast->rb_node, &priv->multicast_tree); 750 list_add_tail(&mcast->list, &remove_list); 751 } 752 753 if (priv->broadcast) { 754 rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree); 755 list_add_tail(&priv->broadcast->list, &remove_list); 756 priv->broadcast = NULL; 757 } 758 759 spin_unlock_irqrestore(&priv->lock, flags); 760 761 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 762 ipoib_mcast_leave(dev, mcast); 763 ipoib_mcast_free(mcast); 764 } 765} 766 767static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast) 768{ 769 /* reserved QPN, prefix, scope */ 770 if (memcmp(addr, broadcast, 6)) 771 return 0; 772 /* signature lower, pkey */ 773 if (memcmp(addr + 7, broadcast + 7, 3)) 774 return 0; 775 return 1; 776} 777 778void ipoib_mcast_restart_task(struct work_struct *work) 779{ 780 struct ipoib_dev_priv *priv = 781 container_of(work, struct ipoib_dev_priv, restart_task); 782 struct net_device *dev = priv->dev; 783 struct netdev_hw_addr *ha; 784 struct ipoib_mcast *mcast, *tmcast; 785 LIST_HEAD(remove_list); 786 unsigned long flags; 787 struct ib_sa_mcmember_rec rec; 788 789 ipoib_dbg_mcast(priv, "restarting multicast task\n"); 790 791 ipoib_mcast_stop_thread(dev, 0); 792 793 local_irq_save(flags); 794 netif_addr_lock(dev); 795 spin_lock(&priv->lock); 796 797 /* 798 * Unfortunately, the networking core only gives us a list of all of 799 * the multicast hardware addresses. We need to figure out which ones 800 * are new and which ones have been removed 801 */ 802 803 /* Clear out the found flag */ 804 list_for_each_entry(mcast, &priv->multicast_list, list) 805 clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); 806 807 /* Mark all of the entries that are found or don't exist */ 808 netdev_for_each_mc_addr(ha, dev) { 809 union ib_gid mgid; 810 811 if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast)) 812 continue; 813 814 memcpy(mgid.raw, ha->addr + 4, sizeof mgid); 815 816 mcast = __ipoib_mcast_find(dev, &mgid); 817 if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 818 struct ipoib_mcast *nmcast; 819 820 /* ignore group which is directly joined by userspace */ 821 if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) && 822 !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) { 823 ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %pI6\n", 824 mgid.raw); 825 continue; 826 } 827 828 /* Not found or send-only group, let's add a new entry */ 829 ipoib_dbg_mcast(priv, "adding multicast entry for mgid %pI6\n", 830 mgid.raw); 831 832 nmcast = ipoib_mcast_alloc(dev, 0); 833 if (!nmcast) { 834 ipoib_warn(priv, "unable to allocate memory for multicast structure\n"); 835 continue; 836 } 837 838 set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags); 839 840 nmcast->mcmember.mgid = mgid; 841 842 if (mcast) { 843 /* Destroy the send only entry */ 844 list_move_tail(&mcast->list, &remove_list); 845 846 rb_replace_node(&mcast->rb_node, 847 &nmcast->rb_node, 848 &priv->multicast_tree); 849 } else 850 __ipoib_mcast_add(dev, nmcast); 851 852 list_add_tail(&nmcast->list, &priv->multicast_list); 853 } 854 855 if (mcast) 856 set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); 857 } 858 859 /* Remove all of the entries don't exist anymore */ 860 list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { 861 if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) && 862 !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 863 ipoib_dbg_mcast(priv, "deleting multicast group %pI6\n", 864 mcast->mcmember.mgid.raw); 865 866 rb_erase(&mcast->rb_node, &priv->multicast_tree); 867 868 /* Move to the remove list */ 869 list_move_tail(&mcast->list, &remove_list); 870 } 871 } 872 873 spin_unlock(&priv->lock); 874 netif_addr_unlock(dev); 875 local_irq_restore(flags); 876 877 /* We have to cancel outside of the spinlock */ 878 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 879 ipoib_mcast_leave(mcast->dev, mcast); 880 ipoib_mcast_free(mcast); 881 } 882 883 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 884 ipoib_mcast_start_thread(dev); 885} 886 887#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 888 889struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) 890{ 891 struct ipoib_mcast_iter *iter; 892 893 iter = kmalloc(sizeof *iter, GFP_KERNEL); 894 if (!iter) 895 return NULL; 896 897 iter->dev = dev; 898 memset(iter->mgid.raw, 0, 16); 899 900 if (ipoib_mcast_iter_next(iter)) { 901 kfree(iter); 902 return NULL; 903 } 904 905 return iter; 906} 907 908int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter) 909{ 910 struct ipoib_dev_priv *priv = netdev_priv(iter->dev); 911 struct rb_node *n; 912 struct ipoib_mcast *mcast; 913 int ret = 1; 914 915 spin_lock_irq(&priv->lock); 916 917 n = rb_first(&priv->multicast_tree); 918 919 while (n) { 920 mcast = rb_entry(n, struct ipoib_mcast, rb_node); 921 922 if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw, 923 sizeof (union ib_gid)) < 0) { 924 iter->mgid = mcast->mcmember.mgid; 925 iter->created = mcast->created; 926 iter->queuelen = skb_queue_len(&mcast->pkt_queue); 927 iter->complete = !!mcast->ah; 928 iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY)); 929 930 ret = 0; 931 932 break; 933 } 934 935 n = rb_next(n); 936 } 937 938 spin_unlock_irq(&priv->lock); 939 940 return ret; 941} 942 943void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, 944 union ib_gid *mgid, 945 unsigned long *created, 946 unsigned int *queuelen, 947 unsigned int *complete, 948 unsigned int *send_only) 949{ 950 *mgid = iter->mgid; 951 *created = iter->created; 952 *queuelen = iter->queuelen; 953 *complete = iter->complete; 954 *send_only = iter->send_only; 955} 956 957#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ 958