1/* 2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35#include "ipoib.h" 36 37static int ipoib_resolvemulti(struct ifnet *, struct sockaddr **, 38 struct sockaddr *); 39 40 41#include <linux/module.h> 42 43#include <linux/slab.h> 44#include <linux/kernel.h> 45#include <linux/vmalloc.h> 46 47#include <linux/if_arp.h> /* For ARPHRD_xxx */ 48#include <linux/if_vlan.h> 49#include <net/ip.h> 50#include <net/ipv6.h> 51 52MODULE_AUTHOR("Roland Dreier"); 53MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); 54MODULE_LICENSE("Dual BSD/GPL"); 55 56int ipoib_sendq_size = IPOIB_TX_RING_SIZE; 57int ipoib_recvq_size = IPOIB_RX_RING_SIZE; 58 59module_param_named(send_queue_size, ipoib_sendq_size, int, 0444); 60MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); 61module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); 62MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); 63 64#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 65int ipoib_debug_level = 1; 66 67module_param_named(debug_level, ipoib_debug_level, int, 0644); 68MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); 69#endif 70 71struct ipoib_path_iter { 72 struct ipoib_dev_priv *priv; 73 struct ipoib_path path; 74}; 75 76static const u8 ipv4_bcast_addr[] = { 77 0x00, 0xff, 0xff, 0xff, 78 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, 79 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff 80}; 81 82struct workqueue_struct *ipoib_workqueue; 83 84struct ib_sa_client ipoib_sa_client; 85 86static void ipoib_add_one(struct ib_device *device); 87static void ipoib_remove_one(struct ib_device *device); 88static void ipoib_start(struct ifnet *dev); 89static int ipoib_output(struct ifnet *ifp, struct mbuf *m, 90 const struct sockaddr *dst, struct route *ro); 91static int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data); 92static void ipoib_input(struct ifnet *ifp, struct mbuf *m); 93 94#define IPOIB_MTAP(_ifp, _m) \ 95do { \ 96 if (bpf_peers_present((_ifp)->if_bpf)) { \ 97 M_ASSERTVALID(_m); \ 98 ipoib_mtap_mb((_ifp), (_m)); \ 99 } \ 100} while (0) 101 102/* 103 * This is for clients that have an ipoib_header in the mbuf. 104 */ 105static void 106ipoib_mtap_mb(struct ifnet *ifp, struct mbuf *mb) 107{ 108 struct ipoib_header *ih; 109 struct ether_header eh; 110 111 ih = mtod(mb, struct ipoib_header *); 112 eh.ether_type = ih->proto; 113 bcopy(ih->hwaddr, &eh.ether_dhost, ETHER_ADDR_LEN); 114 bzero(&eh.ether_shost, ETHER_ADDR_LEN); 115 mb->m_data += sizeof(struct ipoib_header); 116 mb->m_len -= sizeof(struct ipoib_header); 117 bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb); 118 mb->m_data -= sizeof(struct ipoib_header); 119 mb->m_len += sizeof(struct ipoib_header); 120} 121 122void 123ipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto) 124{ 125 struct ether_header eh; 126 127 eh.ether_type = proto; 128 bzero(&eh.ether_shost, ETHER_ADDR_LEN); 129 bzero(&eh.ether_dhost, ETHER_ADDR_LEN); 130 bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb); 131} 132 133static struct ib_client ipoib_client = { 134 .name = "ipoib", 135 .add = ipoib_add_one, 136 .remove = ipoib_remove_one 137}; 138 139int 140ipoib_open(struct ipoib_dev_priv *priv) 141{ 142 struct ifnet *dev = priv->dev; 143 144 ipoib_dbg(priv, "bringing up interface\n"); 145 146 set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 147 148 if (ipoib_pkey_dev_delay_open(priv)) 149 return 0; 150 151 if (ipoib_ib_dev_open(priv)) 152 goto err_disable; 153 154 if (ipoib_ib_dev_up(priv)) 155 goto err_stop; 156 157 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 158 struct ipoib_dev_priv *cpriv; 159 160 /* Bring up any child interfaces too */ 161 mutex_lock(&priv->vlan_mutex); 162 list_for_each_entry(cpriv, &priv->child_intfs, list) 163 if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) == 0) 164 ipoib_open(cpriv); 165 mutex_unlock(&priv->vlan_mutex); 166 } 167 dev->if_drv_flags |= IFF_DRV_RUNNING; 168 dev->if_drv_flags &= ~IFF_DRV_OACTIVE; 169 170 return 0; 171 172err_stop: 173 ipoib_ib_dev_stop(priv, 1); 174 175err_disable: 176 clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 177 178 return -EINVAL; 179} 180 181static void 182ipoib_init(void *arg) 183{ 184 struct ifnet *dev; 185 struct ipoib_dev_priv *priv; 186 187 priv = arg; 188 dev = priv->dev; 189 if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) 190 ipoib_open(priv); 191 queue_work(ipoib_workqueue, &priv->flush_light); 192} 193 194 195static int 196ipoib_stop(struct ipoib_dev_priv *priv) 197{ 198 struct ifnet *dev = priv->dev; 199 200 ipoib_dbg(priv, "stopping interface\n"); 201 202 clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 203 204 dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 205 206 ipoib_ib_dev_down(priv, 0); 207 ipoib_ib_dev_stop(priv, 0); 208 209 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 210 struct ipoib_dev_priv *cpriv; 211 212 /* Bring down any child interfaces too */ 213 mutex_lock(&priv->vlan_mutex); 214 list_for_each_entry(cpriv, &priv->child_intfs, list) 215 if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) != 0) 216 ipoib_stop(cpriv); 217 mutex_unlock(&priv->vlan_mutex); 218 } 219 220 return 0; 221} 222 223int 224ipoib_change_mtu(struct ipoib_dev_priv *priv, int new_mtu) 225{ 226 struct ifnet *dev = priv->dev; 227 228 /* dev->if_mtu > 2K ==> connected mode */ 229 if (ipoib_cm_admin_enabled(priv)) { 230 if (new_mtu > IPOIB_CM_MTU(ipoib_cm_max_mtu(priv))) 231 return -EINVAL; 232 233 if (new_mtu > priv->mcast_mtu) 234 ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", 235 priv->mcast_mtu); 236 237 dev->if_mtu = new_mtu; 238 return 0; 239 } 240 241 if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) 242 return -EINVAL; 243 244 priv->admin_mtu = new_mtu; 245 246 dev->if_mtu = min(priv->mcast_mtu, priv->admin_mtu); 247 248 queue_work(ipoib_workqueue, &priv->flush_light); 249 250 return 0; 251} 252 253static int 254ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 255{ 256 struct ipoib_dev_priv *priv = ifp->if_softc; 257 struct ifaddr *ifa = (struct ifaddr *) data; 258 struct ifreq *ifr = (struct ifreq *) data; 259 int error = 0; 260 261 /* check if detaching */ 262 if (priv == NULL || priv->gone != 0) 263 return (ENXIO); 264 265 switch (command) { 266 case SIOCSIFFLAGS: 267 if (ifp->if_flags & IFF_UP) { 268 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 269 error = -ipoib_open(priv); 270 } else 271 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 272 ipoib_stop(priv); 273 break; 274 case SIOCADDMULTI: 275 case SIOCDELMULTI: 276 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 277 queue_work(ipoib_workqueue, &priv->restart_task); 278 break; 279 case SIOCSIFADDR: 280 ifp->if_flags |= IFF_UP; 281 282 switch (ifa->ifa_addr->sa_family) { 283#ifdef INET 284 case AF_INET: 285 ifp->if_init(ifp->if_softc); /* before arpwhohas */ 286 arp_ifinit(ifp, ifa); 287 break; 288#endif 289 default: 290 ifp->if_init(ifp->if_softc); 291 break; 292 } 293 break; 294 295 case SIOCGIFADDR: 296 bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0], 297 INFINIBAND_ALEN); 298 break; 299 300 case SIOCSIFMTU: 301 /* 302 * Set the interface MTU. 303 */ 304 error = -ipoib_change_mtu(priv, ifr->ifr_mtu); 305 break; 306 default: 307 error = EINVAL; 308 break; 309 } 310 return (error); 311} 312 313 314static struct ipoib_path * 315__path_find(struct ipoib_dev_priv *priv, void *gid) 316{ 317 struct rb_node *n = priv->path_tree.rb_node; 318 struct ipoib_path *path; 319 int ret; 320 321 while (n) { 322 path = rb_entry(n, struct ipoib_path, rb_node); 323 324 ret = memcmp(gid, path->pathrec.dgid.raw, 325 sizeof (union ib_gid)); 326 327 if (ret < 0) 328 n = n->rb_left; 329 else if (ret > 0) 330 n = n->rb_right; 331 else 332 return path; 333 } 334 335 return NULL; 336} 337 338static int 339__path_add(struct ipoib_dev_priv *priv, struct ipoib_path *path) 340{ 341 struct rb_node **n = &priv->path_tree.rb_node; 342 struct rb_node *pn = NULL; 343 struct ipoib_path *tpath; 344 int ret; 345 346 while (*n) { 347 pn = *n; 348 tpath = rb_entry(pn, struct ipoib_path, rb_node); 349 350 ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw, 351 sizeof (union ib_gid)); 352 if (ret < 0) 353 n = &pn->rb_left; 354 else if (ret > 0) 355 n = &pn->rb_right; 356 else 357 return -EEXIST; 358 } 359 360 rb_link_node(&path->rb_node, pn, n); 361 rb_insert_color(&path->rb_node, &priv->path_tree); 362 363 list_add_tail(&path->list, &priv->path_list); 364 365 return 0; 366} 367 368void 369ipoib_path_free(struct ipoib_dev_priv *priv, struct ipoib_path *path) 370{ 371 372 _IF_DRAIN(&path->queue); 373 374 if (path->ah) 375 ipoib_put_ah(path->ah); 376 if (ipoib_cm_get(path)) 377 ipoib_cm_destroy_tx(ipoib_cm_get(path)); 378 379 kfree(path); 380} 381 382#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 383 384struct ipoib_path_iter * 385ipoib_path_iter_init(struct ipoib_dev_priv *priv) 386{ 387 struct ipoib_path_iter *iter; 388 389 iter = kmalloc(sizeof *iter, GFP_KERNEL); 390 if (!iter) 391 return NULL; 392 393 iter->priv = priv; 394 memset(iter->path.pathrec.dgid.raw, 0, 16); 395 396 if (ipoib_path_iter_next(iter)) { 397 kfree(iter); 398 return NULL; 399 } 400 401 return iter; 402} 403 404int 405ipoib_path_iter_next(struct ipoib_path_iter *iter) 406{ 407 struct ipoib_dev_priv *priv = iter->priv; 408 struct rb_node *n; 409 struct ipoib_path *path; 410 int ret = 1; 411 412 spin_lock_irq(&priv->lock); 413 414 n = rb_first(&priv->path_tree); 415 416 while (n) { 417 path = rb_entry(n, struct ipoib_path, rb_node); 418 419 if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw, 420 sizeof (union ib_gid)) < 0) { 421 iter->path = *path; 422 ret = 0; 423 break; 424 } 425 426 n = rb_next(n); 427 } 428 429 spin_unlock_irq(&priv->lock); 430 431 return ret; 432} 433 434void 435ipoib_path_iter_read(struct ipoib_path_iter *iter, struct ipoib_path *path) 436{ 437 *path = iter->path; 438} 439 440#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ 441 442void 443ipoib_mark_paths_invalid(struct ipoib_dev_priv *priv) 444{ 445 struct ipoib_path *path, *tp; 446 447 spin_lock_irq(&priv->lock); 448 449 list_for_each_entry_safe(path, tp, &priv->path_list, list) { 450 ipoib_dbg(priv, "mark path LID 0x%04x GID %16D invalid\n", 451 be16_to_cpu(path->pathrec.dlid), 452 path->pathrec.dgid.raw, ":"); 453 path->valid = 0; 454 } 455 456 spin_unlock_irq(&priv->lock); 457} 458 459void 460ipoib_flush_paths(struct ipoib_dev_priv *priv) 461{ 462 struct ipoib_path *path, *tp; 463 LIST_HEAD(remove_list); 464 unsigned long flags; 465 466 spin_lock_irqsave(&priv->lock, flags); 467 468 list_splice_init(&priv->path_list, &remove_list); 469 470 list_for_each_entry(path, &remove_list, list) 471 rb_erase(&path->rb_node, &priv->path_tree); 472 473 list_for_each_entry_safe(path, tp, &remove_list, list) { 474 if (path->query) 475 ib_sa_cancel_query(path->query_id, path->query); 476 spin_unlock_irqrestore(&priv->lock, flags); 477 wait_for_completion(&path->done); 478 ipoib_path_free(priv, path); 479 spin_lock_irqsave(&priv->lock, flags); 480 } 481 482 spin_unlock_irqrestore(&priv->lock, flags); 483} 484 485static void 486path_rec_completion(int status, struct ib_sa_path_rec *pathrec, void *path_ptr) 487{ 488 struct ipoib_path *path = path_ptr; 489 struct ipoib_dev_priv *priv = path->priv; 490 struct ifnet *dev = priv->dev; 491 struct ipoib_ah *ah = NULL; 492 struct ipoib_ah *old_ah = NULL; 493 struct ifqueue mbqueue; 494 struct mbuf *mb; 495 unsigned long flags; 496 497 if (!status) 498 ipoib_dbg(priv, "PathRec LID 0x%04x for GID %16D\n", 499 be16_to_cpu(pathrec->dlid), pathrec->dgid.raw, ":"); 500 else 501 ipoib_dbg(priv, "PathRec status %d for GID %16D\n", 502 status, path->pathrec.dgid.raw, ":"); 503 504 bzero(&mbqueue, sizeof(mbqueue)); 505 506 if (!status) { 507 struct ib_ah_attr av; 508 509 if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av)) 510 ah = ipoib_create_ah(priv, priv->pd, &av); 511 } 512 513 spin_lock_irqsave(&priv->lock, flags); 514 515 if (ah) { 516 path->pathrec = *pathrec; 517 518 old_ah = path->ah; 519 path->ah = ah; 520 521 ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n", 522 ah, be16_to_cpu(pathrec->dlid), pathrec->sl); 523 524 for (;;) { 525 _IF_DEQUEUE(&path->queue, mb); 526 if (mb == NULL) 527 break; 528 _IF_ENQUEUE(&mbqueue, mb); 529 } 530 531#ifdef CONFIG_INFINIBAND_IPOIB_CM 532 if (ipoib_cm_enabled(priv, path->hwaddr) && !ipoib_cm_get(path)) 533 ipoib_cm_set(path, ipoib_cm_create_tx(priv, path)); 534#endif 535 536 path->valid = 1; 537 } 538 539 path->query = NULL; 540 complete(&path->done); 541 542 spin_unlock_irqrestore(&priv->lock, flags); 543 544 if (old_ah) 545 ipoib_put_ah(old_ah); 546 547 for (;;) { 548 _IF_DEQUEUE(&mbqueue, mb); 549 if (mb == NULL) 550 break; 551 mb->m_pkthdr.rcvif = dev; 552 if (dev->if_transmit(dev, mb)) 553 ipoib_warn(priv, "dev_queue_xmit failed " 554 "to requeue packet\n"); 555 } 556} 557 558static struct ipoib_path * 559path_rec_create(struct ipoib_dev_priv *priv, uint8_t *hwaddr) 560{ 561 struct ipoib_path *path; 562 563 if (!priv->broadcast) 564 return NULL; 565 566 path = kzalloc(sizeof *path, GFP_ATOMIC); 567 if (!path) 568 return NULL; 569 570 path->priv = priv; 571 572 bzero(&path->queue, sizeof(path->queue)); 573 574#ifdef CONFIG_INFINIBAND_IPOIB_CM 575 memcpy(&path->hwaddr, hwaddr, INFINIBAND_ALEN); 576#endif 577 memcpy(path->pathrec.dgid.raw, &hwaddr[4], sizeof (union ib_gid)); 578 path->pathrec.sgid = priv->local_gid; 579 path->pathrec.pkey = cpu_to_be16(priv->pkey); 580 path->pathrec.numb_path = 1; 581 path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class; 582 583 return path; 584} 585 586static int 587path_rec_start(struct ipoib_dev_priv *priv, struct ipoib_path *path) 588{ 589 struct ifnet *dev = priv->dev; 590 591 ib_sa_comp_mask comp_mask = IB_SA_PATH_REC_MTU_SELECTOR | IB_SA_PATH_REC_MTU; 592 struct ib_sa_path_rec p_rec; 593 594 p_rec = path->pathrec; 595 p_rec.mtu_selector = IB_SA_GT; 596 597 switch (roundup_pow_of_two(dev->if_mtu + IPOIB_ENCAP_LEN)) { 598 case 512: 599 p_rec.mtu = IB_MTU_256; 600 break; 601 case 1024: 602 p_rec.mtu = IB_MTU_512; 603 break; 604 case 2048: 605 p_rec.mtu = IB_MTU_1024; 606 break; 607 case 4096: 608 p_rec.mtu = IB_MTU_2048; 609 break; 610 default: 611 /* Wildcard everything */ 612 comp_mask = 0; 613 p_rec.mtu = 0; 614 p_rec.mtu_selector = 0; 615 } 616 617 ipoib_dbg(priv, "Start path record lookup for %16D MTU > %d\n", 618 p_rec.dgid.raw, ":", 619 comp_mask ? ib_mtu_enum_to_int(p_rec.mtu) : 0); 620 621 init_completion(&path->done); 622 623 path->query_id = 624 ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port, 625 &p_rec, comp_mask | 626 IB_SA_PATH_REC_DGID | 627 IB_SA_PATH_REC_SGID | 628 IB_SA_PATH_REC_NUMB_PATH | 629 IB_SA_PATH_REC_TRAFFIC_CLASS | 630 IB_SA_PATH_REC_PKEY, 631 1000, GFP_ATOMIC, 632 path_rec_completion, 633 path, &path->query); 634 if (path->query_id < 0) { 635 ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id); 636 path->query = NULL; 637 complete(&path->done); 638 return path->query_id; 639 } 640 641 return 0; 642} 643 644static void 645ipoib_unicast_send(struct mbuf *mb, struct ipoib_dev_priv *priv, struct ipoib_header *eh) 646{ 647 struct ipoib_path *path; 648 649 path = __path_find(priv, eh->hwaddr + 4); 650 if (!path || !path->valid) { 651 int new_path = 0; 652 653 if (!path) { 654 path = path_rec_create(priv, eh->hwaddr); 655 new_path = 1; 656 } 657 if (path) { 658 _IF_ENQUEUE(&path->queue, mb); 659 if (!path->query && path_rec_start(priv, path)) { 660 if (new_path) 661 ipoib_path_free(priv, path); 662 return; 663 } else 664 __path_add(priv, path); 665 } else { 666 ++priv->dev->if_oerrors; 667 m_freem(mb); 668 } 669 670 return; 671 } 672 673 if (ipoib_cm_get(path) && ipoib_cm_up(path)) { 674 ipoib_cm_send(priv, mb, ipoib_cm_get(path)); 675 } else if (path->ah) { 676 ipoib_send(priv, mb, path->ah, IPOIB_QPN(eh->hwaddr)); 677 } else if ((path->query || !path_rec_start(priv, path)) && 678 path->queue.ifq_len < IPOIB_MAX_PATH_REC_QUEUE) { 679 _IF_ENQUEUE(&path->queue, mb); 680 } else { 681 ++priv->dev->if_oerrors; 682 m_freem(mb); 683 } 684} 685 686static int 687ipoib_send_one(struct ipoib_dev_priv *priv, struct mbuf *mb) 688{ 689 struct ipoib_header *eh; 690 691 eh = mtod(mb, struct ipoib_header *); 692 if (IPOIB_IS_MULTICAST(eh->hwaddr)) { 693 /* Add in the P_Key for multicast*/ 694 eh->hwaddr[8] = (priv->pkey >> 8) & 0xff; 695 eh->hwaddr[9] = priv->pkey & 0xff; 696 697 ipoib_mcast_send(priv, eh->hwaddr + 4, mb); 698 } else 699 ipoib_unicast_send(mb, priv, eh); 700 701 return 0; 702} 703 704 705static void 706_ipoib_start(struct ifnet *dev, struct ipoib_dev_priv *priv) 707{ 708 struct mbuf *mb; 709 710 if ((dev->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 711 IFF_DRV_RUNNING) 712 return; 713 714 spin_lock(&priv->lock); 715 while (!IFQ_DRV_IS_EMPTY(&dev->if_snd) && 716 (dev->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 717 IFQ_DRV_DEQUEUE(&dev->if_snd, mb); 718 if (mb == NULL) 719 break; 720 IPOIB_MTAP(dev, mb); 721 ipoib_send_one(priv, mb); 722 } 723 spin_unlock(&priv->lock); 724} 725 726static void 727ipoib_start(struct ifnet *dev) 728{ 729 _ipoib_start(dev, dev->if_softc); 730} 731 732static void 733ipoib_vlan_start(struct ifnet *dev) 734{ 735 struct ipoib_dev_priv *priv; 736 struct mbuf *mb; 737 738 priv = VLAN_COOKIE(dev); 739 if (priv != NULL) 740 return _ipoib_start(dev, priv); 741 while (!IFQ_DRV_IS_EMPTY(&dev->if_snd)) { 742 IFQ_DRV_DEQUEUE(&dev->if_snd, mb); 743 if (mb == NULL) 744 break; 745 m_freem(mb); 746 dev->if_oerrors++; 747 } 748} 749 750int 751ipoib_dev_init(struct ipoib_dev_priv *priv, struct ib_device *ca, int port) 752{ 753 754 /* Allocate RX/TX "rings" to hold queued mbs */ 755 priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, 756 GFP_KERNEL); 757 if (!priv->rx_ring) { 758 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", 759 ca->name, ipoib_recvq_size); 760 goto out; 761 } 762 763 priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, GFP_KERNEL); 764 if (!priv->tx_ring) { 765 printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", 766 ca->name, ipoib_sendq_size); 767 goto out_rx_ring_cleanup; 768 } 769 memset(priv->tx_ring, 0, ipoib_sendq_size * sizeof *priv->tx_ring); 770 771 /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ 772 773 if (ipoib_ib_dev_init(priv, ca, port)) 774 goto out_tx_ring_cleanup; 775 776 return 0; 777 778out_tx_ring_cleanup: 779 kfree(priv->tx_ring); 780 781out_rx_ring_cleanup: 782 kfree(priv->rx_ring); 783 784out: 785 return -ENOMEM; 786} 787 788static void 789ipoib_detach(struct ipoib_dev_priv *priv) 790{ 791 struct ifnet *dev; 792 793 dev = priv->dev; 794 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 795 priv->gone = 1; 796 bpfdetach(dev); 797 if_detach(dev); 798 if_free(dev); 799 } else 800 VLAN_SETCOOKIE(priv->dev, NULL); 801 802 free(priv, M_TEMP); 803} 804 805void 806ipoib_dev_cleanup(struct ipoib_dev_priv *priv) 807{ 808 struct ipoib_dev_priv *cpriv, *tcpriv; 809 810 /* Delete any child interfaces first */ 811 list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { 812 ipoib_dev_cleanup(cpriv); 813 ipoib_detach(cpriv); 814 } 815 816 ipoib_ib_dev_cleanup(priv); 817 818 kfree(priv->rx_ring); 819 kfree(priv->tx_ring); 820 821 priv->rx_ring = NULL; 822 priv->tx_ring = NULL; 823} 824 825static volatile int ipoib_unit; 826 827static struct ipoib_dev_priv * 828ipoib_priv_alloc(void) 829{ 830 struct ipoib_dev_priv *priv; 831 832 priv = malloc(sizeof(struct ipoib_dev_priv), M_TEMP, M_ZERO|M_WAITOK); 833 spin_lock_init(&priv->lock); 834 mutex_init(&priv->vlan_mutex); 835 INIT_LIST_HEAD(&priv->path_list); 836 INIT_LIST_HEAD(&priv->child_intfs); 837 INIT_LIST_HEAD(&priv->dead_ahs); 838 INIT_LIST_HEAD(&priv->multicast_list); 839 INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); 840 INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); 841 INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task); 842 INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); 843 INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); 844 INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); 845 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); 846 INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); 847 memcpy(priv->broadcastaddr, ipv4_bcast_addr, INFINIBAND_ALEN); 848 849 return (priv); 850} 851 852struct ipoib_dev_priv * 853ipoib_intf_alloc(const char *name) 854{ 855 struct ipoib_dev_priv *priv; 856 struct sockaddr_dl *sdl; 857 struct ifnet *dev; 858 859 priv = ipoib_priv_alloc(); 860 dev = priv->dev = if_alloc(IFT_INFINIBAND); 861 if (!dev) { 862 free(priv, M_TEMP); 863 return NULL; 864 } 865 dev->if_softc = priv; 866 if_initname(dev, name, atomic_fetchadd_int(&ipoib_unit, 1)); 867 dev->if_flags = IFF_BROADCAST | IFF_MULTICAST; 868 dev->if_addrlen = INFINIBAND_ALEN; 869 dev->if_hdrlen = IPOIB_HEADER_LEN; 870 if_attach(dev); 871 dev->if_init = ipoib_init; 872 dev->if_ioctl = ipoib_ioctl; 873 dev->if_start = ipoib_start; 874 dev->if_output = ipoib_output; 875 dev->if_input = ipoib_input; 876 dev->if_resolvemulti = ipoib_resolvemulti; 877 if_initbaudrate(dev, IF_Gbps(10)); 878 dev->if_broadcastaddr = priv->broadcastaddr; 879 dev->if_snd.ifq_maxlen = ipoib_sendq_size * 2; 880 sdl = (struct sockaddr_dl *)dev->if_addr->ifa_addr; 881 sdl->sdl_type = IFT_INFINIBAND; 882 sdl->sdl_alen = dev->if_addrlen; 883 priv->dev = dev; 884 if_link_state_change(dev, LINK_STATE_DOWN); 885 bpfattach(dev, DLT_EN10MB, ETHER_HDR_LEN); 886 887 return dev->if_softc; 888} 889 890int 891ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca) 892{ 893 struct ib_device_attr *device_attr; 894 int result = -ENOMEM; 895 896 device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL); 897 if (!device_attr) { 898 printk(KERN_WARNING "%s: allocation of %zu bytes failed\n", 899 hca->name, sizeof *device_attr); 900 return result; 901 } 902 903 result = ib_query_device(hca, device_attr); 904 if (result) { 905 printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n", 906 hca->name, result); 907 kfree(device_attr); 908 return result; 909 } 910 priv->hca_caps = device_attr->device_cap_flags; 911 912 kfree(device_attr); 913 914 priv->dev->if_hwassist = 0; 915 priv->dev->if_capabilities = 0; 916 917#ifndef CONFIG_INFINIBAND_IPOIB_CM 918 if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { 919 set_bit(IPOIB_FLAG_CSUM, &priv->flags); 920 priv->dev->if_hwassist = CSUM_IP | CSUM_TCP | CSUM_UDP; 921 priv->dev->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; 922 } 923 924#if 0 925 if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO) { 926 priv->dev->if_capabilities |= IFCAP_TSO4; 927 priv->dev->if_hwassist |= CSUM_TSO; 928 } 929#endif 930#endif 931 priv->dev->if_capabilities |= 932 IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_LINKSTATE; 933 priv->dev->if_capenable = priv->dev->if_capabilities; 934 935 return 0; 936} 937 938 939static struct ifnet * 940ipoib_add_port(const char *format, struct ib_device *hca, u8 port) 941{ 942 struct ipoib_dev_priv *priv; 943 struct ib_port_attr attr; 944 int result = -ENOMEM; 945 946 priv = ipoib_intf_alloc(format); 947 if (!priv) 948 goto alloc_mem_failed; 949 950 if (!ib_query_port(hca, port, &attr)) 951 priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); 952 else { 953 printk(KERN_WARNING "%s: ib_query_port %d failed\n", 954 hca->name, port); 955 goto device_init_failed; 956 } 957 958 /* MTU will be reset when mcast join happens */ 959 priv->dev->if_mtu = IPOIB_UD_MTU(priv->max_ib_mtu); 960 priv->mcast_mtu = priv->admin_mtu = priv->dev->if_mtu; 961 962 result = ib_query_pkey(hca, port, 0, &priv->pkey); 963 if (result) { 964 printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", 965 hca->name, port, result); 966 goto device_init_failed; 967 } 968 969 if (ipoib_set_dev_features(priv, hca)) 970 goto device_init_failed; 971 972 /* 973 * Set the full membership bit, so that we join the right 974 * broadcast group, etc. 975 */ 976 priv->pkey |= 0x8000; 977 978 priv->broadcastaddr[8] = priv->pkey >> 8; 979 priv->broadcastaddr[9] = priv->pkey & 0xff; 980 981 result = ib_query_gid(hca, port, 0, &priv->local_gid); 982 if (result) { 983 printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", 984 hca->name, port, result); 985 goto device_init_failed; 986 } 987 memcpy(IF_LLADDR(priv->dev) + 4, priv->local_gid.raw, sizeof (union ib_gid)); 988 989 result = ipoib_dev_init(priv, hca, port); 990 if (result < 0) { 991 printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n", 992 hca->name, port, result); 993 goto device_init_failed; 994 } 995 if (ipoib_cm_admin_enabled(priv)) 996 priv->dev->if_mtu = IPOIB_CM_MTU(ipoib_cm_max_mtu(priv)); 997 998 INIT_IB_EVENT_HANDLER(&priv->event_handler, 999 priv->ca, ipoib_event); 1000 result = ib_register_event_handler(&priv->event_handler); 1001 if (result < 0) { 1002 printk(KERN_WARNING "%s: ib_register_event_handler failed for " 1003 "port %d (ret = %d)\n", 1004 hca->name, port, result); 1005 goto event_failed; 1006 } 1007 if_printf(priv->dev, "Attached to %s port %d\n", hca->name, port); 1008 1009 return priv->dev; 1010 1011event_failed: 1012 ipoib_dev_cleanup(priv); 1013 1014device_init_failed: 1015 ipoib_detach(priv); 1016 1017alloc_mem_failed: 1018 return ERR_PTR(result); 1019} 1020 1021static void 1022ipoib_add_one(struct ib_device *device) 1023{ 1024 struct list_head *dev_list; 1025 struct ifnet *dev; 1026 struct ipoib_dev_priv *priv; 1027 int s, e, p; 1028 1029 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) 1030 return; 1031 1032 dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); 1033 if (!dev_list) 1034 return; 1035 1036 INIT_LIST_HEAD(dev_list); 1037 1038 if (device->node_type == RDMA_NODE_IB_SWITCH) { 1039 s = 0; 1040 e = 0; 1041 } else { 1042 s = 1; 1043 e = device->phys_port_cnt; 1044 } 1045 1046 for (p = s; p <= e; ++p) { 1047 if (rdma_port_get_link_layer(device, p) != IB_LINK_LAYER_INFINIBAND) 1048 continue; 1049 dev = ipoib_add_port("ib", device, p); 1050 if (!IS_ERR(dev)) { 1051 priv = dev->if_softc; 1052 list_add_tail(&priv->list, dev_list); 1053 } 1054 } 1055 1056 ib_set_client_data(device, &ipoib_client, dev_list); 1057} 1058 1059static void 1060ipoib_remove_one(struct ib_device *device) 1061{ 1062 struct ipoib_dev_priv *priv, *tmp; 1063 struct list_head *dev_list; 1064 1065 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) 1066 return; 1067 1068 dev_list = ib_get_client_data(device, &ipoib_client); 1069 1070 list_for_each_entry_safe(priv, tmp, dev_list, list) { 1071 if (rdma_port_get_link_layer(device, priv->port) != IB_LINK_LAYER_INFINIBAND) 1072 continue; 1073 1074 ipoib_stop(priv); 1075 1076 ib_unregister_event_handler(&priv->event_handler); 1077 1078 /* dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); */ 1079 1080 flush_workqueue(ipoib_workqueue); 1081 1082 ipoib_dev_cleanup(priv); 1083 ipoib_detach(priv); 1084 } 1085 1086 kfree(dev_list); 1087} 1088 1089static void 1090ipoib_config_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) 1091{ 1092 struct ipoib_dev_priv *parent; 1093 struct ipoib_dev_priv *priv; 1094 struct ifnet *dev; 1095 uint16_t pkey; 1096 int error; 1097 1098 if (ifp->if_type != IFT_INFINIBAND) 1099 return; 1100 dev = VLAN_DEVAT(ifp, vtag); 1101 if (dev == NULL) 1102 return; 1103 priv = NULL; 1104 error = 0; 1105 parent = ifp->if_softc; 1106 /* We only support 15 bits of pkey. */ 1107 if (vtag & 0x8000) 1108 return; 1109 pkey = vtag | 0x8000; /* Set full membership bit. */ 1110 if (pkey == parent->pkey) 1111 return; 1112 /* Check for dups */ 1113 mutex_lock(&parent->vlan_mutex); 1114 list_for_each_entry(priv, &parent->child_intfs, list) { 1115 if (priv->pkey == pkey) { 1116 priv = NULL; 1117 error = EBUSY; 1118 goto out; 1119 } 1120 } 1121 priv = ipoib_priv_alloc(); 1122 priv->dev = dev; 1123 priv->max_ib_mtu = parent->max_ib_mtu; 1124 priv->mcast_mtu = priv->admin_mtu = parent->dev->if_mtu; 1125 set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); 1126 error = ipoib_set_dev_features(priv, parent->ca); 1127 if (error) 1128 goto out; 1129 priv->pkey = pkey; 1130 priv->broadcastaddr[8] = pkey >> 8; 1131 priv->broadcastaddr[9] = pkey & 0xff; 1132 dev->if_broadcastaddr = priv->broadcastaddr; 1133 error = ipoib_dev_init(priv, parent->ca, parent->port); 1134 if (error) 1135 goto out; 1136 priv->parent = parent->dev; 1137 list_add_tail(&priv->list, &parent->child_intfs); 1138 VLAN_SETCOOKIE(dev, priv); 1139 dev->if_start = ipoib_vlan_start; 1140 dev->if_drv_flags &= ~IFF_DRV_RUNNING; 1141 dev->if_hdrlen = IPOIB_HEADER_LEN; 1142 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1143 ipoib_open(priv); 1144 mutex_unlock(&parent->vlan_mutex); 1145 return; 1146out: 1147 mutex_unlock(&parent->vlan_mutex); 1148 if (priv) 1149 free(priv, M_TEMP); 1150 if (error) 1151 ipoib_warn(parent, 1152 "failed to initialize subinterface: device %s, port %d vtag 0x%X", 1153 parent->ca->name, parent->port, vtag); 1154 return; 1155} 1156 1157static void 1158ipoib_unconfig_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) 1159{ 1160 struct ipoib_dev_priv *parent; 1161 struct ipoib_dev_priv *priv; 1162 struct ifnet *dev; 1163 uint16_t pkey; 1164 1165 if (ifp->if_type != IFT_INFINIBAND) 1166 return; 1167 1168 dev = VLAN_DEVAT(ifp, vtag); 1169 if (dev) 1170 VLAN_SETCOOKIE(dev, NULL); 1171 pkey = vtag | 0x8000; 1172 parent = ifp->if_softc; 1173 mutex_lock(&parent->vlan_mutex); 1174 list_for_each_entry(priv, &parent->child_intfs, list) { 1175 if (priv->pkey == pkey) { 1176 ipoib_dev_cleanup(priv); 1177 list_del(&priv->list); 1178 break; 1179 } 1180 } 1181 mutex_unlock(&parent->vlan_mutex); 1182} 1183 1184eventhandler_tag ipoib_vlan_attach; 1185eventhandler_tag ipoib_vlan_detach; 1186 1187static int __init 1188ipoib_init_module(void) 1189{ 1190 int ret; 1191 1192 ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size); 1193 ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE); 1194 ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE); 1195 1196 ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); 1197 ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); 1198 ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE, 1199 IPOIB_MIN_QUEUE_SIZE)); 1200#ifdef CONFIG_INFINIBAND_IPOIB_CM 1201 ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); 1202#endif 1203 1204 ipoib_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 1205 ipoib_config_vlan, NULL, EVENTHANDLER_PRI_FIRST); 1206 ipoib_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 1207 ipoib_unconfig_vlan, NULL, EVENTHANDLER_PRI_FIRST); 1208 1209 /* 1210 * We create our own workqueue mainly because we want to be 1211 * able to flush it when devices are being removed. We can't 1212 * use schedule_work()/flush_scheduled_work() because both 1213 * unregister_netdev() and linkwatch_event take the rtnl lock, 1214 * so flush_scheduled_work() can deadlock during device 1215 * removal. 1216 */ 1217 ipoib_workqueue = create_singlethread_workqueue("ipoib"); 1218 if (!ipoib_workqueue) { 1219 ret = -ENOMEM; 1220 goto err_fs; 1221 } 1222 1223 ib_sa_register_client(&ipoib_sa_client); 1224 1225 ret = ib_register_client(&ipoib_client); 1226 if (ret) 1227 goto err_sa; 1228 1229 return 0; 1230 1231err_sa: 1232 ib_sa_unregister_client(&ipoib_sa_client); 1233 destroy_workqueue(ipoib_workqueue); 1234 1235err_fs: 1236 return ret; 1237} 1238 1239static void __exit 1240ipoib_cleanup_module(void) 1241{ 1242 1243 EVENTHANDLER_DEREGISTER(vlan_config, ipoib_vlan_attach); 1244 EVENTHANDLER_DEREGISTER(vlan_unconfig, ipoib_vlan_detach); 1245 ib_unregister_client(&ipoib_client); 1246 ib_sa_unregister_client(&ipoib_sa_client); 1247 destroy_workqueue(ipoib_workqueue); 1248} 1249 1250/* 1251 * Infiniband output routine. 1252 */ 1253static int 1254ipoib_output(struct ifnet *ifp, struct mbuf *m, 1255 const struct sockaddr *dst, struct route *ro) 1256{ 1257 u_char edst[INFINIBAND_ALEN]; 1258 struct llentry *lle = NULL; 1259 struct rtentry *rt0 = NULL; 1260 struct ipoib_header *eh; 1261 int error = 0; 1262 short type; 1263 1264 if (ro != NULL) { 1265 if (!(m->m_flags & (M_BCAST | M_MCAST))) 1266 lle = ro->ro_lle; 1267 rt0 = ro->ro_rt; 1268 } 1269#ifdef MAC 1270 error = mac_ifnet_check_transmit(ifp, m); 1271 if (error) 1272 goto bad; 1273#endif 1274 1275 M_PROFILE(m); 1276 if (ifp->if_flags & IFF_MONITOR) { 1277 error = ENETDOWN; 1278 goto bad; 1279 } 1280 if (!((ifp->if_flags & IFF_UP) && 1281 (ifp->if_drv_flags & IFF_DRV_RUNNING))) { 1282 error = ENETDOWN; 1283 goto bad; 1284 } 1285 1286 switch (dst->sa_family) { 1287#ifdef INET 1288 case AF_INET: 1289 if (lle != NULL && (lle->la_flags & LLE_VALID)) 1290 memcpy(edst, &lle->ll_addr.mac8, sizeof(edst)); 1291 else if (m->m_flags & M_MCAST) 1292 ip_ib_mc_map(((struct sockaddr_in *)dst)->sin_addr.s_addr, ifp->if_broadcastaddr, edst); 1293 else 1294 error = arpresolve(ifp, rt0, m, dst, edst, &lle); 1295 if (error) 1296 return (error == EWOULDBLOCK ? 0 : error); 1297 type = htons(ETHERTYPE_IP); 1298 break; 1299 case AF_ARP: 1300 { 1301 struct arphdr *ah; 1302 ah = mtod(m, struct arphdr *); 1303 ah->ar_hrd = htons(ARPHRD_INFINIBAND); 1304 1305 switch(ntohs(ah->ar_op)) { 1306 case ARPOP_REVREQUEST: 1307 case ARPOP_REVREPLY: 1308 type = htons(ETHERTYPE_REVARP); 1309 break; 1310 case ARPOP_REQUEST: 1311 case ARPOP_REPLY: 1312 default: 1313 type = htons(ETHERTYPE_ARP); 1314 break; 1315 } 1316 1317 if (m->m_flags & M_BCAST) 1318 bcopy(ifp->if_broadcastaddr, edst, INFINIBAND_ALEN); 1319 else 1320 bcopy(ar_tha(ah), edst, INFINIBAND_ALEN); 1321 1322 } 1323 break; 1324#endif 1325#ifdef INET6 1326 case AF_INET6: 1327 if (lle != NULL && (lle->la_flags & LLE_VALID)) 1328 memcpy(edst, &lle->ll_addr.mac8, sizeof(edst)); 1329 else if (m->m_flags & M_MCAST) 1330 ipv6_ib_mc_map(&((struct sockaddr_in6 *)dst)->sin6_addr, ifp->if_broadcastaddr, edst); 1331 else 1332 error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle); 1333 if (error) 1334 return error; 1335 type = htons(ETHERTYPE_IPV6); 1336 break; 1337#endif 1338 1339 default: 1340 if_printf(ifp, "can't handle af%d\n", dst->sa_family); 1341 error = EAFNOSUPPORT; 1342 goto bad; 1343 } 1344 1345 /* 1346 * Add local net header. If no space in first mbuf, 1347 * allocate another. 1348 */ 1349 M_PREPEND(m, IPOIB_HEADER_LEN, M_NOWAIT); 1350 if (m == NULL) { 1351 error = ENOBUFS; 1352 goto bad; 1353 } 1354 eh = mtod(m, struct ipoib_header *); 1355 (void)memcpy(&eh->proto, &type, sizeof(eh->proto)); 1356 (void)memcpy(&eh->hwaddr, edst, sizeof (edst)); 1357 1358 /* 1359 * Queue message on interface, update output statistics if 1360 * successful, and start output if interface not yet active. 1361 */ 1362 return ((ifp->if_transmit)(ifp, m)); 1363bad: 1364 if (m != NULL) 1365 m_freem(m); 1366 return (error); 1367} 1368 1369/* 1370 * Upper layer processing for a received Infiniband packet. 1371 */ 1372void 1373ipoib_demux(struct ifnet *ifp, struct mbuf *m, u_short proto) 1374{ 1375 int isr; 1376 1377#ifdef MAC 1378 /* 1379 * Tag the mbuf with an appropriate MAC label before any other 1380 * consumers can get to it. 1381 */ 1382 mac_ifnet_create_mbuf(ifp, m); 1383#endif 1384 /* Allow monitor mode to claim this frame, after stats are updated. */ 1385 if (ifp->if_flags & IFF_MONITOR) { 1386 if_printf(ifp, "discard frame at IFF_MONITOR\n"); 1387 m_freem(m); 1388 return; 1389 } 1390 /* 1391 * Dispatch frame to upper layer. 1392 */ 1393 switch (proto) { 1394#ifdef INET 1395 case ETHERTYPE_IP: 1396 isr = NETISR_IP; 1397 break; 1398 1399 case ETHERTYPE_ARP: 1400 if (ifp->if_flags & IFF_NOARP) { 1401 /* Discard packet if ARP is disabled on interface */ 1402 m_freem(m); 1403 return; 1404 } 1405 isr = NETISR_ARP; 1406 break; 1407#endif 1408#ifdef INET6 1409 case ETHERTYPE_IPV6: 1410 isr = NETISR_IPV6; 1411 break; 1412#endif 1413 default: 1414 goto discard; 1415 } 1416 netisr_dispatch(isr, m); 1417 return; 1418 1419discard: 1420 m_freem(m); 1421} 1422 1423/* 1424 * Process a received Infiniband packet. 1425 */ 1426static void 1427ipoib_input(struct ifnet *ifp, struct mbuf *m) 1428{ 1429 struct ipoib_header *eh; 1430 1431 if ((ifp->if_flags & IFF_UP) == 0) { 1432 m_freem(m); 1433 return; 1434 } 1435 CURVNET_SET_QUIET(ifp->if_vnet); 1436 1437 /* Let BPF have it before we strip the header. */ 1438 IPOIB_MTAP(ifp, m); 1439 eh = mtod(m, struct ipoib_header *); 1440 /* 1441 * Reset layer specific mbuf flags to avoid confusing upper layers. 1442 * Strip off Infiniband header. 1443 */ 1444 m->m_flags &= ~M_VLANTAG; 1445 m_clrprotoflags(m); 1446 m_adj(m, IPOIB_HEADER_LEN); 1447 1448 if (IPOIB_IS_MULTICAST(eh->hwaddr)) { 1449 if (memcmp(eh->hwaddr, ifp->if_broadcastaddr, 1450 ifp->if_addrlen) == 0) 1451 m->m_flags |= M_BCAST; 1452 else 1453 m->m_flags |= M_MCAST; 1454 ifp->if_imcasts++; 1455 } 1456 1457 ipoib_demux(ifp, m, ntohs(eh->proto)); 1458 CURVNET_RESTORE(); 1459} 1460 1461static int 1462ipoib_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, 1463 struct sockaddr *sa) 1464{ 1465 struct sockaddr_dl *sdl; 1466#ifdef INET 1467 struct sockaddr_in *sin; 1468#endif 1469#ifdef INET6 1470 struct sockaddr_in6 *sin6; 1471#endif 1472 u_char *e_addr; 1473 1474 switch(sa->sa_family) { 1475 case AF_LINK: 1476 /* 1477 * No mapping needed. Just check that it's a valid MC address. 1478 */ 1479 sdl = (struct sockaddr_dl *)sa; 1480 e_addr = LLADDR(sdl); 1481 if (!IPOIB_IS_MULTICAST(e_addr)) 1482 return EADDRNOTAVAIL; 1483 *llsa = 0; 1484 return 0; 1485 1486#ifdef INET 1487 case AF_INET: 1488 sin = (struct sockaddr_in *)sa; 1489 if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) 1490 return EADDRNOTAVAIL; 1491 sdl = malloc(sizeof *sdl, M_IFMADDR, 1492 M_NOWAIT|M_ZERO); 1493 if (sdl == NULL) 1494 return ENOMEM; 1495 sdl->sdl_len = sizeof *sdl; 1496 sdl->sdl_family = AF_LINK; 1497 sdl->sdl_index = ifp->if_index; 1498 sdl->sdl_type = IFT_INFINIBAND; 1499 sdl->sdl_alen = INFINIBAND_ALEN; 1500 e_addr = LLADDR(sdl); 1501 ip_ib_mc_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr, 1502 e_addr); 1503 *llsa = (struct sockaddr *)sdl; 1504 return 0; 1505#endif 1506#ifdef INET6 1507 case AF_INET6: 1508 sin6 = (struct sockaddr_in6 *)sa; 1509 /* 1510 * An IP6 address of 0 means listen to all 1511 * of the multicast address used for IP6. 1512 * This has no meaning in ipoib. 1513 */ 1514 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 1515 return EADDRNOTAVAIL; 1516 if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) 1517 return EADDRNOTAVAIL; 1518 sdl = malloc(sizeof *sdl, M_IFMADDR, 1519 M_NOWAIT|M_ZERO); 1520 if (sdl == NULL) 1521 return (ENOMEM); 1522 sdl->sdl_len = sizeof *sdl; 1523 sdl->sdl_family = AF_LINK; 1524 sdl->sdl_index = ifp->if_index; 1525 sdl->sdl_type = IFT_INFINIBAND; 1526 sdl->sdl_alen = INFINIBAND_ALEN; 1527 e_addr = LLADDR(sdl); 1528 ipv6_ib_mc_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr); 1529 *llsa = (struct sockaddr *)sdl; 1530 return 0; 1531#endif 1532 1533 default: 1534 return EAFNOSUPPORT; 1535 } 1536} 1537 1538module_init(ipoib_init_module); 1539module_exit(ipoib_cleanup_module); 1540 1541#undef MODULE_VERSION 1542#include <sys/module.h> 1543static int 1544ipoib_evhand(module_t mod, int event, void *arg) 1545{ 1546 return (0); 1547} 1548 1549static moduledata_t ipoib_mod = { 1550 .name = "ipoib", 1551 .evhand = ipoib_evhand, 1552}; 1553 1554DECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_SMP, SI_ORDER_ANY); 1555MODULE_DEPEND(ipoib, ibcore, 1, 1, 1); 1556 1557