1219820Sjeff/* 2219820Sjeff * Copyright (c) 2004 Topspin Communications. All rights reserved. 3219820Sjeff * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4219820Sjeff * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 5219820Sjeff * 6219820Sjeff * This software is available to you under a choice of one of two 7219820Sjeff * licenses. You may choose to be licensed under the terms of the GNU 8219820Sjeff * General Public License (GPL) Version 2, available from the file 9219820Sjeff * COPYING in the main directory of this source tree, or the 10219820Sjeff * OpenIB.org BSD license below: 11219820Sjeff * 12219820Sjeff * Redistribution and use in source and binary forms, with or 13219820Sjeff * without modification, are permitted provided that the following 14219820Sjeff * conditions are met: 15219820Sjeff * 16219820Sjeff * - Redistributions of source code must retain the above 17219820Sjeff * copyright notice, this list of conditions and the following 18219820Sjeff * disclaimer. 19219820Sjeff * 20219820Sjeff * - Redistributions in binary form must reproduce the above 21219820Sjeff * copyright notice, this list of conditions and the following 22219820Sjeff * disclaimer in the documentation and/or other materials 23219820Sjeff * provided with the distribution. 24219820Sjeff * 25219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32219820Sjeff * SOFTWARE. 33219820Sjeff */ 34219820Sjeff 35337096Shselasky#include <sys/cdefs.h> 36337096Shselasky__FBSDID("$FreeBSD$"); 37337096Shselasky 38219820Sjeff#include "ipoib.h" 39219820Sjeff 40219820Sjeffstatic int ipoib_resolvemulti(struct ifnet *, struct sockaddr **, 41219820Sjeff struct sockaddr *); 42219820Sjeff 43219820Sjeff 44219820Sjeff#include <linux/module.h> 45219820Sjeff 46219820Sjeff#include <linux/slab.h> 47219820Sjeff#include <linux/kernel.h> 48219820Sjeff#include <linux/vmalloc.h> 49219820Sjeff 50219820Sjeff#include <linux/if_arp.h> /* For ARPHRD_xxx */ 51219820Sjeff#include <linux/if_vlan.h> 52219820Sjeff#include <net/ip.h> 53219820Sjeff#include <net/ipv6.h> 54219820Sjeff 55338556Shselasky#include <rdma/ib_cache.h> 56338556Shselasky 57219820SjeffMODULE_AUTHOR("Roland Dreier"); 58219820SjeffMODULE_DESCRIPTION("IP-over-InfiniBand net driver"); 59219820SjeffMODULE_LICENSE("Dual BSD/GPL"); 60219820Sjeff 61219820Sjeffint ipoib_sendq_size = IPOIB_TX_RING_SIZE; 62219820Sjeffint ipoib_recvq_size = IPOIB_RX_RING_SIZE; 63219820Sjeff 64219820Sjeffmodule_param_named(send_queue_size, ipoib_sendq_size, int, 0444); 65219820SjeffMODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); 66219820Sjeffmodule_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); 67219820SjeffMODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); 68219820Sjeff 69219820Sjeff#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 70219820Sjeffint ipoib_debug_level = 1; 71219820Sjeff 72219820Sjeffmodule_param_named(debug_level, ipoib_debug_level, int, 0644); 73219820SjeffMODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); 74219820Sjeff#endif 75219820Sjeff 76219820Sjeffstruct ipoib_path_iter { 77219820Sjeff struct ipoib_dev_priv *priv; 78219820Sjeff struct ipoib_path path; 79219820Sjeff}; 80219820Sjeff 81219820Sjeffstatic const u8 ipv4_bcast_addr[] = { 82219820Sjeff 0x00, 0xff, 0xff, 0xff, 83219820Sjeff 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, 84219820Sjeff 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff 85219820Sjeff}; 86219820Sjeff 87219820Sjeffstruct workqueue_struct *ipoib_workqueue; 88219820Sjeff 89219820Sjeffstruct ib_sa_client ipoib_sa_client; 90219820Sjeff 91219820Sjeffstatic void ipoib_add_one(struct ib_device *device); 92331769Shselaskystatic void ipoib_remove_one(struct ib_device *device, void *client_data); 93338556Shselaskystatic struct net_device *ipoib_get_net_dev_by_params( 94338556Shselasky struct ib_device *dev, u8 port, u16 pkey, 95338556Shselasky const union ib_gid *gid, const struct sockaddr *addr, 96338556Shselasky void *client_data); 97219820Sjeffstatic void ipoib_start(struct ifnet *dev); 98219820Sjeffstatic int ipoib_output(struct ifnet *ifp, struct mbuf *m, 99249976Sglebius const struct sockaddr *dst, struct route *ro); 100219820Sjeffstatic int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data); 101219820Sjeffstatic void ipoib_input(struct ifnet *ifp, struct mbuf *m); 102219820Sjeff 103219820Sjeff#define IPOIB_MTAP(_ifp, _m) \ 104219820Sjeffdo { \ 105219820Sjeff if (bpf_peers_present((_ifp)->if_bpf)) { \ 106219820Sjeff M_ASSERTVALID(_m); \ 107219820Sjeff ipoib_mtap_mb((_ifp), (_m)); \ 108219820Sjeff } \ 109219820Sjeff} while (0) 110219820Sjeff 111331769Shselaskystatic struct unrhdr *ipoib_unrhdr; 112331769Shselasky 113331769Shselaskystatic void 114331769Shselaskyipoib_unrhdr_init(void *arg) 115331769Shselasky{ 116331769Shselasky 117331769Shselasky ipoib_unrhdr = new_unrhdr(0, 65535, NULL); 118331769Shselasky} 119331769ShselaskySYSINIT(ipoib_unrhdr_init, SI_SUB_KLD - 1, SI_ORDER_ANY, ipoib_unrhdr_init, NULL); 120331769Shselasky 121331769Shselaskystatic void 122331769Shselaskyipoib_unrhdr_uninit(void *arg) 123331769Shselasky{ 124331769Shselasky 125331769Shselasky if (ipoib_unrhdr != NULL) { 126331769Shselasky struct unrhdr *hdr; 127331769Shselasky 128331769Shselasky hdr = ipoib_unrhdr; 129331769Shselasky ipoib_unrhdr = NULL; 130331769Shselasky 131331769Shselasky delete_unrhdr(hdr); 132331769Shselasky } 133331769Shselasky} 134331769ShselaskySYSUNINIT(ipoib_unrhdr_uninit, SI_SUB_KLD - 1, SI_ORDER_ANY, ipoib_unrhdr_uninit, NULL); 135331769Shselasky 136219820Sjeff/* 137219820Sjeff * This is for clients that have an ipoib_header in the mbuf. 138219820Sjeff */ 139219820Sjeffstatic void 140219820Sjeffipoib_mtap_mb(struct ifnet *ifp, struct mbuf *mb) 141219820Sjeff{ 142219820Sjeff struct ipoib_header *ih; 143219820Sjeff struct ether_header eh; 144219820Sjeff 145219820Sjeff ih = mtod(mb, struct ipoib_header *); 146219820Sjeff eh.ether_type = ih->proto; 147219820Sjeff bcopy(ih->hwaddr, &eh.ether_dhost, ETHER_ADDR_LEN); 148219820Sjeff bzero(&eh.ether_shost, ETHER_ADDR_LEN); 149219820Sjeff mb->m_data += sizeof(struct ipoib_header); 150219820Sjeff mb->m_len -= sizeof(struct ipoib_header); 151219820Sjeff bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb); 152219820Sjeff mb->m_data -= sizeof(struct ipoib_header); 153219820Sjeff mb->m_len += sizeof(struct ipoib_header); 154219820Sjeff} 155219820Sjeff 156219820Sjeffvoid 157219820Sjeffipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto) 158219820Sjeff{ 159219820Sjeff struct ether_header eh; 160219820Sjeff 161219820Sjeff eh.ether_type = proto; 162219820Sjeff bzero(&eh.ether_shost, ETHER_ADDR_LEN); 163219820Sjeff bzero(&eh.ether_dhost, ETHER_ADDR_LEN); 164219820Sjeff bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb); 165219820Sjeff} 166219820Sjeff 167219820Sjeffstatic struct ib_client ipoib_client = { 168219820Sjeff .name = "ipoib", 169219820Sjeff .add = ipoib_add_one, 170338556Shselasky .remove = ipoib_remove_one, 171338556Shselasky .get_net_dev_by_params = ipoib_get_net_dev_by_params, 172219820Sjeff}; 173219820Sjeff 174219820Sjeffint 175219820Sjeffipoib_open(struct ipoib_dev_priv *priv) 176219820Sjeff{ 177219820Sjeff struct ifnet *dev = priv->dev; 178219820Sjeff 179219820Sjeff ipoib_dbg(priv, "bringing up interface\n"); 180219820Sjeff 181219820Sjeff set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 182219820Sjeff 183219820Sjeff if (ipoib_pkey_dev_delay_open(priv)) 184219820Sjeff return 0; 185219820Sjeff 186219820Sjeff if (ipoib_ib_dev_open(priv)) 187219820Sjeff goto err_disable; 188219820Sjeff 189219820Sjeff if (ipoib_ib_dev_up(priv)) 190219820Sjeff goto err_stop; 191219820Sjeff 192219820Sjeff if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 193219820Sjeff struct ipoib_dev_priv *cpriv; 194219820Sjeff 195219820Sjeff /* Bring up any child interfaces too */ 196219820Sjeff mutex_lock(&priv->vlan_mutex); 197219820Sjeff list_for_each_entry(cpriv, &priv->child_intfs, list) 198219820Sjeff if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) == 0) 199219820Sjeff ipoib_open(cpriv); 200219820Sjeff mutex_unlock(&priv->vlan_mutex); 201219820Sjeff } 202219820Sjeff dev->if_drv_flags |= IFF_DRV_RUNNING; 203219820Sjeff dev->if_drv_flags &= ~IFF_DRV_OACTIVE; 204219820Sjeff 205219820Sjeff return 0; 206219820Sjeff 207219820Sjefferr_stop: 208219820Sjeff ipoib_ib_dev_stop(priv, 1); 209219820Sjeff 210219820Sjefferr_disable: 211219820Sjeff clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 212219820Sjeff 213219820Sjeff return -EINVAL; 214219820Sjeff} 215219820Sjeff 216219820Sjeffstatic void 217219820Sjeffipoib_init(void *arg) 218219820Sjeff{ 219219820Sjeff struct ifnet *dev; 220219820Sjeff struct ipoib_dev_priv *priv; 221219820Sjeff 222219820Sjeff priv = arg; 223219820Sjeff dev = priv->dev; 224219820Sjeff if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) 225219820Sjeff ipoib_open(priv); 226219820Sjeff queue_work(ipoib_workqueue, &priv->flush_light); 227219820Sjeff} 228219820Sjeff 229219820Sjeff 230219820Sjeffstatic int 231219820Sjeffipoib_stop(struct ipoib_dev_priv *priv) 232219820Sjeff{ 233219820Sjeff struct ifnet *dev = priv->dev; 234219820Sjeff 235219820Sjeff ipoib_dbg(priv, "stopping interface\n"); 236219820Sjeff 237219820Sjeff clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 238219820Sjeff 239219820Sjeff dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 240219820Sjeff 241219820Sjeff ipoib_ib_dev_down(priv, 0); 242219820Sjeff ipoib_ib_dev_stop(priv, 0); 243219820Sjeff 244219820Sjeff if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 245219820Sjeff struct ipoib_dev_priv *cpriv; 246219820Sjeff 247219820Sjeff /* Bring down any child interfaces too */ 248219820Sjeff mutex_lock(&priv->vlan_mutex); 249219820Sjeff list_for_each_entry(cpriv, &priv->child_intfs, list) 250219820Sjeff if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) != 0) 251219820Sjeff ipoib_stop(cpriv); 252219820Sjeff mutex_unlock(&priv->vlan_mutex); 253219820Sjeff } 254219820Sjeff 255219820Sjeff return 0; 256219820Sjeff} 257219820Sjeff 258341885Shselaskystatic int 259341885Shselaskyipoib_propagate_ifnet_mtu(struct ipoib_dev_priv *priv, int new_mtu, 260341885Shselasky bool propagate) 261341885Shselasky{ 262341885Shselasky struct ifnet *ifp; 263341885Shselasky struct ifreq ifr; 264341885Shselasky int error; 265341885Shselasky 266341885Shselasky ifp = priv->dev; 267341885Shselasky if (ifp->if_mtu == new_mtu) 268341885Shselasky return (0); 269341885Shselasky if (propagate) { 270341885Shselasky strlcpy(ifr.ifr_name, if_name(ifp), IFNAMSIZ); 271341885Shselasky ifr.ifr_mtu = new_mtu; 272341885Shselasky CURVNET_SET(ifp->if_vnet); 273341885Shselasky error = ifhwioctl(SIOCSIFMTU, ifp, (caddr_t)&ifr, curthread); 274341885Shselasky CURVNET_RESTORE(); 275341885Shselasky } else { 276341885Shselasky ifp->if_mtu = new_mtu; 277341885Shselasky error = 0; 278341885Shselasky } 279341885Shselasky return (error); 280341885Shselasky} 281341885Shselasky 282219820Sjeffint 283341885Shselaskyipoib_change_mtu(struct ipoib_dev_priv *priv, int new_mtu, bool propagate) 284219820Sjeff{ 285341885Shselasky int error, prev_admin_mtu; 286219820Sjeff 287219820Sjeff /* dev->if_mtu > 2K ==> connected mode */ 288219820Sjeff if (ipoib_cm_admin_enabled(priv)) { 289219820Sjeff if (new_mtu > IPOIB_CM_MTU(ipoib_cm_max_mtu(priv))) 290219820Sjeff return -EINVAL; 291219820Sjeff 292219820Sjeff if (new_mtu > priv->mcast_mtu) 293219820Sjeff ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", 294219820Sjeff priv->mcast_mtu); 295219820Sjeff 296341885Shselasky return (ipoib_propagate_ifnet_mtu(priv, new_mtu, propagate)); 297219820Sjeff } 298219820Sjeff 299219820Sjeff if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) 300219820Sjeff return -EINVAL; 301219820Sjeff 302341885Shselasky prev_admin_mtu = priv->admin_mtu; 303219820Sjeff priv->admin_mtu = new_mtu; 304341885Shselasky error = ipoib_propagate_ifnet_mtu(priv, min(priv->mcast_mtu, 305341885Shselasky priv->admin_mtu), propagate); 306341887Shselasky if (error == 0) { 307341887Shselasky /* check for MTU change to avoid infinite loop */ 308341887Shselasky if (prev_admin_mtu != new_mtu) 309341887Shselasky queue_work(ipoib_workqueue, &priv->flush_light); 310341887Shselasky } else 311341885Shselasky priv->admin_mtu = prev_admin_mtu; 312341885Shselasky return (error); 313219820Sjeff} 314219820Sjeff 315219820Sjeffstatic int 316219820Sjeffipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 317219820Sjeff{ 318219820Sjeff struct ipoib_dev_priv *priv = ifp->if_softc; 319219820Sjeff struct ifaddr *ifa = (struct ifaddr *) data; 320219820Sjeff struct ifreq *ifr = (struct ifreq *) data; 321219820Sjeff int error = 0; 322219820Sjeff 323296909Shselasky /* check if detaching */ 324296909Shselasky if (priv == NULL || priv->gone != 0) 325296909Shselasky return (ENXIO); 326296909Shselasky 327219820Sjeff switch (command) { 328219820Sjeff case SIOCSIFFLAGS: 329219820Sjeff if (ifp->if_flags & IFF_UP) { 330219820Sjeff if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 331219820Sjeff error = -ipoib_open(priv); 332219820Sjeff } else 333219820Sjeff if (ifp->if_drv_flags & IFF_DRV_RUNNING) 334219820Sjeff ipoib_stop(priv); 335219820Sjeff break; 336219820Sjeff case SIOCADDMULTI: 337219820Sjeff case SIOCDELMULTI: 338219820Sjeff if (ifp->if_drv_flags & IFF_DRV_RUNNING) 339219820Sjeff queue_work(ipoib_workqueue, &priv->restart_task); 340219820Sjeff break; 341219820Sjeff case SIOCSIFADDR: 342219820Sjeff ifp->if_flags |= IFF_UP; 343219820Sjeff 344219820Sjeff switch (ifa->ifa_addr->sa_family) { 345219820Sjeff#ifdef INET 346219820Sjeff case AF_INET: 347219820Sjeff ifp->if_init(ifp->if_softc); /* before arpwhohas */ 348219820Sjeff arp_ifinit(ifp, ifa); 349219820Sjeff break; 350219820Sjeff#endif 351219820Sjeff default: 352219820Sjeff ifp->if_init(ifp->if_softc); 353219820Sjeff break; 354219820Sjeff } 355219820Sjeff break; 356219820Sjeff 357219820Sjeff case SIOCGIFADDR: 358337096Shselasky bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0], 359337096Shselasky INFINIBAND_ALEN); 360219820Sjeff break; 361219820Sjeff 362219820Sjeff case SIOCSIFMTU: 363219820Sjeff /* 364219820Sjeff * Set the interface MTU. 365219820Sjeff */ 366341885Shselasky error = -ipoib_change_mtu(priv, ifr->ifr_mtu, false); 367219820Sjeff break; 368219820Sjeff default: 369219820Sjeff error = EINVAL; 370219820Sjeff break; 371219820Sjeff } 372219820Sjeff return (error); 373219820Sjeff} 374219820Sjeff 375219820Sjeff 376219820Sjeffstatic struct ipoib_path * 377219820Sjeff__path_find(struct ipoib_dev_priv *priv, void *gid) 378219820Sjeff{ 379219820Sjeff struct rb_node *n = priv->path_tree.rb_node; 380219820Sjeff struct ipoib_path *path; 381219820Sjeff int ret; 382219820Sjeff 383219820Sjeff while (n) { 384219820Sjeff path = rb_entry(n, struct ipoib_path, rb_node); 385219820Sjeff 386219820Sjeff ret = memcmp(gid, path->pathrec.dgid.raw, 387219820Sjeff sizeof (union ib_gid)); 388219820Sjeff 389219820Sjeff if (ret < 0) 390219820Sjeff n = n->rb_left; 391219820Sjeff else if (ret > 0) 392219820Sjeff n = n->rb_right; 393219820Sjeff else 394219820Sjeff return path; 395219820Sjeff } 396219820Sjeff 397219820Sjeff return NULL; 398219820Sjeff} 399219820Sjeff 400219820Sjeffstatic int 401219820Sjeff__path_add(struct ipoib_dev_priv *priv, struct ipoib_path *path) 402219820Sjeff{ 403219820Sjeff struct rb_node **n = &priv->path_tree.rb_node; 404219820Sjeff struct rb_node *pn = NULL; 405219820Sjeff struct ipoib_path *tpath; 406219820Sjeff int ret; 407219820Sjeff 408219820Sjeff while (*n) { 409219820Sjeff pn = *n; 410219820Sjeff tpath = rb_entry(pn, struct ipoib_path, rb_node); 411219820Sjeff 412219820Sjeff ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw, 413219820Sjeff sizeof (union ib_gid)); 414219820Sjeff if (ret < 0) 415219820Sjeff n = &pn->rb_left; 416219820Sjeff else if (ret > 0) 417219820Sjeff n = &pn->rb_right; 418219820Sjeff else 419219820Sjeff return -EEXIST; 420219820Sjeff } 421219820Sjeff 422219820Sjeff rb_link_node(&path->rb_node, pn, n); 423219820Sjeff rb_insert_color(&path->rb_node, &priv->path_tree); 424219820Sjeff 425219820Sjeff list_add_tail(&path->list, &priv->path_list); 426219820Sjeff 427219820Sjeff return 0; 428219820Sjeff} 429219820Sjeff 430219820Sjeffvoid 431219820Sjeffipoib_path_free(struct ipoib_dev_priv *priv, struct ipoib_path *path) 432219820Sjeff{ 433219820Sjeff 434219820Sjeff _IF_DRAIN(&path->queue); 435219820Sjeff 436219820Sjeff if (path->ah) 437219820Sjeff ipoib_put_ah(path->ah); 438219820Sjeff if (ipoib_cm_get(path)) 439219820Sjeff ipoib_cm_destroy_tx(ipoib_cm_get(path)); 440219820Sjeff 441219820Sjeff kfree(path); 442219820Sjeff} 443219820Sjeff 444219820Sjeff#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 445219820Sjeff 446219820Sjeffstruct ipoib_path_iter * 447219820Sjeffipoib_path_iter_init(struct ipoib_dev_priv *priv) 448219820Sjeff{ 449219820Sjeff struct ipoib_path_iter *iter; 450219820Sjeff 451219820Sjeff iter = kmalloc(sizeof *iter, GFP_KERNEL); 452219820Sjeff if (!iter) 453219820Sjeff return NULL; 454219820Sjeff 455219820Sjeff iter->priv = priv; 456219820Sjeff memset(iter->path.pathrec.dgid.raw, 0, 16); 457219820Sjeff 458219820Sjeff if (ipoib_path_iter_next(iter)) { 459219820Sjeff kfree(iter); 460219820Sjeff return NULL; 461219820Sjeff } 462219820Sjeff 463219820Sjeff return iter; 464219820Sjeff} 465219820Sjeff 466219820Sjeffint 467219820Sjeffipoib_path_iter_next(struct ipoib_path_iter *iter) 468219820Sjeff{ 469219820Sjeff struct ipoib_dev_priv *priv = iter->priv; 470219820Sjeff struct rb_node *n; 471219820Sjeff struct ipoib_path *path; 472219820Sjeff int ret = 1; 473219820Sjeff 474219820Sjeff spin_lock_irq(&priv->lock); 475219820Sjeff 476219820Sjeff n = rb_first(&priv->path_tree); 477219820Sjeff 478219820Sjeff while (n) { 479219820Sjeff path = rb_entry(n, struct ipoib_path, rb_node); 480219820Sjeff 481219820Sjeff if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw, 482219820Sjeff sizeof (union ib_gid)) < 0) { 483219820Sjeff iter->path = *path; 484219820Sjeff ret = 0; 485219820Sjeff break; 486219820Sjeff } 487219820Sjeff 488219820Sjeff n = rb_next(n); 489219820Sjeff } 490219820Sjeff 491219820Sjeff spin_unlock_irq(&priv->lock); 492219820Sjeff 493219820Sjeff return ret; 494219820Sjeff} 495219820Sjeff 496219820Sjeffvoid 497219820Sjeffipoib_path_iter_read(struct ipoib_path_iter *iter, struct ipoib_path *path) 498219820Sjeff{ 499219820Sjeff *path = iter->path; 500219820Sjeff} 501219820Sjeff 502219820Sjeff#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ 503219820Sjeff 504219820Sjeffvoid 505219820Sjeffipoib_mark_paths_invalid(struct ipoib_dev_priv *priv) 506219820Sjeff{ 507219820Sjeff struct ipoib_path *path, *tp; 508219820Sjeff 509219820Sjeff spin_lock_irq(&priv->lock); 510219820Sjeff 511219820Sjeff list_for_each_entry_safe(path, tp, &priv->path_list, list) { 512219820Sjeff ipoib_dbg(priv, "mark path LID 0x%04x GID %16D invalid\n", 513219820Sjeff be16_to_cpu(path->pathrec.dlid), 514219820Sjeff path->pathrec.dgid.raw, ":"); 515219820Sjeff path->valid = 0; 516219820Sjeff } 517219820Sjeff 518219820Sjeff spin_unlock_irq(&priv->lock); 519219820Sjeff} 520219820Sjeff 521219820Sjeffvoid 522219820Sjeffipoib_flush_paths(struct ipoib_dev_priv *priv) 523219820Sjeff{ 524219820Sjeff struct ipoib_path *path, *tp; 525219820Sjeff LIST_HEAD(remove_list); 526219820Sjeff unsigned long flags; 527219820Sjeff 528219820Sjeff spin_lock_irqsave(&priv->lock, flags); 529219820Sjeff 530219820Sjeff list_splice_init(&priv->path_list, &remove_list); 531219820Sjeff 532219820Sjeff list_for_each_entry(path, &remove_list, list) 533219820Sjeff rb_erase(&path->rb_node, &priv->path_tree); 534219820Sjeff 535219820Sjeff list_for_each_entry_safe(path, tp, &remove_list, list) { 536219820Sjeff if (path->query) 537219820Sjeff ib_sa_cancel_query(path->query_id, path->query); 538219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 539219820Sjeff wait_for_completion(&path->done); 540219820Sjeff ipoib_path_free(priv, path); 541219820Sjeff spin_lock_irqsave(&priv->lock, flags); 542219820Sjeff } 543219820Sjeff 544219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 545219820Sjeff} 546219820Sjeff 547219820Sjeffstatic void 548219820Sjeffpath_rec_completion(int status, struct ib_sa_path_rec *pathrec, void *path_ptr) 549219820Sjeff{ 550219820Sjeff struct ipoib_path *path = path_ptr; 551219820Sjeff struct ipoib_dev_priv *priv = path->priv; 552219820Sjeff struct ifnet *dev = priv->dev; 553219820Sjeff struct ipoib_ah *ah = NULL; 554219820Sjeff struct ipoib_ah *old_ah = NULL; 555219820Sjeff struct ifqueue mbqueue; 556219820Sjeff struct mbuf *mb; 557219820Sjeff unsigned long flags; 558219820Sjeff 559219820Sjeff if (!status) 560219820Sjeff ipoib_dbg(priv, "PathRec LID 0x%04x for GID %16D\n", 561219820Sjeff be16_to_cpu(pathrec->dlid), pathrec->dgid.raw, ":"); 562219820Sjeff else 563219820Sjeff ipoib_dbg(priv, "PathRec status %d for GID %16D\n", 564219820Sjeff status, path->pathrec.dgid.raw, ":"); 565219820Sjeff 566219820Sjeff bzero(&mbqueue, sizeof(mbqueue)); 567219820Sjeff 568219820Sjeff if (!status) { 569219820Sjeff struct ib_ah_attr av; 570219820Sjeff 571219820Sjeff if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av)) 572219820Sjeff ah = ipoib_create_ah(priv, priv->pd, &av); 573219820Sjeff } 574219820Sjeff 575219820Sjeff spin_lock_irqsave(&priv->lock, flags); 576219820Sjeff 577219820Sjeff if (ah) { 578219820Sjeff path->pathrec = *pathrec; 579219820Sjeff 580219820Sjeff old_ah = path->ah; 581219820Sjeff path->ah = ah; 582219820Sjeff 583219820Sjeff ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n", 584219820Sjeff ah, be16_to_cpu(pathrec->dlid), pathrec->sl); 585219820Sjeff 586219820Sjeff for (;;) { 587219820Sjeff _IF_DEQUEUE(&path->queue, mb); 588219820Sjeff if (mb == NULL) 589219820Sjeff break; 590219820Sjeff _IF_ENQUEUE(&mbqueue, mb); 591219820Sjeff } 592219820Sjeff 593219820Sjeff#ifdef CONFIG_INFINIBAND_IPOIB_CM 594219820Sjeff if (ipoib_cm_enabled(priv, path->hwaddr) && !ipoib_cm_get(path)) 595219820Sjeff ipoib_cm_set(path, ipoib_cm_create_tx(priv, path)); 596219820Sjeff#endif 597219820Sjeff 598219820Sjeff path->valid = 1; 599219820Sjeff } 600219820Sjeff 601219820Sjeff path->query = NULL; 602219820Sjeff complete(&path->done); 603219820Sjeff 604219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 605219820Sjeff 606219820Sjeff if (old_ah) 607219820Sjeff ipoib_put_ah(old_ah); 608219820Sjeff 609219820Sjeff for (;;) { 610219820Sjeff _IF_DEQUEUE(&mbqueue, mb); 611219820Sjeff if (mb == NULL) 612219820Sjeff break; 613219820Sjeff mb->m_pkthdr.rcvif = dev; 614219820Sjeff if (dev->if_transmit(dev, mb)) 615219820Sjeff ipoib_warn(priv, "dev_queue_xmit failed " 616219820Sjeff "to requeue packet\n"); 617219820Sjeff } 618219820Sjeff} 619219820Sjeff 620219820Sjeffstatic struct ipoib_path * 621219820Sjeffpath_rec_create(struct ipoib_dev_priv *priv, uint8_t *hwaddr) 622219820Sjeff{ 623219820Sjeff struct ipoib_path *path; 624219820Sjeff 625219820Sjeff if (!priv->broadcast) 626219820Sjeff return NULL; 627219820Sjeff 628219820Sjeff path = kzalloc(sizeof *path, GFP_ATOMIC); 629219820Sjeff if (!path) 630219820Sjeff return NULL; 631219820Sjeff 632219820Sjeff path->priv = priv; 633219820Sjeff 634219820Sjeff bzero(&path->queue, sizeof(path->queue)); 635219820Sjeff 636219820Sjeff#ifdef CONFIG_INFINIBAND_IPOIB_CM 637219820Sjeff memcpy(&path->hwaddr, hwaddr, INFINIBAND_ALEN); 638219820Sjeff#endif 639219820Sjeff memcpy(path->pathrec.dgid.raw, &hwaddr[4], sizeof (union ib_gid)); 640219820Sjeff path->pathrec.sgid = priv->local_gid; 641219820Sjeff path->pathrec.pkey = cpu_to_be16(priv->pkey); 642219820Sjeff path->pathrec.numb_path = 1; 643219820Sjeff path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class; 644219820Sjeff 645219820Sjeff return path; 646219820Sjeff} 647219820Sjeff 648219820Sjeffstatic int 649219820Sjeffpath_rec_start(struct ipoib_dev_priv *priv, struct ipoib_path *path) 650219820Sjeff{ 651219820Sjeff struct ifnet *dev = priv->dev; 652219820Sjeff 653219820Sjeff ib_sa_comp_mask comp_mask = IB_SA_PATH_REC_MTU_SELECTOR | IB_SA_PATH_REC_MTU; 654219820Sjeff struct ib_sa_path_rec p_rec; 655219820Sjeff 656219820Sjeff p_rec = path->pathrec; 657219820Sjeff p_rec.mtu_selector = IB_SA_GT; 658219820Sjeff 659219820Sjeff switch (roundup_pow_of_two(dev->if_mtu + IPOIB_ENCAP_LEN)) { 660219820Sjeff case 512: 661219820Sjeff p_rec.mtu = IB_MTU_256; 662219820Sjeff break; 663219820Sjeff case 1024: 664219820Sjeff p_rec.mtu = IB_MTU_512; 665219820Sjeff break; 666219820Sjeff case 2048: 667219820Sjeff p_rec.mtu = IB_MTU_1024; 668219820Sjeff break; 669219820Sjeff case 4096: 670219820Sjeff p_rec.mtu = IB_MTU_2048; 671219820Sjeff break; 672219820Sjeff default: 673219820Sjeff /* Wildcard everything */ 674219820Sjeff comp_mask = 0; 675219820Sjeff p_rec.mtu = 0; 676219820Sjeff p_rec.mtu_selector = 0; 677219820Sjeff } 678219820Sjeff 679219820Sjeff ipoib_dbg(priv, "Start path record lookup for %16D MTU > %d\n", 680219820Sjeff p_rec.dgid.raw, ":", 681219820Sjeff comp_mask ? ib_mtu_enum_to_int(p_rec.mtu) : 0); 682219820Sjeff 683219820Sjeff init_completion(&path->done); 684219820Sjeff 685219820Sjeff path->query_id = 686219820Sjeff ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port, 687219820Sjeff &p_rec, comp_mask | 688219820Sjeff IB_SA_PATH_REC_DGID | 689219820Sjeff IB_SA_PATH_REC_SGID | 690219820Sjeff IB_SA_PATH_REC_NUMB_PATH | 691219820Sjeff IB_SA_PATH_REC_TRAFFIC_CLASS | 692219820Sjeff IB_SA_PATH_REC_PKEY, 693219820Sjeff 1000, GFP_ATOMIC, 694219820Sjeff path_rec_completion, 695219820Sjeff path, &path->query); 696219820Sjeff if (path->query_id < 0) { 697219820Sjeff ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id); 698219820Sjeff path->query = NULL; 699219820Sjeff complete(&path->done); 700219820Sjeff return path->query_id; 701219820Sjeff } 702219820Sjeff 703219820Sjeff return 0; 704219820Sjeff} 705219820Sjeff 706219820Sjeffstatic void 707219820Sjeffipoib_unicast_send(struct mbuf *mb, struct ipoib_dev_priv *priv, struct ipoib_header *eh) 708219820Sjeff{ 709219820Sjeff struct ipoib_path *path; 710219820Sjeff 711219820Sjeff path = __path_find(priv, eh->hwaddr + 4); 712219820Sjeff if (!path || !path->valid) { 713219820Sjeff int new_path = 0; 714219820Sjeff 715219820Sjeff if (!path) { 716219820Sjeff path = path_rec_create(priv, eh->hwaddr); 717219820Sjeff new_path = 1; 718219820Sjeff } 719219820Sjeff if (path) { 720331769Shselasky if (_IF_QLEN(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) 721331769Shselasky _IF_ENQUEUE(&path->queue, mb); 722331769Shselasky else { 723331769Shselasky if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1); 724331769Shselasky m_freem(mb); 725331769Shselasky } 726331769Shselasky 727219820Sjeff if (!path->query && path_rec_start(priv, path)) { 728219820Sjeff if (new_path) 729219820Sjeff ipoib_path_free(priv, path); 730219820Sjeff return; 731219820Sjeff } else 732219820Sjeff __path_add(priv, path); 733219820Sjeff } else { 734272225Sglebius if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1); 735219820Sjeff m_freem(mb); 736219820Sjeff } 737219820Sjeff 738219820Sjeff return; 739219820Sjeff } 740219820Sjeff 741219820Sjeff if (ipoib_cm_get(path) && ipoib_cm_up(path)) { 742219820Sjeff ipoib_cm_send(priv, mb, ipoib_cm_get(path)); 743219820Sjeff } else if (path->ah) { 744219820Sjeff ipoib_send(priv, mb, path->ah, IPOIB_QPN(eh->hwaddr)); 745219820Sjeff } else if ((path->query || !path_rec_start(priv, path)) && 746219820Sjeff path->queue.ifq_len < IPOIB_MAX_PATH_REC_QUEUE) { 747219820Sjeff _IF_ENQUEUE(&path->queue, mb); 748219820Sjeff } else { 749272225Sglebius if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1); 750219820Sjeff m_freem(mb); 751219820Sjeff } 752219820Sjeff} 753219820Sjeff 754219820Sjeffstatic int 755219820Sjeffipoib_send_one(struct ipoib_dev_priv *priv, struct mbuf *mb) 756219820Sjeff{ 757219820Sjeff struct ipoib_header *eh; 758219820Sjeff 759219820Sjeff eh = mtod(mb, struct ipoib_header *); 760219820Sjeff if (IPOIB_IS_MULTICAST(eh->hwaddr)) { 761219820Sjeff /* Add in the P_Key for multicast*/ 762219820Sjeff eh->hwaddr[8] = (priv->pkey >> 8) & 0xff; 763219820Sjeff eh->hwaddr[9] = priv->pkey & 0xff; 764219820Sjeff 765219820Sjeff ipoib_mcast_send(priv, eh->hwaddr + 4, mb); 766219820Sjeff } else 767219820Sjeff ipoib_unicast_send(mb, priv, eh); 768219820Sjeff 769219820Sjeff return 0; 770219820Sjeff} 771219820Sjeff 772353183Shselaskyvoid 773353183Shselaskyipoib_start_locked(struct ifnet *dev, struct ipoib_dev_priv *priv) 774219820Sjeff{ 775219820Sjeff struct mbuf *mb; 776219820Sjeff 777353183Shselasky assert_spin_locked(&priv->lock); 778219820Sjeff 779219820Sjeff while (!IFQ_DRV_IS_EMPTY(&dev->if_snd) && 780219820Sjeff (dev->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 781219820Sjeff IFQ_DRV_DEQUEUE(&dev->if_snd, mb); 782219820Sjeff if (mb == NULL) 783219820Sjeff break; 784219820Sjeff IPOIB_MTAP(dev, mb); 785219820Sjeff ipoib_send_one(priv, mb); 786219820Sjeff } 787353183Shselasky} 788353183Shselasky 789353183Shselaskystatic void 790353183Shselasky_ipoib_start(struct ifnet *dev, struct ipoib_dev_priv *priv) 791353183Shselasky{ 792353183Shselasky 793353183Shselasky if ((dev->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 794353183Shselasky IFF_DRV_RUNNING) 795353183Shselasky return; 796353183Shselasky 797353183Shselasky spin_lock(&priv->lock); 798353183Shselasky ipoib_start_locked(dev, priv); 799219820Sjeff spin_unlock(&priv->lock); 800219820Sjeff} 801219820Sjeff 802219820Sjeffstatic void 803219820Sjeffipoib_start(struct ifnet *dev) 804219820Sjeff{ 805219820Sjeff _ipoib_start(dev, dev->if_softc); 806219820Sjeff} 807219820Sjeff 808219820Sjeffstatic void 809219820Sjeffipoib_vlan_start(struct ifnet *dev) 810219820Sjeff{ 811219820Sjeff struct ipoib_dev_priv *priv; 812219820Sjeff struct mbuf *mb; 813219820Sjeff 814219820Sjeff priv = VLAN_COOKIE(dev); 815219820Sjeff if (priv != NULL) 816219820Sjeff return _ipoib_start(dev, priv); 817219820Sjeff while (!IFQ_DRV_IS_EMPTY(&dev->if_snd)) { 818219820Sjeff IFQ_DRV_DEQUEUE(&dev->if_snd, mb); 819219820Sjeff if (mb == NULL) 820219820Sjeff break; 821219820Sjeff m_freem(mb); 822272027Shselasky if_inc_counter(dev, IFCOUNTER_OERRORS, 1); 823219820Sjeff } 824219820Sjeff} 825219820Sjeff 826219820Sjeffint 827219820Sjeffipoib_dev_init(struct ipoib_dev_priv *priv, struct ib_device *ca, int port) 828219820Sjeff{ 829219820Sjeff 830219820Sjeff /* Allocate RX/TX "rings" to hold queued mbs */ 831219820Sjeff priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, 832219820Sjeff GFP_KERNEL); 833219820Sjeff if (!priv->rx_ring) { 834219820Sjeff printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", 835219820Sjeff ca->name, ipoib_recvq_size); 836219820Sjeff goto out; 837219820Sjeff } 838219820Sjeff 839219820Sjeff priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, GFP_KERNEL); 840219820Sjeff if (!priv->tx_ring) { 841219820Sjeff printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", 842219820Sjeff ca->name, ipoib_sendq_size); 843219820Sjeff goto out_rx_ring_cleanup; 844219820Sjeff } 845219820Sjeff memset(priv->tx_ring, 0, ipoib_sendq_size * sizeof *priv->tx_ring); 846219820Sjeff 847219820Sjeff /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ 848219820Sjeff 849219820Sjeff if (ipoib_ib_dev_init(priv, ca, port)) 850219820Sjeff goto out_tx_ring_cleanup; 851219820Sjeff 852219820Sjeff return 0; 853219820Sjeff 854219820Sjeffout_tx_ring_cleanup: 855219820Sjeff kfree(priv->tx_ring); 856219820Sjeff 857219820Sjeffout_rx_ring_cleanup: 858219820Sjeff kfree(priv->rx_ring); 859219820Sjeff 860219820Sjeffout: 861219820Sjeff return -ENOMEM; 862219820Sjeff} 863219820Sjeff 864219820Sjeffstatic void 865219820Sjeffipoib_detach(struct ipoib_dev_priv *priv) 866219820Sjeff{ 867219820Sjeff struct ifnet *dev; 868219820Sjeff 869219820Sjeff dev = priv->dev; 870219820Sjeff if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 871296909Shselasky priv->gone = 1; 872219820Sjeff bpfdetach(dev); 873219820Sjeff if_detach(dev); 874219820Sjeff if_free(dev); 875331769Shselasky free_unr(ipoib_unrhdr, priv->unit); 876219820Sjeff } else 877219820Sjeff VLAN_SETCOOKIE(priv->dev, NULL); 878219820Sjeff 879219820Sjeff free(priv, M_TEMP); 880219820Sjeff} 881219820Sjeff 882219820Sjeffvoid 883219820Sjeffipoib_dev_cleanup(struct ipoib_dev_priv *priv) 884219820Sjeff{ 885219820Sjeff struct ipoib_dev_priv *cpriv, *tcpriv; 886219820Sjeff 887219820Sjeff /* Delete any child interfaces first */ 888219820Sjeff list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { 889219820Sjeff ipoib_dev_cleanup(cpriv); 890219820Sjeff ipoib_detach(cpriv); 891219820Sjeff } 892219820Sjeff 893219820Sjeff ipoib_ib_dev_cleanup(priv); 894219820Sjeff 895219820Sjeff kfree(priv->rx_ring); 896219820Sjeff kfree(priv->tx_ring); 897219820Sjeff 898219820Sjeff priv->rx_ring = NULL; 899219820Sjeff priv->tx_ring = NULL; 900219820Sjeff} 901219820Sjeff 902219820Sjeffstatic struct ipoib_dev_priv * 903219820Sjeffipoib_priv_alloc(void) 904219820Sjeff{ 905219820Sjeff struct ipoib_dev_priv *priv; 906219820Sjeff 907219820Sjeff priv = malloc(sizeof(struct ipoib_dev_priv), M_TEMP, M_ZERO|M_WAITOK); 908219820Sjeff spin_lock_init(&priv->lock); 909278886Shselasky spin_lock_init(&priv->drain_lock); 910219820Sjeff mutex_init(&priv->vlan_mutex); 911219820Sjeff INIT_LIST_HEAD(&priv->path_list); 912219820Sjeff INIT_LIST_HEAD(&priv->child_intfs); 913219820Sjeff INIT_LIST_HEAD(&priv->dead_ahs); 914219820Sjeff INIT_LIST_HEAD(&priv->multicast_list); 915219820Sjeff INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); 916219820Sjeff INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); 917219820Sjeff INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task); 918219820Sjeff INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); 919219820Sjeff INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); 920219820Sjeff INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); 921219820Sjeff INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); 922219820Sjeff INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); 923219820Sjeff memcpy(priv->broadcastaddr, ipv4_bcast_addr, INFINIBAND_ALEN); 924219820Sjeff 925219820Sjeff return (priv); 926219820Sjeff} 927219820Sjeff 928219820Sjeffstruct ipoib_dev_priv * 929219820Sjeffipoib_intf_alloc(const char *name) 930219820Sjeff{ 931219820Sjeff struct ipoib_dev_priv *priv; 932219820Sjeff struct sockaddr_dl *sdl; 933219820Sjeff struct ifnet *dev; 934219820Sjeff 935219820Sjeff priv = ipoib_priv_alloc(); 936219820Sjeff dev = priv->dev = if_alloc(IFT_INFINIBAND); 937219820Sjeff if (!dev) { 938219820Sjeff free(priv, M_TEMP); 939219820Sjeff return NULL; 940219820Sjeff } 941219820Sjeff dev->if_softc = priv; 942331769Shselasky priv->unit = alloc_unr(ipoib_unrhdr); 943331769Shselasky if (priv->unit == -1) { 944331769Shselasky if_free(dev); 945331769Shselasky free(priv, M_TEMP); 946331769Shselasky return NULL; 947331769Shselasky } 948331769Shselasky if_initname(dev, name, priv->unit); 949219820Sjeff dev->if_flags = IFF_BROADCAST | IFF_MULTICAST; 950219820Sjeff dev->if_addrlen = INFINIBAND_ALEN; 951219820Sjeff dev->if_hdrlen = IPOIB_HEADER_LEN; 952219820Sjeff if_attach(dev); 953219820Sjeff dev->if_init = ipoib_init; 954219820Sjeff dev->if_ioctl = ipoib_ioctl; 955219820Sjeff dev->if_start = ipoib_start; 956219820Sjeff dev->if_output = ipoib_output; 957219820Sjeff dev->if_input = ipoib_input; 958219820Sjeff dev->if_resolvemulti = ipoib_resolvemulti; 959263102Sglebius dev->if_baudrate = IF_Gbps(10); 960219820Sjeff dev->if_broadcastaddr = priv->broadcastaddr; 961219820Sjeff dev->if_snd.ifq_maxlen = ipoib_sendq_size * 2; 962219820Sjeff sdl = (struct sockaddr_dl *)dev->if_addr->ifa_addr; 963219820Sjeff sdl->sdl_type = IFT_INFINIBAND; 964219820Sjeff sdl->sdl_alen = dev->if_addrlen; 965219820Sjeff priv->dev = dev; 966219820Sjeff if_link_state_change(dev, LINK_STATE_DOWN); 967219820Sjeff bpfattach(dev, DLT_EN10MB, ETHER_HDR_LEN); 968219820Sjeff 969219820Sjeff return dev->if_softc; 970219820Sjeff} 971219820Sjeff 972219820Sjeffint 973219820Sjeffipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca) 974219820Sjeff{ 975331769Shselasky struct ib_device_attr *device_attr = &hca->attrs; 976219820Sjeff 977219820Sjeff priv->hca_caps = device_attr->device_cap_flags; 978219820Sjeff 979219820Sjeff priv->dev->if_hwassist = 0; 980219820Sjeff priv->dev->if_capabilities = 0; 981219820Sjeff 982219820Sjeff#ifndef CONFIG_INFINIBAND_IPOIB_CM 983219820Sjeff if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { 984219820Sjeff set_bit(IPOIB_FLAG_CSUM, &priv->flags); 985219820Sjeff priv->dev->if_hwassist = CSUM_IP | CSUM_TCP | CSUM_UDP; 986219820Sjeff priv->dev->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; 987219820Sjeff } 988219820Sjeff 989219820Sjeff#if 0 990220555Sbz if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO) { 991220555Sbz priv->dev->if_capabilities |= IFCAP_TSO4; 992220555Sbz priv->dev->if_hwassist |= CSUM_TSO; 993220555Sbz } 994219820Sjeff#endif 995219820Sjeff#endif 996219820Sjeff priv->dev->if_capabilities |= 997219820Sjeff IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_LINKSTATE; 998219820Sjeff priv->dev->if_capenable = priv->dev->if_capabilities; 999219820Sjeff 1000219820Sjeff return 0; 1001219820Sjeff} 1002219820Sjeff 1003219820Sjeff 1004219820Sjeffstatic struct ifnet * 1005219820Sjeffipoib_add_port(const char *format, struct ib_device *hca, u8 port) 1006219820Sjeff{ 1007219820Sjeff struct ipoib_dev_priv *priv; 1008219820Sjeff struct ib_port_attr attr; 1009219820Sjeff int result = -ENOMEM; 1010219820Sjeff 1011219820Sjeff priv = ipoib_intf_alloc(format); 1012219820Sjeff if (!priv) 1013219820Sjeff goto alloc_mem_failed; 1014219820Sjeff 1015219820Sjeff if (!ib_query_port(hca, port, &attr)) 1016219820Sjeff priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); 1017219820Sjeff else { 1018219820Sjeff printk(KERN_WARNING "%s: ib_query_port %d failed\n", 1019219820Sjeff hca->name, port); 1020219820Sjeff goto device_init_failed; 1021219820Sjeff } 1022219820Sjeff 1023219820Sjeff /* MTU will be reset when mcast join happens */ 1024219820Sjeff priv->dev->if_mtu = IPOIB_UD_MTU(priv->max_ib_mtu); 1025219820Sjeff priv->mcast_mtu = priv->admin_mtu = priv->dev->if_mtu; 1026219820Sjeff 1027219820Sjeff result = ib_query_pkey(hca, port, 0, &priv->pkey); 1028219820Sjeff if (result) { 1029219820Sjeff printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", 1030219820Sjeff hca->name, port, result); 1031219820Sjeff goto device_init_failed; 1032219820Sjeff } 1033219820Sjeff 1034219820Sjeff if (ipoib_set_dev_features(priv, hca)) 1035219820Sjeff goto device_init_failed; 1036219820Sjeff 1037219820Sjeff /* 1038219820Sjeff * Set the full membership bit, so that we join the right 1039219820Sjeff * broadcast group, etc. 1040219820Sjeff */ 1041219820Sjeff priv->pkey |= 0x8000; 1042219820Sjeff 1043219820Sjeff priv->broadcastaddr[8] = priv->pkey >> 8; 1044219820Sjeff priv->broadcastaddr[9] = priv->pkey & 0xff; 1045219820Sjeff 1046331769Shselasky result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL); 1047219820Sjeff if (result) { 1048219820Sjeff printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", 1049219820Sjeff hca->name, port, result); 1050219820Sjeff goto device_init_failed; 1051219820Sjeff } 1052219820Sjeff memcpy(IF_LLADDR(priv->dev) + 4, priv->local_gid.raw, sizeof (union ib_gid)); 1053219820Sjeff 1054219820Sjeff result = ipoib_dev_init(priv, hca, port); 1055219820Sjeff if (result < 0) { 1056219820Sjeff printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n", 1057219820Sjeff hca->name, port, result); 1058219820Sjeff goto device_init_failed; 1059219820Sjeff } 1060219820Sjeff if (ipoib_cm_admin_enabled(priv)) 1061219820Sjeff priv->dev->if_mtu = IPOIB_CM_MTU(ipoib_cm_max_mtu(priv)); 1062219820Sjeff 1063219820Sjeff INIT_IB_EVENT_HANDLER(&priv->event_handler, 1064219820Sjeff priv->ca, ipoib_event); 1065219820Sjeff result = ib_register_event_handler(&priv->event_handler); 1066219820Sjeff if (result < 0) { 1067219820Sjeff printk(KERN_WARNING "%s: ib_register_event_handler failed for " 1068219820Sjeff "port %d (ret = %d)\n", 1069219820Sjeff hca->name, port, result); 1070219820Sjeff goto event_failed; 1071219820Sjeff } 1072219820Sjeff if_printf(priv->dev, "Attached to %s port %d\n", hca->name, port); 1073219820Sjeff 1074219820Sjeff return priv->dev; 1075219820Sjeff 1076219820Sjeffevent_failed: 1077219820Sjeff ipoib_dev_cleanup(priv); 1078219820Sjeff 1079219820Sjeffdevice_init_failed: 1080219820Sjeff ipoib_detach(priv); 1081219820Sjeff 1082219820Sjeffalloc_mem_failed: 1083219820Sjeff return ERR_PTR(result); 1084219820Sjeff} 1085219820Sjeff 1086219820Sjeffstatic void 1087219820Sjeffipoib_add_one(struct ib_device *device) 1088219820Sjeff{ 1089219820Sjeff struct list_head *dev_list; 1090219820Sjeff struct ifnet *dev; 1091219820Sjeff struct ipoib_dev_priv *priv; 1092219820Sjeff int s, e, p; 1093219820Sjeff 1094219820Sjeff if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) 1095219820Sjeff return; 1096219820Sjeff 1097219820Sjeff dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); 1098219820Sjeff if (!dev_list) 1099219820Sjeff return; 1100219820Sjeff 1101219820Sjeff INIT_LIST_HEAD(dev_list); 1102219820Sjeff 1103219820Sjeff if (device->node_type == RDMA_NODE_IB_SWITCH) { 1104219820Sjeff s = 0; 1105219820Sjeff e = 0; 1106219820Sjeff } else { 1107219820Sjeff s = 1; 1108219820Sjeff e = device->phys_port_cnt; 1109219820Sjeff } 1110219820Sjeff 1111219820Sjeff for (p = s; p <= e; ++p) { 1112219820Sjeff if (rdma_port_get_link_layer(device, p) != IB_LINK_LAYER_INFINIBAND) 1113219820Sjeff continue; 1114219820Sjeff dev = ipoib_add_port("ib", device, p); 1115219820Sjeff if (!IS_ERR(dev)) { 1116219820Sjeff priv = dev->if_softc; 1117219820Sjeff list_add_tail(&priv->list, dev_list); 1118219820Sjeff } 1119219820Sjeff } 1120219820Sjeff 1121219820Sjeff ib_set_client_data(device, &ipoib_client, dev_list); 1122219820Sjeff} 1123219820Sjeff 1124219820Sjeffstatic void 1125331769Shselaskyipoib_remove_one(struct ib_device *device, void *client_data) 1126219820Sjeff{ 1127219820Sjeff struct ipoib_dev_priv *priv, *tmp; 1128331769Shselasky struct list_head *dev_list = client_data; 1129219820Sjeff 1130331769Shselasky if (!dev_list) 1131331769Shselasky return; 1132331769Shselasky 1133219820Sjeff if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) 1134219820Sjeff return; 1135219820Sjeff 1136219820Sjeff list_for_each_entry_safe(priv, tmp, dev_list, list) { 1137219820Sjeff if (rdma_port_get_link_layer(device, priv->port) != IB_LINK_LAYER_INFINIBAND) 1138219820Sjeff continue; 1139219820Sjeff 1140254576Sjhb ipoib_stop(priv); 1141254576Sjhb 1142219820Sjeff ib_unregister_event_handler(&priv->event_handler); 1143219820Sjeff 1144219820Sjeff /* dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); */ 1145219820Sjeff 1146219820Sjeff flush_workqueue(ipoib_workqueue); 1147219820Sjeff 1148219820Sjeff ipoib_dev_cleanup(priv); 1149219820Sjeff ipoib_detach(priv); 1150219820Sjeff } 1151219820Sjeff 1152219820Sjeff kfree(dev_list); 1153219820Sjeff} 1154219820Sjeff 1155338556Shselaskystatic int 1156338556Shselaskyipoib_match_dev_addr(const struct sockaddr *addr, struct net_device *dev) 1157338556Shselasky{ 1158338556Shselasky struct ifaddr *ifa; 1159338556Shselasky int retval = 0; 1160338556Shselasky 1161338556Shselasky CURVNET_SET(dev->if_vnet); 1162338556Shselasky IF_ADDR_RLOCK(dev); 1163338556Shselasky TAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) { 1164338556Shselasky if (ifa->ifa_addr == NULL || 1165338556Shselasky ifa->ifa_addr->sa_family != addr->sa_family || 1166338556Shselasky ifa->ifa_addr->sa_len != addr->sa_len) { 1167338556Shselasky continue; 1168338556Shselasky } 1169338556Shselasky if (memcmp(ifa->ifa_addr, addr, addr->sa_len) == 0) { 1170338556Shselasky retval = 1; 1171338556Shselasky break; 1172338556Shselasky } 1173338556Shselasky } 1174338556Shselasky IF_ADDR_RUNLOCK(dev); 1175338556Shselasky CURVNET_RESTORE(); 1176338556Shselasky 1177338556Shselasky return (retval); 1178338556Shselasky} 1179338556Shselasky 1180338556Shselasky/* 1181338556Shselasky * ipoib_match_gid_pkey_addr - returns the number of IPoIB netdevs on 1182338556Shselasky * top a given ipoib device matching a pkey_index and address, if one 1183338556Shselasky * exists. 1184338556Shselasky * 1185338556Shselasky * @found_net_dev: contains a matching net_device if the return value 1186338556Shselasky * >= 1, with a reference held. 1187338556Shselasky */ 1188338556Shselaskystatic int 1189338556Shselaskyipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv, 1190338556Shselasky const union ib_gid *gid, u16 pkey_index, const struct sockaddr *addr, 1191338556Shselasky struct net_device **found_net_dev) 1192338556Shselasky{ 1193338556Shselasky struct ipoib_dev_priv *child_priv; 1194338556Shselasky int matches = 0; 1195338556Shselasky 1196338556Shselasky if (priv->pkey_index == pkey_index && 1197338556Shselasky (!gid || !memcmp(gid, &priv->local_gid, sizeof(*gid)))) { 1198338556Shselasky if (addr == NULL || ipoib_match_dev_addr(addr, priv->dev) != 0) { 1199338556Shselasky if (*found_net_dev == NULL) { 1200338556Shselasky struct net_device *net_dev; 1201338556Shselasky 1202338556Shselasky if (priv->parent != NULL) 1203338556Shselasky net_dev = priv->parent; 1204338556Shselasky else 1205338556Shselasky net_dev = priv->dev; 1206338556Shselasky *found_net_dev = net_dev; 1207338556Shselasky dev_hold(net_dev); 1208338556Shselasky } 1209338556Shselasky matches++; 1210338556Shselasky } 1211338556Shselasky } 1212338556Shselasky 1213338556Shselasky /* Check child interfaces */ 1214338556Shselasky mutex_lock(&priv->vlan_mutex); 1215338556Shselasky list_for_each_entry(child_priv, &priv->child_intfs, list) { 1216338556Shselasky matches += ipoib_match_gid_pkey_addr(child_priv, gid, 1217338556Shselasky pkey_index, addr, found_net_dev); 1218338556Shselasky if (matches > 1) 1219338556Shselasky break; 1220338556Shselasky } 1221338556Shselasky mutex_unlock(&priv->vlan_mutex); 1222338556Shselasky 1223338556Shselasky return matches; 1224338556Shselasky} 1225338556Shselasky 1226338556Shselasky/* 1227338556Shselasky * __ipoib_get_net_dev_by_params - returns the number of matching 1228338556Shselasky * net_devs found (between 0 and 2). Also return the matching 1229338556Shselasky * net_device in the @net_dev parameter, holding a reference to the 1230338556Shselasky * net_device, if the number of matches >= 1 1231338556Shselasky */ 1232338556Shselaskystatic int 1233338556Shselasky__ipoib_get_net_dev_by_params(struct list_head *dev_list, u8 port, 1234338556Shselasky u16 pkey_index, const union ib_gid *gid, 1235338556Shselasky const struct sockaddr *addr, struct net_device **net_dev) 1236338556Shselasky{ 1237338556Shselasky struct ipoib_dev_priv *priv; 1238338556Shselasky int matches = 0; 1239338556Shselasky 1240338556Shselasky *net_dev = NULL; 1241338556Shselasky 1242338556Shselasky list_for_each_entry(priv, dev_list, list) { 1243338556Shselasky if (priv->port != port) 1244338556Shselasky continue; 1245338556Shselasky 1246338556Shselasky matches += ipoib_match_gid_pkey_addr(priv, gid, pkey_index, 1247338556Shselasky addr, net_dev); 1248338556Shselasky 1249338556Shselasky if (matches > 1) 1250338556Shselasky break; 1251338556Shselasky } 1252338556Shselasky 1253338556Shselasky return matches; 1254338556Shselasky} 1255338556Shselasky 1256338556Shselaskystatic struct net_device * 1257338556Shselaskyipoib_get_net_dev_by_params(struct ib_device *dev, u8 port, u16 pkey, 1258338556Shselasky const union ib_gid *gid, const struct sockaddr *addr, void *client_data) 1259338556Shselasky{ 1260338556Shselasky struct net_device *net_dev; 1261338556Shselasky struct list_head *dev_list = client_data; 1262338556Shselasky u16 pkey_index; 1263338556Shselasky int matches; 1264338556Shselasky int ret; 1265338556Shselasky 1266338556Shselasky if (!rdma_protocol_ib(dev, port)) 1267338556Shselasky return NULL; 1268338556Shselasky 1269338556Shselasky ret = ib_find_cached_pkey(dev, port, pkey, &pkey_index); 1270338556Shselasky if (ret) 1271338556Shselasky return NULL; 1272338556Shselasky 1273338556Shselasky if (!dev_list) 1274338556Shselasky return NULL; 1275338556Shselasky 1276338556Shselasky /* See if we can find a unique device matching the L2 parameters */ 1277338556Shselasky matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index, 1278338556Shselasky gid, NULL, &net_dev); 1279338556Shselasky 1280338556Shselasky switch (matches) { 1281338556Shselasky case 0: 1282338556Shselasky return NULL; 1283338556Shselasky case 1: 1284338556Shselasky return net_dev; 1285338556Shselasky } 1286338556Shselasky 1287338556Shselasky dev_put(net_dev); 1288338556Shselasky 1289338556Shselasky /* Couldn't find a unique device with L2 parameters only. Use L3 1290338556Shselasky * address to uniquely match the net device */ 1291338556Shselasky matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index, 1292338556Shselasky gid, addr, &net_dev); 1293338556Shselasky switch (matches) { 1294338556Shselasky case 0: 1295338556Shselasky return NULL; 1296338556Shselasky default: 1297338556Shselasky dev_warn_ratelimited(&dev->dev, 1298338556Shselasky "duplicate IP address detected\n"); 1299338556Shselasky /* Fall through */ 1300338556Shselasky case 1: 1301338556Shselasky return net_dev; 1302338556Shselasky } 1303338556Shselasky} 1304338556Shselasky 1305219820Sjeffstatic void 1306219820Sjeffipoib_config_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) 1307219820Sjeff{ 1308219820Sjeff struct ipoib_dev_priv *parent; 1309219820Sjeff struct ipoib_dev_priv *priv; 1310219820Sjeff struct ifnet *dev; 1311219820Sjeff uint16_t pkey; 1312219820Sjeff int error; 1313219820Sjeff 1314219820Sjeff if (ifp->if_type != IFT_INFINIBAND) 1315219820Sjeff return; 1316219820Sjeff dev = VLAN_DEVAT(ifp, vtag); 1317219820Sjeff if (dev == NULL) 1318219820Sjeff return; 1319219820Sjeff priv = NULL; 1320219820Sjeff error = 0; 1321219820Sjeff parent = ifp->if_softc; 1322219820Sjeff /* We only support 15 bits of pkey. */ 1323219820Sjeff if (vtag & 0x8000) 1324219820Sjeff return; 1325219820Sjeff pkey = vtag | 0x8000; /* Set full membership bit. */ 1326219820Sjeff if (pkey == parent->pkey) 1327219820Sjeff return; 1328219820Sjeff /* Check for dups */ 1329219820Sjeff mutex_lock(&parent->vlan_mutex); 1330219820Sjeff list_for_each_entry(priv, &parent->child_intfs, list) { 1331219820Sjeff if (priv->pkey == pkey) { 1332219820Sjeff priv = NULL; 1333219820Sjeff error = EBUSY; 1334219820Sjeff goto out; 1335219820Sjeff } 1336219820Sjeff } 1337219820Sjeff priv = ipoib_priv_alloc(); 1338219820Sjeff priv->dev = dev; 1339219820Sjeff priv->max_ib_mtu = parent->max_ib_mtu; 1340219820Sjeff priv->mcast_mtu = priv->admin_mtu = parent->dev->if_mtu; 1341219820Sjeff set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); 1342219820Sjeff error = ipoib_set_dev_features(priv, parent->ca); 1343219820Sjeff if (error) 1344219820Sjeff goto out; 1345219820Sjeff priv->pkey = pkey; 1346219820Sjeff priv->broadcastaddr[8] = pkey >> 8; 1347219820Sjeff priv->broadcastaddr[9] = pkey & 0xff; 1348219820Sjeff dev->if_broadcastaddr = priv->broadcastaddr; 1349219820Sjeff error = ipoib_dev_init(priv, parent->ca, parent->port); 1350219820Sjeff if (error) 1351219820Sjeff goto out; 1352219820Sjeff priv->parent = parent->dev; 1353219820Sjeff list_add_tail(&priv->list, &parent->child_intfs); 1354219820Sjeff VLAN_SETCOOKIE(dev, priv); 1355219820Sjeff dev->if_start = ipoib_vlan_start; 1356219820Sjeff dev->if_drv_flags &= ~IFF_DRV_RUNNING; 1357219820Sjeff dev->if_hdrlen = IPOIB_HEADER_LEN; 1358219820Sjeff if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1359219820Sjeff ipoib_open(priv); 1360219820Sjeff mutex_unlock(&parent->vlan_mutex); 1361219820Sjeff return; 1362219820Sjeffout: 1363219820Sjeff mutex_unlock(&parent->vlan_mutex); 1364219820Sjeff if (priv) 1365219820Sjeff free(priv, M_TEMP); 1366219820Sjeff if (error) 1367219820Sjeff ipoib_warn(parent, 1368219820Sjeff "failed to initialize subinterface: device %s, port %d vtag 0x%X", 1369219820Sjeff parent->ca->name, parent->port, vtag); 1370219820Sjeff return; 1371219820Sjeff} 1372219820Sjeff 1373219820Sjeffstatic void 1374219820Sjeffipoib_unconfig_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) 1375219820Sjeff{ 1376219820Sjeff struct ipoib_dev_priv *parent; 1377219820Sjeff struct ipoib_dev_priv *priv; 1378219820Sjeff struct ifnet *dev; 1379219820Sjeff uint16_t pkey; 1380219820Sjeff 1381219820Sjeff if (ifp->if_type != IFT_INFINIBAND) 1382219820Sjeff return; 1383219820Sjeff 1384219820Sjeff dev = VLAN_DEVAT(ifp, vtag); 1385219820Sjeff if (dev) 1386219820Sjeff VLAN_SETCOOKIE(dev, NULL); 1387219820Sjeff pkey = vtag | 0x8000; 1388219820Sjeff parent = ifp->if_softc; 1389219820Sjeff mutex_lock(&parent->vlan_mutex); 1390219820Sjeff list_for_each_entry(priv, &parent->child_intfs, list) { 1391219820Sjeff if (priv->pkey == pkey) { 1392219820Sjeff ipoib_dev_cleanup(priv); 1393219820Sjeff list_del(&priv->list); 1394219820Sjeff break; 1395219820Sjeff } 1396219820Sjeff } 1397219820Sjeff mutex_unlock(&parent->vlan_mutex); 1398219820Sjeff} 1399219820Sjeff 1400219820Sjeffeventhandler_tag ipoib_vlan_attach; 1401219820Sjeffeventhandler_tag ipoib_vlan_detach; 1402219820Sjeff 1403219820Sjeffstatic int __init 1404219820Sjeffipoib_init_module(void) 1405219820Sjeff{ 1406219820Sjeff int ret; 1407219820Sjeff 1408219820Sjeff ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size); 1409219820Sjeff ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE); 1410219820Sjeff ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE); 1411219820Sjeff 1412219820Sjeff ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); 1413219820Sjeff ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); 1414219820Sjeff ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE, 1415219820Sjeff IPOIB_MIN_QUEUE_SIZE)); 1416219820Sjeff#ifdef CONFIG_INFINIBAND_IPOIB_CM 1417219820Sjeff ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); 1418219820Sjeff#endif 1419219820Sjeff 1420219820Sjeff ipoib_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 1421219820Sjeff ipoib_config_vlan, NULL, EVENTHANDLER_PRI_FIRST); 1422219820Sjeff ipoib_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 1423219820Sjeff ipoib_unconfig_vlan, NULL, EVENTHANDLER_PRI_FIRST); 1424219820Sjeff 1425219820Sjeff /* 1426219820Sjeff * We create our own workqueue mainly because we want to be 1427219820Sjeff * able to flush it when devices are being removed. We can't 1428219820Sjeff * use schedule_work()/flush_scheduled_work() because both 1429219820Sjeff * unregister_netdev() and linkwatch_event take the rtnl lock, 1430219820Sjeff * so flush_scheduled_work() can deadlock during device 1431219820Sjeff * removal. 1432219820Sjeff */ 1433219820Sjeff ipoib_workqueue = create_singlethread_workqueue("ipoib"); 1434219820Sjeff if (!ipoib_workqueue) { 1435219820Sjeff ret = -ENOMEM; 1436219820Sjeff goto err_fs; 1437219820Sjeff } 1438219820Sjeff 1439219820Sjeff ib_sa_register_client(&ipoib_sa_client); 1440219820Sjeff 1441219820Sjeff ret = ib_register_client(&ipoib_client); 1442219820Sjeff if (ret) 1443219820Sjeff goto err_sa; 1444219820Sjeff 1445219820Sjeff return 0; 1446219820Sjeff 1447219820Sjefferr_sa: 1448219820Sjeff ib_sa_unregister_client(&ipoib_sa_client); 1449219820Sjeff destroy_workqueue(ipoib_workqueue); 1450219820Sjeff 1451219820Sjefferr_fs: 1452219820Sjeff return ret; 1453219820Sjeff} 1454219820Sjeff 1455219820Sjeffstatic void __exit 1456219820Sjeffipoib_cleanup_module(void) 1457219820Sjeff{ 1458219820Sjeff 1459219820Sjeff EVENTHANDLER_DEREGISTER(vlan_config, ipoib_vlan_attach); 1460219820Sjeff EVENTHANDLER_DEREGISTER(vlan_unconfig, ipoib_vlan_detach); 1461219820Sjeff ib_unregister_client(&ipoib_client); 1462219820Sjeff ib_sa_unregister_client(&ipoib_sa_client); 1463219820Sjeff destroy_workqueue(ipoib_workqueue); 1464219820Sjeff} 1465219820Sjeff 1466219820Sjeff/* 1467219820Sjeff * Infiniband output routine. 1468219820Sjeff */ 1469219820Sjeffstatic int 1470219820Sjeffipoib_output(struct ifnet *ifp, struct mbuf *m, 1471249976Sglebius const struct sockaddr *dst, struct route *ro) 1472219820Sjeff{ 1473219820Sjeff u_char edst[INFINIBAND_ALEN]; 1474292978Smelifaro#if defined(INET) || defined(INET6) 1475219820Sjeff struct llentry *lle = NULL; 1476292978Smelifaro#endif 1477219820Sjeff struct ipoib_header *eh; 1478275196Smelifaro int error = 0, is_gw = 0; 1479219820Sjeff short type; 1480219820Sjeff 1481293544Smelifaro if (ro != NULL) 1482293544Smelifaro is_gw = (ro->ro_flags & RT_HAS_GW) != 0; 1483219820Sjeff#ifdef MAC 1484219820Sjeff error = mac_ifnet_check_transmit(ifp, m); 1485219820Sjeff if (error) 1486219820Sjeff goto bad; 1487219820Sjeff#endif 1488219820Sjeff 1489219820Sjeff M_PROFILE(m); 1490219820Sjeff if (ifp->if_flags & IFF_MONITOR) { 1491219820Sjeff error = ENETDOWN; 1492219820Sjeff goto bad; 1493219820Sjeff } 1494219820Sjeff if (!((ifp->if_flags & IFF_UP) && 1495219820Sjeff (ifp->if_drv_flags & IFF_DRV_RUNNING))) { 1496219820Sjeff error = ENETDOWN; 1497219820Sjeff goto bad; 1498219820Sjeff } 1499219820Sjeff 1500219820Sjeff switch (dst->sa_family) { 1501219820Sjeff#ifdef INET 1502219820Sjeff case AF_INET: 1503219820Sjeff if (lle != NULL && (lle->la_flags & LLE_VALID)) 1504292978Smelifaro memcpy(edst, lle->ll_addr, sizeof(edst)); 1505219820Sjeff else if (m->m_flags & M_MCAST) 1506219820Sjeff ip_ib_mc_map(((struct sockaddr_in *)dst)->sin_addr.s_addr, ifp->if_broadcastaddr, edst); 1507219820Sjeff else 1508301229Sgnn error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL); 1509219820Sjeff if (error) 1510219820Sjeff return (error == EWOULDBLOCK ? 0 : error); 1511219820Sjeff type = htons(ETHERTYPE_IP); 1512219820Sjeff break; 1513219820Sjeff case AF_ARP: 1514219820Sjeff { 1515219820Sjeff struct arphdr *ah; 1516219820Sjeff ah = mtod(m, struct arphdr *); 1517219820Sjeff ah->ar_hrd = htons(ARPHRD_INFINIBAND); 1518219820Sjeff 1519219820Sjeff switch(ntohs(ah->ar_op)) { 1520219820Sjeff case ARPOP_REVREQUEST: 1521219820Sjeff case ARPOP_REVREPLY: 1522219820Sjeff type = htons(ETHERTYPE_REVARP); 1523219820Sjeff break; 1524219820Sjeff case ARPOP_REQUEST: 1525219820Sjeff case ARPOP_REPLY: 1526219820Sjeff default: 1527219820Sjeff type = htons(ETHERTYPE_ARP); 1528219820Sjeff break; 1529219820Sjeff } 1530219820Sjeff 1531219820Sjeff if (m->m_flags & M_BCAST) 1532219820Sjeff bcopy(ifp->if_broadcastaddr, edst, INFINIBAND_ALEN); 1533219820Sjeff else 1534219820Sjeff bcopy(ar_tha(ah), edst, INFINIBAND_ALEN); 1535219820Sjeff 1536219820Sjeff } 1537219820Sjeff break; 1538219820Sjeff#endif 1539219820Sjeff#ifdef INET6 1540219820Sjeff case AF_INET6: 1541219820Sjeff if (lle != NULL && (lle->la_flags & LLE_VALID)) 1542292978Smelifaro memcpy(edst, lle->ll_addr, sizeof(edst)); 1543219820Sjeff else if (m->m_flags & M_MCAST) 1544219820Sjeff ipv6_ib_mc_map(&((struct sockaddr_in6 *)dst)->sin6_addr, ifp->if_broadcastaddr, edst); 1545219820Sjeff else 1546301229Sgnn error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL); 1547219820Sjeff if (error) 1548219820Sjeff return error; 1549219820Sjeff type = htons(ETHERTYPE_IPV6); 1550219820Sjeff break; 1551219820Sjeff#endif 1552219820Sjeff 1553219820Sjeff default: 1554219820Sjeff if_printf(ifp, "can't handle af%d\n", dst->sa_family); 1555219820Sjeff error = EAFNOSUPPORT; 1556219820Sjeff goto bad; 1557219820Sjeff } 1558219820Sjeff 1559219820Sjeff /* 1560219820Sjeff * Add local net header. If no space in first mbuf, 1561219820Sjeff * allocate another. 1562219820Sjeff */ 1563243882Sglebius M_PREPEND(m, IPOIB_HEADER_LEN, M_NOWAIT); 1564219820Sjeff if (m == NULL) { 1565219820Sjeff error = ENOBUFS; 1566219820Sjeff goto bad; 1567219820Sjeff } 1568219820Sjeff eh = mtod(m, struct ipoib_header *); 1569219820Sjeff (void)memcpy(&eh->proto, &type, sizeof(eh->proto)); 1570219820Sjeff (void)memcpy(&eh->hwaddr, edst, sizeof (edst)); 1571219820Sjeff 1572219820Sjeff /* 1573219820Sjeff * Queue message on interface, update output statistics if 1574219820Sjeff * successful, and start output if interface not yet active. 1575219820Sjeff */ 1576219820Sjeff return ((ifp->if_transmit)(ifp, m)); 1577219820Sjeffbad: 1578219820Sjeff if (m != NULL) 1579219820Sjeff m_freem(m); 1580219820Sjeff return (error); 1581219820Sjeff} 1582219820Sjeff 1583219820Sjeff/* 1584219820Sjeff * Upper layer processing for a received Infiniband packet. 1585219820Sjeff */ 1586219820Sjeffvoid 1587219820Sjeffipoib_demux(struct ifnet *ifp, struct mbuf *m, u_short proto) 1588219820Sjeff{ 1589219820Sjeff int isr; 1590219820Sjeff 1591219820Sjeff#ifdef MAC 1592219820Sjeff /* 1593219820Sjeff * Tag the mbuf with an appropriate MAC label before any other 1594219820Sjeff * consumers can get to it. 1595219820Sjeff */ 1596219820Sjeff mac_ifnet_create_mbuf(ifp, m); 1597219820Sjeff#endif 1598219820Sjeff /* Allow monitor mode to claim this frame, after stats are updated. */ 1599219820Sjeff if (ifp->if_flags & IFF_MONITOR) { 1600219820Sjeff if_printf(ifp, "discard frame at IFF_MONITOR\n"); 1601219820Sjeff m_freem(m); 1602219820Sjeff return; 1603219820Sjeff } 1604367047Srpokala /* Direct packet to correct FIB based on interface config */ 1605367047Srpokala M_SETFIB(m, ifp->if_fib); 1606219820Sjeff /* 1607219820Sjeff * Dispatch frame to upper layer. 1608219820Sjeff */ 1609219820Sjeff switch (proto) { 1610219820Sjeff#ifdef INET 1611219820Sjeff case ETHERTYPE_IP: 1612219820Sjeff isr = NETISR_IP; 1613219820Sjeff break; 1614219820Sjeff 1615219820Sjeff case ETHERTYPE_ARP: 1616219820Sjeff if (ifp->if_flags & IFF_NOARP) { 1617219820Sjeff /* Discard packet if ARP is disabled on interface */ 1618219820Sjeff m_freem(m); 1619219820Sjeff return; 1620219820Sjeff } 1621219820Sjeff isr = NETISR_ARP; 1622219820Sjeff break; 1623219820Sjeff#endif 1624219820Sjeff#ifdef INET6 1625219820Sjeff case ETHERTYPE_IPV6: 1626219820Sjeff isr = NETISR_IPV6; 1627219820Sjeff break; 1628219820Sjeff#endif 1629219820Sjeff default: 1630219820Sjeff goto discard; 1631219820Sjeff } 1632219820Sjeff netisr_dispatch(isr, m); 1633219820Sjeff return; 1634219820Sjeff 1635219820Sjeffdiscard: 1636219820Sjeff m_freem(m); 1637219820Sjeff} 1638219820Sjeff 1639219820Sjeff/* 1640219820Sjeff * Process a received Infiniband packet. 1641219820Sjeff */ 1642219820Sjeffstatic void 1643219820Sjeffipoib_input(struct ifnet *ifp, struct mbuf *m) 1644219820Sjeff{ 1645219820Sjeff struct ipoib_header *eh; 1646219820Sjeff 1647219820Sjeff if ((ifp->if_flags & IFF_UP) == 0) { 1648219820Sjeff m_freem(m); 1649219820Sjeff return; 1650219820Sjeff } 1651219820Sjeff CURVNET_SET_QUIET(ifp->if_vnet); 1652219820Sjeff 1653219820Sjeff /* Let BPF have it before we strip the header. */ 1654219820Sjeff IPOIB_MTAP(ifp, m); 1655219820Sjeff eh = mtod(m, struct ipoib_header *); 1656219820Sjeff /* 1657219820Sjeff * Reset layer specific mbuf flags to avoid confusing upper layers. 1658219820Sjeff * Strip off Infiniband header. 1659219820Sjeff */ 1660219820Sjeff m->m_flags &= ~M_VLANTAG; 1661254523Sandre m_clrprotoflags(m); 1662219820Sjeff m_adj(m, IPOIB_HEADER_LEN); 1663219820Sjeff 1664219820Sjeff if (IPOIB_IS_MULTICAST(eh->hwaddr)) { 1665219820Sjeff if (memcmp(eh->hwaddr, ifp->if_broadcastaddr, 1666219820Sjeff ifp->if_addrlen) == 0) 1667219820Sjeff m->m_flags |= M_BCAST; 1668219820Sjeff else 1669219820Sjeff m->m_flags |= M_MCAST; 1670272027Shselasky if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); 1671219820Sjeff } 1672219820Sjeff 1673219820Sjeff ipoib_demux(ifp, m, ntohs(eh->proto)); 1674219820Sjeff CURVNET_RESTORE(); 1675219820Sjeff} 1676219820Sjeff 1677219820Sjeffstatic int 1678219820Sjeffipoib_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, 1679219820Sjeff struct sockaddr *sa) 1680219820Sjeff{ 1681219820Sjeff struct sockaddr_dl *sdl; 1682219820Sjeff#ifdef INET 1683219820Sjeff struct sockaddr_in *sin; 1684219820Sjeff#endif 1685219820Sjeff#ifdef INET6 1686219820Sjeff struct sockaddr_in6 *sin6; 1687219820Sjeff#endif 1688219820Sjeff u_char *e_addr; 1689219820Sjeff 1690219820Sjeff switch(sa->sa_family) { 1691219820Sjeff case AF_LINK: 1692219820Sjeff /* 1693219820Sjeff * No mapping needed. Just check that it's a valid MC address. 1694219820Sjeff */ 1695219820Sjeff sdl = (struct sockaddr_dl *)sa; 1696219820Sjeff e_addr = LLADDR(sdl); 1697219820Sjeff if (!IPOIB_IS_MULTICAST(e_addr)) 1698219820Sjeff return EADDRNOTAVAIL; 1699298046Spfg *llsa = NULL; 1700219820Sjeff return 0; 1701219820Sjeff 1702219820Sjeff#ifdef INET 1703219820Sjeff case AF_INET: 1704219820Sjeff sin = (struct sockaddr_in *)sa; 1705219820Sjeff if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) 1706219820Sjeff return EADDRNOTAVAIL; 1707260870Smelifaro sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND); 1708219820Sjeff sdl->sdl_alen = INFINIBAND_ALEN; 1709219820Sjeff e_addr = LLADDR(sdl); 1710219820Sjeff ip_ib_mc_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr, 1711219820Sjeff e_addr); 1712219820Sjeff *llsa = (struct sockaddr *)sdl; 1713219820Sjeff return 0; 1714219820Sjeff#endif 1715219820Sjeff#ifdef INET6 1716219820Sjeff case AF_INET6: 1717219820Sjeff sin6 = (struct sockaddr_in6 *)sa; 1718219820Sjeff /* 1719219820Sjeff * An IP6 address of 0 means listen to all 1720219820Sjeff * of the multicast address used for IP6. 1721219820Sjeff * This has no meaning in ipoib. 1722219820Sjeff */ 1723219820Sjeff if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 1724219820Sjeff return EADDRNOTAVAIL; 1725219820Sjeff if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) 1726219820Sjeff return EADDRNOTAVAIL; 1727260870Smelifaro sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND); 1728219820Sjeff sdl->sdl_alen = INFINIBAND_ALEN; 1729219820Sjeff e_addr = LLADDR(sdl); 1730219820Sjeff ipv6_ib_mc_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr); 1731219820Sjeff *llsa = (struct sockaddr *)sdl; 1732219820Sjeff return 0; 1733219820Sjeff#endif 1734219820Sjeff 1735219820Sjeff default: 1736219820Sjeff return EAFNOSUPPORT; 1737219820Sjeff } 1738219820Sjeff} 1739219820Sjeff 1740363151Shselaskymodule_init_order(ipoib_init_module, SI_ORDER_FIFTH); 1741363151Shselaskymodule_exit_order(ipoib_cleanup_module, SI_ORDER_FIFTH); 1742255932Salfred 1743255932Salfredstatic int 1744255932Salfredipoib_evhand(module_t mod, int event, void *arg) 1745255932Salfred{ 1746358932Shselasky return (0); 1747255932Salfred} 1748255932Salfred 1749255932Salfredstatic moduledata_t ipoib_mod = { 1750358932Shselasky .name = "ipoib", 1751358932Shselasky .evhand = ipoib_evhand, 1752255932Salfred}; 1753255932Salfred 1754296688SjhbDECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_LAST, SI_ORDER_ANY); 1755255932SalfredMODULE_DEPEND(ipoib, ibcore, 1, 1, 1); 1756289749ShselaskyMODULE_DEPEND(ipoib, linuxkpi, 1, 1, 1); 1757