1/* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005 Intel Corporation. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36#include <linux/mutex.h> 37#include <linux/inetdevice.h> 38#include <linux/slab.h> 39#include <linux/workqueue.h> 40#include <linux/module.h> 41#include <linux/notifier.h> 42#include <net/route.h> 43#include <net/netevent.h> 44#include <rdma/ib_addr.h> 45#include <netinet/if_ether.h> 46#include <netinet6/scope6_var.h> 47 48 49MODULE_AUTHOR("Sean Hefty"); 50MODULE_DESCRIPTION("IB Address Translation"); 51MODULE_LICENSE("Dual BSD/GPL"); 52 53struct addr_req { 54 struct list_head list; 55 struct sockaddr_storage src_addr; 56 struct sockaddr_storage dst_addr; 57 struct rdma_dev_addr *addr; 58 struct rdma_addr_client *client; 59 void *context; 60 void (*callback)(int status, struct sockaddr *src_addr, 61 struct rdma_dev_addr *addr, void *context); 62 unsigned long timeout; 63 int status; 64}; 65 66static void process_req(struct work_struct *work); 67 68static DEFINE_MUTEX(lock); 69static LIST_HEAD(req_list); 70static struct delayed_work work; 71static struct workqueue_struct *addr_wq; 72 73static struct rdma_addr_client self; 74void rdma_addr_register_client(struct rdma_addr_client *client) 75{ 76 atomic_set(&client->refcount, 1); 77 init_completion(&client->comp); 78} 79EXPORT_SYMBOL(rdma_addr_register_client); 80 81static inline void put_client(struct rdma_addr_client *client) 82{ 83 if (atomic_dec_and_test(&client->refcount)) 84 complete(&client->comp); 85} 86 87void rdma_addr_unregister_client(struct rdma_addr_client *client) 88{ 89 put_client(client); 90 wait_for_completion(&client->comp); 91} 92EXPORT_SYMBOL(rdma_addr_unregister_client); 93 94int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev, 95 const unsigned char *dst_dev_addr) 96{ 97 if (dev->if_type == IFT_INFINIBAND) 98 dev_addr->dev_type = ARPHRD_INFINIBAND; 99 else if (dev->if_type == IFT_ETHER) 100 dev_addr->dev_type = ARPHRD_ETHER; 101 else 102 dev_addr->dev_type = 0; 103 memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen); 104 memcpy(dev_addr->broadcast, __DECONST(char *, dev->if_broadcastaddr), 105 dev->if_addrlen); 106 if (dst_dev_addr) 107 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, dev->if_addrlen); 108 dev_addr->bound_dev_if = dev->if_index; 109 return 0; 110} 111EXPORT_SYMBOL(rdma_copy_addr); 112 113#define SCOPE_ID_CACHE(_scope_id, _addr6) do { \ 114 (_addr6)->sin6_addr.s6_addr[3] = (_scope_id); \ 115 (_addr6)->sin6_scope_id = 0; } while (0) 116 117#define SCOPE_ID_RESTORE(_scope_id, _addr6) do { \ 118 (_addr6)->sin6_scope_id = (_scope_id); \ 119 (_addr6)->sin6_addr.s6_addr[3] = 0; } while (0) 120 121int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, 122 u16 *vlan_id) 123{ 124 struct net_device *dev; 125 int ret = -EADDRNOTAVAIL; 126 127 if (dev_addr->bound_dev_if) { 128 dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); 129 if (!dev) 130 return -ENODEV; 131 ret = rdma_copy_addr(dev_addr, dev, NULL); 132 dev_put(dev); 133 return ret; 134 } 135 136 switch (addr->sa_family) { 137 case AF_INET: 138 dev = ip_dev_find(&init_net, 139 ((struct sockaddr_in *) addr)->sin_addr.s_addr); 140 141 if (!dev) 142 return ret; 143 144 ret = rdma_copy_addr(dev_addr, dev, NULL); 145 if (vlan_id) 146 *vlan_id = rdma_vlan_dev_vlan_id(dev); 147 dev_put(dev); 148 break; 149 150#if defined(INET6) 151 case AF_INET6: 152 { 153 struct sockaddr_in6 *sin6; 154 struct ifaddr *ifa; 155 in_port_t port; 156 uint32_t scope_id; 157 158 sin6 = (struct sockaddr_in6 *)addr; 159 port = sin6->sin6_port; 160 sin6->sin6_port = 0; 161 scope_id = sin6->sin6_scope_id; 162 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 163 SCOPE_ID_CACHE(scope_id, sin6); 164 ifa = ifa_ifwithaddr(addr); 165 sin6->sin6_port = port; 166 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 167 SCOPE_ID_RESTORE(scope_id, sin6); 168 if (ifa == NULL) { 169 ret = -ENODEV; 170 break; 171 } 172 ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL); 173 if (vlan_id) 174 *vlan_id = rdma_vlan_dev_vlan_id(ifa->ifa_ifp); 175 ifa_free(ifa); 176 break; 177 } 178#endif 179 default: 180 break; 181 } 182 return ret; 183} 184EXPORT_SYMBOL(rdma_translate_ip); 185 186static void set_timeout(unsigned long time) 187{ 188 unsigned long delay; 189 190 delay = time - jiffies; 191 if ((long)delay <= 0) 192 delay = 1; 193 194 mod_delayed_work(addr_wq, &work, delay); 195} 196 197static void queue_req(struct addr_req *req) 198{ 199 struct addr_req *temp_req; 200 201 mutex_lock(&lock); 202 list_for_each_entry_reverse(temp_req, &req_list, list) { 203 if (time_after_eq(req->timeout, temp_req->timeout)) 204 break; 205 } 206 207 list_add(&req->list, &temp_req->list); 208 209 if (req_list.next == &req->list) 210 set_timeout(req->timeout); 211 mutex_unlock(&lock); 212} 213 214static int addr_resolve(struct sockaddr *src_in, 215 struct sockaddr *dst_in, 216 struct rdma_dev_addr *addr) 217{ 218 struct sockaddr_in *sin; 219 struct sockaddr_in6 *sin6; 220 struct ifaddr *ifa; 221 struct ifnet *ifp; 222 struct rtentry *rte; 223#if defined(INET) || defined(INET6) 224 in_port_t port; 225#endif 226#ifdef INET6 227 uint32_t scope_id; 228#endif 229 u_char edst[MAX_ADDR_LEN]; 230 int multi; 231 int bcast; 232 int is_gw = 0; 233 int error = 0; 234 /* 235 * Determine whether the address is unicast, multicast, or broadcast 236 * and whether the source interface is valid. 237 */ 238 multi = 0; 239 bcast = 0; 240 sin = NULL; 241 sin6 = NULL; 242 ifp = NULL; 243 rte = NULL; 244 ifa = NULL; 245 ifp = NULL; 246 memset(edst, 0, sizeof(edst)); 247#ifdef INET6 248 scope_id = -1U; 249#endif 250 251 switch (dst_in->sa_family) { 252#ifdef INET 253 case AF_INET: 254 sin = (struct sockaddr_in *)dst_in; 255 if (sin->sin_addr.s_addr == INADDR_BROADCAST) 256 bcast = 1; 257 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) 258 multi = 1; 259 sin = (struct sockaddr_in *)src_in; 260 if (sin->sin_addr.s_addr != INADDR_ANY) { 261 /* 262 * Address comparison fails if the port is set 263 * cache it here to be restored later. 264 */ 265 port = sin->sin_port; 266 sin->sin_port = 0; 267 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); 268 269 /* 270 * If we have a source address to use look it 271 * up first and verify that it is a local 272 * interface: 273 */ 274 ifa = ifa_ifwithaddr(src_in); 275 sin->sin_port = port; 276 if (ifa == NULL) { 277 error = ENETUNREACH; 278 goto done; 279 } 280 ifp = ifa->ifa_ifp; 281 ifa_free(ifa); 282 if (bcast || multi) 283 goto mcast; 284 } 285 break; 286#endif 287#ifdef INET6 288 case AF_INET6: 289 sin6 = (struct sockaddr_in6 *)dst_in; 290 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) 291 multi = 1; 292 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) { 293 /* 294 * The IB address comparison fails if the 295 * scope ID is set and not part of the addr: 296 */ 297 scope_id = sin6->sin6_scope_id; 298 if (scope_id < 256) 299 SCOPE_ID_CACHE(scope_id, sin6); 300 } 301 sin6 = (struct sockaddr_in6 *)src_in; 302 if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 303 port = sin6->sin6_port; 304 sin6->sin6_port = 0; 305 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) { 306 if (scope_id < 256) 307 SCOPE_ID_CACHE(scope_id, sin6); 308 } 309 310 /* 311 * If we have a source address to use look it 312 * up first and verify that it is a local 313 * interface: 314 */ 315 ifa = ifa_ifwithaddr(src_in); 316 sin6->sin6_port = port; 317 if (ifa == NULL) { 318 error = ENETUNREACH; 319 goto done; 320 } 321 ifp = ifa->ifa_ifp; 322 ifa_free(ifa); 323 if (bcast || multi) 324 goto mcast; 325 } 326 break; 327#endif 328 default: 329 error = EINVAL; 330 goto done; 331 } 332 /* 333 * Make sure the route exists and has a valid link. 334 */ 335 rte = rtalloc1(dst_in, 1, 0); 336 if (rte == NULL || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) { 337 if (rte) 338 RTFREE_LOCKED(rte); 339 error = EHOSTUNREACH; 340 goto done; 341 } 342 if (rte->rt_flags & RTF_GATEWAY) 343 is_gw = 1; 344 /* 345 * If it's not multicast or broadcast and the route doesn't match the 346 * requested interface return unreachable. Otherwise fetch the 347 * correct interface pointer and unlock the route. 348 */ 349 if (multi || bcast) { 350 if (ifp == NULL) { 351 ifp = rte->rt_ifp; 352 /* rt_ifa holds the route answer source address */ 353 ifa = rte->rt_ifa; 354 } 355 RTFREE_LOCKED(rte); 356 } else if (ifp && ifp != rte->rt_ifp) { 357 RTFREE_LOCKED(rte); 358 error = ENETUNREACH; 359 goto done; 360 } else { 361 if (ifp == NULL) { 362 ifp = rte->rt_ifp; 363 ifa = rte->rt_ifa; 364 } 365 RT_UNLOCK(rte); 366 } 367#if defined(INET) || defined(INET6) 368mcast: 369#endif 370 if (bcast) { 371 memcpy(edst, ifp->if_broadcastaddr, ifp->if_addrlen); 372 goto done; 373 } else if (multi) { 374 struct sockaddr *llsa; 375 struct sockaddr_dl sdl; 376 377 sdl.sdl_len = sizeof(sdl); 378 llsa = (struct sockaddr *)&sdl; 379 380 if (ifp->if_resolvemulti == NULL) { 381 error = EOPNOTSUPP; 382 goto done; 383 } 384 error = ifp->if_resolvemulti(ifp, &llsa, dst_in); 385 if (error == 0) { 386 memcpy(edst, LLADDR((struct sockaddr_dl *)llsa), 387 ifp->if_addrlen); 388 } 389 goto done; 390 } 391 /* 392 * Resolve the link local address. 393 */ 394 switch (dst_in->sa_family) { 395#ifdef INET 396 case AF_INET: 397 error = arpresolve(ifp, is_gw, NULL, 398 is_gw ? rte->rt_gateway : dst_in, edst, NULL, NULL); 399 break; 400#endif 401#ifdef INET6 402 case AF_INET6: 403 error = nd6_resolve(ifp, is_gw, NULL, 404 is_gw ? rte->rt_gateway : dst_in, edst, NULL, NULL); 405 break; 406#endif 407 default: 408 KASSERT(0, ("rdma_addr_resolve: Unreachable")); 409 error = EINVAL; 410 break; 411 } 412 RTFREE(rte); 413done: 414 if (error == 0) 415 error = -rdma_copy_addr(addr, ifp, edst); 416 if (error == 0) 417 memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr)); 418#ifdef INET6 419 if (scope_id < 256) { 420 sin6 = (struct sockaddr_in6 *)src_in; 421 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 422 SCOPE_ID_RESTORE(scope_id, sin6); 423 sin6 = (struct sockaddr_in6 *)dst_in; 424 SCOPE_ID_RESTORE(scope_id, sin6); 425 } 426#endif 427 if (error == EWOULDBLOCK) 428 error = ENODATA; 429 return -error; 430} 431 432static void process_req(struct work_struct *work) 433{ 434 struct addr_req *req, *temp_req; 435 struct sockaddr *src_in, *dst_in; 436 struct list_head done_list; 437 438 INIT_LIST_HEAD(&done_list); 439 440 mutex_lock(&lock); 441 list_for_each_entry_safe(req, temp_req, &req_list, list) { 442 if (req->status == -ENODATA) { 443 src_in = (struct sockaddr *) &req->src_addr; 444 dst_in = (struct sockaddr *) &req->dst_addr; 445 req->status = addr_resolve(src_in, dst_in, req->addr); 446 if (req->status && time_after_eq(jiffies, req->timeout)) 447 req->status = -ETIMEDOUT; 448 else if (req->status == -ENODATA) 449 continue; 450 } 451 list_move_tail(&req->list, &done_list); 452 } 453 454 if (!list_empty(&req_list)) { 455 req = list_entry(req_list.next, struct addr_req, list); 456 set_timeout(req->timeout); 457 } 458 mutex_unlock(&lock); 459 460 list_for_each_entry_safe(req, temp_req, &done_list, list) { 461 list_del(&req->list); 462 req->callback(req->status, (struct sockaddr *) &req->src_addr, 463 req->addr, req->context); 464 put_client(req->client); 465 kfree(req); 466 } 467} 468 469int rdma_resolve_ip(struct rdma_addr_client *client, 470 struct sockaddr *src_addr, struct sockaddr *dst_addr, 471 struct rdma_dev_addr *addr, int timeout_ms, 472 void (*callback)(int status, struct sockaddr *src_addr, 473 struct rdma_dev_addr *addr, void *context), 474 void *context) 475{ 476 struct sockaddr *src_in, *dst_in; 477 struct addr_req *req; 478 int ret = 0; 479 480 req = kzalloc(sizeof *req, GFP_KERNEL); 481 if (!req) 482 return -ENOMEM; 483 484 src_in = (struct sockaddr *) &req->src_addr; 485 dst_in = (struct sockaddr *) &req->dst_addr; 486 487 if (src_addr) { 488 if (src_addr->sa_family != dst_addr->sa_family) { 489 ret = -EINVAL; 490 goto err; 491 } 492 493 memcpy(src_in, src_addr, ip_addr_size(src_addr)); 494 } else { 495 src_in->sa_family = dst_addr->sa_family; 496 } 497 498 memcpy(dst_in, dst_addr, ip_addr_size(dst_addr)); 499 req->addr = addr; 500 req->callback = callback; 501 req->context = context; 502 req->client = client; 503 atomic_inc(&client->refcount); 504 505 req->status = addr_resolve(src_in, dst_in, addr); 506 switch (req->status) { 507 case 0: 508 req->timeout = jiffies; 509 queue_req(req); 510 break; 511 case -ENODATA: 512 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; 513 queue_req(req); 514 break; 515 default: 516 ret = req->status; 517 atomic_dec(&client->refcount); 518 goto err; 519 } 520 return ret; 521err: 522 kfree(req); 523 return ret; 524} 525EXPORT_SYMBOL(rdma_resolve_ip); 526 527void rdma_addr_cancel(struct rdma_dev_addr *addr) 528{ 529 struct addr_req *req, *temp_req; 530 531 mutex_lock(&lock); 532 list_for_each_entry_safe(req, temp_req, &req_list, list) { 533 if (req->addr == addr) { 534 req->status = -ECANCELED; 535 req->timeout = jiffies; 536 list_move(&req->list, &req_list); 537 set_timeout(req->timeout); 538 break; 539 } 540 } 541 mutex_unlock(&lock); 542} 543EXPORT_SYMBOL(rdma_addr_cancel); 544 545struct resolve_cb_context { 546 struct rdma_dev_addr *addr; 547 struct completion comp; 548}; 549 550static void resolve_cb(int status, struct sockaddr *src_addr, 551 struct rdma_dev_addr *addr, void *context) 552{ 553 memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct 554 rdma_dev_addr)); 555 complete(&((struct resolve_cb_context *)context)->comp); 556} 557 558int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, 559 u16 *vlan_id, u32 scope_id) 560{ 561 int ret = 0; 562 struct rdma_dev_addr dev_addr; 563 struct resolve_cb_context ctx; 564 struct net_device *dev; 565 566 union { 567 struct sockaddr _sockaddr; 568 struct sockaddr_in _sockaddr_in; 569 struct sockaddr_in6 _sockaddr_in6; 570 } sgid_addr, dgid_addr; 571 572 573 ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid, scope_id); 574 if (ret) 575 return ret; 576 577 ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid, scope_id); 578 if (ret) 579 return ret; 580 581 memset(&dev_addr, 0, sizeof(dev_addr)); 582 583 ctx.addr = &dev_addr; 584 init_completion(&ctx.comp); 585 ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr, 586 &dev_addr, 1000, resolve_cb, &ctx); 587 if (ret) 588 return ret; 589 590 wait_for_completion(&ctx.comp); 591 592 memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); 593 dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if); 594 if (!dev) 595 return -ENODEV; 596 if (vlan_id) 597 *vlan_id = rdma_vlan_dev_vlan_id(dev); 598 dev_put(dev); 599 return ret; 600} 601EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh); 602 603u32 rdma_get_ipv6_scope_id(struct ib_device *ib, u8 port_num) 604{ 605#ifdef INET6 606 struct ifnet *ifp; 607 if (ib->get_netdev == NULL) 608 return (-1U); 609 ifp = ib->get_netdev(ib, port_num); 610 if (ifp == NULL) 611 return (-1U); 612 return (in6_getscopezone(ifp, IPV6_ADDR_SCOPE_LINKLOCAL)); 613#else 614 return (-1U); 615#endif 616} 617 618int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id, 619 u32 scope_id) 620{ 621 int ret = 0; 622 struct rdma_dev_addr dev_addr; 623 union { 624 struct sockaddr _sockaddr; 625 struct sockaddr_in _sockaddr_in; 626 struct sockaddr_in6 _sockaddr_in6; 627 } gid_addr; 628 629 ret = rdma_gid2ip(&gid_addr._sockaddr, sgid, scope_id); 630 if (ret) 631 return ret; 632 memset(&dev_addr, 0, sizeof(dev_addr)); 633 ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); 634 if (ret) 635 return ret; 636 637 memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN); 638 return ret; 639} 640EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid); 641 642static int netevent_callback(struct notifier_block *self, unsigned long event, 643 void *ctx) 644{ 645 if (event == NETEVENT_NEIGH_UPDATE) { 646 set_timeout(jiffies); 647 } 648 return 0; 649} 650 651static struct notifier_block nb = { 652 .notifier_call = netevent_callback 653}; 654 655static int __init addr_init(void) 656{ 657 INIT_DELAYED_WORK(&work, process_req); 658 addr_wq = create_singlethread_workqueue("ib_addr"); 659 if (!addr_wq) 660 return -ENOMEM; 661 662 register_netevent_notifier(&nb); 663 rdma_addr_register_client(&self); 664 return 0; 665} 666 667static void __exit addr_cleanup(void) 668{ 669 rdma_addr_unregister_client(&self); 670 unregister_netevent_notifier(&nb); 671 destroy_workqueue(addr_wq); 672} 673 674module_init(addr_init); 675module_exit(addr_cleanup); 676