1/* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005 Intel Corporation. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36#include <linux/mutex.h> 37#include <linux/inetdevice.h> 38#include <linux/slab.h> 39#include <linux/workqueue.h> 40#include <linux/module.h> 41#include <linux/notifier.h> 42#include <net/route.h> 43#include <net/netevent.h> 44#include <rdma/ib_addr.h> 45#include <netinet/if_ether.h> 46 47 48MODULE_AUTHOR("Sean Hefty"); 49MODULE_DESCRIPTION("IB Address Translation"); 50MODULE_LICENSE("Dual BSD/GPL"); 51 52struct addr_req { 53 struct list_head list; 54 struct sockaddr_storage src_addr; 55 struct sockaddr_storage dst_addr; 56 struct rdma_dev_addr *addr; 57 struct rdma_addr_client *client; 58 void *context; 59 void (*callback)(int status, struct sockaddr *src_addr, 60 struct rdma_dev_addr *addr, void *context); 61 unsigned long timeout; 62 int status; 63}; 64 65static void process_req(struct work_struct *work); 66 67static DEFINE_MUTEX(lock); 68static LIST_HEAD(req_list); 69static struct delayed_work work; 70static struct workqueue_struct *addr_wq; 71 72void rdma_addr_register_client(struct rdma_addr_client *client) 73{ 74 atomic_set(&client->refcount, 1); 75 init_completion(&client->comp); 76} 77EXPORT_SYMBOL(rdma_addr_register_client); 78 79static inline void put_client(struct rdma_addr_client *client) 80{ 81 if (atomic_dec_and_test(&client->refcount)) 82 complete(&client->comp); 83} 84 85void rdma_addr_unregister_client(struct rdma_addr_client *client) 86{ 87 put_client(client); 88 wait_for_completion(&client->comp); 89} 90EXPORT_SYMBOL(rdma_addr_unregister_client); 91 92#ifdef __linux__ 93int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, 94 const unsigned char *dst_dev_addr) 95{ 96 dev_addr->dev_type = dev->type; 97 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 98 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); 99 if (dst_dev_addr) 100 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); 101 dev_addr->bound_dev_if = dev->ifindex; 102 return 0; 103} 104#else 105int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev, 106 const unsigned char *dst_dev_addr) 107{ 108 if (dev->if_type == IFT_INFINIBAND) 109 dev_addr->dev_type = ARPHRD_INFINIBAND; 110 else if (dev->if_type == IFT_ETHER) 111 dev_addr->dev_type = ARPHRD_ETHER; 112 else 113 dev_addr->dev_type = 0; 114 memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen); 115 memcpy(dev_addr->broadcast, __DECONST(char *, dev->if_broadcastaddr), 116 dev->if_addrlen); 117 if (dst_dev_addr) 118 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, dev->if_addrlen); 119 dev_addr->bound_dev_if = dev->if_index; 120 return 0; 121} 122#endif 123EXPORT_SYMBOL(rdma_copy_addr); 124 125int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 126{ 127 struct net_device *dev; 128 int ret = -EADDRNOTAVAIL; 129 130 if (dev_addr->bound_dev_if) { 131 dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); 132 if (!dev) 133 return -ENODEV; 134 ret = rdma_copy_addr(dev_addr, dev, NULL); 135 dev_put(dev); 136 return ret; 137 } 138 139 switch (addr->sa_family) { 140#ifdef INET 141 case AF_INET: 142 dev = ip_dev_find(NULL, 143 ((struct sockaddr_in *) addr)->sin_addr.s_addr); 144 145 if (!dev) 146 return ret; 147 148 ret = rdma_copy_addr(dev_addr, dev, NULL); 149 dev_put(dev); 150 break; 151#endif 152 153#if defined(INET6) 154 case AF_INET6: 155#ifdef __linux__ 156 read_lock(&dev_base_lock); 157 for_each_netdev(&init_net, dev) { 158 if (ipv6_chk_addr(&init_net, 159 &((struct sockaddr_in6 *) addr)->sin6_addr, 160 dev, 1)) { 161 ret = rdma_copy_addr(dev_addr, dev, NULL); 162 break; 163 } 164 } 165 read_unlock(&dev_base_lock); 166#else 167 { 168 struct sockaddr_in6 *sin6; 169 struct ifaddr *ifa; 170 in_port_t port; 171 172 sin6 = (struct sockaddr_in6 *)addr; 173 port = sin6->sin6_port; 174 sin6->sin6_port = 0; 175 ifa = ifa_ifwithaddr(addr); 176 sin6->sin6_port = port; 177 if (ifa == NULL) { 178 ret = -ENODEV; 179 break; 180 } 181 ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL); 182 ifa_free(ifa); 183 break; 184 } 185#endif 186 break; 187#endif 188 } 189 return ret; 190} 191EXPORT_SYMBOL(rdma_translate_ip); 192 193static void set_timeout(unsigned long time) 194{ 195 unsigned long delay; 196 197 delay = time - jiffies; 198 if ((long)delay <= 0) 199 delay = 1; 200 201 mod_delayed_work(addr_wq, &work, delay); 202} 203 204static void queue_req(struct addr_req *req) 205{ 206 struct addr_req *temp_req; 207 208 mutex_lock(&lock); 209 list_for_each_entry_reverse(temp_req, &req_list, list) { 210 if (time_after_eq(req->timeout, temp_req->timeout)) 211 break; 212 } 213 214 list_add(&req->list, &temp_req->list); 215 216 if (req_list.next == &req->list) 217 set_timeout(req->timeout); 218 mutex_unlock(&lock); 219} 220 221#ifdef __linux__ 222static int addr4_resolve(struct sockaddr_in *src_in, 223 struct sockaddr_in *dst_in, 224 struct rdma_dev_addr *addr) 225{ 226 __be32 src_ip = src_in->sin_addr.s_addr; 227 __be32 dst_ip = dst_in->sin_addr.s_addr; 228 struct flowi fl; 229 struct rtable *rt; 230 struct neighbour *neigh; 231 int ret; 232 233 memset(&fl, 0, sizeof fl); 234 fl.nl_u.ip4_u.daddr = dst_ip; 235 fl.nl_u.ip4_u.saddr = src_ip; 236 fl.oif = addr->bound_dev_if; 237 238 ret = ip_route_output_key(&init_net, &rt, &fl); 239 if (ret) 240 goto out; 241 242 src_in->sin_family = AF_INET; 243 src_in->sin_addr.s_addr = rt->rt_src; 244 245 if (rt->idev->dev->flags & IFF_LOOPBACK) { 246 ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); 247 if (!ret) 248 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); 249 goto put; 250 } 251 252 /* If the device does ARP internally, return 'done' */ 253 if (rt->idev->dev->flags & IFF_NOARP) { 254 rdma_copy_addr(addr, rt->idev->dev, NULL); 255 goto put; 256 } 257 258 neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); 259 if (!neigh || !(neigh->nud_state & NUD_VALID)) { 260 neigh_event_send(rt->u.dst.neighbour, NULL); 261 ret = -ENODATA; 262 if (neigh) 263 goto release; 264 goto put; 265 } 266 267 ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); 268release: 269 neigh_release(neigh); 270put: 271 ip_rt_put(rt); 272out: 273 return ret; 274} 275 276#if defined(INET6) 277static int addr6_resolve(struct sockaddr_in6 *src_in, 278 struct sockaddr_in6 *dst_in, 279 struct rdma_dev_addr *addr) 280{ 281 struct flowi fl; 282 struct neighbour *neigh; 283 struct dst_entry *dst; 284 int ret; 285 286 memset(&fl, 0, sizeof fl); 287 ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr); 288 ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr); 289 fl.oif = addr->bound_dev_if; 290 291 dst = ip6_route_output(&init_net, NULL, &fl); 292 if ((ret = dst->error)) 293 goto put; 294 295 if (ipv6_addr_any(&fl.fl6_src)) { 296 ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, 297 &fl.fl6_dst, 0, &fl.fl6_src); 298 if (ret) 299 goto put; 300 301 src_in->sin6_family = AF_INET6; 302 ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src); 303 } 304 305 if (dst->dev->flags & IFF_LOOPBACK) { 306 ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); 307 if (!ret) 308 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); 309 goto put; 310 } 311 312 /* If the device does ARP internally, return 'done' */ 313 if (dst->dev->flags & IFF_NOARP) { 314 ret = rdma_copy_addr(addr, dst->dev, NULL); 315 goto put; 316 } 317 318 neigh = dst->neighbour; 319 if (!neigh || !(neigh->nud_state & NUD_VALID)) { 320 neigh_event_send(dst->neighbour, NULL); 321 ret = -ENODATA; 322 goto put; 323 } 324 325 ret = rdma_copy_addr(addr, dst->dev, neigh->ha); 326put: 327 dst_release(dst); 328 return ret; 329} 330#else 331static int addr6_resolve(struct sockaddr_in6 *src_in, 332 struct sockaddr_in6 *dst_in, 333 struct rdma_dev_addr *addr) 334{ 335 return -EADDRNOTAVAIL; 336} 337#endif 338 339#else 340#include <netinet/if_ether.h> 341 342static int addr_resolve(struct sockaddr *src_in, 343 struct sockaddr *dst_in, 344 struct rdma_dev_addr *addr) 345{ 346 struct sockaddr_in *sin; 347 struct sockaddr_in6 *sin6; 348 struct ifaddr *ifa; 349 struct ifnet *ifp; 350#if defined(INET) || defined(INET6) 351 struct llentry *lle; 352#endif 353 struct rtentry *rte; 354 in_port_t port; 355 u_char edst[MAX_ADDR_LEN]; 356 int multi; 357 int bcast; 358 int error = 0; 359 360 /* 361 * Determine whether the address is unicast, multicast, or broadcast 362 * and whether the source interface is valid. 363 */ 364 multi = 0; 365 bcast = 0; 366 sin = NULL; 367 sin6 = NULL; 368 ifp = NULL; 369 rte = NULL; 370 switch (dst_in->sa_family) { 371#ifdef INET 372 case AF_INET: 373 sin = (struct sockaddr_in *)dst_in; 374 if (sin->sin_addr.s_addr == INADDR_BROADCAST) 375 bcast = 1; 376 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) 377 multi = 1; 378 sin = (struct sockaddr_in *)src_in; 379 if (sin->sin_addr.s_addr != INADDR_ANY) { 380 /* 381 * Address comparison fails if the port is set 382 * cache it here to be restored later. 383 */ 384 port = sin->sin_port; 385 sin->sin_port = 0; 386 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); 387 } else 388 src_in = NULL; 389 break; 390#endif 391#ifdef INET6 392 case AF_INET6: 393 sin6 = (struct sockaddr_in6 *)dst_in; 394 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) 395 multi = 1; 396 sin6 = (struct sockaddr_in6 *)src_in; 397 if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 398 port = sin6->sin6_port; 399 sin6->sin6_port = 0; 400 } else 401 src_in = NULL; 402 break; 403#endif 404 default: 405 return -EINVAL; 406 } 407 /* 408 * If we have a source address to use look it up first and verify 409 * that it is a local interface. 410 */ 411 if (src_in) { 412 ifa = ifa_ifwithaddr(src_in); 413 if (sin) 414 sin->sin_port = port; 415 if (sin6) 416 sin6->sin6_port = port; 417 if (ifa == NULL) 418 return -ENETUNREACH; 419 ifp = ifa->ifa_ifp; 420 ifa_free(ifa); 421 if (bcast || multi) 422 goto mcast; 423 } 424 /* 425 * Make sure the route exists and has a valid link. 426 */ 427 rte = rtalloc1(dst_in, 1, 0); 428 if (rte == NULL || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) { 429 if (rte) 430 RTFREE_LOCKED(rte); 431 return -EHOSTUNREACH; 432 } 433 /* 434 * If it's not multicast or broadcast and the route doesn't match the 435 * requested interface return unreachable. Otherwise fetch the 436 * correct interface pointer and unlock the route. 437 */ 438 if (multi || bcast) { 439 if (ifp == NULL) 440 ifp = rte->rt_ifp; 441 RTFREE_LOCKED(rte); 442 } else if (ifp && ifp != rte->rt_ifp) { 443 RTFREE_LOCKED(rte); 444 return -ENETUNREACH; 445 } else { 446 if (ifp == NULL) 447 ifp = rte->rt_ifp; 448 RT_UNLOCK(rte); 449 } 450mcast: 451 if (bcast) 452 return rdma_copy_addr(addr, ifp, ifp->if_broadcastaddr); 453 if (multi) { 454 struct sockaddr *llsa; 455 456 error = ifp->if_resolvemulti(ifp, &llsa, dst_in); 457 if (error) 458 return -error; 459 error = rdma_copy_addr(addr, ifp, 460 LLADDR((struct sockaddr_dl *)llsa)); 461 free(llsa, M_IFMADDR); 462 return error; 463 } 464 /* 465 * Resolve the link local address. 466 */ 467 switch (dst_in->sa_family) { 468#ifdef INET 469 case AF_INET: 470 error = arpresolve(ifp, rte, NULL, dst_in, edst, &lle); 471 break; 472#endif 473#ifdef INET6 474 case AF_INET6: 475 error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst, &lle); 476 break; 477#endif 478 default: 479 /* XXX: Shouldn't happen. */ 480 error = -EINVAL; 481 } 482 RTFREE(rte); 483 if (error == 0) 484 return rdma_copy_addr(addr, ifp, edst); 485 if (error == EWOULDBLOCK) 486 return -ENODATA; 487 return -error; 488} 489 490#endif 491 492static void process_req(struct work_struct *work) 493{ 494 struct addr_req *req, *temp_req; 495 struct sockaddr *src_in, *dst_in; 496 struct list_head done_list; 497 498 INIT_LIST_HEAD(&done_list); 499 500 mutex_lock(&lock); 501 list_for_each_entry_safe(req, temp_req, &req_list, list) { 502 if (req->status == -ENODATA) { 503 src_in = (struct sockaddr *) &req->src_addr; 504 dst_in = (struct sockaddr *) &req->dst_addr; 505 req->status = addr_resolve(src_in, dst_in, req->addr); 506 if (req->status && time_after_eq(jiffies, req->timeout)) 507 req->status = -ETIMEDOUT; 508 else if (req->status == -ENODATA) 509 continue; 510 } 511 list_move_tail(&req->list, &done_list); 512 } 513 514 if (!list_empty(&req_list)) { 515 req = list_entry(req_list.next, struct addr_req, list); 516 set_timeout(req->timeout); 517 } 518 mutex_unlock(&lock); 519 520 list_for_each_entry_safe(req, temp_req, &done_list, list) { 521 list_del(&req->list); 522 req->callback(req->status, (struct sockaddr *) &req->src_addr, 523 req->addr, req->context); 524 put_client(req->client); 525 kfree(req); 526 } 527} 528 529int rdma_resolve_ip(struct rdma_addr_client *client, 530 struct sockaddr *src_addr, struct sockaddr *dst_addr, 531 struct rdma_dev_addr *addr, int timeout_ms, 532 void (*callback)(int status, struct sockaddr *src_addr, 533 struct rdma_dev_addr *addr, void *context), 534 void *context) 535{ 536 struct sockaddr *src_in, *dst_in; 537 struct addr_req *req; 538 int ret = 0; 539 540 req = kzalloc(sizeof *req, GFP_KERNEL); 541 if (!req) 542 return -ENOMEM; 543 544 src_in = (struct sockaddr *) &req->src_addr; 545 dst_in = (struct sockaddr *) &req->dst_addr; 546 547 if (src_addr) { 548 if (src_addr->sa_family != dst_addr->sa_family) { 549 ret = -EINVAL; 550 goto err; 551 } 552 553 memcpy(src_in, src_addr, ip_addr_size(src_addr)); 554 } else { 555 src_in->sa_family = dst_addr->sa_family; 556 } 557 558 memcpy(dst_in, dst_addr, ip_addr_size(dst_addr)); 559 req->addr = addr; 560 req->callback = callback; 561 req->context = context; 562 req->client = client; 563 atomic_inc(&client->refcount); 564 565 req->status = addr_resolve(src_in, dst_in, addr); 566 switch (req->status) { 567 case 0: 568 req->timeout = jiffies; 569 queue_req(req); 570 break; 571 case -ENODATA: 572 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; 573 queue_req(req); 574 break; 575 default: 576 ret = req->status; 577 atomic_dec(&client->refcount); 578 goto err; 579 } 580 return ret; 581err: 582 kfree(req); 583 return ret; 584} 585EXPORT_SYMBOL(rdma_resolve_ip); 586 587void rdma_addr_cancel(struct rdma_dev_addr *addr) 588{ 589 struct addr_req *req, *temp_req; 590 591 mutex_lock(&lock); 592 list_for_each_entry_safe(req, temp_req, &req_list, list) { 593 if (req->addr == addr) { 594 req->status = -ECANCELED; 595 req->timeout = jiffies; 596 list_move(&req->list, &req_list); 597 set_timeout(req->timeout); 598 break; 599 } 600 } 601 mutex_unlock(&lock); 602} 603EXPORT_SYMBOL(rdma_addr_cancel); 604 605static int netevent_callback(struct notifier_block *self, unsigned long event, 606 void *ctx) 607{ 608 if (event == NETEVENT_NEIGH_UPDATE) { 609#ifdef __linux__ 610 struct neighbour *neigh = ctx; 611 612 if (neigh->nud_state & NUD_VALID) { 613 set_timeout(jiffies); 614 } 615#else 616 set_timeout(jiffies); 617#endif 618 } 619 return 0; 620} 621 622static struct notifier_block nb = { 623 .notifier_call = netevent_callback 624}; 625 626static int __init addr_init(void) 627{ 628 INIT_DELAYED_WORK(&work, process_req); 629 addr_wq = create_singlethread_workqueue("ib_addr"); 630 if (!addr_wq) 631 return -ENOMEM; 632 633 register_netevent_notifier(&nb); 634 return 0; 635} 636 637static void __exit addr_cleanup(void) 638{ 639 unregister_netevent_notifier(&nb); 640 destroy_workqueue(addr_wq); 641} 642 643module_init(addr_init); 644module_exit(addr_cleanup); 645