addr.c revision 256281
1/* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005 Intel Corporation. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36#include <linux/mutex.h> 37#include <linux/inetdevice.h> 38#include <linux/workqueue.h> 39#include <net/arp.h> 40#include <net/neighbour.h> 41#include <net/route.h> 42#include <net/netevent.h> 43#include <net/addrconf.h> 44#include <net/ip6_route.h> 45#include <rdma/ib_addr.h> 46 47MODULE_AUTHOR("Sean Hefty"); 48MODULE_DESCRIPTION("IB Address Translation"); 49MODULE_LICENSE("Dual BSD/GPL"); 50 51struct addr_req { 52 struct list_head list; 53 struct sockaddr_storage src_addr; 54 struct sockaddr_storage dst_addr; 55 struct rdma_dev_addr *addr; 56 struct rdma_addr_client *client; 57 void *context; 58 void (*callback)(int status, struct sockaddr *src_addr, 59 struct rdma_dev_addr *addr, void *context); 60 unsigned long timeout; 61 int status; 62}; 63 64static void process_req(struct work_struct *work); 65 66static DEFINE_MUTEX(lock); 67static LIST_HEAD(req_list); 68static struct delayed_work work; 69static struct workqueue_struct *addr_wq; 70 71void rdma_addr_register_client(struct rdma_addr_client *client) 72{ 73 atomic_set(&client->refcount, 1); 74 init_completion(&client->comp); 75} 76EXPORT_SYMBOL(rdma_addr_register_client); 77 78static inline void put_client(struct rdma_addr_client *client) 79{ 80 if (atomic_dec_and_test(&client->refcount)) 81 complete(&client->comp); 82} 83 84void rdma_addr_unregister_client(struct rdma_addr_client *client) 85{ 86 put_client(client); 87 wait_for_completion(&client->comp); 88} 89EXPORT_SYMBOL(rdma_addr_unregister_client); 90 91#ifdef __linux__ 92int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, 93 const unsigned char *dst_dev_addr) 94{ 95 dev_addr->dev_type = dev->type; 96 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 97 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); 98 if (dst_dev_addr) 99 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); 100 dev_addr->bound_dev_if = dev->ifindex; 101 return 0; 102} 103#else 104int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev, 105 const unsigned char *dst_dev_addr) 106{ 107 if (dev->if_type == IFT_INFINIBAND) 108 dev_addr->dev_type = ARPHRD_INFINIBAND; 109 else if (dev->if_type == IFT_ETHER) 110 dev_addr->dev_type = ARPHRD_ETHER; 111 else 112 dev_addr->dev_type = 0; 113 memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen); 114 memcpy(dev_addr->broadcast, __DECONST(char *, dev->if_broadcastaddr), 115 dev->if_addrlen); 116 if (dst_dev_addr) 117 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, dev->if_addrlen); 118 dev_addr->bound_dev_if = dev->if_index; 119 return 0; 120} 121#endif 122EXPORT_SYMBOL(rdma_copy_addr); 123 124int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 125{ 126 struct net_device *dev; 127 int ret = -EADDRNOTAVAIL; 128 129 if (dev_addr->bound_dev_if) { 130 dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); 131 if (!dev) 132 return -ENODEV; 133 ret = rdma_copy_addr(dev_addr, dev, NULL); 134 dev_put(dev); 135 return ret; 136 } 137 138 switch (addr->sa_family) { 139#ifdef INET 140 case AF_INET: 141 dev = ip_dev_find(NULL, 142 ((struct sockaddr_in *) addr)->sin_addr.s_addr); 143 144 if (!dev) 145 return ret; 146 147 ret = rdma_copy_addr(dev_addr, dev, NULL); 148 dev_put(dev); 149 break; 150#endif 151 152#if defined(INET6) 153 case AF_INET6: 154#ifdef __linux__ 155 read_lock(&dev_base_lock); 156 for_each_netdev(&init_net, dev) { 157 if (ipv6_chk_addr(&init_net, 158 &((struct sockaddr_in6 *) addr)->sin6_addr, 159 dev, 1)) { 160 ret = rdma_copy_addr(dev_addr, dev, NULL); 161 break; 162 } 163 } 164 read_unlock(&dev_base_lock); 165#else 166 { 167 struct sockaddr_in6 *sin6; 168 struct ifaddr *ifa; 169 in_port_t port; 170 171 sin6 = (struct sockaddr_in6 *)addr; 172 port = sin6->sin6_port; 173 sin6->sin6_port = 0; 174 ifa = ifa_ifwithaddr(addr); 175 sin6->sin6_port = port; 176 if (ifa == NULL) { 177 ret = -ENODEV; 178 break; 179 } 180 ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL); 181 ifa_free(ifa); 182 break; 183 } 184#endif 185 break; 186#endif 187 } 188 return ret; 189} 190EXPORT_SYMBOL(rdma_translate_ip); 191 192static void set_timeout(unsigned long time) 193{ 194 unsigned long delay; 195 196 cancel_delayed_work(&work); 197 198 delay = time - jiffies; 199 if ((long)delay <= 0) 200 delay = 1; 201 202 queue_delayed_work(addr_wq, &work, delay); 203} 204 205static void queue_req(struct addr_req *req) 206{ 207 struct addr_req *temp_req; 208 209 mutex_lock(&lock); 210 list_for_each_entry_reverse(temp_req, &req_list, list) { 211 if (time_after_eq(req->timeout, temp_req->timeout)) 212 break; 213 } 214 215 list_add(&req->list, &temp_req->list); 216 217 if (req_list.next == &req->list) 218 set_timeout(req->timeout); 219 mutex_unlock(&lock); 220} 221 222#ifdef __linux__ 223static int addr4_resolve(struct sockaddr_in *src_in, 224 struct sockaddr_in *dst_in, 225 struct rdma_dev_addr *addr) 226{ 227 __be32 src_ip = src_in->sin_addr.s_addr; 228 __be32 dst_ip = dst_in->sin_addr.s_addr; 229 struct flowi fl; 230 struct rtable *rt; 231 struct neighbour *neigh; 232 int ret; 233 234 memset(&fl, 0, sizeof fl); 235 fl.nl_u.ip4_u.daddr = dst_ip; 236 fl.nl_u.ip4_u.saddr = src_ip; 237 fl.oif = addr->bound_dev_if; 238 239 ret = ip_route_output_key(&init_net, &rt, &fl); 240 if (ret) 241 goto out; 242 243 src_in->sin_family = AF_INET; 244 src_in->sin_addr.s_addr = rt->rt_src; 245 246 if (rt->idev->dev->flags & IFF_LOOPBACK) { 247 ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); 248 if (!ret) 249 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); 250 goto put; 251 } 252 253 /* If the device does ARP internally, return 'done' */ 254 if (rt->idev->dev->flags & IFF_NOARP) { 255 rdma_copy_addr(addr, rt->idev->dev, NULL); 256 goto put; 257 } 258 259 neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); 260 if (!neigh || !(neigh->nud_state & NUD_VALID)) { 261 neigh_event_send(rt->u.dst.neighbour, NULL); 262 ret = -ENODATA; 263 if (neigh) 264 goto release; 265 goto put; 266 } 267 268 ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); 269release: 270 neigh_release(neigh); 271put: 272 ip_rt_put(rt); 273out: 274 return ret; 275} 276 277#if defined(INET6) 278static int addr6_resolve(struct sockaddr_in6 *src_in, 279 struct sockaddr_in6 *dst_in, 280 struct rdma_dev_addr *addr) 281{ 282 struct flowi fl; 283 struct neighbour *neigh; 284 struct dst_entry *dst; 285 int ret; 286 287 memset(&fl, 0, sizeof fl); 288 ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr); 289 ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr); 290 fl.oif = addr->bound_dev_if; 291 292 dst = ip6_route_output(&init_net, NULL, &fl); 293 if ((ret = dst->error)) 294 goto put; 295 296 if (ipv6_addr_any(&fl.fl6_src)) { 297 ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, 298 &fl.fl6_dst, 0, &fl.fl6_src); 299 if (ret) 300 goto put; 301 302 src_in->sin6_family = AF_INET6; 303 ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src); 304 } 305 306 if (dst->dev->flags & IFF_LOOPBACK) { 307 ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); 308 if (!ret) 309 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); 310 goto put; 311 } 312 313 /* If the device does ARP internally, return 'done' */ 314 if (dst->dev->flags & IFF_NOARP) { 315 ret = rdma_copy_addr(addr, dst->dev, NULL); 316 goto put; 317 } 318 319 neigh = dst->neighbour; 320 if (!neigh || !(neigh->nud_state & NUD_VALID)) { 321 neigh_event_send(dst->neighbour, NULL); 322 ret = -ENODATA; 323 goto put; 324 } 325 326 ret = rdma_copy_addr(addr, dst->dev, neigh->ha); 327put: 328 dst_release(dst); 329 return ret; 330} 331#else 332static int addr6_resolve(struct sockaddr_in6 *src_in, 333 struct sockaddr_in6 *dst_in, 334 struct rdma_dev_addr *addr) 335{ 336 return -EADDRNOTAVAIL; 337} 338#endif 339 340#else 341#include <netinet/if_ether.h> 342 343static int addr_resolve(struct sockaddr *src_in, 344 struct sockaddr *dst_in, 345 struct rdma_dev_addr *addr) 346{ 347 struct sockaddr_in *sin; 348 struct sockaddr_in6 *sin6; 349 struct ifaddr *ifa; 350 struct ifnet *ifp; 351#if defined(INET) || defined(INET6) 352 struct llentry *lle; 353#endif 354 struct rtentry *rte; 355 in_port_t port; 356 u_char edst[MAX_ADDR_LEN]; 357 int multi; 358 int bcast; 359 int error = 0; 360 361 /* 362 * Determine whether the address is unicast, multicast, or broadcast 363 * and whether the source interface is valid. 364 */ 365 multi = 0; 366 bcast = 0; 367 sin = NULL; 368 sin6 = NULL; 369 ifp = NULL; 370 rte = NULL; 371 switch (dst_in->sa_family) { 372#ifdef INET 373 case AF_INET: 374 sin = (struct sockaddr_in *)dst_in; 375 if (sin->sin_addr.s_addr == INADDR_BROADCAST) 376 bcast = 1; 377 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) 378 multi = 1; 379 sin = (struct sockaddr_in *)src_in; 380 if (sin->sin_addr.s_addr != INADDR_ANY) { 381 /* 382 * Address comparison fails if the port is set 383 * cache it here to be restored later. 384 */ 385 port = sin->sin_port; 386 sin->sin_port = 0; 387 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); 388 } else 389 src_in = NULL; 390 break; 391#endif 392#ifdef INET6 393 case AF_INET6: 394 sin6 = (struct sockaddr_in6 *)dst_in; 395 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) 396 multi = 1; 397 sin6 = (struct sockaddr_in6 *)src_in; 398 if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 399 port = sin6->sin6_port; 400 sin6->sin6_port = 0; 401 } else 402 src_in = NULL; 403 break; 404#endif 405 default: 406 return -EINVAL; 407 } 408 /* 409 * If we have a source address to use look it up first and verify 410 * that it is a local interface. 411 */ 412 if (src_in) { 413 ifa = ifa_ifwithaddr(src_in); 414 if (sin) 415 sin->sin_port = port; 416 if (sin6) 417 sin6->sin6_port = port; 418 if (ifa == NULL) 419 return -ENETUNREACH; 420 ifp = ifa->ifa_ifp; 421 ifa_free(ifa); 422 if (bcast || multi) 423 goto mcast; 424 } 425 /* 426 * Make sure the route exists and has a valid link. 427 */ 428 rte = rtalloc1(dst_in, 1, 0); 429 if (rte == NULL || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) { 430 if (rte) 431 RTFREE_LOCKED(rte); 432 return -EHOSTUNREACH; 433 } 434 /* 435 * If it's not multicast or broadcast and the route doesn't match the 436 * requested interface return unreachable. Otherwise fetch the 437 * correct interface pointer and unlock the route. 438 */ 439 if (multi || bcast) { 440 if (ifp == NULL) 441 ifp = rte->rt_ifp; 442 RTFREE_LOCKED(rte); 443 } else if (ifp && ifp != rte->rt_ifp) { 444 RTFREE_LOCKED(rte); 445 return -ENETUNREACH; 446 } else { 447 if (ifp == NULL) 448 ifp = rte->rt_ifp; 449 RT_UNLOCK(rte); 450 } 451mcast: 452 if (bcast) 453 return rdma_copy_addr(addr, ifp, ifp->if_broadcastaddr); 454 if (multi) { 455 struct sockaddr *llsa; 456 457 error = ifp->if_resolvemulti(ifp, &llsa, dst_in); 458 if (error) 459 return -error; 460 error = rdma_copy_addr(addr, ifp, 461 LLADDR((struct sockaddr_dl *)llsa)); 462 free(llsa, M_IFMADDR); 463 return error; 464 } 465 /* 466 * Resolve the link local address. 467 */ 468 switch (dst_in->sa_family) { 469#ifdef INET 470 case AF_INET: 471 error = arpresolve(ifp, rte, NULL, dst_in, edst, &lle); 472 break; 473#endif 474#ifdef INET6 475 case AF_INET6: 476 error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst, &lle); 477 break; 478#endif 479 default: 480 /* XXX: Shouldn't happen. */ 481 error = -EINVAL; 482 } 483 RTFREE(rte); 484 if (error == 0) 485 return rdma_copy_addr(addr, ifp, edst); 486 if (error == EWOULDBLOCK) 487 return -ENODATA; 488 return -error; 489} 490 491#endif 492 493static void process_req(struct work_struct *work) 494{ 495 struct addr_req *req, *temp_req; 496 struct sockaddr *src_in, *dst_in; 497 struct list_head done_list; 498 499 INIT_LIST_HEAD(&done_list); 500 501 mutex_lock(&lock); 502 list_for_each_entry_safe(req, temp_req, &req_list, list) { 503 if (req->status == -ENODATA) { 504 src_in = (struct sockaddr *) &req->src_addr; 505 dst_in = (struct sockaddr *) &req->dst_addr; 506 req->status = addr_resolve(src_in, dst_in, req->addr); 507 if (req->status && time_after_eq(jiffies, req->timeout)) 508 req->status = -ETIMEDOUT; 509 else if (req->status == -ENODATA) 510 continue; 511 } 512 list_move_tail(&req->list, &done_list); 513 } 514 515 if (!list_empty(&req_list)) { 516 req = list_entry(req_list.next, struct addr_req, list); 517 set_timeout(req->timeout); 518 } 519 mutex_unlock(&lock); 520 521 list_for_each_entry_safe(req, temp_req, &done_list, list) { 522 list_del(&req->list); 523 req->callback(req->status, (struct sockaddr *) &req->src_addr, 524 req->addr, req->context); 525 put_client(req->client); 526 kfree(req); 527 } 528} 529 530int rdma_resolve_ip(struct rdma_addr_client *client, 531 struct sockaddr *src_addr, struct sockaddr *dst_addr, 532 struct rdma_dev_addr *addr, int timeout_ms, 533 void (*callback)(int status, struct sockaddr *src_addr, 534 struct rdma_dev_addr *addr, void *context), 535 void *context) 536{ 537 struct sockaddr *src_in, *dst_in; 538 struct addr_req *req; 539 int ret = 0; 540 541 req = kzalloc(sizeof *req, GFP_KERNEL); 542 if (!req) 543 return -ENOMEM; 544 545 src_in = (struct sockaddr *) &req->src_addr; 546 dst_in = (struct sockaddr *) &req->dst_addr; 547 548 if (src_addr) { 549 if (src_addr->sa_family != dst_addr->sa_family) { 550 ret = -EINVAL; 551 goto err; 552 } 553 554 memcpy(src_in, src_addr, ip_addr_size(src_addr)); 555 } else { 556 src_in->sa_family = dst_addr->sa_family; 557 } 558 559 memcpy(dst_in, dst_addr, ip_addr_size(dst_addr)); 560 req->addr = addr; 561 req->callback = callback; 562 req->context = context; 563 req->client = client; 564 atomic_inc(&client->refcount); 565 566 req->status = addr_resolve(src_in, dst_in, addr); 567 switch (req->status) { 568 case 0: 569 req->timeout = jiffies; 570 queue_req(req); 571 break; 572 case -ENODATA: 573 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; 574 queue_req(req); 575 break; 576 default: 577 ret = req->status; 578 atomic_dec(&client->refcount); 579 goto err; 580 } 581 return ret; 582err: 583 kfree(req); 584 return ret; 585} 586EXPORT_SYMBOL(rdma_resolve_ip); 587 588void rdma_addr_cancel(struct rdma_dev_addr *addr) 589{ 590 struct addr_req *req, *temp_req; 591 592 mutex_lock(&lock); 593 list_for_each_entry_safe(req, temp_req, &req_list, list) { 594 if (req->addr == addr) { 595 req->status = -ECANCELED; 596 req->timeout = jiffies; 597 list_move(&req->list, &req_list); 598 set_timeout(req->timeout); 599 break; 600 } 601 } 602 mutex_unlock(&lock); 603} 604EXPORT_SYMBOL(rdma_addr_cancel); 605 606static int netevent_callback(struct notifier_block *self, unsigned long event, 607 void *ctx) 608{ 609 if (event == NETEVENT_NEIGH_UPDATE) { 610#ifdef __linux__ 611 struct neighbour *neigh = ctx; 612 613 if (neigh->nud_state & NUD_VALID) { 614 set_timeout(jiffies); 615 } 616#else 617 set_timeout(jiffies); 618#endif 619 } 620 return 0; 621} 622 623static struct notifier_block nb = { 624 .notifier_call = netevent_callback 625}; 626 627static int addr_init(void) 628{ 629 INIT_DELAYED_WORK(&work, process_req); 630 addr_wq = create_singlethread_workqueue("ib_addr"); 631 if (!addr_wq) 632 return -ENOMEM; 633 634 register_netevent_notifier(&nb); 635 return 0; 636} 637 638static void addr_cleanup(void) 639{ 640 unregister_netevent_notifier(&nb); 641 destroy_workqueue(addr_wq); 642} 643 644module_init(addr_init); 645module_exit(addr_cleanup); 646