addr.c revision 233040
1/* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005 Intel Corporation. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36#include <linux/mutex.h> 37#include <linux/inetdevice.h> 38#include <linux/workqueue.h> 39#include <net/arp.h> 40#include <net/neighbour.h> 41#include <net/route.h> 42#include <net/netevent.h> 43#include <net/addrconf.h> 44#include <net/ip6_route.h> 45#include <rdma/ib_addr.h> 46 47MODULE_AUTHOR("Sean Hefty"); 48MODULE_DESCRIPTION("IB Address Translation"); 49MODULE_LICENSE("Dual BSD/GPL"); 50 51struct addr_req { 52 struct list_head list; 53 struct sockaddr_storage src_addr; 54 struct sockaddr_storage dst_addr; 55 struct rdma_dev_addr *addr; 56 struct rdma_addr_client *client; 57 void *context; 58 void (*callback)(int status, struct sockaddr *src_addr, 59 struct rdma_dev_addr *addr, void *context); 60 unsigned long timeout; 61 int status; 62}; 63 64static void process_req(struct work_struct *work); 65 66static DEFINE_MUTEX(lock); 67static LIST_HEAD(req_list); 68static struct delayed_work work; 69static struct workqueue_struct *addr_wq; 70 71void rdma_addr_register_client(struct rdma_addr_client *client) 72{ 73 atomic_set(&client->refcount, 1); 74 init_completion(&client->comp); 75} 76EXPORT_SYMBOL(rdma_addr_register_client); 77 78static inline void put_client(struct rdma_addr_client *client) 79{ 80 if (atomic_dec_and_test(&client->refcount)) 81 complete(&client->comp); 82} 83 84void rdma_addr_unregister_client(struct rdma_addr_client *client) 85{ 86 put_client(client); 87 wait_for_completion(&client->comp); 88} 89EXPORT_SYMBOL(rdma_addr_unregister_client); 90 91#ifdef __linux__ 92int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, 93 const unsigned char *dst_dev_addr) 94{ 95 dev_addr->dev_type = dev->type; 96 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 97 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); 98 if (dst_dev_addr) 99 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); 100 dev_addr->bound_dev_if = dev->ifindex; 101 return 0; 102} 103#else 104int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev, 105 const unsigned char *dst_dev_addr) 106{ 107 if (dev->if_type == IFT_INFINIBAND) 108 dev_addr->dev_type = ARPHRD_INFINIBAND; 109 else if (dev->if_type == IFT_ETHER) 110 dev_addr->dev_type = ARPHRD_ETHER; 111 else 112 dev_addr->dev_type = 0; 113 memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen); 114 memcpy(dev_addr->broadcast, __DECONST(char *, dev->if_broadcastaddr), 115 dev->if_addrlen); 116 if (dst_dev_addr) 117 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, dev->if_addrlen); 118 dev_addr->bound_dev_if = dev->if_index; 119 return 0; 120} 121#endif 122EXPORT_SYMBOL(rdma_copy_addr); 123 124int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 125{ 126 struct net_device *dev; 127 int ret = -EADDRNOTAVAIL; 128 129 if (dev_addr->bound_dev_if) { 130 dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); 131 if (!dev) 132 return -ENODEV; 133 ret = rdma_copy_addr(dev_addr, dev, NULL); 134 dev_put(dev); 135 return ret; 136 } 137 138 switch (addr->sa_family) { 139 case AF_INET: 140 dev = ip_dev_find(NULL, 141 ((struct sockaddr_in *) addr)->sin_addr.s_addr); 142 143 if (!dev) 144 return ret; 145 146 ret = rdma_copy_addr(dev_addr, dev, NULL); 147 dev_put(dev); 148 break; 149 150#if defined(INET6) 151 case AF_INET6: 152#ifdef __linux__ 153 read_lock(&dev_base_lock); 154 for_each_netdev(&init_net, dev) { 155 if (ipv6_chk_addr(&init_net, 156 &((struct sockaddr_in6 *) addr)->sin6_addr, 157 dev, 1)) { 158 ret = rdma_copy_addr(dev_addr, dev, NULL); 159 break; 160 } 161 } 162 read_unlock(&dev_base_lock); 163#else 164 { 165 struct sockaddr_in6 *sin6; 166 struct ifaddr *ifa; 167 in_port_t port; 168 169 sin6 = (struct sockaddr_in6 *)addr; 170 port = sin6->sin6_port; 171 sin6->sin6_port = 0; 172 ifa = ifa_ifwithaddr(addr); 173 sin6->sin6_port = port; 174 if (ifa == NULL) { 175 ret = -ENODEV; 176 break; 177 } 178 ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL); 179 ifa_free(ifa); 180 break; 181 } 182#endif 183 break; 184#endif 185 } 186 return ret; 187} 188EXPORT_SYMBOL(rdma_translate_ip); 189 190static void set_timeout(unsigned long time) 191{ 192 unsigned long delay; 193 194 cancel_delayed_work(&work); 195 196 delay = time - jiffies; 197 if ((long)delay <= 0) 198 delay = 1; 199 200 queue_delayed_work(addr_wq, &work, delay); 201} 202 203static void queue_req(struct addr_req *req) 204{ 205 struct addr_req *temp_req; 206 207 mutex_lock(&lock); 208 list_for_each_entry_reverse(temp_req, &req_list, list) { 209 if (time_after_eq(req->timeout, temp_req->timeout)) 210 break; 211 } 212 213 list_add(&req->list, &temp_req->list); 214 215 if (req_list.next == &req->list) 216 set_timeout(req->timeout); 217 mutex_unlock(&lock); 218} 219 220#ifdef __linux__ 221static int addr4_resolve(struct sockaddr_in *src_in, 222 struct sockaddr_in *dst_in, 223 struct rdma_dev_addr *addr) 224{ 225 __be32 src_ip = src_in->sin_addr.s_addr; 226 __be32 dst_ip = dst_in->sin_addr.s_addr; 227 struct flowi fl; 228 struct rtable *rt; 229 struct neighbour *neigh; 230 int ret; 231 232 memset(&fl, 0, sizeof fl); 233 fl.nl_u.ip4_u.daddr = dst_ip; 234 fl.nl_u.ip4_u.saddr = src_ip; 235 fl.oif = addr->bound_dev_if; 236 237 ret = ip_route_output_key(&init_net, &rt, &fl); 238 if (ret) 239 goto out; 240 241 src_in->sin_family = AF_INET; 242 src_in->sin_addr.s_addr = rt->rt_src; 243 244 if (rt->idev->dev->flags & IFF_LOOPBACK) { 245 ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); 246 if (!ret) 247 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); 248 goto put; 249 } 250 251 /* If the device does ARP internally, return 'done' */ 252 if (rt->idev->dev->flags & IFF_NOARP) { 253 rdma_copy_addr(addr, rt->idev->dev, NULL); 254 goto put; 255 } 256 257 neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); 258 if (!neigh || !(neigh->nud_state & NUD_VALID)) { 259 neigh_event_send(rt->u.dst.neighbour, NULL); 260 ret = -ENODATA; 261 if (neigh) 262 goto release; 263 goto put; 264 } 265 266 ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); 267release: 268 neigh_release(neigh); 269put: 270 ip_rt_put(rt); 271out: 272 return ret; 273} 274 275#if defined(INET6) 276static int addr6_resolve(struct sockaddr_in6 *src_in, 277 struct sockaddr_in6 *dst_in, 278 struct rdma_dev_addr *addr) 279{ 280 struct flowi fl; 281 struct neighbour *neigh; 282 struct dst_entry *dst; 283 int ret; 284 285 memset(&fl, 0, sizeof fl); 286 ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr); 287 ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr); 288 fl.oif = addr->bound_dev_if; 289 290 dst = ip6_route_output(&init_net, NULL, &fl); 291 if ((ret = dst->error)) 292 goto put; 293 294 if (ipv6_addr_any(&fl.fl6_src)) { 295 ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, 296 &fl.fl6_dst, 0, &fl.fl6_src); 297 if (ret) 298 goto put; 299 300 src_in->sin6_family = AF_INET6; 301 ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src); 302 } 303 304 if (dst->dev->flags & IFF_LOOPBACK) { 305 ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); 306 if (!ret) 307 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); 308 goto put; 309 } 310 311 /* If the device does ARP internally, return 'done' */ 312 if (dst->dev->flags & IFF_NOARP) { 313 ret = rdma_copy_addr(addr, dst->dev, NULL); 314 goto put; 315 } 316 317 neigh = dst->neighbour; 318 if (!neigh || !(neigh->nud_state & NUD_VALID)) { 319 neigh_event_send(dst->neighbour, NULL); 320 ret = -ENODATA; 321 goto put; 322 } 323 324 ret = rdma_copy_addr(addr, dst->dev, neigh->ha); 325put: 326 dst_release(dst); 327 return ret; 328} 329#else 330static int addr6_resolve(struct sockaddr_in6 *src_in, 331 struct sockaddr_in6 *dst_in, 332 struct rdma_dev_addr *addr) 333{ 334 return -EADDRNOTAVAIL; 335} 336#endif 337 338#else 339#include <netinet/if_ether.h> 340 341static int addr_resolve(struct sockaddr *src_in, 342 struct sockaddr *dst_in, 343 struct rdma_dev_addr *addr) 344{ 345 struct sockaddr_in *sin; 346 struct sockaddr_in6 *sin6; 347 struct ifaddr *ifa; 348 struct ifnet *ifp; 349 struct llentry *lle; 350 struct rtentry *rte; 351 in_port_t port; 352 u_char edst[MAX_ADDR_LEN]; 353 int multi; 354 int bcast; 355 int error; 356 357 /* 358 * Determine whether the address is unicast, multicast, or broadcast 359 * and whether the source interface is valid. 360 */ 361 multi = 0; 362 bcast = 0; 363 sin = NULL; 364 sin6 = NULL; 365 ifp = NULL; 366 rte = NULL; 367 switch (dst_in->sa_family) { 368 case AF_INET: 369 sin = (struct sockaddr_in *)dst_in; 370 if (sin->sin_addr.s_addr == INADDR_BROADCAST) 371 bcast = 1; 372 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) 373 multi = 1; 374 sin = (struct sockaddr_in *)src_in; 375 if (sin->sin_addr.s_addr != INADDR_ANY) { 376 /* 377 * Address comparison fails if the port is set 378 * cache it here to be restored later. 379 */ 380 port = sin->sin_port; 381 sin->sin_port = 0; 382 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); 383 } else 384 src_in = NULL; 385 break; 386#ifdef INET6 387 case AF_INET6: 388 sin6 = (struct sockaddr_in6 *)dst_in; 389 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) 390 multi = 1; 391 sin6 = (struct sockaddr_in6 *)src_in; 392 if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 393 port = sin6->sin6_port; 394 sin6->sin6_port = 0; 395 } else 396 src_in = NULL; 397 break; 398#endif 399 default: 400 return -EINVAL; 401 } 402 /* 403 * If we have a source address to use look it up first and verify 404 * that it is a local interface. 405 */ 406 if (src_in) { 407 ifa = ifa_ifwithaddr(src_in); 408 if (sin) 409 sin->sin_port = port; 410 if (sin6) 411 sin6->sin6_port = port; 412 if (ifa == NULL) 413 return -ENETUNREACH; 414 ifp = ifa->ifa_ifp; 415 ifa_free(ifa); 416 if (bcast || multi) 417 goto mcast; 418 } 419 /* 420 * Make sure the route exists and has a valid link. 421 */ 422 rte = rtalloc1(dst_in, 1, 0); 423 if (rte == NULL || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) { 424 if (rte) 425 RTFREE_LOCKED(rte); 426 return -EHOSTUNREACH; 427 } 428 /* 429 * If it's not multicast or broadcast and the route doesn't match the 430 * requested interface return unreachable. Otherwise fetch the 431 * correct interface pointer and unlock the route. 432 */ 433 if (multi || bcast) { 434 if (ifp == NULL) 435 ifp = rte->rt_ifp; 436 RTFREE_LOCKED(rte); 437 } else if (ifp && ifp != rte->rt_ifp) { 438 RTFREE_LOCKED(rte); 439 return -ENETUNREACH; 440 } else { 441 if (ifp == NULL) 442 ifp = rte->rt_ifp; 443 RT_UNLOCK(rte); 444 } 445mcast: 446 if (bcast) 447 return rdma_copy_addr(addr, ifp, ifp->if_broadcastaddr); 448 if (multi) { 449 struct sockaddr *llsa; 450 451 error = ifp->if_resolvemulti(ifp, &llsa, dst_in); 452 if (error) 453 return -error; 454 error = rdma_copy_addr(addr, ifp, 455 LLADDR((struct sockaddr_dl *)llsa)); 456 free(llsa, M_IFMADDR); 457 return error; 458 } 459 /* 460 * Resolve the link local address. 461 */ 462#ifdef INET6 463 if (dst_in->sa_family == AF_INET6) 464 error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst, &lle); 465 else 466#endif 467 error = arpresolve(ifp, rte, NULL, dst_in, edst, &lle); 468 RTFREE(rte); 469 if (error == 0) 470 return rdma_copy_addr(addr, ifp, edst); 471 if (error == EWOULDBLOCK) 472 return -ENODATA; 473 return -error; 474} 475 476#endif 477 478static void process_req(struct work_struct *work) 479{ 480 struct addr_req *req, *temp_req; 481 struct sockaddr *src_in, *dst_in; 482 struct list_head done_list; 483 484 INIT_LIST_HEAD(&done_list); 485 486 mutex_lock(&lock); 487 list_for_each_entry_safe(req, temp_req, &req_list, list) { 488 if (req->status == -ENODATA) { 489 src_in = (struct sockaddr *) &req->src_addr; 490 dst_in = (struct sockaddr *) &req->dst_addr; 491 req->status = addr_resolve(src_in, dst_in, req->addr); 492 if (req->status && time_after_eq(jiffies, req->timeout)) 493 req->status = -ETIMEDOUT; 494 else if (req->status == -ENODATA) 495 continue; 496 } 497 list_move_tail(&req->list, &done_list); 498 } 499 500 if (!list_empty(&req_list)) { 501 req = list_entry(req_list.next, struct addr_req, list); 502 set_timeout(req->timeout); 503 } 504 mutex_unlock(&lock); 505 506 list_for_each_entry_safe(req, temp_req, &done_list, list) { 507 list_del(&req->list); 508 req->callback(req->status, (struct sockaddr *) &req->src_addr, 509 req->addr, req->context); 510 put_client(req->client); 511 kfree(req); 512 } 513} 514 515int rdma_resolve_ip(struct rdma_addr_client *client, 516 struct sockaddr *src_addr, struct sockaddr *dst_addr, 517 struct rdma_dev_addr *addr, int timeout_ms, 518 void (*callback)(int status, struct sockaddr *src_addr, 519 struct rdma_dev_addr *addr, void *context), 520 void *context) 521{ 522 struct sockaddr *src_in, *dst_in; 523 struct addr_req *req; 524 int ret = 0; 525 526 req = kzalloc(sizeof *req, GFP_KERNEL); 527 if (!req) 528 return -ENOMEM; 529 530 src_in = (struct sockaddr *) &req->src_addr; 531 dst_in = (struct sockaddr *) &req->dst_addr; 532 533 if (src_addr) { 534 if (src_addr->sa_family != dst_addr->sa_family) { 535 ret = -EINVAL; 536 goto err; 537 } 538 539 memcpy(src_in, src_addr, ip_addr_size(src_addr)); 540 } else { 541 src_in->sa_family = dst_addr->sa_family; 542 } 543 544 memcpy(dst_in, dst_addr, ip_addr_size(dst_addr)); 545 req->addr = addr; 546 req->callback = callback; 547 req->context = context; 548 req->client = client; 549 atomic_inc(&client->refcount); 550 551 req->status = addr_resolve(src_in, dst_in, addr); 552 switch (req->status) { 553 case 0: 554 req->timeout = jiffies; 555 queue_req(req); 556 break; 557 case -ENODATA: 558 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; 559 queue_req(req); 560 break; 561 default: 562 ret = req->status; 563 atomic_dec(&client->refcount); 564 goto err; 565 } 566 return ret; 567err: 568 kfree(req); 569 return ret; 570} 571EXPORT_SYMBOL(rdma_resolve_ip); 572 573void rdma_addr_cancel(struct rdma_dev_addr *addr) 574{ 575 struct addr_req *req, *temp_req; 576 577 mutex_lock(&lock); 578 list_for_each_entry_safe(req, temp_req, &req_list, list) { 579 if (req->addr == addr) { 580 req->status = -ECANCELED; 581 req->timeout = jiffies; 582 list_move(&req->list, &req_list); 583 set_timeout(req->timeout); 584 break; 585 } 586 } 587 mutex_unlock(&lock); 588} 589EXPORT_SYMBOL(rdma_addr_cancel); 590 591static int netevent_callback(struct notifier_block *self, unsigned long event, 592 void *ctx) 593{ 594 if (event == NETEVENT_NEIGH_UPDATE) { 595#ifdef __linux__ 596 struct neighbour *neigh = ctx; 597 598 if (neigh->nud_state & NUD_VALID) { 599 set_timeout(jiffies); 600 } 601#else 602 set_timeout(jiffies); 603#endif 604 } 605 return 0; 606} 607 608static struct notifier_block nb = { 609 .notifier_call = netevent_callback 610}; 611 612static int addr_init(void) 613{ 614 INIT_DELAYED_WORK(&work, process_req); 615 addr_wq = create_singlethread_workqueue("ib_addr"); 616 if (!addr_wq) 617 return -ENOMEM; 618 619 register_netevent_notifier(&nb); 620 return 0; 621} 622 623static void addr_cleanup(void) 624{ 625 unregister_netevent_notifier(&nb); 626 destroy_workqueue(addr_wq); 627} 628 629module_init(addr_init); 630module_exit(addr_cleanup); 631