addr.c revision 287862
1/* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005 Intel Corporation. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36#include <linux/mutex.h> 37#include <linux/inetdevice.h> 38#include <linux/slab.h> 39#include <linux/workqueue.h> 40#include <linux/module.h> 41#include <linux/notifier.h> 42#include <net/route.h> 43#include <net/netevent.h> 44#include <rdma/ib_addr.h> 45#include <netinet/if_ether.h> 46 47 48MODULE_AUTHOR("Sean Hefty"); 49MODULE_DESCRIPTION("IB Address Translation"); 50MODULE_LICENSE("Dual BSD/GPL"); 51 52struct addr_req { 53 struct list_head list; 54 struct sockaddr_storage src_addr; 55 struct sockaddr_storage dst_addr; 56 struct rdma_dev_addr *addr; 57 struct rdma_addr_client *client; 58 void *context; 59 void (*callback)(int status, struct sockaddr *src_addr, 60 struct rdma_dev_addr *addr, void *context); 61 unsigned long timeout; 62 int status; 63}; 64 65static void process_req(struct work_struct *work); 66 67static DEFINE_MUTEX(lock); 68static LIST_HEAD(req_list); 69static struct delayed_work work; 70static struct workqueue_struct *addr_wq; 71 72static struct rdma_addr_client self; 73void rdma_addr_register_client(struct rdma_addr_client *client) 74{ 75 atomic_set(&client->refcount, 1); 76 init_completion(&client->comp); 77} 78EXPORT_SYMBOL(rdma_addr_register_client); 79 80static inline void put_client(struct rdma_addr_client *client) 81{ 82 if (atomic_dec_and_test(&client->refcount)) 83 complete(&client->comp); 84} 85 86void rdma_addr_unregister_client(struct rdma_addr_client *client) 87{ 88 put_client(client); 89 wait_for_completion(&client->comp); 90} 91EXPORT_SYMBOL(rdma_addr_unregister_client); 92 93int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev, 94 const unsigned char *dst_dev_addr) 95{ 96 if (dev->if_type == IFT_INFINIBAND) 97 dev_addr->dev_type = ARPHRD_INFINIBAND; 98 else if (dev->if_type == IFT_ETHER) 99 dev_addr->dev_type = ARPHRD_ETHER; 100 else 101 dev_addr->dev_type = 0; 102 memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen); 103 memcpy(dev_addr->broadcast, __DECONST(char *, dev->if_broadcastaddr), 104 dev->if_addrlen); 105 if (dst_dev_addr) 106 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, dev->if_addrlen); 107 dev_addr->bound_dev_if = dev->if_index; 108 return 0; 109} 110EXPORT_SYMBOL(rdma_copy_addr); 111 112int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, 113 u16 *vlan_id) 114{ 115 struct net_device *dev; 116 int ret = -EADDRNOTAVAIL; 117 118 if (dev_addr->bound_dev_if) { 119 dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); 120 if (!dev) 121 return -ENODEV; 122 ret = rdma_copy_addr(dev_addr, dev, NULL); 123 dev_put(dev); 124 return ret; 125 } 126 127 switch (addr->sa_family) { 128 case AF_INET: 129 dev = ip_dev_find(&init_net, 130 ((struct sockaddr_in *) addr)->sin_addr.s_addr); 131 132 if (!dev) 133 return ret; 134 135 ret = rdma_copy_addr(dev_addr, dev, NULL); 136 if (vlan_id) 137 *vlan_id = rdma_vlan_dev_vlan_id(dev); 138 dev_put(dev); 139 break; 140 141#if defined(INET6) 142 case AF_INET6: 143 { 144 struct sockaddr_in6 *sin6; 145 struct ifaddr *ifa; 146 in_port_t port; 147 148 sin6 = (struct sockaddr_in6 *)addr; 149 port = sin6->sin6_port; 150 sin6->sin6_port = 0; 151 ifa = ifa_ifwithaddr(addr); 152 sin6->sin6_port = port; 153 if (ifa == NULL) { 154 ret = -ENODEV; 155 break; 156 } 157 ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL); 158 if (vlan_id) 159 *vlan_id = rdma_vlan_dev_vlan_id(ifa->ifa_ifp); 160 ifa_free(ifa); 161 break; 162 } 163#endif 164 } 165 return ret; 166} 167EXPORT_SYMBOL(rdma_translate_ip); 168 169static void set_timeout(unsigned long time) 170{ 171 unsigned long delay; 172 173 delay = time - jiffies; 174 if ((long)delay <= 0) 175 delay = 1; 176 177 mod_delayed_work(addr_wq, &work, delay); 178} 179 180static void queue_req(struct addr_req *req) 181{ 182 struct addr_req *temp_req; 183 184 mutex_lock(&lock); 185 list_for_each_entry_reverse(temp_req, &req_list, list) { 186 if (time_after_eq(req->timeout, temp_req->timeout)) 187 break; 188 } 189 190 list_add(&req->list, &temp_req->list); 191 192 if (req_list.next == &req->list) 193 set_timeout(req->timeout); 194 mutex_unlock(&lock); 195} 196 197static int addr_resolve(struct sockaddr *src_in, 198 struct sockaddr *dst_in, 199 struct rdma_dev_addr *addr) 200{ 201 struct sockaddr_in *sin; 202 struct sockaddr_in6 *sin6; 203 struct ifaddr *ifa; 204 struct ifnet *ifp; 205 struct rtentry *rte; 206 in_port_t port; 207 u_char edst[MAX_ADDR_LEN]; 208 int multi; 209 int bcast; 210 int is_gw = 0; 211 int error = 0; 212 /* 213 * Determine whether the address is unicast, multicast, or broadcast 214 * and whether the source interface is valid. 215 */ 216 multi = 0; 217 bcast = 0; 218 sin = NULL; 219 sin6 = NULL; 220 ifp = NULL; 221 rte = NULL; 222 switch (dst_in->sa_family) { 223#ifdef INET 224 case AF_INET: 225 sin = (struct sockaddr_in *)dst_in; 226 if (sin->sin_addr.s_addr == INADDR_BROADCAST) 227 bcast = 1; 228 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) 229 multi = 1; 230 sin = (struct sockaddr_in *)src_in; 231 if (sin->sin_addr.s_addr != INADDR_ANY) { 232 /* 233 * Address comparison fails if the port is set 234 * cache it here to be restored later. 235 */ 236 port = sin->sin_port; 237 sin->sin_port = 0; 238 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); 239 } 240 break; 241#endif 242#ifdef INET6 243 case AF_INET6: 244 sin6 = (struct sockaddr_in6 *)dst_in; 245 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) 246 multi = 1; 247 sin6 = (struct sockaddr_in6 *)src_in; 248 if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 249 port = sin6->sin6_port; 250 sin6->sin6_port = 0; 251 } else 252 src_in = NULL; 253 break; 254#endif 255 default: 256 return -EINVAL; 257 } 258 /* 259 * If we have a source address to use look it up first and verify 260 * that it is a local interface. 261 */ 262 if (sin->sin_addr.s_addr != INADDR_ANY) { 263 ifa = ifa_ifwithaddr(src_in); 264 if (sin) 265 sin->sin_port = port; 266 if (sin6) 267 sin6->sin6_port = port; 268 if (ifa == NULL) 269 return -ENETUNREACH; 270 ifp = ifa->ifa_ifp; 271 ifa_free(ifa); 272 if (bcast || multi) 273 goto mcast; 274 } 275 /* 276 * Make sure the route exists and has a valid link. 277 */ 278 rte = rtalloc1(dst_in, 1, 0); 279 if (rte == NULL || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) { 280 if (rte) 281 RTFREE_LOCKED(rte); 282 return -EHOSTUNREACH; 283 } 284 if (rte->rt_flags & RTF_GATEWAY) 285 is_gw = 1; 286 /* 287 * If it's not multicast or broadcast and the route doesn't match the 288 * requested interface return unreachable. Otherwise fetch the 289 * correct interface pointer and unlock the route. 290 */ 291 if (multi || bcast) { 292 if (ifp == NULL) { 293 ifp = rte->rt_ifp; 294 /* rt_ifa holds the route answer source address */ 295 ifa = rte->rt_ifa; 296 } 297 RTFREE_LOCKED(rte); 298 } else if (ifp && ifp != rte->rt_ifp) { 299 RTFREE_LOCKED(rte); 300 return -ENETUNREACH; 301 } else { 302 if (ifp == NULL) { 303 ifp = rte->rt_ifp; 304 ifa = rte->rt_ifa; 305 } 306 RT_UNLOCK(rte); 307 } 308mcast: 309 if (bcast) 310 return rdma_copy_addr(addr, ifp, ifp->if_broadcastaddr); 311 if (multi) { 312 struct sockaddr *llsa; 313 314 error = ifp->if_resolvemulti(ifp, &llsa, dst_in); 315 if (error) 316 return -error; 317 error = rdma_copy_addr(addr, ifp, 318 LLADDR((struct sockaddr_dl *)llsa)); 319 free(llsa, M_IFMADDR); 320 if (error == 0) 321 memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr)); 322 return error; 323 } 324 /* 325 * Resolve the link local address. 326 */ 327 switch (dst_in->sa_family) { 328#ifdef INET 329 case AF_INET: 330 error = arpresolve(ifp, is_gw, NULL, dst_in, edst, NULL); 331 break; 332#endif 333#ifdef INET6 334 case AF_INET6: 335 error = nd6_resolve(ifp, is_gw, NULL, dst_in, edst, NULL); 336 break; 337#endif 338 default: 339 /* XXX: Shouldn't happen. */ 340 error = -EINVAL; 341 } 342 RTFREE(rte); 343 if (error == 0) { 344 memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr)); 345 return rdma_copy_addr(addr, ifp, edst); 346 } 347 if (error == EWOULDBLOCK) 348 return -ENODATA; 349 return -error; 350} 351 352static void process_req(struct work_struct *work) 353{ 354 struct addr_req *req, *temp_req; 355 struct sockaddr *src_in, *dst_in; 356 struct list_head done_list; 357 358 INIT_LIST_HEAD(&done_list); 359 360 mutex_lock(&lock); 361 list_for_each_entry_safe(req, temp_req, &req_list, list) { 362 if (req->status == -ENODATA) { 363 src_in = (struct sockaddr *) &req->src_addr; 364 dst_in = (struct sockaddr *) &req->dst_addr; 365 req->status = addr_resolve(src_in, dst_in, req->addr); 366 if (req->status && time_after_eq(jiffies, req->timeout)) 367 req->status = -ETIMEDOUT; 368 else if (req->status == -ENODATA) 369 continue; 370 } 371 list_move_tail(&req->list, &done_list); 372 } 373 374 if (!list_empty(&req_list)) { 375 req = list_entry(req_list.next, struct addr_req, list); 376 set_timeout(req->timeout); 377 } 378 mutex_unlock(&lock); 379 380 list_for_each_entry_safe(req, temp_req, &done_list, list) { 381 list_del(&req->list); 382 req->callback(req->status, (struct sockaddr *) &req->src_addr, 383 req->addr, req->context); 384 put_client(req->client); 385 kfree(req); 386 } 387} 388 389int rdma_resolve_ip(struct rdma_addr_client *client, 390 struct sockaddr *src_addr, struct sockaddr *dst_addr, 391 struct rdma_dev_addr *addr, int timeout_ms, 392 void (*callback)(int status, struct sockaddr *src_addr, 393 struct rdma_dev_addr *addr, void *context), 394 void *context) 395{ 396 struct sockaddr *src_in, *dst_in; 397 struct addr_req *req; 398 int ret = 0; 399 400 req = kzalloc(sizeof *req, GFP_KERNEL); 401 if (!req) 402 return -ENOMEM; 403 404 src_in = (struct sockaddr *) &req->src_addr; 405 dst_in = (struct sockaddr *) &req->dst_addr; 406 407 if (src_addr) { 408 if (src_addr->sa_family != dst_addr->sa_family) { 409 ret = -EINVAL; 410 goto err; 411 } 412 413 memcpy(src_in, src_addr, ip_addr_size(src_addr)); 414 } else { 415 src_in->sa_family = dst_addr->sa_family; 416 } 417 418 memcpy(dst_in, dst_addr, ip_addr_size(dst_addr)); 419 req->addr = addr; 420 req->callback = callback; 421 req->context = context; 422 req->client = client; 423 atomic_inc(&client->refcount); 424 425 req->status = addr_resolve(src_in, dst_in, addr); 426 switch (req->status) { 427 case 0: 428 req->timeout = jiffies; 429 queue_req(req); 430 break; 431 case -ENODATA: 432 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; 433 queue_req(req); 434 break; 435 default: 436 ret = req->status; 437 atomic_dec(&client->refcount); 438 goto err; 439 } 440 return ret; 441err: 442 kfree(req); 443 return ret; 444} 445EXPORT_SYMBOL(rdma_resolve_ip); 446 447void rdma_addr_cancel(struct rdma_dev_addr *addr) 448{ 449 struct addr_req *req, *temp_req; 450 451 mutex_lock(&lock); 452 list_for_each_entry_safe(req, temp_req, &req_list, list) { 453 if (req->addr == addr) { 454 req->status = -ECANCELED; 455 req->timeout = jiffies; 456 list_move(&req->list, &req_list); 457 set_timeout(req->timeout); 458 break; 459 } 460 } 461 mutex_unlock(&lock); 462} 463EXPORT_SYMBOL(rdma_addr_cancel); 464 465struct resolve_cb_context { 466 struct rdma_dev_addr *addr; 467 struct completion comp; 468}; 469 470static void resolve_cb(int status, struct sockaddr *src_addr, 471 struct rdma_dev_addr *addr, void *context) 472{ 473 memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct 474 rdma_dev_addr)); 475 complete(&((struct resolve_cb_context *)context)->comp); 476} 477 478int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, 479 u16 *vlan_id) 480{ 481 int ret = 0; 482 struct rdma_dev_addr dev_addr; 483 struct resolve_cb_context ctx; 484 struct net_device *dev; 485 486 union { 487 struct sockaddr _sockaddr; 488 struct sockaddr_in _sockaddr_in; 489 struct sockaddr_in6 _sockaddr_in6; 490 } sgid_addr, dgid_addr; 491 492 493 ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid); 494 if (ret) 495 return ret; 496 497 ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid); 498 if (ret) 499 return ret; 500 501 memset(&dev_addr, 0, sizeof(dev_addr)); 502 503 ctx.addr = &dev_addr; 504 init_completion(&ctx.comp); 505 ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr, 506 &dev_addr, 1000, resolve_cb, &ctx); 507 if (ret) 508 return ret; 509 510 wait_for_completion(&ctx.comp); 511 512 memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); 513 dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if); 514 if (!dev) 515 return -ENODEV; 516 if (vlan_id) 517 *vlan_id = rdma_vlan_dev_vlan_id(dev); 518 dev_put(dev); 519 return ret; 520} 521EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh); 522 523int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id) 524{ 525 int ret = 0; 526 struct rdma_dev_addr dev_addr; 527 union { 528 struct sockaddr _sockaddr; 529 struct sockaddr_in _sockaddr_in; 530 struct sockaddr_in6 _sockaddr_in6; 531 } gid_addr; 532 533 ret = rdma_gid2ip(&gid_addr._sockaddr, sgid); 534 535 if (ret) 536 return ret; 537 memset(&dev_addr, 0, sizeof(dev_addr)); 538 ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); 539 if (ret) 540 return ret; 541 542 memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN); 543 return ret; 544} 545EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid); 546 547static int netevent_callback(struct notifier_block *self, unsigned long event, 548 void *ctx) 549{ 550 if (event == NETEVENT_NEIGH_UPDATE) { 551 set_timeout(jiffies); 552 } 553 return 0; 554} 555 556static struct notifier_block nb = { 557 .notifier_call = netevent_callback 558}; 559 560static int __init addr_init(void) 561{ 562 INIT_DELAYED_WORK(&work, process_req); 563 addr_wq = create_singlethread_workqueue("ib_addr"); 564 if (!addr_wq) 565 return -ENOMEM; 566 567 register_netevent_notifier(&nb); 568 rdma_addr_register_client(&self); 569 return 0; 570} 571 572static void __exit addr_cleanup(void) 573{ 574 rdma_addr_unregister_client(&self); 575 unregister_netevent_notifier(&nb); 576 destroy_workqueue(addr_wq); 577} 578 579module_init(addr_init); 580module_exit(addr_cleanup); 581