addr.c revision 256281
1/*
2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/mutex.h>
37#include <linux/inetdevice.h>
38#include <linux/workqueue.h>
39#include <net/arp.h>
40#include <net/neighbour.h>
41#include <net/route.h>
42#include <net/netevent.h>
43#include <net/addrconf.h>
44#include <net/ip6_route.h>
45#include <rdma/ib_addr.h>
46
47MODULE_AUTHOR("Sean Hefty");
48MODULE_DESCRIPTION("IB Address Translation");
49MODULE_LICENSE("Dual BSD/GPL");
50
51struct addr_req {
52	struct list_head list;
53	struct sockaddr_storage src_addr;
54	struct sockaddr_storage dst_addr;
55	struct rdma_dev_addr *addr;
56	struct rdma_addr_client *client;
57	void *context;
58	void (*callback)(int status, struct sockaddr *src_addr,
59			 struct rdma_dev_addr *addr, void *context);
60	unsigned long timeout;
61	int status;
62};
63
64static void process_req(struct work_struct *work);
65
66static DEFINE_MUTEX(lock);
67static LIST_HEAD(req_list);
68static struct delayed_work work;
69static struct workqueue_struct *addr_wq;
70
71void rdma_addr_register_client(struct rdma_addr_client *client)
72{
73	atomic_set(&client->refcount, 1);
74	init_completion(&client->comp);
75}
76EXPORT_SYMBOL(rdma_addr_register_client);
77
78static inline void put_client(struct rdma_addr_client *client)
79{
80	if (atomic_dec_and_test(&client->refcount))
81		complete(&client->comp);
82}
83
84void rdma_addr_unregister_client(struct rdma_addr_client *client)
85{
86	put_client(client);
87	wait_for_completion(&client->comp);
88}
89EXPORT_SYMBOL(rdma_addr_unregister_client);
90
91#ifdef __linux__
92int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
93		     const unsigned char *dst_dev_addr)
94{
95	dev_addr->dev_type = dev->type;
96	memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
97	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
98	if (dst_dev_addr)
99		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
100	dev_addr->bound_dev_if = dev->ifindex;
101	return 0;
102}
103#else
104int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
105		     const unsigned char *dst_dev_addr)
106{
107	if (dev->if_type == IFT_INFINIBAND)
108		dev_addr->dev_type = ARPHRD_INFINIBAND;
109	else if (dev->if_type == IFT_ETHER)
110		dev_addr->dev_type = ARPHRD_ETHER;
111	else
112		dev_addr->dev_type = 0;
113	memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen);
114	memcpy(dev_addr->broadcast, __DECONST(char *, dev->if_broadcastaddr),
115	    dev->if_addrlen);
116	if (dst_dev_addr)
117		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, dev->if_addrlen);
118	dev_addr->bound_dev_if = dev->if_index;
119	return 0;
120}
121#endif
122EXPORT_SYMBOL(rdma_copy_addr);
123
124int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
125{
126	struct net_device *dev;
127	int ret = -EADDRNOTAVAIL;
128
129	if (dev_addr->bound_dev_if) {
130		dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
131		if (!dev)
132			return -ENODEV;
133		ret = rdma_copy_addr(dev_addr, dev, NULL);
134		dev_put(dev);
135		return ret;
136	}
137
138	switch (addr->sa_family) {
139#ifdef INET
140	case AF_INET:
141		dev = ip_dev_find(NULL,
142			((struct sockaddr_in *) addr)->sin_addr.s_addr);
143
144		if (!dev)
145			return ret;
146
147		ret = rdma_copy_addr(dev_addr, dev, NULL);
148		dev_put(dev);
149		break;
150#endif
151
152#if defined(INET6)
153	case AF_INET6:
154#ifdef __linux__
155		read_lock(&dev_base_lock);
156		for_each_netdev(&init_net, dev) {
157			if (ipv6_chk_addr(&init_net,
158					  &((struct sockaddr_in6 *) addr)->sin6_addr,
159					  dev, 1)) {
160				ret = rdma_copy_addr(dev_addr, dev, NULL);
161				break;
162			}
163		}
164		read_unlock(&dev_base_lock);
165#else
166		{
167			struct sockaddr_in6 *sin6;
168			struct ifaddr *ifa;
169			in_port_t port;
170
171			sin6 = (struct sockaddr_in6 *)addr;
172			port = sin6->sin6_port;
173			sin6->sin6_port = 0;
174			ifa = ifa_ifwithaddr(addr);
175			sin6->sin6_port = port;
176			if (ifa == NULL) {
177				ret = -ENODEV;
178				break;
179			}
180			ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL);
181			ifa_free(ifa);
182			break;
183		}
184#endif
185		break;
186#endif
187	}
188	return ret;
189}
190EXPORT_SYMBOL(rdma_translate_ip);
191
192static void set_timeout(unsigned long time)
193{
194	unsigned long delay;
195
196	cancel_delayed_work(&work);
197
198	delay = time - jiffies;
199	if ((long)delay <= 0)
200		delay = 1;
201
202	queue_delayed_work(addr_wq, &work, delay);
203}
204
205static void queue_req(struct addr_req *req)
206{
207	struct addr_req *temp_req;
208
209	mutex_lock(&lock);
210	list_for_each_entry_reverse(temp_req, &req_list, list) {
211		if (time_after_eq(req->timeout, temp_req->timeout))
212			break;
213	}
214
215	list_add(&req->list, &temp_req->list);
216
217	if (req_list.next == &req->list)
218		set_timeout(req->timeout);
219	mutex_unlock(&lock);
220}
221
222#ifdef __linux__
223static int addr4_resolve(struct sockaddr_in *src_in,
224			 struct sockaddr_in *dst_in,
225			 struct rdma_dev_addr *addr)
226{
227	__be32 src_ip = src_in->sin_addr.s_addr;
228	__be32 dst_ip = dst_in->sin_addr.s_addr;
229	struct flowi fl;
230	struct rtable *rt;
231	struct neighbour *neigh;
232	int ret;
233
234	memset(&fl, 0, sizeof fl);
235	fl.nl_u.ip4_u.daddr = dst_ip;
236	fl.nl_u.ip4_u.saddr = src_ip;
237	fl.oif = addr->bound_dev_if;
238
239	ret = ip_route_output_key(&init_net, &rt, &fl);
240	if (ret)
241		goto out;
242
243	src_in->sin_family = AF_INET;
244	src_in->sin_addr.s_addr = rt->rt_src;
245
246	if (rt->idev->dev->flags & IFF_LOOPBACK) {
247		ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
248		if (!ret)
249			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
250		goto put;
251	}
252
253	/* If the device does ARP internally, return 'done' */
254	if (rt->idev->dev->flags & IFF_NOARP) {
255		rdma_copy_addr(addr, rt->idev->dev, NULL);
256		goto put;
257	}
258
259	neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
260	if (!neigh || !(neigh->nud_state & NUD_VALID)) {
261		neigh_event_send(rt->u.dst.neighbour, NULL);
262		ret = -ENODATA;
263		if (neigh)
264			goto release;
265		goto put;
266	}
267
268	ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
269release:
270	neigh_release(neigh);
271put:
272	ip_rt_put(rt);
273out:
274	return ret;
275}
276
277#if defined(INET6)
278static int addr6_resolve(struct sockaddr_in6 *src_in,
279			 struct sockaddr_in6 *dst_in,
280			 struct rdma_dev_addr *addr)
281{
282	struct flowi fl;
283	struct neighbour *neigh;
284	struct dst_entry *dst;
285	int ret;
286
287	memset(&fl, 0, sizeof fl);
288	ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr);
289	ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr);
290	fl.oif = addr->bound_dev_if;
291
292	dst = ip6_route_output(&init_net, NULL, &fl);
293	if ((ret = dst->error))
294		goto put;
295
296	if (ipv6_addr_any(&fl.fl6_src)) {
297		ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
298					 &fl.fl6_dst, 0, &fl.fl6_src);
299		if (ret)
300			goto put;
301
302		src_in->sin6_family = AF_INET6;
303		ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src);
304	}
305
306	if (dst->dev->flags & IFF_LOOPBACK) {
307		ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
308		if (!ret)
309			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
310		goto put;
311	}
312
313	/* If the device does ARP internally, return 'done' */
314	if (dst->dev->flags & IFF_NOARP) {
315		ret = rdma_copy_addr(addr, dst->dev, NULL);
316		goto put;
317	}
318
319	neigh = dst->neighbour;
320	if (!neigh || !(neigh->nud_state & NUD_VALID)) {
321		neigh_event_send(dst->neighbour, NULL);
322		ret = -ENODATA;
323		goto put;
324	}
325
326	ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
327put:
328	dst_release(dst);
329	return ret;
330}
331#else
332static int addr6_resolve(struct sockaddr_in6 *src_in,
333			 struct sockaddr_in6 *dst_in,
334			 struct rdma_dev_addr *addr)
335{
336	return -EADDRNOTAVAIL;
337}
338#endif
339
340#else
341#include <netinet/if_ether.h>
342
343static int addr_resolve(struct sockaddr *src_in,
344			struct sockaddr *dst_in,
345			struct rdma_dev_addr *addr)
346{
347	struct sockaddr_in *sin;
348	struct sockaddr_in6 *sin6;
349	struct ifaddr *ifa;
350	struct ifnet *ifp;
351#if defined(INET) || defined(INET6)
352	struct llentry *lle;
353#endif
354	struct rtentry *rte;
355	in_port_t port;
356	u_char edst[MAX_ADDR_LEN];
357	int multi;
358	int bcast;
359	int error = 0;
360
361	/*
362	 * Determine whether the address is unicast, multicast, or broadcast
363	 * and whether the source interface is valid.
364	 */
365	multi = 0;
366	bcast = 0;
367	sin = NULL;
368	sin6 = NULL;
369	ifp = NULL;
370	rte = NULL;
371	switch (dst_in->sa_family) {
372#ifdef INET
373	case AF_INET:
374		sin = (struct sockaddr_in *)dst_in;
375		if (sin->sin_addr.s_addr == INADDR_BROADCAST)
376			bcast = 1;
377		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
378			multi = 1;
379		sin = (struct sockaddr_in *)src_in;
380		if (sin->sin_addr.s_addr != INADDR_ANY) {
381			/*
382			 * Address comparison fails if the port is set
383			 * cache it here to be restored later.
384			 */
385			port = sin->sin_port;
386			sin->sin_port = 0;
387			memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
388		} else
389			src_in = NULL;
390		break;
391#endif
392#ifdef INET6
393	case AF_INET6:
394		sin6 = (struct sockaddr_in6 *)dst_in;
395		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
396			multi = 1;
397		sin6 = (struct sockaddr_in6 *)src_in;
398		if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
399			port = sin6->sin6_port;
400			sin6->sin6_port = 0;
401		} else
402			src_in = NULL;
403		break;
404#endif
405	default:
406		return -EINVAL;
407	}
408	/*
409	 * If we have a source address to use look it up first and verify
410	 * that it is a local interface.
411	 */
412	if (src_in) {
413		ifa = ifa_ifwithaddr(src_in);
414		if (sin)
415			sin->sin_port = port;
416		if (sin6)
417			sin6->sin6_port = port;
418		if (ifa == NULL)
419			return -ENETUNREACH;
420		ifp = ifa->ifa_ifp;
421		ifa_free(ifa);
422		if (bcast || multi)
423			goto mcast;
424	}
425	/*
426	 * Make sure the route exists and has a valid link.
427	 */
428	rte = rtalloc1(dst_in, 1, 0);
429	if (rte == NULL || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) {
430		if (rte)
431			RTFREE_LOCKED(rte);
432		return -EHOSTUNREACH;
433	}
434	/*
435	 * If it's not multicast or broadcast and the route doesn't match the
436	 * requested interface return unreachable.  Otherwise fetch the
437	 * correct interface pointer and unlock the route.
438	 */
439	if (multi || bcast) {
440		if (ifp == NULL)
441			ifp = rte->rt_ifp;
442		RTFREE_LOCKED(rte);
443	} else if (ifp && ifp != rte->rt_ifp) {
444		RTFREE_LOCKED(rte);
445		return -ENETUNREACH;
446	} else {
447		if (ifp == NULL)
448			ifp = rte->rt_ifp;
449		RT_UNLOCK(rte);
450	}
451mcast:
452	if (bcast)
453		return rdma_copy_addr(addr, ifp, ifp->if_broadcastaddr);
454	if (multi) {
455		struct sockaddr *llsa;
456
457		error = ifp->if_resolvemulti(ifp, &llsa, dst_in);
458		if (error)
459			return -error;
460		error = rdma_copy_addr(addr, ifp,
461		    LLADDR((struct sockaddr_dl *)llsa));
462		free(llsa, M_IFMADDR);
463		return error;
464	}
465	/*
466	 * Resolve the link local address.
467	 */
468	switch (dst_in->sa_family) {
469#ifdef INET
470	case AF_INET:
471		error = arpresolve(ifp, rte, NULL, dst_in, edst, &lle);
472		break;
473#endif
474#ifdef INET6
475	case AF_INET6:
476		error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst, &lle);
477		break;
478#endif
479	default:
480		/* XXX: Shouldn't happen. */
481		error = -EINVAL;
482	}
483	RTFREE(rte);
484	if (error == 0)
485		return rdma_copy_addr(addr, ifp, edst);
486	if (error == EWOULDBLOCK)
487		return -ENODATA;
488	return -error;
489}
490
491#endif
492
493static void process_req(struct work_struct *work)
494{
495	struct addr_req *req, *temp_req;
496	struct sockaddr *src_in, *dst_in;
497	struct list_head done_list;
498
499	INIT_LIST_HEAD(&done_list);
500
501	mutex_lock(&lock);
502	list_for_each_entry_safe(req, temp_req, &req_list, list) {
503		if (req->status == -ENODATA) {
504			src_in = (struct sockaddr *) &req->src_addr;
505			dst_in = (struct sockaddr *) &req->dst_addr;
506			req->status = addr_resolve(src_in, dst_in, req->addr);
507			if (req->status && time_after_eq(jiffies, req->timeout))
508				req->status = -ETIMEDOUT;
509			else if (req->status == -ENODATA)
510				continue;
511		}
512		list_move_tail(&req->list, &done_list);
513	}
514
515	if (!list_empty(&req_list)) {
516		req = list_entry(req_list.next, struct addr_req, list);
517		set_timeout(req->timeout);
518	}
519	mutex_unlock(&lock);
520
521	list_for_each_entry_safe(req, temp_req, &done_list, list) {
522		list_del(&req->list);
523		req->callback(req->status, (struct sockaddr *) &req->src_addr,
524			req->addr, req->context);
525		put_client(req->client);
526		kfree(req);
527	}
528}
529
530int rdma_resolve_ip(struct rdma_addr_client *client,
531		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
532		    struct rdma_dev_addr *addr, int timeout_ms,
533		    void (*callback)(int status, struct sockaddr *src_addr,
534				     struct rdma_dev_addr *addr, void *context),
535		    void *context)
536{
537	struct sockaddr *src_in, *dst_in;
538	struct addr_req *req;
539	int ret = 0;
540
541	req = kzalloc(sizeof *req, GFP_KERNEL);
542	if (!req)
543		return -ENOMEM;
544
545	src_in = (struct sockaddr *) &req->src_addr;
546	dst_in = (struct sockaddr *) &req->dst_addr;
547
548	if (src_addr) {
549		if (src_addr->sa_family != dst_addr->sa_family) {
550			ret = -EINVAL;
551			goto err;
552		}
553
554		memcpy(src_in, src_addr, ip_addr_size(src_addr));
555	} else {
556		src_in->sa_family = dst_addr->sa_family;
557	}
558
559	memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
560	req->addr = addr;
561	req->callback = callback;
562	req->context = context;
563	req->client = client;
564	atomic_inc(&client->refcount);
565
566	req->status = addr_resolve(src_in, dst_in, addr);
567	switch (req->status) {
568	case 0:
569		req->timeout = jiffies;
570		queue_req(req);
571		break;
572	case -ENODATA:
573		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
574		queue_req(req);
575		break;
576	default:
577		ret = req->status;
578		atomic_dec(&client->refcount);
579		goto err;
580	}
581	return ret;
582err:
583	kfree(req);
584	return ret;
585}
586EXPORT_SYMBOL(rdma_resolve_ip);
587
588void rdma_addr_cancel(struct rdma_dev_addr *addr)
589{
590	struct addr_req *req, *temp_req;
591
592	mutex_lock(&lock);
593	list_for_each_entry_safe(req, temp_req, &req_list, list) {
594		if (req->addr == addr) {
595			req->status = -ECANCELED;
596			req->timeout = jiffies;
597			list_move(&req->list, &req_list);
598			set_timeout(req->timeout);
599			break;
600		}
601	}
602	mutex_unlock(&lock);
603}
604EXPORT_SYMBOL(rdma_addr_cancel);
605
606static int netevent_callback(struct notifier_block *self, unsigned long event,
607	void *ctx)
608{
609	if (event == NETEVENT_NEIGH_UPDATE) {
610#ifdef __linux__
611		struct neighbour *neigh = ctx;
612
613		if (neigh->nud_state & NUD_VALID) {
614			set_timeout(jiffies);
615		}
616#else
617		set_timeout(jiffies);
618#endif
619	}
620	return 0;
621}
622
623static struct notifier_block nb = {
624	.notifier_call = netevent_callback
625};
626
627static int addr_init(void)
628{
629	INIT_DELAYED_WORK(&work, process_req);
630	addr_wq = create_singlethread_workqueue("ib_addr");
631	if (!addr_wq)
632		return -ENOMEM;
633
634	register_netevent_notifier(&nb);
635	return 0;
636}
637
638static void addr_cleanup(void)
639{
640	unregister_netevent_notifier(&nb);
641	destroy_workqueue(addr_wq);
642}
643
644module_init(addr_init);
645module_exit(addr_cleanup);
646