addr.c revision 287862
1/*
2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/mutex.h>
37#include <linux/inetdevice.h>
38#include <linux/slab.h>
39#include <linux/workqueue.h>
40#include <linux/module.h>
41#include <linux/notifier.h>
42#include <net/route.h>
43#include <net/netevent.h>
44#include <rdma/ib_addr.h>
45#include <netinet/if_ether.h>
46
47
48MODULE_AUTHOR("Sean Hefty");
49MODULE_DESCRIPTION("IB Address Translation");
50MODULE_LICENSE("Dual BSD/GPL");
51
52struct addr_req {
53	struct list_head list;
54	struct sockaddr_storage src_addr;
55	struct sockaddr_storage dst_addr;
56	struct rdma_dev_addr *addr;
57	struct rdma_addr_client *client;
58	void *context;
59	void (*callback)(int status, struct sockaddr *src_addr,
60			 struct rdma_dev_addr *addr, void *context);
61	unsigned long timeout;
62	int status;
63};
64
65static void process_req(struct work_struct *work);
66
67static DEFINE_MUTEX(lock);
68static LIST_HEAD(req_list);
69static struct delayed_work work;
70static struct workqueue_struct *addr_wq;
71
72static struct rdma_addr_client self;
73void rdma_addr_register_client(struct rdma_addr_client *client)
74{
75	atomic_set(&client->refcount, 1);
76	init_completion(&client->comp);
77}
78EXPORT_SYMBOL(rdma_addr_register_client);
79
80static inline void put_client(struct rdma_addr_client *client)
81{
82	if (atomic_dec_and_test(&client->refcount))
83		complete(&client->comp);
84}
85
86void rdma_addr_unregister_client(struct rdma_addr_client *client)
87{
88	put_client(client);
89	wait_for_completion(&client->comp);
90}
91EXPORT_SYMBOL(rdma_addr_unregister_client);
92
93int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
94		     const unsigned char *dst_dev_addr)
95{
96	if (dev->if_type == IFT_INFINIBAND)
97		dev_addr->dev_type = ARPHRD_INFINIBAND;
98	else if (dev->if_type == IFT_ETHER)
99		dev_addr->dev_type = ARPHRD_ETHER;
100	else
101		dev_addr->dev_type = 0;
102	memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen);
103	memcpy(dev_addr->broadcast, __DECONST(char *, dev->if_broadcastaddr),
104	    dev->if_addrlen);
105	if (dst_dev_addr)
106		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, dev->if_addrlen);
107	dev_addr->bound_dev_if = dev->if_index;
108	return 0;
109}
110EXPORT_SYMBOL(rdma_copy_addr);
111
112int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
113		      u16 *vlan_id)
114{
115	struct net_device *dev;
116	int ret = -EADDRNOTAVAIL;
117
118	if (dev_addr->bound_dev_if) {
119		dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
120		if (!dev)
121			return -ENODEV;
122		ret = rdma_copy_addr(dev_addr, dev, NULL);
123		dev_put(dev);
124		return ret;
125	}
126
127	switch (addr->sa_family) {
128	case AF_INET:
129		dev = ip_dev_find(&init_net,
130			((struct sockaddr_in *) addr)->sin_addr.s_addr);
131
132		if (!dev)
133			return ret;
134
135		ret = rdma_copy_addr(dev_addr, dev, NULL);
136		if (vlan_id)
137			*vlan_id = rdma_vlan_dev_vlan_id(dev);
138		dev_put(dev);
139		break;
140
141#if defined(INET6)
142	case AF_INET6:
143		{
144			struct sockaddr_in6 *sin6;
145			struct ifaddr *ifa;
146			in_port_t port;
147
148			sin6 = (struct sockaddr_in6 *)addr;
149			port = sin6->sin6_port;
150			sin6->sin6_port = 0;
151			ifa = ifa_ifwithaddr(addr);
152			sin6->sin6_port = port;
153			if (ifa == NULL) {
154				ret = -ENODEV;
155				break;
156			}
157			ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL);
158			if (vlan_id)
159				*vlan_id = rdma_vlan_dev_vlan_id(ifa->ifa_ifp);
160			ifa_free(ifa);
161			break;
162		}
163#endif
164	}
165	return ret;
166}
167EXPORT_SYMBOL(rdma_translate_ip);
168
169static void set_timeout(unsigned long time)
170{
171	unsigned long delay;
172
173	delay = time - jiffies;
174	if ((long)delay <= 0)
175		delay = 1;
176
177	mod_delayed_work(addr_wq, &work, delay);
178}
179
180static void queue_req(struct addr_req *req)
181{
182	struct addr_req *temp_req;
183
184	mutex_lock(&lock);
185	list_for_each_entry_reverse(temp_req, &req_list, list) {
186		if (time_after_eq(req->timeout, temp_req->timeout))
187			break;
188	}
189
190	list_add(&req->list, &temp_req->list);
191
192	if (req_list.next == &req->list)
193		set_timeout(req->timeout);
194	mutex_unlock(&lock);
195}
196
197static int addr_resolve(struct sockaddr *src_in,
198			struct sockaddr *dst_in,
199			struct rdma_dev_addr *addr)
200{
201	struct sockaddr_in *sin;
202	struct sockaddr_in6 *sin6;
203	struct ifaddr *ifa;
204	struct ifnet *ifp;
205	struct rtentry *rte;
206	in_port_t port;
207	u_char edst[MAX_ADDR_LEN];
208	int multi;
209	int bcast;
210	int is_gw = 0;
211	int error = 0;
212	/*
213	 * Determine whether the address is unicast, multicast, or broadcast
214	 * and whether the source interface is valid.
215	 */
216	multi = 0;
217	bcast = 0;
218	sin = NULL;
219	sin6 = NULL;
220	ifp = NULL;
221	rte = NULL;
222	switch (dst_in->sa_family) {
223#ifdef INET
224	case AF_INET:
225		sin = (struct sockaddr_in *)dst_in;
226		if (sin->sin_addr.s_addr == INADDR_BROADCAST)
227			bcast = 1;
228		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
229			multi = 1;
230		sin = (struct sockaddr_in *)src_in;
231		if (sin->sin_addr.s_addr != INADDR_ANY) {
232			/*
233			 * Address comparison fails if the port is set
234			 * cache it here to be restored later.
235			 */
236			port = sin->sin_port;
237			sin->sin_port = 0;
238			memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
239		}
240		break;
241#endif
242#ifdef INET6
243	case AF_INET6:
244		sin6 = (struct sockaddr_in6 *)dst_in;
245		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
246			multi = 1;
247		sin6 = (struct sockaddr_in6 *)src_in;
248		if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
249			port = sin6->sin6_port;
250			sin6->sin6_port = 0;
251		} else
252			src_in = NULL;
253		break;
254#endif
255	default:
256		return -EINVAL;
257	}
258	/*
259	 * If we have a source address to use look it up first and verify
260	 * that it is a local interface.
261	 */
262	if (sin->sin_addr.s_addr != INADDR_ANY) {
263		ifa = ifa_ifwithaddr(src_in);
264		if (sin)
265			sin->sin_port = port;
266		if (sin6)
267			sin6->sin6_port = port;
268		if (ifa == NULL)
269			return -ENETUNREACH;
270		ifp = ifa->ifa_ifp;
271		ifa_free(ifa);
272		if (bcast || multi)
273			goto mcast;
274	}
275	/*
276	 * Make sure the route exists and has a valid link.
277	 */
278	rte = rtalloc1(dst_in, 1, 0);
279	if (rte == NULL || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) {
280		if (rte)
281			RTFREE_LOCKED(rte);
282		return -EHOSTUNREACH;
283	}
284	if (rte->rt_flags & RTF_GATEWAY)
285		is_gw = 1;
286	/*
287	 * If it's not multicast or broadcast and the route doesn't match the
288	 * requested interface return unreachable.  Otherwise fetch the
289	 * correct interface pointer and unlock the route.
290	 */
291	if (multi || bcast) {
292		if (ifp == NULL) {
293			ifp = rte->rt_ifp;
294			/* rt_ifa holds the route answer source address */
295			ifa = rte->rt_ifa;
296		}
297		RTFREE_LOCKED(rte);
298	} else if (ifp && ifp != rte->rt_ifp) {
299		RTFREE_LOCKED(rte);
300		return -ENETUNREACH;
301	} else {
302		if (ifp == NULL) {
303			ifp = rte->rt_ifp;
304			ifa = rte->rt_ifa;
305		}
306		RT_UNLOCK(rte);
307	}
308mcast:
309	if (bcast)
310		return rdma_copy_addr(addr, ifp, ifp->if_broadcastaddr);
311	if (multi) {
312		struct sockaddr *llsa;
313
314		error = ifp->if_resolvemulti(ifp, &llsa, dst_in);
315		if (error)
316			return -error;
317		error = rdma_copy_addr(addr, ifp,
318		    LLADDR((struct sockaddr_dl *)llsa));
319		free(llsa, M_IFMADDR);
320		if (error == 0)
321			memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr));
322		return error;
323	}
324	/*
325	 * Resolve the link local address.
326	 */
327	switch (dst_in->sa_family) {
328#ifdef INET
329	case AF_INET:
330		error = arpresolve(ifp, is_gw, NULL, dst_in, edst, NULL);
331		break;
332#endif
333#ifdef INET6
334	case AF_INET6:
335		error = nd6_resolve(ifp, is_gw, NULL, dst_in, edst, NULL);
336		break;
337#endif
338	default:
339		/* XXX: Shouldn't happen. */
340		error = -EINVAL;
341	}
342	RTFREE(rte);
343	if (error == 0) {
344		memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr));
345		return rdma_copy_addr(addr, ifp, edst);
346	}
347	if (error == EWOULDBLOCK)
348		return -ENODATA;
349	return -error;
350}
351
352static void process_req(struct work_struct *work)
353{
354	struct addr_req *req, *temp_req;
355	struct sockaddr *src_in, *dst_in;
356	struct list_head done_list;
357
358	INIT_LIST_HEAD(&done_list);
359
360	mutex_lock(&lock);
361	list_for_each_entry_safe(req, temp_req, &req_list, list) {
362		if (req->status == -ENODATA) {
363			src_in = (struct sockaddr *) &req->src_addr;
364			dst_in = (struct sockaddr *) &req->dst_addr;
365			req->status = addr_resolve(src_in, dst_in, req->addr);
366			if (req->status && time_after_eq(jiffies, req->timeout))
367				req->status = -ETIMEDOUT;
368			else if (req->status == -ENODATA)
369				continue;
370		}
371		list_move_tail(&req->list, &done_list);
372	}
373
374	if (!list_empty(&req_list)) {
375		req = list_entry(req_list.next, struct addr_req, list);
376		set_timeout(req->timeout);
377	}
378	mutex_unlock(&lock);
379
380	list_for_each_entry_safe(req, temp_req, &done_list, list) {
381		list_del(&req->list);
382		req->callback(req->status, (struct sockaddr *) &req->src_addr,
383			req->addr, req->context);
384		put_client(req->client);
385		kfree(req);
386	}
387}
388
389int rdma_resolve_ip(struct rdma_addr_client *client,
390		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
391		    struct rdma_dev_addr *addr, int timeout_ms,
392		    void (*callback)(int status, struct sockaddr *src_addr,
393				     struct rdma_dev_addr *addr, void *context),
394		    void *context)
395{
396	struct sockaddr *src_in, *dst_in;
397	struct addr_req *req;
398	int ret = 0;
399
400	req = kzalloc(sizeof *req, GFP_KERNEL);
401	if (!req)
402		return -ENOMEM;
403
404	src_in = (struct sockaddr *) &req->src_addr;
405	dst_in = (struct sockaddr *) &req->dst_addr;
406
407	if (src_addr) {
408		if (src_addr->sa_family != dst_addr->sa_family) {
409			ret = -EINVAL;
410			goto err;
411		}
412
413		memcpy(src_in, src_addr, ip_addr_size(src_addr));
414	} else {
415		src_in->sa_family = dst_addr->sa_family;
416	}
417
418	memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
419	req->addr = addr;
420	req->callback = callback;
421	req->context = context;
422	req->client = client;
423	atomic_inc(&client->refcount);
424
425	req->status = addr_resolve(src_in, dst_in, addr);
426	switch (req->status) {
427	case 0:
428		req->timeout = jiffies;
429		queue_req(req);
430		break;
431	case -ENODATA:
432		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
433		queue_req(req);
434		break;
435	default:
436		ret = req->status;
437		atomic_dec(&client->refcount);
438		goto err;
439	}
440	return ret;
441err:
442	kfree(req);
443	return ret;
444}
445EXPORT_SYMBOL(rdma_resolve_ip);
446
447void rdma_addr_cancel(struct rdma_dev_addr *addr)
448{
449	struct addr_req *req, *temp_req;
450
451	mutex_lock(&lock);
452	list_for_each_entry_safe(req, temp_req, &req_list, list) {
453		if (req->addr == addr) {
454			req->status = -ECANCELED;
455			req->timeout = jiffies;
456			list_move(&req->list, &req_list);
457			set_timeout(req->timeout);
458			break;
459		}
460	}
461	mutex_unlock(&lock);
462}
463EXPORT_SYMBOL(rdma_addr_cancel);
464
465struct resolve_cb_context {
466	struct rdma_dev_addr *addr;
467	struct completion comp;
468};
469
470static void resolve_cb(int status, struct sockaddr *src_addr,
471	     struct rdma_dev_addr *addr, void *context)
472{
473	memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
474				rdma_dev_addr));
475	complete(&((struct resolve_cb_context *)context)->comp);
476}
477
478int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
479			       u16 *vlan_id)
480{
481	int ret = 0;
482	struct rdma_dev_addr dev_addr;
483	struct resolve_cb_context ctx;
484	struct net_device *dev;
485
486	union {
487		struct sockaddr     _sockaddr;
488		struct sockaddr_in  _sockaddr_in;
489		struct sockaddr_in6 _sockaddr_in6;
490	} sgid_addr, dgid_addr;
491
492
493	ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
494	if (ret)
495		return ret;
496
497	ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
498	if (ret)
499		return ret;
500
501	memset(&dev_addr, 0, sizeof(dev_addr));
502
503	ctx.addr = &dev_addr;
504	init_completion(&ctx.comp);
505	ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
506			&dev_addr, 1000, resolve_cb, &ctx);
507	if (ret)
508		return ret;
509
510	wait_for_completion(&ctx.comp);
511
512	memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
513	dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
514	if (!dev)
515		return -ENODEV;
516	if (vlan_id)
517		*vlan_id = rdma_vlan_dev_vlan_id(dev);
518	dev_put(dev);
519	return ret;
520}
521EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
522
523int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
524{
525	int ret = 0;
526	struct rdma_dev_addr dev_addr;
527	union {
528		struct sockaddr     _sockaddr;
529		struct sockaddr_in  _sockaddr_in;
530		struct sockaddr_in6 _sockaddr_in6;
531	} gid_addr;
532
533	ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
534
535	if (ret)
536		return ret;
537	memset(&dev_addr, 0, sizeof(dev_addr));
538	ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
539	if (ret)
540		return ret;
541
542	memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
543	return ret;
544}
545EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
546
547static int netevent_callback(struct notifier_block *self, unsigned long event,
548	void *ctx)
549{
550	if (event == NETEVENT_NEIGH_UPDATE) {
551			set_timeout(jiffies);
552		}
553	return 0;
554}
555
556static struct notifier_block nb = {
557	.notifier_call = netevent_callback
558};
559
560static int __init addr_init(void)
561{
562	INIT_DELAYED_WORK(&work, process_req);
563	addr_wq = create_singlethread_workqueue("ib_addr");
564	if (!addr_wq)
565		return -ENOMEM;
566
567	register_netevent_notifier(&nb);
568	rdma_addr_register_client(&self);
569	return 0;
570}
571
572static void __exit addr_cleanup(void)
573{
574	rdma_addr_unregister_client(&self);
575	unregister_netevent_notifier(&nb);
576	destroy_workqueue(addr_wq);
577}
578
579module_init(addr_init);
580module_exit(addr_cleanup);
581