ib_roce_gid_mgmt.c revision 331769
1/*
2 * Copyright (c) 2015-2017, Mellanox Technologies inc.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "core_priv.h"
34
35#include <linux/in.h>
36#include <linux/in6.h>
37#include <linux/rcupdate.h>
38
39#include <rdma/ib_cache.h>
40#include <rdma/ib_addr.h>
41
42#include <netinet6/scope6_var.h>
43
44static struct workqueue_struct *roce_gid_mgmt_wq;
45
46enum gid_op_type {
47	GID_DEL = 0,
48	GID_ADD
49};
50
51struct roce_netdev_event_work {
52	struct work_struct work;
53	struct net_device *ndev;
54};
55
56struct roce_rescan_work {
57	struct work_struct	work;
58	struct ib_device	*ib_dev;
59};
60
61static const struct {
62	bool (*is_supported)(const struct ib_device *device, u8 port_num);
63	enum ib_gid_type gid_type;
64} PORT_CAP_TO_GID_TYPE[] = {
65	{rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
66	{rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
67};
68
69#define CAP_TO_GID_TABLE_SIZE	ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
70
71unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
72{
73	int i;
74	unsigned int ret_flags = 0;
75
76	if (!rdma_protocol_roce(ib_dev, port))
77		return 1UL << IB_GID_TYPE_IB;
78
79	for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
80		if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
81			ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
82
83	return ret_flags;
84}
85EXPORT_SYMBOL(roce_gid_type_mask_support);
86
87static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
88    u8 port, union ib_gid *gid, struct net_device *ndev)
89{
90	int i;
91	unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
92	struct ib_gid_attr gid_attr;
93
94	memset(&gid_attr, 0, sizeof(gid_attr));
95	gid_attr.ndev = ndev;
96
97	for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
98		if ((1UL << i) & gid_type_mask) {
99			gid_attr.gid_type = i;
100			switch (gid_op) {
101			case GID_ADD:
102				ib_cache_gid_add(ib_dev, port,
103						 gid, &gid_attr);
104				break;
105			case GID_DEL:
106				ib_cache_gid_del(ib_dev, port,
107						 gid, &gid_attr);
108				break;
109			}
110		}
111	}
112}
113
114static int
115roce_gid_match_netdev(struct ib_device *ib_dev, u8 port,
116    struct net_device *idev, void *cookie)
117{
118	struct net_device *ndev = (struct net_device *)cookie;
119	if (idev == NULL)
120		return (0);
121	return (ndev == idev);
122}
123
124static int
125roce_gid_match_all(struct ib_device *ib_dev, u8 port,
126    struct net_device *idev, void *cookie)
127{
128	if (idev == NULL)
129		return (0);
130	return (1);
131}
132
133static int
134roce_gid_enum_netdev_default(struct ib_device *ib_dev,
135    u8 port, struct net_device *idev)
136{
137	unsigned long gid_type_mask;
138
139	gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
140
141	ib_cache_gid_set_default_gid(ib_dev, port, idev, gid_type_mask,
142				     IB_CACHE_GID_DEFAULT_MODE_SET);
143
144	return (hweight_long(gid_type_mask));
145}
146
147#define ETH_IPOIB_DRV_NAME	"ib"
148
149static inline int
150is_eth_ipoib_intf(struct net_device *dev)
151{
152	if (strcmp(dev->if_dname, ETH_IPOIB_DRV_NAME))
153		return 0;
154	return 1;
155}
156
157static void
158roce_gid_update_addr_callback(struct ib_device *device, u8 port,
159    struct net_device *ndev, void *cookie)
160{
161	struct ipx_entry {
162		STAILQ_ENTRY(ipx_entry)	entry;
163		union ipx_addr {
164			struct sockaddr sa[0];
165			struct sockaddr_in v4;
166			struct sockaddr_in6 v6;
167		} ipx_addr;
168	};
169	struct ipx_entry *entry;
170	struct net_device *idev;
171#if defined(INET) || defined(INET6)
172	struct ifaddr *ifa;
173#endif
174	union ib_gid gid;
175	int default_gids;
176	u16 index_num;
177	int i;
178
179	STAILQ_HEAD(, ipx_entry) ipx_head;
180
181	STAILQ_INIT(&ipx_head);
182
183	/* make sure default GIDs are in */
184	default_gids = roce_gid_enum_netdev_default(device, port, ndev);
185
186	CURVNET_SET(ndev->if_vnet);
187	IFNET_RLOCK();
188	TAILQ_FOREACH(idev, &V_ifnet, if_link) {
189		if (idev != ndev) {
190			if (idev->if_type != IFT_L2VLAN)
191				continue;
192			if (ndev != rdma_vlan_dev_real_dev(idev))
193				continue;
194		}
195
196		/* clone address information for IPv4 and IPv6 */
197		IF_ADDR_RLOCK(idev);
198#if defined(INET)
199		TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
200			if (ifa->ifa_addr == NULL ||
201			    ifa->ifa_addr->sa_family != AF_INET)
202				continue;
203			entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
204			if (entry == NULL) {
205				pr_warn("roce_gid_update_addr_callback: "
206				    "couldn't allocate entry for IPv4 update\n");
207				continue;
208			}
209			entry->ipx_addr.v4 = *((struct sockaddr_in *)ifa->ifa_addr);
210			STAILQ_INSERT_TAIL(&ipx_head, entry, entry);
211		}
212#endif
213#if defined(INET6)
214		TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
215			if (ifa->ifa_addr == NULL ||
216			    ifa->ifa_addr->sa_family != AF_INET6)
217				continue;
218			entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
219			if (entry == NULL) {
220				pr_warn("roce_gid_update_addr_callback: "
221				    "couldn't allocate entry for IPv6 update\n");
222				continue;
223			}
224			entry->ipx_addr.v6 = *((struct sockaddr_in6 *)ifa->ifa_addr);
225
226			/* trash IPv6 scope ID */
227			sa6_recoverscope(&entry->ipx_addr.v6);
228			entry->ipx_addr.v6.sin6_scope_id = 0;
229
230			STAILQ_INSERT_TAIL(&ipx_head, entry, entry);
231		}
232#endif
233		IF_ADDR_RUNLOCK(idev);
234	}
235	IFNET_RUNLOCK();
236	CURVNET_RESTORE();
237
238	/* add missing GIDs, if any */
239	STAILQ_FOREACH(entry, &ipx_head, entry) {
240		unsigned long gid_type_mask = roce_gid_type_mask_support(device, port);
241
242		if (rdma_ip2gid(&entry->ipx_addr.sa[0], &gid) != 0)
243			continue;
244
245		for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
246			if (!((1UL << i) & gid_type_mask))
247				continue;
248			/* check if entry found */
249			if (ib_find_cached_gid_by_port(device, &gid, i,
250			    port, ndev, &index_num) == 0)
251				break;
252		}
253		if (i != IB_GID_TYPE_SIZE)
254			continue;
255		/* add new GID */
256		update_gid(GID_ADD, device, port, &gid, ndev);
257	}
258
259	/* remove stale GIDs, if any */
260	for (i = default_gids; ib_get_cached_gid(device, port, i, &gid, NULL) == 0; i++) {
261		union ipx_addr ipx;
262
263		/* don't delete empty entries */
264		if (memcmp(&gid, &zgid, sizeof(zgid)) == 0)
265			continue;
266
267		/* zero default */
268		memset(&ipx, 0, sizeof(ipx));
269
270		rdma_gid2ip(&ipx.sa[0], &gid);
271
272		STAILQ_FOREACH(entry, &ipx_head, entry) {
273			if (memcmp(&entry->ipx_addr, &ipx, sizeof(ipx)) == 0)
274				break;
275		}
276		/* check if entry found */
277		if (entry != NULL)
278			continue;
279
280		/* remove GID */
281		update_gid(GID_DEL, device, port, &gid, ndev);
282	}
283
284	while ((entry = STAILQ_FIRST(&ipx_head))) {
285		STAILQ_REMOVE_HEAD(&ipx_head, entry);
286		kfree(entry);
287	}
288}
289
290static void
291roce_gid_queue_scan_event_handler(struct work_struct *_work)
292{
293	struct roce_netdev_event_work *work =
294		container_of(_work, struct roce_netdev_event_work, work);
295
296	ib_enum_all_roce_netdevs(roce_gid_match_netdev, work->ndev,
297	    roce_gid_update_addr_callback, NULL);
298
299	dev_put(work->ndev);
300	kfree(work);
301}
302
303static void
304roce_gid_queue_scan_event(struct net_device *ndev)
305{
306	struct roce_netdev_event_work *work;
307
308retry:
309	if (is_eth_ipoib_intf(ndev))
310		return;
311
312	if (ndev->if_type != IFT_ETHER) {
313		if (ndev->if_type == IFT_L2VLAN) {
314			ndev = rdma_vlan_dev_real_dev(ndev);
315			if (ndev != NULL)
316				goto retry;
317		}
318		return;
319	}
320
321	work = kmalloc(sizeof(*work), GFP_ATOMIC);
322	if (!work) {
323		pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
324		return;
325	}
326
327	INIT_WORK(&work->work, roce_gid_queue_scan_event_handler);
328	dev_hold(ndev);
329
330	work->ndev = ndev;
331
332	queue_work(roce_gid_mgmt_wq, &work->work);
333}
334
335static void
336roce_gid_delete_all_event_handler(struct work_struct *_work)
337{
338	struct roce_netdev_event_work *work =
339		container_of(_work, struct roce_netdev_event_work, work);
340
341	ib_cache_gid_del_all_by_netdev(work->ndev);
342	dev_put(work->ndev);
343	kfree(work);
344}
345
346static void
347roce_gid_delete_all_event(struct net_device *ndev)
348{
349	struct roce_netdev_event_work *work;
350
351	work = kmalloc(sizeof(*work), GFP_ATOMIC);
352	if (!work) {
353		pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
354		return;
355	}
356
357	INIT_WORK(&work->work, roce_gid_delete_all_event_handler);
358	dev_hold(ndev);
359	work->ndev = ndev;
360	queue_work(roce_gid_mgmt_wq, &work->work);
361}
362
363static int
364inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
365{
366	struct net_device *ndev = ptr;
367
368	switch (event) {
369	case NETDEV_UNREGISTER:
370		roce_gid_delete_all_event(ndev);
371		break;
372	case NETDEV_REGISTER:
373	case NETDEV_CHANGEADDR:
374	case NETDEV_CHANGEIFADDR:
375		roce_gid_queue_scan_event(ndev);
376		break;
377	default:
378		break;
379	}
380	return NOTIFY_DONE;
381}
382
383static struct notifier_block nb_inetaddr = {
384	.notifier_call = inetaddr_event
385};
386
387static void
388roce_rescan_device_handler(struct work_struct *_work)
389{
390	struct roce_rescan_work *work =
391	    container_of(_work, struct roce_rescan_work, work);
392
393	ib_enum_roce_netdev(work->ib_dev, roce_gid_match_all, NULL,
394	    roce_gid_update_addr_callback, NULL);
395	kfree(work);
396}
397
398/* Caller must flush system workqueue before removing the ib_device */
399int roce_rescan_device(struct ib_device *ib_dev)
400{
401	struct roce_rescan_work *work = kmalloc(sizeof(*work), GFP_KERNEL);
402
403	if (!work)
404		return -ENOMEM;
405
406	work->ib_dev = ib_dev;
407	INIT_WORK(&work->work, roce_rescan_device_handler);
408	queue_work(roce_gid_mgmt_wq, &work->work);
409
410	return 0;
411}
412
413int __init roce_gid_mgmt_init(void)
414{
415	roce_gid_mgmt_wq = alloc_ordered_workqueue("roce_gid_mgmt_wq", 0);
416	if (!roce_gid_mgmt_wq) {
417		pr_warn("roce_gid_mgmt: can't allocate work queue\n");
418		return -ENOMEM;
419	}
420
421	register_inetaddr_notifier(&nb_inetaddr);
422
423	/*
424	 * We rely on the netdevice notifier to enumerate all existing
425	 * devices in the system. Register to this notifier last to
426	 * make sure we will not miss any IP add/del callbacks.
427	 */
428	register_netdevice_notifier(&nb_inetaddr);
429
430	return 0;
431}
432
433void __exit roce_gid_mgmt_cleanup(void)
434{
435	unregister_inetaddr_notifier(&nb_inetaddr);
436	unregister_netdevice_notifier(&nb_inetaddr);
437
438	/*
439	 * Ensure all gid deletion tasks complete before we go down,
440	 * to avoid any reference to free'd memory. By the time
441	 * ib-core is removed, all physical devices have been removed,
442	 * so no issue with remaining hardware contexts.
443	 */
444	synchronize_rcu();
445	drain_workqueue(roce_gid_mgmt_wq);
446	destroy_workqueue(roce_gid_mgmt_wq);
447}
448