1/*-
2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3 *
4 * Copyright (c) 2015-2017, Mellanox Technologies inc.  All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses.  You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 *     Redistribution and use in source and binary forms, with or
13 *     without modification, are permitted provided that the following
14 *     conditions are met:
15 *
16 *      - Redistributions of source code must retain the above
17 *        copyright notice, this list of conditions and the following
18 *        disclaimer.
19 *
20 *      - Redistributions in binary form must reproduce the above
21 *        copyright notice, this list of conditions and the following
22 *        disclaimer in the documentation and/or other materials
23 *        provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35#include <sys/cdefs.h>
36#include "core_priv.h"
37#include <sys/eventhandler.h>
38
39#include <linux/in.h>
40#include <linux/in6.h>
41#include <linux/rcupdate.h>
42
43#include <rdma/ib_cache.h>
44#include <rdma/ib_addr.h>
45
46#include <netinet6/scope6_var.h>
47
48static struct workqueue_struct *roce_gid_mgmt_wq;
49
50enum gid_op_type {
51	GID_DEL = 0,
52	GID_ADD
53};
54
55struct roce_netdev_event_work {
56	struct work_struct work;
57	if_t ndev;
58};
59
60struct roce_rescan_work {
61	struct work_struct	work;
62	struct ib_device	*ib_dev;
63};
64
65static const struct {
66	bool (*is_supported)(const struct ib_device *device, u8 port_num);
67	enum ib_gid_type gid_type;
68} PORT_CAP_TO_GID_TYPE[] = {
69	{rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
70	{rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
71};
72
73#define CAP_TO_GID_TABLE_SIZE	ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
74
75unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
76{
77	int i;
78	unsigned int ret_flags = 0;
79
80	if (!rdma_protocol_roce(ib_dev, port))
81		return 1UL << IB_GID_TYPE_IB;
82
83	for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
84		if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
85			ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
86
87	return ret_flags;
88}
89EXPORT_SYMBOL(roce_gid_type_mask_support);
90
91static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
92    u8 port, union ib_gid *gid, if_t ndev)
93{
94	int i;
95	unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
96	struct ib_gid_attr gid_attr;
97
98	memset(&gid_attr, 0, sizeof(gid_attr));
99	gid_attr.ndev = ndev;
100
101	for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
102		if ((1UL << i) & gid_type_mask) {
103			gid_attr.gid_type = i;
104			switch (gid_op) {
105			case GID_ADD:
106				ib_cache_gid_add(ib_dev, port,
107						 gid, &gid_attr);
108				break;
109			case GID_DEL:
110				ib_cache_gid_del(ib_dev, port,
111						 gid, &gid_attr);
112				break;
113			}
114		}
115	}
116}
117
118static int
119roce_gid_match_netdev(struct ib_device *ib_dev, u8 port,
120    if_t idev, void *cookie)
121{
122	if_t ndev = (if_t )cookie;
123	if (idev == NULL)
124		return (0);
125	return (ndev == idev);
126}
127
128static int
129roce_gid_match_all(struct ib_device *ib_dev, u8 port,
130    if_t idev, void *cookie)
131{
132	if (idev == NULL)
133		return (0);
134	return (1);
135}
136
137static int
138roce_gid_enum_netdev_default(struct ib_device *ib_dev,
139    u8 port, if_t idev)
140{
141	unsigned long gid_type_mask;
142
143	gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
144
145	ib_cache_gid_set_default_gid(ib_dev, port, idev, gid_type_mask,
146				     IB_CACHE_GID_DEFAULT_MODE_SET);
147
148	return (hweight_long(gid_type_mask));
149}
150
151struct ipx_entry {
152	STAILQ_ENTRY(ipx_entry)	entry;
153	union ipx_addr {
154		struct sockaddr sa[0];
155		struct sockaddr_in v4;
156		struct sockaddr_in6 v6;
157	} ipx_addr;
158	if_t ndev;
159};
160
161STAILQ_HEAD(ipx_queue, ipx_entry);
162
163#ifdef INET
164static u_int
165roce_gid_update_addr_ifa4_cb(void *arg, struct ifaddr *ifa, u_int count)
166{
167	struct ipx_queue *ipx_head = arg;
168	struct ipx_entry *entry;
169
170	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
171	if (entry == NULL) {
172		pr_warn("roce_gid_update_addr_callback: "
173		    "couldn't allocate entry for IPv4 update\n");
174		return (0);
175	}
176	entry->ipx_addr.v4 = *((struct sockaddr_in *)ifa->ifa_addr);
177	entry->ndev = ifa->ifa_ifp;
178	STAILQ_INSERT_TAIL(ipx_head, entry, entry);
179
180	return (1);
181}
182#endif
183
184#ifdef INET6
185static u_int
186roce_gid_update_addr_ifa6_cb(void *arg, struct ifaddr *ifa, u_int count)
187{
188	struct ipx_queue *ipx_head = arg;
189	struct ipx_entry *entry;
190
191	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
192	if (entry == NULL) {
193		pr_warn("roce_gid_update_addr_callback: "
194		    "couldn't allocate entry for IPv6 update\n");
195		return (0);
196	}
197	entry->ipx_addr.v6 = *((struct sockaddr_in6 *)ifa->ifa_addr);
198	entry->ndev = ifa->ifa_ifp;
199
200	/* trash IPv6 scope ID */
201	sa6_recoverscope(&entry->ipx_addr.v6);
202	entry->ipx_addr.v6.sin6_scope_id = 0;
203
204	STAILQ_INSERT_TAIL(ipx_head, entry, entry);
205
206	return (1);
207}
208#endif
209
210static void
211roce_gid_update_addr_callback(struct ib_device *device, u8 port,
212    if_t ndev, void *cookie)
213{
214	struct epoch_tracker et;
215	struct if_iter iter;
216	struct ipx_entry *entry;
217	VNET_ITERATOR_DECL(vnet_iter);
218	struct ib_gid_attr gid_attr;
219	union ib_gid gid;
220	if_t ifp;
221	int default_gids;
222	u16 index_num;
223	int i;
224
225	struct ipx_queue ipx_head;
226
227	STAILQ_INIT(&ipx_head);
228
229	/* make sure default GIDs are in */
230	default_gids = roce_gid_enum_netdev_default(device, port, ndev);
231
232	VNET_LIST_RLOCK();
233	VNET_FOREACH(vnet_iter) {
234	    CURVNET_SET(vnet_iter);
235	    NET_EPOCH_ENTER(et);
236	    for (ifp = if_iter_start(&iter); ifp != NULL; ifp = if_iter_next(&iter)) {
237		if (ifp != ndev) {
238			if (if_gettype(ifp) != IFT_L2VLAN)
239				continue;
240			if (ndev != rdma_vlan_dev_real_dev(ifp))
241				continue;
242		}
243
244		/* clone address information for IPv4 and IPv6 */
245#if defined(INET)
246		if_foreach_addr_type(ifp, AF_INET, roce_gid_update_addr_ifa4_cb, &ipx_head);
247#endif
248#if defined(INET6)
249		if_foreach_addr_type(ifp, AF_INET6, roce_gid_update_addr_ifa6_cb, &ipx_head);
250#endif
251	    }
252	    NET_EPOCH_EXIT(et);
253	    CURVNET_RESTORE();
254	}
255	VNET_LIST_RUNLOCK();
256
257	/* add missing GIDs, if any */
258	STAILQ_FOREACH(entry, &ipx_head, entry) {
259		unsigned long gid_type_mask = roce_gid_type_mask_support(device, port);
260
261		if (rdma_ip2gid(&entry->ipx_addr.sa[0], &gid) != 0)
262			continue;
263
264		for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
265			if (!((1UL << i) & gid_type_mask))
266				continue;
267			/* check if entry found */
268			if (ib_find_cached_gid_by_port(device, &gid, i,
269			    port, entry->ndev, &index_num) == 0)
270				break;
271		}
272		if (i != IB_GID_TYPE_SIZE)
273			continue;
274		/* add new GID */
275		update_gid(GID_ADD, device, port, &gid, entry->ndev);
276	}
277
278	/* remove stale GIDs, if any */
279	for (i = default_gids; ib_get_cached_gid(device, port, i, &gid, &gid_attr) == 0; i++) {
280		union ipx_addr ipx;
281
282		/* check for valid network device pointer */
283		ndev = gid_attr.ndev;
284		if (ndev == NULL)
285			continue;
286		dev_put(ndev);
287
288		/* don't delete empty entries */
289		if (memcmp(&gid, &zgid, sizeof(zgid)) == 0)
290			continue;
291
292		/* zero default */
293		memset(&ipx, 0, sizeof(ipx));
294
295		rdma_gid2ip(&ipx.sa[0], &gid);
296
297		STAILQ_FOREACH(entry, &ipx_head, entry) {
298			if (entry->ndev == ndev &&
299			    memcmp(&entry->ipx_addr, &ipx, sizeof(ipx)) == 0)
300				break;
301		}
302		/* check if entry found */
303		if (entry != NULL)
304			continue;
305
306		/* remove GID */
307		update_gid(GID_DEL, device, port, &gid, ndev);
308	}
309
310	while ((entry = STAILQ_FIRST(&ipx_head))) {
311		STAILQ_REMOVE_HEAD(&ipx_head, entry);
312		kfree(entry);
313	}
314}
315
316static void
317roce_gid_queue_scan_event_handler(struct work_struct *_work)
318{
319	struct roce_netdev_event_work *work =
320		container_of(_work, struct roce_netdev_event_work, work);
321
322	ib_enum_all_roce_netdevs(roce_gid_match_netdev, work->ndev,
323	    roce_gid_update_addr_callback, NULL);
324
325	dev_put(work->ndev);
326	kfree(work);
327}
328
329static void
330roce_gid_queue_scan_event(if_t ndev)
331{
332	struct roce_netdev_event_work *work;
333
334retry:
335	switch (if_gettype(ndev)) {
336	case IFT_ETHER:
337		break;
338	case IFT_L2VLAN:
339		ndev = rdma_vlan_dev_real_dev(ndev);
340		if (ndev != NULL)
341			goto retry;
342		/* FALLTHROUGH */
343	default:
344		return;
345	}
346
347	work = kmalloc(sizeof(*work), GFP_ATOMIC);
348	if (!work) {
349		pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
350		return;
351	}
352
353	INIT_WORK(&work->work, roce_gid_queue_scan_event_handler);
354	dev_hold(ndev);
355
356	work->ndev = ndev;
357
358	queue_work(roce_gid_mgmt_wq, &work->work);
359}
360
361static void
362roce_gid_delete_all_event_handler(struct work_struct *_work)
363{
364	struct roce_netdev_event_work *work =
365		container_of(_work, struct roce_netdev_event_work, work);
366
367	ib_cache_gid_del_all_by_netdev(work->ndev);
368	dev_put(work->ndev);
369	kfree(work);
370}
371
372static void
373roce_gid_delete_all_event(if_t ndev)
374{
375	struct roce_netdev_event_work *work;
376
377	work = kmalloc(sizeof(*work), GFP_ATOMIC);
378	if (!work) {
379		pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
380		return;
381	}
382
383	INIT_WORK(&work->work, roce_gid_delete_all_event_handler);
384	dev_hold(ndev);
385	work->ndev = ndev;
386	queue_work(roce_gid_mgmt_wq, &work->work);
387
388	/* make sure job is complete before returning */
389	flush_workqueue(roce_gid_mgmt_wq);
390}
391
392static int
393inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
394{
395	if_t ndev = netdev_notifier_info_to_ifp(ptr);
396
397	switch (event) {
398	case NETDEV_UNREGISTER:
399		roce_gid_delete_all_event(ndev);
400		break;
401	case NETDEV_REGISTER:
402	case NETDEV_CHANGEADDR:
403	case NETDEV_CHANGEIFADDR:
404		roce_gid_queue_scan_event(ndev);
405		break;
406	default:
407		break;
408	}
409	return NOTIFY_DONE;
410}
411
412static struct notifier_block nb_inetaddr = {
413	.notifier_call = inetaddr_event
414};
415
416static eventhandler_tag eh_ifnet_event;
417
418static void
419roce_ifnet_event(void *arg, if_t ifp, int event)
420{
421	if (event != IFNET_EVENT_PCP || is_vlan_dev(ifp))
422		return;
423
424	/* make sure GID table is reloaded */
425	roce_gid_delete_all_event(ifp);
426	roce_gid_queue_scan_event(ifp);
427}
428
429static void
430roce_rescan_device_handler(struct work_struct *_work)
431{
432	struct roce_rescan_work *work =
433	    container_of(_work, struct roce_rescan_work, work);
434
435	ib_enum_roce_netdev(work->ib_dev, roce_gid_match_all, NULL,
436	    roce_gid_update_addr_callback, NULL);
437	kfree(work);
438}
439
440/* Caller must flush system workqueue before removing the ib_device */
441int roce_rescan_device(struct ib_device *ib_dev)
442{
443	struct roce_rescan_work *work = kmalloc(sizeof(*work), GFP_KERNEL);
444
445	if (!work)
446		return -ENOMEM;
447
448	work->ib_dev = ib_dev;
449	INIT_WORK(&work->work, roce_rescan_device_handler);
450	queue_work(roce_gid_mgmt_wq, &work->work);
451
452	return 0;
453}
454
455int __init roce_gid_mgmt_init(void)
456{
457	roce_gid_mgmt_wq = alloc_ordered_workqueue("roce_gid_mgmt_wq", 0);
458	if (!roce_gid_mgmt_wq) {
459		pr_warn("roce_gid_mgmt: can't allocate work queue\n");
460		return -ENOMEM;
461	}
462
463	register_inetaddr_notifier(&nb_inetaddr);
464
465	/*
466	 * We rely on the netdevice notifier to enumerate all existing
467	 * devices in the system. Register to this notifier last to
468	 * make sure we will not miss any IP add/del callbacks.
469	 */
470	register_netdevice_notifier(&nb_inetaddr);
471
472	eh_ifnet_event = EVENTHANDLER_REGISTER(ifnet_event,
473	    roce_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
474
475	return 0;
476}
477
478void __exit roce_gid_mgmt_cleanup(void)
479{
480
481	if (eh_ifnet_event != NULL)
482		EVENTHANDLER_DEREGISTER(ifnet_event, eh_ifnet_event);
483
484	unregister_inetaddr_notifier(&nb_inetaddr);
485	unregister_netdevice_notifier(&nb_inetaddr);
486
487	/*
488	 * Ensure all gid deletion tasks complete before we go down,
489	 * to avoid any reference to free'd memory. By the time
490	 * ib-core is removed, all physical devices have been removed,
491	 * so no issue with remaining hardware contexts.
492	 */
493	synchronize_rcu();
494	drain_workqueue(roce_gid_mgmt_wq);
495	destroy_workqueue(roce_gid_mgmt_wq);
496}
497