1/*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2015-2017, Mellanox Technologies inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35#include <sys/cdefs.h> 36#include "core_priv.h" 37#include <sys/eventhandler.h> 38 39#include <linux/in.h> 40#include <linux/in6.h> 41#include <linux/rcupdate.h> 42 43#include <rdma/ib_cache.h> 44#include <rdma/ib_addr.h> 45 46#include <netinet6/scope6_var.h> 47 48static struct workqueue_struct *roce_gid_mgmt_wq; 49 50enum gid_op_type { 51 GID_DEL = 0, 52 GID_ADD 53}; 54 55struct roce_netdev_event_work { 56 struct work_struct work; 57 if_t ndev; 58}; 59 60struct roce_rescan_work { 61 struct work_struct work; 62 struct ib_device *ib_dev; 63}; 64 65static const struct { 66 bool (*is_supported)(const struct ib_device *device, u8 port_num); 67 enum ib_gid_type gid_type; 68} PORT_CAP_TO_GID_TYPE[] = { 69 {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE}, 70 {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP}, 71}; 72 73#define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE) 74 75unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port) 76{ 77 int i; 78 unsigned int ret_flags = 0; 79 80 if (!rdma_protocol_roce(ib_dev, port)) 81 return 1UL << IB_GID_TYPE_IB; 82 83 for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++) 84 if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port)) 85 ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type; 86 87 return ret_flags; 88} 89EXPORT_SYMBOL(roce_gid_type_mask_support); 90 91static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev, 92 u8 port, union ib_gid *gid, if_t ndev) 93{ 94 int i; 95 unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 96 struct ib_gid_attr gid_attr; 97 98 memset(&gid_attr, 0, sizeof(gid_attr)); 99 gid_attr.ndev = ndev; 100 101 for (i = 0; i != IB_GID_TYPE_SIZE; i++) { 102 if ((1UL << i) & gid_type_mask) { 103 gid_attr.gid_type = i; 104 switch (gid_op) { 105 case GID_ADD: 106 ib_cache_gid_add(ib_dev, port, 107 gid, &gid_attr); 108 break; 109 case GID_DEL: 110 ib_cache_gid_del(ib_dev, port, 111 gid, &gid_attr); 112 break; 113 } 114 } 115 } 116} 117 118static int 119roce_gid_match_netdev(struct ib_device *ib_dev, u8 port, 120 if_t idev, void *cookie) 121{ 122 if_t ndev = (if_t )cookie; 123 if (idev == NULL) 124 return (0); 125 return (ndev == idev); 126} 127 128static int 129roce_gid_match_all(struct ib_device *ib_dev, u8 port, 130 if_t idev, void *cookie) 131{ 132 if (idev == NULL) 133 return (0); 134 return (1); 135} 136 137static int 138roce_gid_enum_netdev_default(struct ib_device *ib_dev, 139 u8 port, if_t idev) 140{ 141 unsigned long gid_type_mask; 142 143 gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 144 145 ib_cache_gid_set_default_gid(ib_dev, port, idev, gid_type_mask, 146 IB_CACHE_GID_DEFAULT_MODE_SET); 147 148 return (hweight_long(gid_type_mask)); 149} 150 151struct ipx_entry { 152 STAILQ_ENTRY(ipx_entry) entry; 153 union ipx_addr { 154 struct sockaddr sa[0]; 155 struct sockaddr_in v4; 156 struct sockaddr_in6 v6; 157 } ipx_addr; 158 if_t ndev; 159}; 160 161STAILQ_HEAD(ipx_queue, ipx_entry); 162 163#ifdef INET 164static u_int 165roce_gid_update_addr_ifa4_cb(void *arg, struct ifaddr *ifa, u_int count) 166{ 167 struct ipx_queue *ipx_head = arg; 168 struct ipx_entry *entry; 169 170 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 171 if (entry == NULL) { 172 pr_warn("roce_gid_update_addr_callback: " 173 "couldn't allocate entry for IPv4 update\n"); 174 return (0); 175 } 176 entry->ipx_addr.v4 = *((struct sockaddr_in *)ifa->ifa_addr); 177 entry->ndev = ifa->ifa_ifp; 178 STAILQ_INSERT_TAIL(ipx_head, entry, entry); 179 180 return (1); 181} 182#endif 183 184#ifdef INET6 185static u_int 186roce_gid_update_addr_ifa6_cb(void *arg, struct ifaddr *ifa, u_int count) 187{ 188 struct ipx_queue *ipx_head = arg; 189 struct ipx_entry *entry; 190 191 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 192 if (entry == NULL) { 193 pr_warn("roce_gid_update_addr_callback: " 194 "couldn't allocate entry for IPv6 update\n"); 195 return (0); 196 } 197 entry->ipx_addr.v6 = *((struct sockaddr_in6 *)ifa->ifa_addr); 198 entry->ndev = ifa->ifa_ifp; 199 200 /* trash IPv6 scope ID */ 201 sa6_recoverscope(&entry->ipx_addr.v6); 202 entry->ipx_addr.v6.sin6_scope_id = 0; 203 204 STAILQ_INSERT_TAIL(ipx_head, entry, entry); 205 206 return (1); 207} 208#endif 209 210static void 211roce_gid_update_addr_callback(struct ib_device *device, u8 port, 212 if_t ndev, void *cookie) 213{ 214 struct epoch_tracker et; 215 struct if_iter iter; 216 struct ipx_entry *entry; 217 VNET_ITERATOR_DECL(vnet_iter); 218 struct ib_gid_attr gid_attr; 219 union ib_gid gid; 220 if_t ifp; 221 int default_gids; 222 u16 index_num; 223 int i; 224 225 struct ipx_queue ipx_head; 226 227 STAILQ_INIT(&ipx_head); 228 229 /* make sure default GIDs are in */ 230 default_gids = roce_gid_enum_netdev_default(device, port, ndev); 231 232 VNET_LIST_RLOCK(); 233 VNET_FOREACH(vnet_iter) { 234 CURVNET_SET(vnet_iter); 235 NET_EPOCH_ENTER(et); 236 for (ifp = if_iter_start(&iter); ifp != NULL; ifp = if_iter_next(&iter)) { 237 if (ifp != ndev) { 238 if (if_gettype(ifp) != IFT_L2VLAN) 239 continue; 240 if (ndev != rdma_vlan_dev_real_dev(ifp)) 241 continue; 242 } 243 244 /* clone address information for IPv4 and IPv6 */ 245#if defined(INET) 246 if_foreach_addr_type(ifp, AF_INET, roce_gid_update_addr_ifa4_cb, &ipx_head); 247#endif 248#if defined(INET6) 249 if_foreach_addr_type(ifp, AF_INET6, roce_gid_update_addr_ifa6_cb, &ipx_head); 250#endif 251 } 252 NET_EPOCH_EXIT(et); 253 CURVNET_RESTORE(); 254 } 255 VNET_LIST_RUNLOCK(); 256 257 /* add missing GIDs, if any */ 258 STAILQ_FOREACH(entry, &ipx_head, entry) { 259 unsigned long gid_type_mask = roce_gid_type_mask_support(device, port); 260 261 if (rdma_ip2gid(&entry->ipx_addr.sa[0], &gid) != 0) 262 continue; 263 264 for (i = 0; i != IB_GID_TYPE_SIZE; i++) { 265 if (!((1UL << i) & gid_type_mask)) 266 continue; 267 /* check if entry found */ 268 if (ib_find_cached_gid_by_port(device, &gid, i, 269 port, entry->ndev, &index_num) == 0) 270 break; 271 } 272 if (i != IB_GID_TYPE_SIZE) 273 continue; 274 /* add new GID */ 275 update_gid(GID_ADD, device, port, &gid, entry->ndev); 276 } 277 278 /* remove stale GIDs, if any */ 279 for (i = default_gids; ib_get_cached_gid(device, port, i, &gid, &gid_attr) == 0; i++) { 280 union ipx_addr ipx; 281 282 /* check for valid network device pointer */ 283 ndev = gid_attr.ndev; 284 if (ndev == NULL) 285 continue; 286 dev_put(ndev); 287 288 /* don't delete empty entries */ 289 if (memcmp(&gid, &zgid, sizeof(zgid)) == 0) 290 continue; 291 292 /* zero default */ 293 memset(&ipx, 0, sizeof(ipx)); 294 295 rdma_gid2ip(&ipx.sa[0], &gid); 296 297 STAILQ_FOREACH(entry, &ipx_head, entry) { 298 if (entry->ndev == ndev && 299 memcmp(&entry->ipx_addr, &ipx, sizeof(ipx)) == 0) 300 break; 301 } 302 /* check if entry found */ 303 if (entry != NULL) 304 continue; 305 306 /* remove GID */ 307 update_gid(GID_DEL, device, port, &gid, ndev); 308 } 309 310 while ((entry = STAILQ_FIRST(&ipx_head))) { 311 STAILQ_REMOVE_HEAD(&ipx_head, entry); 312 kfree(entry); 313 } 314} 315 316static void 317roce_gid_queue_scan_event_handler(struct work_struct *_work) 318{ 319 struct roce_netdev_event_work *work = 320 container_of(_work, struct roce_netdev_event_work, work); 321 322 ib_enum_all_roce_netdevs(roce_gid_match_netdev, work->ndev, 323 roce_gid_update_addr_callback, NULL); 324 325 dev_put(work->ndev); 326 kfree(work); 327} 328 329static void 330roce_gid_queue_scan_event(if_t ndev) 331{ 332 struct roce_netdev_event_work *work; 333 334retry: 335 switch (if_gettype(ndev)) { 336 case IFT_ETHER: 337 break; 338 case IFT_L2VLAN: 339 ndev = rdma_vlan_dev_real_dev(ndev); 340 if (ndev != NULL) 341 goto retry; 342 /* FALLTHROUGH */ 343 default: 344 return; 345 } 346 347 work = kmalloc(sizeof(*work), GFP_ATOMIC); 348 if (!work) { 349 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n"); 350 return; 351 } 352 353 INIT_WORK(&work->work, roce_gid_queue_scan_event_handler); 354 dev_hold(ndev); 355 356 work->ndev = ndev; 357 358 queue_work(roce_gid_mgmt_wq, &work->work); 359} 360 361static void 362roce_gid_delete_all_event_handler(struct work_struct *_work) 363{ 364 struct roce_netdev_event_work *work = 365 container_of(_work, struct roce_netdev_event_work, work); 366 367 ib_cache_gid_del_all_by_netdev(work->ndev); 368 dev_put(work->ndev); 369 kfree(work); 370} 371 372static void 373roce_gid_delete_all_event(if_t ndev) 374{ 375 struct roce_netdev_event_work *work; 376 377 work = kmalloc(sizeof(*work), GFP_ATOMIC); 378 if (!work) { 379 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n"); 380 return; 381 } 382 383 INIT_WORK(&work->work, roce_gid_delete_all_event_handler); 384 dev_hold(ndev); 385 work->ndev = ndev; 386 queue_work(roce_gid_mgmt_wq, &work->work); 387 388 /* make sure job is complete before returning */ 389 flush_workqueue(roce_gid_mgmt_wq); 390} 391 392static int 393inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 394{ 395 if_t ndev = netdev_notifier_info_to_ifp(ptr); 396 397 switch (event) { 398 case NETDEV_UNREGISTER: 399 roce_gid_delete_all_event(ndev); 400 break; 401 case NETDEV_REGISTER: 402 case NETDEV_CHANGEADDR: 403 case NETDEV_CHANGEIFADDR: 404 roce_gid_queue_scan_event(ndev); 405 break; 406 default: 407 break; 408 } 409 return NOTIFY_DONE; 410} 411 412static struct notifier_block nb_inetaddr = { 413 .notifier_call = inetaddr_event 414}; 415 416static eventhandler_tag eh_ifnet_event; 417 418static void 419roce_ifnet_event(void *arg, if_t ifp, int event) 420{ 421 if (event != IFNET_EVENT_PCP || is_vlan_dev(ifp)) 422 return; 423 424 /* make sure GID table is reloaded */ 425 roce_gid_delete_all_event(ifp); 426 roce_gid_queue_scan_event(ifp); 427} 428 429static void 430roce_rescan_device_handler(struct work_struct *_work) 431{ 432 struct roce_rescan_work *work = 433 container_of(_work, struct roce_rescan_work, work); 434 435 ib_enum_roce_netdev(work->ib_dev, roce_gid_match_all, NULL, 436 roce_gid_update_addr_callback, NULL); 437 kfree(work); 438} 439 440/* Caller must flush system workqueue before removing the ib_device */ 441int roce_rescan_device(struct ib_device *ib_dev) 442{ 443 struct roce_rescan_work *work = kmalloc(sizeof(*work), GFP_KERNEL); 444 445 if (!work) 446 return -ENOMEM; 447 448 work->ib_dev = ib_dev; 449 INIT_WORK(&work->work, roce_rescan_device_handler); 450 queue_work(roce_gid_mgmt_wq, &work->work); 451 452 return 0; 453} 454 455int __init roce_gid_mgmt_init(void) 456{ 457 roce_gid_mgmt_wq = alloc_ordered_workqueue("roce_gid_mgmt_wq", 0); 458 if (!roce_gid_mgmt_wq) { 459 pr_warn("roce_gid_mgmt: can't allocate work queue\n"); 460 return -ENOMEM; 461 } 462 463 register_inetaddr_notifier(&nb_inetaddr); 464 465 /* 466 * We rely on the netdevice notifier to enumerate all existing 467 * devices in the system. Register to this notifier last to 468 * make sure we will not miss any IP add/del callbacks. 469 */ 470 register_netdevice_notifier(&nb_inetaddr); 471 472 eh_ifnet_event = EVENTHANDLER_REGISTER(ifnet_event, 473 roce_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); 474 475 return 0; 476} 477 478void __exit roce_gid_mgmt_cleanup(void) 479{ 480 481 if (eh_ifnet_event != NULL) 482 EVENTHANDLER_DEREGISTER(ifnet_event, eh_ifnet_event); 483 484 unregister_inetaddr_notifier(&nb_inetaddr); 485 unregister_netdevice_notifier(&nb_inetaddr); 486 487 /* 488 * Ensure all gid deletion tasks complete before we go down, 489 * to avoid any reference to free'd memory. By the time 490 * ib-core is removed, all physical devices have been removed, 491 * so no issue with remaining hardware contexts. 492 */ 493 synchronize_rcu(); 494 drain_workqueue(roce_gid_mgmt_wq); 495 destroy_workqueue(roce_gid_mgmt_wq); 496} 497