ib_roce_gid_mgmt.c revision 331769
1/* 2 * Copyright (c) 2015-2017, Mellanox Technologies inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33#include "core_priv.h" 34 35#include <linux/in.h> 36#include <linux/in6.h> 37#include <linux/rcupdate.h> 38 39#include <rdma/ib_cache.h> 40#include <rdma/ib_addr.h> 41 42#include <netinet6/scope6_var.h> 43 44static struct workqueue_struct *roce_gid_mgmt_wq; 45 46enum gid_op_type { 47 GID_DEL = 0, 48 GID_ADD 49}; 50 51struct roce_netdev_event_work { 52 struct work_struct work; 53 struct net_device *ndev; 54}; 55 56struct roce_rescan_work { 57 struct work_struct work; 58 struct ib_device *ib_dev; 59}; 60 61static const struct { 62 bool (*is_supported)(const struct ib_device *device, u8 port_num); 63 enum ib_gid_type gid_type; 64} PORT_CAP_TO_GID_TYPE[] = { 65 {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE}, 66 {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP}, 67}; 68 69#define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE) 70 71unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port) 72{ 73 int i; 74 unsigned int ret_flags = 0; 75 76 if (!rdma_protocol_roce(ib_dev, port)) 77 return 1UL << IB_GID_TYPE_IB; 78 79 for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++) 80 if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port)) 81 ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type; 82 83 return ret_flags; 84} 85EXPORT_SYMBOL(roce_gid_type_mask_support); 86 87static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev, 88 u8 port, union ib_gid *gid, struct net_device *ndev) 89{ 90 int i; 91 unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 92 struct ib_gid_attr gid_attr; 93 94 memset(&gid_attr, 0, sizeof(gid_attr)); 95 gid_attr.ndev = ndev; 96 97 for (i = 0; i != IB_GID_TYPE_SIZE; i++) { 98 if ((1UL << i) & gid_type_mask) { 99 gid_attr.gid_type = i; 100 switch (gid_op) { 101 case GID_ADD: 102 ib_cache_gid_add(ib_dev, port, 103 gid, &gid_attr); 104 break; 105 case GID_DEL: 106 ib_cache_gid_del(ib_dev, port, 107 gid, &gid_attr); 108 break; 109 } 110 } 111 } 112} 113 114static int 115roce_gid_match_netdev(struct ib_device *ib_dev, u8 port, 116 struct net_device *idev, void *cookie) 117{ 118 struct net_device *ndev = (struct net_device *)cookie; 119 if (idev == NULL) 120 return (0); 121 return (ndev == idev); 122} 123 124static int 125roce_gid_match_all(struct ib_device *ib_dev, u8 port, 126 struct net_device *idev, void *cookie) 127{ 128 if (idev == NULL) 129 return (0); 130 return (1); 131} 132 133static int 134roce_gid_enum_netdev_default(struct ib_device *ib_dev, 135 u8 port, struct net_device *idev) 136{ 137 unsigned long gid_type_mask; 138 139 gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 140 141 ib_cache_gid_set_default_gid(ib_dev, port, idev, gid_type_mask, 142 IB_CACHE_GID_DEFAULT_MODE_SET); 143 144 return (hweight_long(gid_type_mask)); 145} 146 147#define ETH_IPOIB_DRV_NAME "ib" 148 149static inline int 150is_eth_ipoib_intf(struct net_device *dev) 151{ 152 if (strcmp(dev->if_dname, ETH_IPOIB_DRV_NAME)) 153 return 0; 154 return 1; 155} 156 157static void 158roce_gid_update_addr_callback(struct ib_device *device, u8 port, 159 struct net_device *ndev, void *cookie) 160{ 161 struct ipx_entry { 162 STAILQ_ENTRY(ipx_entry) entry; 163 union ipx_addr { 164 struct sockaddr sa[0]; 165 struct sockaddr_in v4; 166 struct sockaddr_in6 v6; 167 } ipx_addr; 168 }; 169 struct ipx_entry *entry; 170 struct net_device *idev; 171#if defined(INET) || defined(INET6) 172 struct ifaddr *ifa; 173#endif 174 union ib_gid gid; 175 int default_gids; 176 u16 index_num; 177 int i; 178 179 STAILQ_HEAD(, ipx_entry) ipx_head; 180 181 STAILQ_INIT(&ipx_head); 182 183 /* make sure default GIDs are in */ 184 default_gids = roce_gid_enum_netdev_default(device, port, ndev); 185 186 CURVNET_SET(ndev->if_vnet); 187 IFNET_RLOCK(); 188 TAILQ_FOREACH(idev, &V_ifnet, if_link) { 189 if (idev != ndev) { 190 if (idev->if_type != IFT_L2VLAN) 191 continue; 192 if (ndev != rdma_vlan_dev_real_dev(idev)) 193 continue; 194 } 195 196 /* clone address information for IPv4 and IPv6 */ 197 IF_ADDR_RLOCK(idev); 198#if defined(INET) 199 TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) { 200 if (ifa->ifa_addr == NULL || 201 ifa->ifa_addr->sa_family != AF_INET) 202 continue; 203 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 204 if (entry == NULL) { 205 pr_warn("roce_gid_update_addr_callback: " 206 "couldn't allocate entry for IPv4 update\n"); 207 continue; 208 } 209 entry->ipx_addr.v4 = *((struct sockaddr_in *)ifa->ifa_addr); 210 STAILQ_INSERT_TAIL(&ipx_head, entry, entry); 211 } 212#endif 213#if defined(INET6) 214 TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) { 215 if (ifa->ifa_addr == NULL || 216 ifa->ifa_addr->sa_family != AF_INET6) 217 continue; 218 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 219 if (entry == NULL) { 220 pr_warn("roce_gid_update_addr_callback: " 221 "couldn't allocate entry for IPv6 update\n"); 222 continue; 223 } 224 entry->ipx_addr.v6 = *((struct sockaddr_in6 *)ifa->ifa_addr); 225 226 /* trash IPv6 scope ID */ 227 sa6_recoverscope(&entry->ipx_addr.v6); 228 entry->ipx_addr.v6.sin6_scope_id = 0; 229 230 STAILQ_INSERT_TAIL(&ipx_head, entry, entry); 231 } 232#endif 233 IF_ADDR_RUNLOCK(idev); 234 } 235 IFNET_RUNLOCK(); 236 CURVNET_RESTORE(); 237 238 /* add missing GIDs, if any */ 239 STAILQ_FOREACH(entry, &ipx_head, entry) { 240 unsigned long gid_type_mask = roce_gid_type_mask_support(device, port); 241 242 if (rdma_ip2gid(&entry->ipx_addr.sa[0], &gid) != 0) 243 continue; 244 245 for (i = 0; i != IB_GID_TYPE_SIZE; i++) { 246 if (!((1UL << i) & gid_type_mask)) 247 continue; 248 /* check if entry found */ 249 if (ib_find_cached_gid_by_port(device, &gid, i, 250 port, ndev, &index_num) == 0) 251 break; 252 } 253 if (i != IB_GID_TYPE_SIZE) 254 continue; 255 /* add new GID */ 256 update_gid(GID_ADD, device, port, &gid, ndev); 257 } 258 259 /* remove stale GIDs, if any */ 260 for (i = default_gids; ib_get_cached_gid(device, port, i, &gid, NULL) == 0; i++) { 261 union ipx_addr ipx; 262 263 /* don't delete empty entries */ 264 if (memcmp(&gid, &zgid, sizeof(zgid)) == 0) 265 continue; 266 267 /* zero default */ 268 memset(&ipx, 0, sizeof(ipx)); 269 270 rdma_gid2ip(&ipx.sa[0], &gid); 271 272 STAILQ_FOREACH(entry, &ipx_head, entry) { 273 if (memcmp(&entry->ipx_addr, &ipx, sizeof(ipx)) == 0) 274 break; 275 } 276 /* check if entry found */ 277 if (entry != NULL) 278 continue; 279 280 /* remove GID */ 281 update_gid(GID_DEL, device, port, &gid, ndev); 282 } 283 284 while ((entry = STAILQ_FIRST(&ipx_head))) { 285 STAILQ_REMOVE_HEAD(&ipx_head, entry); 286 kfree(entry); 287 } 288} 289 290static void 291roce_gid_queue_scan_event_handler(struct work_struct *_work) 292{ 293 struct roce_netdev_event_work *work = 294 container_of(_work, struct roce_netdev_event_work, work); 295 296 ib_enum_all_roce_netdevs(roce_gid_match_netdev, work->ndev, 297 roce_gid_update_addr_callback, NULL); 298 299 dev_put(work->ndev); 300 kfree(work); 301} 302 303static void 304roce_gid_queue_scan_event(struct net_device *ndev) 305{ 306 struct roce_netdev_event_work *work; 307 308retry: 309 if (is_eth_ipoib_intf(ndev)) 310 return; 311 312 if (ndev->if_type != IFT_ETHER) { 313 if (ndev->if_type == IFT_L2VLAN) { 314 ndev = rdma_vlan_dev_real_dev(ndev); 315 if (ndev != NULL) 316 goto retry; 317 } 318 return; 319 } 320 321 work = kmalloc(sizeof(*work), GFP_ATOMIC); 322 if (!work) { 323 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n"); 324 return; 325 } 326 327 INIT_WORK(&work->work, roce_gid_queue_scan_event_handler); 328 dev_hold(ndev); 329 330 work->ndev = ndev; 331 332 queue_work(roce_gid_mgmt_wq, &work->work); 333} 334 335static void 336roce_gid_delete_all_event_handler(struct work_struct *_work) 337{ 338 struct roce_netdev_event_work *work = 339 container_of(_work, struct roce_netdev_event_work, work); 340 341 ib_cache_gid_del_all_by_netdev(work->ndev); 342 dev_put(work->ndev); 343 kfree(work); 344} 345 346static void 347roce_gid_delete_all_event(struct net_device *ndev) 348{ 349 struct roce_netdev_event_work *work; 350 351 work = kmalloc(sizeof(*work), GFP_ATOMIC); 352 if (!work) { 353 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n"); 354 return; 355 } 356 357 INIT_WORK(&work->work, roce_gid_delete_all_event_handler); 358 dev_hold(ndev); 359 work->ndev = ndev; 360 queue_work(roce_gid_mgmt_wq, &work->work); 361} 362 363static int 364inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 365{ 366 struct net_device *ndev = ptr; 367 368 switch (event) { 369 case NETDEV_UNREGISTER: 370 roce_gid_delete_all_event(ndev); 371 break; 372 case NETDEV_REGISTER: 373 case NETDEV_CHANGEADDR: 374 case NETDEV_CHANGEIFADDR: 375 roce_gid_queue_scan_event(ndev); 376 break; 377 default: 378 break; 379 } 380 return NOTIFY_DONE; 381} 382 383static struct notifier_block nb_inetaddr = { 384 .notifier_call = inetaddr_event 385}; 386 387static void 388roce_rescan_device_handler(struct work_struct *_work) 389{ 390 struct roce_rescan_work *work = 391 container_of(_work, struct roce_rescan_work, work); 392 393 ib_enum_roce_netdev(work->ib_dev, roce_gid_match_all, NULL, 394 roce_gid_update_addr_callback, NULL); 395 kfree(work); 396} 397 398/* Caller must flush system workqueue before removing the ib_device */ 399int roce_rescan_device(struct ib_device *ib_dev) 400{ 401 struct roce_rescan_work *work = kmalloc(sizeof(*work), GFP_KERNEL); 402 403 if (!work) 404 return -ENOMEM; 405 406 work->ib_dev = ib_dev; 407 INIT_WORK(&work->work, roce_rescan_device_handler); 408 queue_work(roce_gid_mgmt_wq, &work->work); 409 410 return 0; 411} 412 413int __init roce_gid_mgmt_init(void) 414{ 415 roce_gid_mgmt_wq = alloc_ordered_workqueue("roce_gid_mgmt_wq", 0); 416 if (!roce_gid_mgmt_wq) { 417 pr_warn("roce_gid_mgmt: can't allocate work queue\n"); 418 return -ENOMEM; 419 } 420 421 register_inetaddr_notifier(&nb_inetaddr); 422 423 /* 424 * We rely on the netdevice notifier to enumerate all existing 425 * devices in the system. Register to this notifier last to 426 * make sure we will not miss any IP add/del callbacks. 427 */ 428 register_netdevice_notifier(&nb_inetaddr); 429 430 return 0; 431} 432 433void __exit roce_gid_mgmt_cleanup(void) 434{ 435 unregister_inetaddr_notifier(&nb_inetaddr); 436 unregister_netdevice_notifier(&nb_inetaddr); 437 438 /* 439 * Ensure all gid deletion tasks complete before we go down, 440 * to avoid any reference to free'd memory. By the time 441 * ib-core is removed, all physical devices have been removed, 442 * so no issue with remaining hardware contexts. 443 */ 444 synchronize_rcu(); 445 drain_workqueue(roce_gid_mgmt_wq); 446 destroy_workqueue(roce_gid_mgmt_wq); 447} 448