ib_cache.c revision 331769
1/* 2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Intel Corporation. All rights reserved. 4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36#include <linux/module.h> 37#include <linux/errno.h> 38#include <linux/slab.h> 39#include <linux/workqueue.h> 40#include <linux/netdevice.h> 41 42#include <rdma/ib_cache.h> 43 44#include "core_priv.h" 45 46struct ib_pkey_cache { 47 int table_len; 48 u16 table[0]; 49}; 50 51struct ib_update_work { 52 struct work_struct work; 53 struct ib_device *device; 54 u8 port_num; 55}; 56 57union ib_gid zgid; 58EXPORT_SYMBOL(zgid); 59 60static const struct ib_gid_attr zattr; 61 62enum gid_attr_find_mask { 63 GID_ATTR_FIND_MASK_GID = 1UL << 0, 64 GID_ATTR_FIND_MASK_NETDEV = 1UL << 1, 65 GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2, 66 GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3, 67}; 68 69enum gid_table_entry_props { 70 GID_TABLE_ENTRY_INVALID = 1UL << 0, 71 GID_TABLE_ENTRY_DEFAULT = 1UL << 1, 72}; 73 74enum gid_table_write_action { 75 GID_TABLE_WRITE_ACTION_ADD, 76 GID_TABLE_WRITE_ACTION_DEL, 77 /* MODIFY only updates the GID table. Currently only used by 78 * ib_cache_update. 79 */ 80 GID_TABLE_WRITE_ACTION_MODIFY 81}; 82 83struct ib_gid_table_entry { 84 unsigned long props; 85 union ib_gid gid; 86 struct ib_gid_attr attr; 87 void *context; 88}; 89 90struct ib_gid_table { 91 int sz; 92 /* In RoCE, adding a GID to the table requires: 93 * (a) Find if this GID is already exists. 94 * (b) Find a free space. 95 * (c) Write the new GID 96 * 97 * Delete requires different set of operations: 98 * (a) Find the GID 99 * (b) Delete it. 100 * 101 * Add/delete should be carried out atomically. 102 * This is done by locking this mutex from multiple 103 * writers. We don't need this lock for IB, as the MAD 104 * layer replaces all entries. All data_vec entries 105 * are locked by this lock. 106 **/ 107 struct mutex lock; 108 /* This lock protects the table entries from being 109 * read and written simultaneously. 110 */ 111 rwlock_t rwlock; 112 struct ib_gid_table_entry *data_vec; 113}; 114 115static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) 116{ 117 if (rdma_cap_roce_gid_table(ib_dev, port)) { 118 struct ib_event event; 119 120 event.device = ib_dev; 121 event.element.port_num = port; 122 event.event = IB_EVENT_GID_CHANGE; 123 124 ib_dispatch_event(&event); 125 } 126} 127 128static const char * const gid_type_str[] = { 129 [IB_GID_TYPE_IB] = "IB/RoCE v1", 130 [IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2", 131}; 132 133const char *ib_cache_gid_type_str(enum ib_gid_type gid_type) 134{ 135 if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type]) 136 return gid_type_str[gid_type]; 137 138 return "Invalid GID type"; 139} 140EXPORT_SYMBOL(ib_cache_gid_type_str); 141 142int ib_cache_gid_parse_type_str(const char *buf) 143{ 144 unsigned int i; 145 size_t len; 146 int err = -EINVAL; 147 148 len = strlen(buf); 149 if (len == 0) 150 return -EINVAL; 151 152 if (buf[len - 1] == '\n') 153 len--; 154 155 for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i) 156 if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) && 157 len == strlen(gid_type_str[i])) { 158 err = i; 159 break; 160 } 161 162 return err; 163} 164EXPORT_SYMBOL(ib_cache_gid_parse_type_str); 165 166/* This function expects that rwlock will be write locked in all 167 * scenarios and that lock will be locked in sleep-able (RoCE) 168 * scenarios. 169 */ 170static int write_gid(struct ib_device *ib_dev, u8 port, 171 struct ib_gid_table *table, int ix, 172 const union ib_gid *gid, 173 const struct ib_gid_attr *attr, 174 enum gid_table_write_action action, 175 bool default_gid) 176 __releases(&table->rwlock) __acquires(&table->rwlock) 177{ 178 int ret = 0; 179 struct net_device *old_net_dev; 180 enum ib_gid_type old_gid_type; 181 182 /* in rdma_cap_roce_gid_table, this funciton should be protected by a 183 * sleep-able lock. 184 */ 185 186 if (rdma_cap_roce_gid_table(ib_dev, port)) { 187 table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; 188 write_unlock_irq(&table->rwlock); 189 /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by 190 * RoCE providers and thus only updates the cache. 191 */ 192 if (action == GID_TABLE_WRITE_ACTION_ADD) 193 ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr, 194 &table->data_vec[ix].context); 195 else if (action == GID_TABLE_WRITE_ACTION_DEL) 196 ret = ib_dev->del_gid(ib_dev, port, ix, 197 &table->data_vec[ix].context); 198 write_lock_irq(&table->rwlock); 199 } 200 201 old_net_dev = table->data_vec[ix].attr.ndev; 202 old_gid_type = table->data_vec[ix].attr.gid_type; 203 if (old_net_dev && old_net_dev != attr->ndev) 204 dev_put(old_net_dev); 205 /* if modify_gid failed, just delete the old gid */ 206 if (ret || action == GID_TABLE_WRITE_ACTION_DEL) { 207 gid = &zgid; 208 attr = &zattr; 209 table->data_vec[ix].context = NULL; 210 } 211 212 memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid)); 213 memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr)); 214 if (default_gid) { 215 table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT; 216 if (action == GID_TABLE_WRITE_ACTION_DEL) 217 table->data_vec[ix].attr.gid_type = old_gid_type; 218 } 219 if (table->data_vec[ix].attr.ndev && 220 table->data_vec[ix].attr.ndev != old_net_dev) 221 dev_hold(table->data_vec[ix].attr.ndev); 222 223 table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID; 224 225 return ret; 226} 227 228static int add_gid(struct ib_device *ib_dev, u8 port, 229 struct ib_gid_table *table, int ix, 230 const union ib_gid *gid, 231 const struct ib_gid_attr *attr, 232 bool default_gid) { 233 return write_gid(ib_dev, port, table, ix, gid, attr, 234 GID_TABLE_WRITE_ACTION_ADD, default_gid); 235} 236 237static int modify_gid(struct ib_device *ib_dev, u8 port, 238 struct ib_gid_table *table, int ix, 239 const union ib_gid *gid, 240 const struct ib_gid_attr *attr, 241 bool default_gid) { 242 return write_gid(ib_dev, port, table, ix, gid, attr, 243 GID_TABLE_WRITE_ACTION_MODIFY, default_gid); 244} 245 246static int del_gid(struct ib_device *ib_dev, u8 port, 247 struct ib_gid_table *table, int ix, 248 bool default_gid) { 249 return write_gid(ib_dev, port, table, ix, &zgid, &zattr, 250 GID_TABLE_WRITE_ACTION_DEL, default_gid); 251} 252 253/* rwlock should be read locked */ 254static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, 255 const struct ib_gid_attr *val, bool default_gid, 256 unsigned long mask, int *pempty) 257{ 258 int i = 0; 259 int found = -1; 260 int empty = pempty ? -1 : 0; 261 262 while (i < table->sz && (found < 0 || empty < 0)) { 263 struct ib_gid_table_entry *data = &table->data_vec[i]; 264 struct ib_gid_attr *attr = &data->attr; 265 int curr_index = i; 266 267 i++; 268 269 if (data->props & GID_TABLE_ENTRY_INVALID) 270 continue; 271 272 if (empty < 0) 273 if (!memcmp(&data->gid, &zgid, sizeof(*gid)) && 274 !memcmp(attr, &zattr, sizeof(*attr)) && 275 !data->props) 276 empty = curr_index; 277 278 if (found >= 0) 279 continue; 280 281 if (mask & GID_ATTR_FIND_MASK_GID_TYPE && 282 attr->gid_type != val->gid_type) 283 continue; 284 285 if (mask & GID_ATTR_FIND_MASK_GID && 286 memcmp(gid, &data->gid, sizeof(*gid))) 287 continue; 288 289 if (mask & GID_ATTR_FIND_MASK_NETDEV && 290 attr->ndev != val->ndev) 291 continue; 292 293 if (mask & GID_ATTR_FIND_MASK_DEFAULT && 294 !!(data->props & GID_TABLE_ENTRY_DEFAULT) != 295 default_gid) 296 continue; 297 298 found = curr_index; 299 } 300 301 if (pempty) 302 *pempty = empty; 303 304 return found; 305} 306 307static void addrconf_ifid_eui48(u8 *eui, struct net_device *dev) 308{ 309 if (dev->if_addrlen != ETH_ALEN) 310 return; 311 memcpy(eui, IF_LLADDR(dev), 3); 312 memcpy(eui + 5, IF_LLADDR(dev) + 3, 3); 313 314 /* NOTE: The scope ID is added by the GID to IP conversion */ 315 316 eui[3] = 0xFF; 317 eui[4] = 0xFE; 318 eui[0] ^= 2; 319} 320 321static void make_default_gid(struct net_device *dev, union ib_gid *gid) 322{ 323 gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); 324 addrconf_ifid_eui48(&gid->raw[8], dev); 325} 326 327int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, 328 union ib_gid *gid, struct ib_gid_attr *attr) 329{ 330 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 331 struct ib_gid_table *table; 332 int ix; 333 int ret = 0; 334 struct net_device *idev; 335 int empty; 336 337 table = ports_table[port - rdma_start_port(ib_dev)]; 338 339 if (!memcmp(gid, &zgid, sizeof(*gid))) 340 return -EINVAL; 341 342 if (ib_dev->get_netdev) { 343 idev = ib_dev->get_netdev(ib_dev, port); 344 if (idev && attr->ndev != idev) { 345 union ib_gid default_gid; 346 347 /* Adding default GIDs in not permitted */ 348 make_default_gid(idev, &default_gid); 349 if (!memcmp(gid, &default_gid, sizeof(*gid))) { 350 dev_put(idev); 351 return -EPERM; 352 } 353 } 354 if (idev) 355 dev_put(idev); 356 } 357 358 mutex_lock(&table->lock); 359 write_lock_irq(&table->rwlock); 360 361 ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID | 362 GID_ATTR_FIND_MASK_GID_TYPE | 363 GID_ATTR_FIND_MASK_NETDEV, &empty); 364 if (ix >= 0) 365 goto out_unlock; 366 367 if (empty < 0) { 368 ret = -ENOSPC; 369 goto out_unlock; 370 } 371 372 ret = add_gid(ib_dev, port, table, empty, gid, attr, false); 373 if (!ret) 374 dispatch_gid_change_event(ib_dev, port); 375 376out_unlock: 377 write_unlock_irq(&table->rwlock); 378 mutex_unlock(&table->lock); 379 return ret; 380} 381 382int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, 383 union ib_gid *gid, struct ib_gid_attr *attr) 384{ 385 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 386 struct ib_gid_table *table; 387 int ix; 388 389 table = ports_table[port - rdma_start_port(ib_dev)]; 390 391 mutex_lock(&table->lock); 392 write_lock_irq(&table->rwlock); 393 394 ix = find_gid(table, gid, attr, false, 395 GID_ATTR_FIND_MASK_GID | 396 GID_ATTR_FIND_MASK_GID_TYPE | 397 GID_ATTR_FIND_MASK_NETDEV | 398 GID_ATTR_FIND_MASK_DEFAULT, 399 NULL); 400 if (ix < 0) 401 goto out_unlock; 402 403 if (!del_gid(ib_dev, port, table, ix, false)) 404 dispatch_gid_change_event(ib_dev, port); 405 406out_unlock: 407 write_unlock_irq(&table->rwlock); 408 mutex_unlock(&table->lock); 409 return 0; 410} 411 412int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, 413 struct net_device *ndev) 414{ 415 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 416 struct ib_gid_table *table; 417 int ix; 418 bool deleted = false; 419 420 table = ports_table[port - rdma_start_port(ib_dev)]; 421 422 mutex_lock(&table->lock); 423 write_lock_irq(&table->rwlock); 424 425 for (ix = 0; ix < table->sz; ix++) 426 if (table->data_vec[ix].attr.ndev == ndev) 427 if (!del_gid(ib_dev, port, table, ix, 428 !!(table->data_vec[ix].props & 429 GID_TABLE_ENTRY_DEFAULT))) 430 deleted = true; 431 432 write_unlock_irq(&table->rwlock); 433 mutex_unlock(&table->lock); 434 435 if (deleted) 436 dispatch_gid_change_event(ib_dev, port); 437 438 return 0; 439} 440 441static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, 442 union ib_gid *gid, struct ib_gid_attr *attr) 443{ 444 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 445 struct ib_gid_table *table; 446 447 table = ports_table[port - rdma_start_port(ib_dev)]; 448 449 if (index < 0 || index >= table->sz) 450 return -EINVAL; 451 452 if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) 453 return -EAGAIN; 454 455 memcpy(gid, &table->data_vec[index].gid, sizeof(*gid)); 456 if (attr) { 457 memcpy(attr, &table->data_vec[index].attr, sizeof(*attr)); 458 /* make sure network device is valid and attached */ 459 if (attr->ndev != NULL && 460 (attr->ndev->if_flags & IFF_DYING) == 0 && 461 attr->ndev->if_addr != NULL) 462 dev_hold(attr->ndev); 463 else 464 attr->ndev = NULL; 465 } 466 467 return 0; 468} 469 470static int _ib_cache_gid_table_find(struct ib_device *ib_dev, 471 const union ib_gid *gid, 472 const struct ib_gid_attr *val, 473 unsigned long mask, 474 u8 *port, u16 *index) 475{ 476 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 477 struct ib_gid_table *table; 478 u8 p; 479 int local_index; 480 unsigned long flags; 481 482 for (p = 0; p < ib_dev->phys_port_cnt; p++) { 483 table = ports_table[p]; 484 read_lock_irqsave(&table->rwlock, flags); 485 local_index = find_gid(table, gid, val, false, mask, NULL); 486 if (local_index >= 0) { 487 if (index) 488 *index = local_index; 489 if (port) 490 *port = p + rdma_start_port(ib_dev); 491 read_unlock_irqrestore(&table->rwlock, flags); 492 return 0; 493 } 494 read_unlock_irqrestore(&table->rwlock, flags); 495 } 496 497 return -ENOENT; 498} 499 500static int ib_cache_gid_find(struct ib_device *ib_dev, 501 const union ib_gid *gid, 502 enum ib_gid_type gid_type, 503 struct net_device *ndev, u8 *port, 504 u16 *index) 505{ 506 unsigned long mask = GID_ATTR_FIND_MASK_GID | 507 GID_ATTR_FIND_MASK_GID_TYPE; 508 struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type}; 509 510 if (ndev) 511 mask |= GID_ATTR_FIND_MASK_NETDEV; 512 513 return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val, 514 mask, port, index); 515} 516 517int ib_find_cached_gid_by_port(struct ib_device *ib_dev, 518 const union ib_gid *gid, 519 enum ib_gid_type gid_type, 520 u8 port, struct net_device *ndev, 521 u16 *index) 522{ 523 int local_index; 524 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 525 struct ib_gid_table *table; 526 unsigned long mask = GID_ATTR_FIND_MASK_GID | 527 GID_ATTR_FIND_MASK_GID_TYPE; 528 struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type}; 529 unsigned long flags; 530 531 if (port < rdma_start_port(ib_dev) || 532 port > rdma_end_port(ib_dev)) 533 return -ENOENT; 534 535 table = ports_table[port - rdma_start_port(ib_dev)]; 536 537 if (ndev) 538 mask |= GID_ATTR_FIND_MASK_NETDEV; 539 540 read_lock_irqsave(&table->rwlock, flags); 541 local_index = find_gid(table, gid, &val, false, mask, NULL); 542 if (local_index >= 0) { 543 if (index) 544 *index = local_index; 545 read_unlock_irqrestore(&table->rwlock, flags); 546 return 0; 547 } 548 549 read_unlock_irqrestore(&table->rwlock, flags); 550 return -ENOENT; 551} 552EXPORT_SYMBOL(ib_find_cached_gid_by_port); 553 554/** 555 * ib_find_gid_by_filter - Returns the GID table index where a specified 556 * GID value occurs 557 * @device: The device to query. 558 * @gid: The GID value to search for. 559 * @port_num: The port number of the device where the GID value could be 560 * searched. 561 * @filter: The filter function is executed on any matching GID in the table. 562 * If the filter function returns true, the corresponding index is returned, 563 * otherwise, we continue searching the GID table. It's guaranteed that 564 * while filter is executed, ndev field is valid and the structure won't 565 * change. filter is executed in an atomic context. filter must not be NULL. 566 * @index: The index into the cached GID table where the GID was found. This 567 * parameter may be NULL. 568 * 569 * ib_cache_gid_find_by_filter() searches for the specified GID value 570 * of which the filter function returns true in the port's GID table. 571 * This function is only supported on RoCE ports. 572 * 573 */ 574static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, 575 const union ib_gid *gid, 576 u8 port, 577 bool (*filter)(const union ib_gid *, 578 const struct ib_gid_attr *, 579 void *), 580 void *context, 581 u16 *index) 582{ 583 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 584 struct ib_gid_table *table; 585 unsigned int i; 586 unsigned long flags; 587 bool found = false; 588 589 if (!ports_table) 590 return -EOPNOTSUPP; 591 592 if (port < rdma_start_port(ib_dev) || 593 port > rdma_end_port(ib_dev) || 594 !rdma_protocol_roce(ib_dev, port)) 595 return -EPROTONOSUPPORT; 596 597 table = ports_table[port - rdma_start_port(ib_dev)]; 598 599 read_lock_irqsave(&table->rwlock, flags); 600 for (i = 0; i < table->sz; i++) { 601 struct ib_gid_attr attr; 602 603 if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) 604 goto next; 605 606 if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) 607 goto next; 608 609 memcpy(&attr, &table->data_vec[i].attr, sizeof(attr)); 610 611 if (filter(gid, &attr, context)) 612 found = true; 613 614next: 615 if (found) 616 break; 617 } 618 read_unlock_irqrestore(&table->rwlock, flags); 619 620 if (!found) 621 return -ENOENT; 622 623 if (index) 624 *index = i; 625 return 0; 626} 627 628static struct ib_gid_table *alloc_gid_table(int sz) 629{ 630 struct ib_gid_table *table = 631 kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL); 632 633 if (!table) 634 return NULL; 635 636 table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL); 637 if (!table->data_vec) 638 goto err_free_table; 639 640 mutex_init(&table->lock); 641 642 table->sz = sz; 643 rwlock_init(&table->rwlock); 644 645 return table; 646 647err_free_table: 648 kfree(table); 649 return NULL; 650} 651 652static void release_gid_table(struct ib_gid_table *table) 653{ 654 if (table) { 655 kfree(table->data_vec); 656 kfree(table); 657 } 658} 659 660static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, 661 struct ib_gid_table *table) 662{ 663 int i; 664 bool deleted = false; 665 666 if (!table) 667 return; 668 669 write_lock_irq(&table->rwlock); 670 for (i = 0; i < table->sz; ++i) { 671 if (memcmp(&table->data_vec[i].gid, &zgid, 672 sizeof(table->data_vec[i].gid))) 673 if (!del_gid(ib_dev, port, table, i, 674 table->data_vec[i].props & 675 GID_ATTR_FIND_MASK_DEFAULT)) 676 deleted = true; 677 } 678 write_unlock_irq(&table->rwlock); 679 680 if (deleted) 681 dispatch_gid_change_event(ib_dev, port); 682} 683 684void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, 685 struct net_device *ndev, 686 unsigned long gid_type_mask, 687 enum ib_cache_gid_default_mode mode) 688{ 689 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 690 union ib_gid gid; 691 struct ib_gid_attr gid_attr; 692 struct ib_gid_attr zattr_type = zattr; 693 struct ib_gid_table *table; 694 unsigned int gid_type; 695 696 table = ports_table[port - rdma_start_port(ib_dev)]; 697 698 make_default_gid(ndev, &gid); 699 memset(&gid_attr, 0, sizeof(gid_attr)); 700 gid_attr.ndev = ndev; 701 702 for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) { 703 int ix; 704 union ib_gid current_gid; 705 struct ib_gid_attr current_gid_attr = {}; 706 707 if (1UL << gid_type & ~gid_type_mask) 708 continue; 709 710 gid_attr.gid_type = gid_type; 711 712 mutex_lock(&table->lock); 713 write_lock_irq(&table->rwlock); 714 ix = find_gid(table, NULL, &gid_attr, true, 715 GID_ATTR_FIND_MASK_GID_TYPE | 716 GID_ATTR_FIND_MASK_DEFAULT, 717 NULL); 718 719 /* Coudn't find default GID location */ 720 if (WARN_ON(ix < 0)) 721 goto release; 722 723 zattr_type.gid_type = gid_type; 724 725 if (!__ib_cache_gid_get(ib_dev, port, ix, 726 ¤t_gid, ¤t_gid_attr) && 727 mode == IB_CACHE_GID_DEFAULT_MODE_SET && 728 !memcmp(&gid, ¤t_gid, sizeof(gid)) && 729 !memcmp(&gid_attr, ¤t_gid_attr, sizeof(gid_attr))) 730 goto release; 731 732 if (memcmp(¤t_gid, &zgid, sizeof(current_gid)) || 733 memcmp(¤t_gid_attr, &zattr_type, 734 sizeof(current_gid_attr))) { 735 if (del_gid(ib_dev, port, table, ix, true)) { 736 pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n", 737 ix, gid.raw); 738 goto release; 739 } else { 740 dispatch_gid_change_event(ib_dev, port); 741 } 742 } 743 744 if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) { 745 if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true)) 746 pr_warn("ib_cache_gid: unable to add default gid %pI6\n", 747 gid.raw); 748 else 749 dispatch_gid_change_event(ib_dev, port); 750 } 751 752release: 753 if (current_gid_attr.ndev) 754 dev_put(current_gid_attr.ndev); 755 write_unlock_irq(&table->rwlock); 756 mutex_unlock(&table->lock); 757 } 758} 759 760static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port, 761 struct ib_gid_table *table) 762{ 763 unsigned int i; 764 unsigned long roce_gid_type_mask; 765 unsigned int num_default_gids; 766 unsigned int current_gid = 0; 767 768 roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 769 num_default_gids = hweight_long(roce_gid_type_mask); 770 for (i = 0; i < num_default_gids && i < table->sz; i++) { 771 struct ib_gid_table_entry *entry = 772 &table->data_vec[i]; 773 774 entry->props |= GID_TABLE_ENTRY_DEFAULT; 775 current_gid = find_next_bit(&roce_gid_type_mask, 776 BITS_PER_LONG, 777 current_gid); 778 entry->attr.gid_type = current_gid++; 779 } 780 781 return 0; 782} 783 784static int _gid_table_setup_one(struct ib_device *ib_dev) 785{ 786 u8 port; 787 struct ib_gid_table **table; 788 int err = 0; 789 790 table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL); 791 792 if (!table) { 793 pr_warn("failed to allocate ib gid cache for %s\n", 794 ib_dev->name); 795 return -ENOMEM; 796 } 797 798 for (port = 0; port < ib_dev->phys_port_cnt; port++) { 799 u8 rdma_port = port + rdma_start_port(ib_dev); 800 801 table[port] = 802 alloc_gid_table( 803 ib_dev->port_immutable[rdma_port].gid_tbl_len); 804 if (!table[port]) { 805 err = -ENOMEM; 806 goto rollback_table_setup; 807 } 808 809 err = gid_table_reserve_default(ib_dev, 810 port + rdma_start_port(ib_dev), 811 table[port]); 812 if (err) 813 goto rollback_table_setup; 814 } 815 816 ib_dev->cache.gid_cache = table; 817 return 0; 818 819rollback_table_setup: 820 for (port = 0; port < ib_dev->phys_port_cnt; port++) { 821 cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev), 822 table[port]); 823 release_gid_table(table[port]); 824 } 825 826 kfree(table); 827 return err; 828} 829 830static void gid_table_release_one(struct ib_device *ib_dev) 831{ 832 struct ib_gid_table **table = ib_dev->cache.gid_cache; 833 u8 port; 834 835 if (!table) 836 return; 837 838 for (port = 0; port < ib_dev->phys_port_cnt; port++) 839 release_gid_table(table[port]); 840 841 kfree(table); 842 ib_dev->cache.gid_cache = NULL; 843} 844 845static void gid_table_cleanup_one(struct ib_device *ib_dev) 846{ 847 struct ib_gid_table **table = ib_dev->cache.gid_cache; 848 u8 port; 849 850 if (!table) 851 return; 852 853 for (port = 0; port < ib_dev->phys_port_cnt; port++) 854 cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev), 855 table[port]); 856} 857 858static int gid_table_setup_one(struct ib_device *ib_dev) 859{ 860 int err; 861 862 err = _gid_table_setup_one(ib_dev); 863 864 if (err) 865 return err; 866 867 err = roce_rescan_device(ib_dev); 868 869 if (err) { 870 gid_table_cleanup_one(ib_dev); 871 gid_table_release_one(ib_dev); 872 } 873 874 return err; 875} 876 877int ib_get_cached_gid(struct ib_device *device, 878 u8 port_num, 879 int index, 880 union ib_gid *gid, 881 struct ib_gid_attr *gid_attr) 882{ 883 int res; 884 unsigned long flags; 885 struct ib_gid_table **ports_table = device->cache.gid_cache; 886 struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)]; 887 888 if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) 889 return -EINVAL; 890 891 read_lock_irqsave(&table->rwlock, flags); 892 res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr); 893 read_unlock_irqrestore(&table->rwlock, flags); 894 895 return res; 896} 897EXPORT_SYMBOL(ib_get_cached_gid); 898 899int ib_find_cached_gid(struct ib_device *device, 900 const union ib_gid *gid, 901 enum ib_gid_type gid_type, 902 struct net_device *ndev, 903 u8 *port_num, 904 u16 *index) 905{ 906 return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index); 907} 908EXPORT_SYMBOL(ib_find_cached_gid); 909 910int ib_find_gid_by_filter(struct ib_device *device, 911 const union ib_gid *gid, 912 u8 port_num, 913 bool (*filter)(const union ib_gid *gid, 914 const struct ib_gid_attr *, 915 void *), 916 void *context, u16 *index) 917{ 918 /* Only RoCE GID table supports filter function */ 919 if (!rdma_cap_roce_gid_table(device, port_num) && filter) 920 return -EPROTONOSUPPORT; 921 922 return ib_cache_gid_find_by_filter(device, gid, 923 port_num, filter, 924 context, index); 925} 926EXPORT_SYMBOL(ib_find_gid_by_filter); 927 928int ib_get_cached_pkey(struct ib_device *device, 929 u8 port_num, 930 int index, 931 u16 *pkey) 932{ 933 struct ib_pkey_cache *cache; 934 unsigned long flags; 935 int ret = 0; 936 937 if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) 938 return -EINVAL; 939 940 read_lock_irqsave(&device->cache.lock, flags); 941 942 cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; 943 944 if (index < 0 || index >= cache->table_len) 945 ret = -EINVAL; 946 else 947 *pkey = cache->table[index]; 948 949 read_unlock_irqrestore(&device->cache.lock, flags); 950 951 return ret; 952} 953EXPORT_SYMBOL(ib_get_cached_pkey); 954 955int ib_find_cached_pkey(struct ib_device *device, 956 u8 port_num, 957 u16 pkey, 958 u16 *index) 959{ 960 struct ib_pkey_cache *cache; 961 unsigned long flags; 962 int i; 963 int ret = -ENOENT; 964 int partial_ix = -1; 965 966 if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) 967 return -EINVAL; 968 969 read_lock_irqsave(&device->cache.lock, flags); 970 971 cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; 972 973 *index = -1; 974 975 for (i = 0; i < cache->table_len; ++i) 976 if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { 977 if (cache->table[i] & 0x8000) { 978 *index = i; 979 ret = 0; 980 break; 981 } else 982 partial_ix = i; 983 } 984 985 if (ret && partial_ix >= 0) { 986 *index = partial_ix; 987 ret = 0; 988 } 989 990 read_unlock_irqrestore(&device->cache.lock, flags); 991 992 return ret; 993} 994EXPORT_SYMBOL(ib_find_cached_pkey); 995 996int ib_find_exact_cached_pkey(struct ib_device *device, 997 u8 port_num, 998 u16 pkey, 999 u16 *index) 1000{ 1001 struct ib_pkey_cache *cache; 1002 unsigned long flags; 1003 int i; 1004 int ret = -ENOENT; 1005 1006 if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) 1007 return -EINVAL; 1008 1009 read_lock_irqsave(&device->cache.lock, flags); 1010 1011 cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; 1012 1013 *index = -1; 1014 1015 for (i = 0; i < cache->table_len; ++i) 1016 if (cache->table[i] == pkey) { 1017 *index = i; 1018 ret = 0; 1019 break; 1020 } 1021 1022 read_unlock_irqrestore(&device->cache.lock, flags); 1023 1024 return ret; 1025} 1026EXPORT_SYMBOL(ib_find_exact_cached_pkey); 1027 1028int ib_get_cached_lmc(struct ib_device *device, 1029 u8 port_num, 1030 u8 *lmc) 1031{ 1032 unsigned long flags; 1033 int ret = 0; 1034 1035 if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) 1036 return -EINVAL; 1037 1038 read_lock_irqsave(&device->cache.lock, flags); 1039 *lmc = device->cache.lmc_cache[port_num - rdma_start_port(device)]; 1040 read_unlock_irqrestore(&device->cache.lock, flags); 1041 1042 return ret; 1043} 1044EXPORT_SYMBOL(ib_get_cached_lmc); 1045 1046static void ib_cache_update(struct ib_device *device, 1047 u8 port) 1048{ 1049 struct ib_port_attr *tprops = NULL; 1050 struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; 1051 struct ib_gid_cache { 1052 int table_len; 1053 union ib_gid table[0]; 1054 } *gid_cache = NULL; 1055 int i; 1056 int ret; 1057 struct ib_gid_table *table; 1058 struct ib_gid_table **ports_table = device->cache.gid_cache; 1059 bool use_roce_gid_table = 1060 rdma_cap_roce_gid_table(device, port); 1061 1062 if (port < rdma_start_port(device) || port > rdma_end_port(device)) 1063 return; 1064 1065 table = ports_table[port - rdma_start_port(device)]; 1066 1067 tprops = kmalloc(sizeof *tprops, GFP_KERNEL); 1068 if (!tprops) 1069 return; 1070 1071 ret = ib_query_port(device, port, tprops); 1072 if (ret) { 1073 pr_warn("ib_query_port failed (%d) for %s\n", 1074 ret, device->name); 1075 goto err; 1076 } 1077 1078 pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len * 1079 sizeof *pkey_cache->table, GFP_KERNEL); 1080 if (!pkey_cache) 1081 goto err; 1082 1083 pkey_cache->table_len = tprops->pkey_tbl_len; 1084 1085 if (!use_roce_gid_table) { 1086 gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len * 1087 sizeof(*gid_cache->table), GFP_KERNEL); 1088 if (!gid_cache) 1089 goto err; 1090 1091 gid_cache->table_len = tprops->gid_tbl_len; 1092 } 1093 1094 for (i = 0; i < pkey_cache->table_len; ++i) { 1095 ret = ib_query_pkey(device, port, i, pkey_cache->table + i); 1096 if (ret) { 1097 pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n", 1098 ret, device->name, i); 1099 goto err; 1100 } 1101 } 1102 1103 if (!use_roce_gid_table) { 1104 for (i = 0; i < gid_cache->table_len; ++i) { 1105 ret = ib_query_gid(device, port, i, 1106 gid_cache->table + i, NULL); 1107 if (ret) { 1108 pr_warn("ib_query_gid failed (%d) for %s (index %d)\n", 1109 ret, device->name, i); 1110 goto err; 1111 } 1112 } 1113 } 1114 1115 write_lock_irq(&device->cache.lock); 1116 1117 old_pkey_cache = device->cache.pkey_cache[port - rdma_start_port(device)]; 1118 1119 device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache; 1120 if (!use_roce_gid_table) { 1121 write_lock(&table->rwlock); 1122 for (i = 0; i < gid_cache->table_len; i++) { 1123 modify_gid(device, port, table, i, gid_cache->table + i, 1124 &zattr, false); 1125 } 1126 write_unlock(&table->rwlock); 1127 } 1128 1129 device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc; 1130 1131 write_unlock_irq(&device->cache.lock); 1132 1133 kfree(gid_cache); 1134 kfree(old_pkey_cache); 1135 kfree(tprops); 1136 return; 1137 1138err: 1139 kfree(pkey_cache); 1140 kfree(gid_cache); 1141 kfree(tprops); 1142} 1143 1144static void ib_cache_task(struct work_struct *_work) 1145{ 1146 struct ib_update_work *work = 1147 container_of(_work, struct ib_update_work, work); 1148 1149 ib_cache_update(work->device, work->port_num); 1150 kfree(work); 1151} 1152 1153static void ib_cache_event(struct ib_event_handler *handler, 1154 struct ib_event *event) 1155{ 1156 struct ib_update_work *work; 1157 1158 if (event->event == IB_EVENT_PORT_ERR || 1159 event->event == IB_EVENT_PORT_ACTIVE || 1160 event->event == IB_EVENT_LID_CHANGE || 1161 event->event == IB_EVENT_PKEY_CHANGE || 1162 event->event == IB_EVENT_SM_CHANGE || 1163 event->event == IB_EVENT_CLIENT_REREGISTER || 1164 event->event == IB_EVENT_GID_CHANGE) { 1165 work = kmalloc(sizeof *work, GFP_ATOMIC); 1166 if (work) { 1167 INIT_WORK(&work->work, ib_cache_task); 1168 work->device = event->device; 1169 work->port_num = event->element.port_num; 1170 queue_work(ib_wq, &work->work); 1171 } 1172 } 1173} 1174 1175int ib_cache_setup_one(struct ib_device *device) 1176{ 1177 int p; 1178 int err; 1179 1180 rwlock_init(&device->cache.lock); 1181 1182 device->cache.pkey_cache = 1183 kzalloc(sizeof *device->cache.pkey_cache * 1184 (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL); 1185 device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache * 1186 (rdma_end_port(device) - 1187 rdma_start_port(device) + 1), 1188 GFP_KERNEL); 1189 if (!device->cache.pkey_cache || 1190 !device->cache.lmc_cache) { 1191 pr_warn("Couldn't allocate cache for %s\n", device->name); 1192 return -ENOMEM; 1193 } 1194 1195 err = gid_table_setup_one(device); 1196 if (err) 1197 /* Allocated memory will be cleaned in the release function */ 1198 return err; 1199 1200 for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) 1201 ib_cache_update(device, p + rdma_start_port(device)); 1202 1203 INIT_IB_EVENT_HANDLER(&device->cache.event_handler, 1204 device, ib_cache_event); 1205 err = ib_register_event_handler(&device->cache.event_handler); 1206 if (err) 1207 goto err; 1208 1209 return 0; 1210 1211err: 1212 gid_table_cleanup_one(device); 1213 return err; 1214} 1215 1216void ib_cache_release_one(struct ib_device *device) 1217{ 1218 int p; 1219 1220 /* 1221 * The release function frees all the cache elements. 1222 * This function should be called as part of freeing 1223 * all the device's resources when the cache could no 1224 * longer be accessed. 1225 */ 1226 if (device->cache.pkey_cache) 1227 for (p = 0; 1228 p <= rdma_end_port(device) - rdma_start_port(device); ++p) 1229 kfree(device->cache.pkey_cache[p]); 1230 1231 gid_table_release_one(device); 1232 kfree(device->cache.pkey_cache); 1233 kfree(device->cache.lmc_cache); 1234} 1235 1236void ib_cache_cleanup_one(struct ib_device *device) 1237{ 1238 /* The cleanup function unregisters the event handler, 1239 * waits for all in-progress workqueue elements and cleans 1240 * up the GID cache. This function should be called after 1241 * the device was removed from the devices list and all 1242 * clients were removed, so the cache exists but is 1243 * non-functional and shouldn't be updated anymore. 1244 */ 1245 ib_unregister_event_handler(&device->cache.event_handler); 1246 flush_workqueue(ib_wq); 1247 gid_table_cleanup_one(device); 1248} 1249 1250void __init ib_cache_setup(void) 1251{ 1252 roce_gid_mgmt_init(); 1253} 1254 1255void __exit ib_cache_cleanup(void) 1256{ 1257 roce_gid_mgmt_cleanup(); 1258} 1259