1/* 2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33#include <linux/netdevice.h> 34#include <net/bonding.h> 35#include <linux/mlx5/driver.h> 36#include <linux/mlx5/eswitch.h> 37#include <linux/mlx5/vport.h> 38#include "lib/devcom.h" 39#include "mlx5_core.h" 40#include "eswitch.h" 41#include "esw/acl/ofld.h" 42#include "lag.h" 43#include "mp.h" 44#include "mpesw.h" 45 46enum { 47 MLX5_LAG_EGRESS_PORT_1 = 1, 48 MLX5_LAG_EGRESS_PORT_2, 49}; 50 51/* General purpose, use for short periods of time. 52 * Beware of lock dependencies (preferably, no locks should be acquired 53 * under it). 54 */ 55static DEFINE_SPINLOCK(lag_lock); 56 57static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 58{ 59 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 60 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT; 61 62 if (mode == MLX5_LAG_MODE_MPESW) 63 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW; 64 65 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY; 66} 67 68static u8 lag_active_port_bits(struct mlx5_lag *ldev) 69{ 70 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 71 u8 active_port = 0; 72 int num_enabled; 73 int idx; 74 75 mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, enabled_ports, 76 &num_enabled); 77 for (idx = 0; idx < num_enabled; idx++) 78 active_port |= BIT_MASK(enabled_ports[idx]); 79 80 return active_port; 81} 82 83static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode, 84 unsigned long flags) 85{ 86 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, 87 &flags); 88 int port_sel_mode = get_port_sel_mode(mode, flags); 89 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; 90 void *lag_ctx; 91 92 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); 93 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); 94 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode); 95 96 switch (port_sel_mode) { 97 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: 98 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); 99 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); 100 break; 101 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: 102 if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass)) 103 break; 104 105 MLX5_SET(lagc, lag_ctx, active_port, 106 lag_active_port_bits(mlx5_lag_dev(dev))); 107 break; 108 default: 109 break; 110 } 111 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode); 112 113 return mlx5_cmd_exec_in(dev, create_lag, in); 114} 115 116static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports, 117 u8 *ports) 118{ 119 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 120 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 121 122 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 123 MLX5_SET(modify_lag_in, in, field_select, 0x1); 124 125 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); 126 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); 127 128 return mlx5_cmd_exec_in(dev, modify_lag, in); 129} 130 131int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev) 132{ 133 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {}; 134 135 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG); 136 137 return mlx5_cmd_exec_in(dev, create_vport_lag, in); 138} 139EXPORT_SYMBOL(mlx5_cmd_create_vport_lag); 140 141int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) 142{ 143 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {}; 144 145 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG); 146 147 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in); 148} 149EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); 150 151static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, 152 u8 *ports, int *num_disabled) 153{ 154 int i; 155 156 *num_disabled = 0; 157 for (i = 0; i < num_ports; i++) { 158 if (!tracker->netdev_state[i].tx_enabled || 159 !tracker->netdev_state[i].link_up) 160 ports[(*num_disabled)++] = i; 161 } 162} 163 164void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, 165 u8 *ports, int *num_enabled) 166{ 167 int i; 168 169 *num_enabled = 0; 170 for (i = 0; i < num_ports; i++) { 171 if (tracker->netdev_state[i].tx_enabled && 172 tracker->netdev_state[i].link_up) 173 ports[(*num_enabled)++] = i; 174 } 175 176 if (*num_enabled == 0) 177 mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled); 178} 179 180static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, 181 struct mlx5_lag *ldev, 182 struct lag_tracker *tracker, 183 unsigned long flags) 184{ 185 char buf[MLX5_MAX_PORTS * 10 + 1] = {}; 186 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 187 int written = 0; 188 int num_enabled; 189 int idx; 190 int err; 191 int i; 192 int j; 193 194 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 195 mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports, 196 &num_enabled); 197 for (i = 0; i < num_enabled; i++) { 198 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); 199 if (err != 3) 200 return; 201 written += err; 202 } 203 buf[written - 2] = 0; 204 mlx5_core_info(dev, "lag map active ports: %s\n", buf); 205 } else { 206 for (i = 0; i < ldev->ports; i++) { 207 for (j = 0; j < ldev->buckets; j++) { 208 idx = i * ldev->buckets + j; 209 err = scnprintf(buf + written, 10, 210 " port %d:%d", i + 1, ldev->v2p_map[idx]); 211 if (err != 9) 212 return; 213 written += err; 214 } 215 } 216 mlx5_core_info(dev, "lag map:%s\n", buf); 217 } 218} 219 220static int mlx5_lag_netdev_event(struct notifier_block *this, 221 unsigned long event, void *ptr); 222static void mlx5_do_bond_work(struct work_struct *work); 223 224static void mlx5_ldev_free(struct kref *ref) 225{ 226 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref); 227 228 if (ldev->nb.notifier_call) 229 unregister_netdevice_notifier_net(&init_net, &ldev->nb); 230 mlx5_lag_mp_cleanup(ldev); 231 cancel_delayed_work_sync(&ldev->bond_work); 232 destroy_workqueue(ldev->wq); 233 mutex_destroy(&ldev->lock); 234 kfree(ldev); 235} 236 237static void mlx5_ldev_put(struct mlx5_lag *ldev) 238{ 239 kref_put(&ldev->ref, mlx5_ldev_free); 240} 241 242static void mlx5_ldev_get(struct mlx5_lag *ldev) 243{ 244 kref_get(&ldev->ref); 245} 246 247static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) 248{ 249 struct mlx5_lag *ldev; 250 int err; 251 252 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); 253 if (!ldev) 254 return NULL; 255 256 ldev->wq = create_singlethread_workqueue("mlx5_lag"); 257 if (!ldev->wq) { 258 kfree(ldev); 259 return NULL; 260 } 261 262 kref_init(&ldev->ref); 263 mutex_init(&ldev->lock); 264 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); 265 266 ldev->nb.notifier_call = mlx5_lag_netdev_event; 267 if (register_netdevice_notifier_net(&init_net, &ldev->nb)) { 268 ldev->nb.notifier_call = NULL; 269 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); 270 } 271 ldev->mode = MLX5_LAG_MODE_NONE; 272 273 err = mlx5_lag_mp_init(ldev); 274 if (err) 275 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", 276 err); 277 278 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); 279 ldev->buckets = 1; 280 281 return ldev; 282} 283 284int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, 285 struct net_device *ndev) 286{ 287 int i; 288 289 for (i = 0; i < ldev->ports; i++) 290 if (ldev->pf[i].netdev == ndev) 291 return i; 292 293 return -ENOENT; 294} 295 296static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) 297{ 298 return ldev->mode == MLX5_LAG_MODE_ROCE; 299} 300 301static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) 302{ 303 return ldev->mode == MLX5_LAG_MODE_SRIOV; 304} 305 306/* Create a mapping between steering slots and active ports. 307 * As we have ldev->buckets slots per port first assume the native 308 * mapping should be used. 309 * If there are ports that are disabled fill the relevant slots 310 * with mapping that points to active ports. 311 */ 312static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, 313 u8 num_ports, 314 u8 buckets, 315 u8 *ports) 316{ 317 int disabled[MLX5_MAX_PORTS] = {}; 318 int enabled[MLX5_MAX_PORTS] = {}; 319 int disabled_ports_num = 0; 320 int enabled_ports_num = 0; 321 int idx; 322 u32 rand; 323 int i; 324 int j; 325 326 for (i = 0; i < num_ports; i++) { 327 if (tracker->netdev_state[i].tx_enabled && 328 tracker->netdev_state[i].link_up) 329 enabled[enabled_ports_num++] = i; 330 else 331 disabled[disabled_ports_num++] = i; 332 } 333 334 /* Use native mapping by default where each port's buckets 335 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc 336 */ 337 for (i = 0; i < num_ports; i++) 338 for (j = 0; j < buckets; j++) { 339 idx = i * buckets + j; 340 ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i; 341 } 342 343 /* If all ports are disabled/enabled keep native mapping */ 344 if (enabled_ports_num == num_ports || 345 disabled_ports_num == num_ports) 346 return; 347 348 /* Go over the disabled ports and for each assign a random active port */ 349 for (i = 0; i < disabled_ports_num; i++) { 350 for (j = 0; j < buckets; j++) { 351 get_random_bytes(&rand, 4); 352 ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1; 353 } 354 } 355} 356 357static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) 358{ 359 int i; 360 361 for (i = 0; i < ldev->ports; i++) 362 if (ldev->pf[i].has_drop) 363 return true; 364 return false; 365} 366 367static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) 368{ 369 int i; 370 371 for (i = 0; i < ldev->ports; i++) { 372 if (!ldev->pf[i].has_drop) 373 continue; 374 375 mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch, 376 MLX5_VPORT_UPLINK); 377 ldev->pf[i].has_drop = false; 378 } 379} 380 381static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, 382 struct lag_tracker *tracker) 383{ 384 u8 disabled_ports[MLX5_MAX_PORTS] = {}; 385 struct mlx5_core_dev *dev; 386 int disabled_index; 387 int num_disabled; 388 int err; 389 int i; 390 391 /* First delete the current drop rule so there won't be any dropped 392 * packets 393 */ 394 mlx5_lag_drop_rule_cleanup(ldev); 395 396 if (!ldev->tracker.has_inactive) 397 return; 398 399 mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled); 400 401 for (i = 0; i < num_disabled; i++) { 402 disabled_index = disabled_ports[i]; 403 dev = ldev->pf[disabled_index].dev; 404 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch, 405 MLX5_VPORT_UPLINK); 406 if (!err) 407 ldev->pf[disabled_index].has_drop = true; 408 else 409 mlx5_core_err(dev, 410 "Failed to create lag drop rule, error: %d", err); 411 } 412} 413 414static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports) 415{ 416 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 417 void *lag_ctx; 418 419 lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 420 421 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 422 MLX5_SET(modify_lag_in, in, field_select, 0x2); 423 424 MLX5_SET(lagc, lag_ctx, active_port, ports); 425 426 return mlx5_cmd_exec_in(dev, modify_lag, in); 427} 428 429static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) 430{ 431 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 432 u8 active_ports; 433 int ret; 434 435 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { 436 ret = mlx5_lag_port_sel_modify(ldev, ports); 437 if (ret || 438 !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass)) 439 return ret; 440 441 active_ports = lag_active_port_bits(ldev); 442 443 return mlx5_cmd_modify_active_port(dev0, active_ports); 444 } 445 return mlx5_cmd_modify_lag(dev0, ldev->ports, ports); 446} 447 448void mlx5_modify_lag(struct mlx5_lag *ldev, 449 struct lag_tracker *tracker) 450{ 451 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; 452 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 453 int idx; 454 int err; 455 int i; 456 int j; 457 458 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports); 459 460 for (i = 0; i < ldev->ports; i++) { 461 for (j = 0; j < ldev->buckets; j++) { 462 idx = i * ldev->buckets + j; 463 if (ports[idx] == ldev->v2p_map[idx]) 464 continue; 465 err = _mlx5_modify_lag(ldev, ports); 466 if (err) { 467 mlx5_core_err(dev0, 468 "Failed to modify LAG (%d)\n", 469 err); 470 return; 471 } 472 memcpy(ldev->v2p_map, ports, sizeof(ports)); 473 474 mlx5_lag_print_mapping(dev0, ldev, tracker, 475 ldev->mode_flags); 476 break; 477 } 478 } 479 480 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 481 !(ldev->mode == MLX5_LAG_MODE_ROCE)) 482 mlx5_lag_drop_rule_setup(ldev, tracker); 483} 484 485static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, 486 unsigned long *flags) 487{ 488 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 489 490 if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) { 491 if (ldev->ports > 2) 492 return -EINVAL; 493 return 0; 494 } 495 496 if (ldev->ports > 2) 497 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; 498 499 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); 500 501 return 0; 502} 503 504static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev, 505 struct lag_tracker *tracker, 506 enum mlx5_lag_mode mode, 507 unsigned long *flags) 508{ 509 struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; 510 511 if (mode == MLX5_LAG_MODE_MPESW) 512 return; 513 514 if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) && 515 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) { 516 if (ldev->ports > 2) 517 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; 518 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); 519 } 520} 521 522static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, 523 struct lag_tracker *tracker, bool shared_fdb, 524 unsigned long *flags) 525{ 526 bool roce_lag = mode == MLX5_LAG_MODE_ROCE; 527 528 *flags = 0; 529 if (shared_fdb) { 530 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags); 531 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); 532 } 533 534 if (mode == MLX5_LAG_MODE_MPESW) 535 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); 536 537 if (roce_lag) 538 return mlx5_lag_set_port_sel_mode_roce(ldev, flags); 539 540 mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags); 541 return 0; 542} 543 544char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 545{ 546 int port_sel_mode = get_port_sel_mode(mode, flags); 547 548 switch (port_sel_mode) { 549 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity"; 550 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash"; 551 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw"; 552 default: return "invalid"; 553 } 554} 555 556static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) 557{ 558 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 559 struct mlx5_eswitch *master_esw = dev0->priv.eswitch; 560 int err; 561 int i; 562 563 for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) { 564 struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch; 565 566 err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw, 567 slave_esw, ldev->ports); 568 if (err) 569 goto err; 570 } 571 return 0; 572err: 573 for (; i > MLX5_LAG_P1; i--) 574 mlx5_eswitch_offloads_single_fdb_del_one(master_esw, 575 ldev->pf[i].dev->priv.eswitch); 576 return err; 577} 578 579static int mlx5_create_lag(struct mlx5_lag *ldev, 580 struct lag_tracker *tracker, 581 enum mlx5_lag_mode mode, 582 unsigned long flags) 583{ 584 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 585 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 586 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 587 int err; 588 589 if (tracker) 590 mlx5_lag_print_mapping(dev0, ldev, tracker, flags); 591 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", 592 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags)); 593 594 err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags); 595 if (err) { 596 mlx5_core_err(dev0, 597 "Failed to create LAG (%d)\n", 598 err); 599 return err; 600 } 601 602 if (shared_fdb) { 603 err = mlx5_lag_create_single_fdb(ldev); 604 if (err) 605 mlx5_core_err(dev0, "Can't enable single FDB mode\n"); 606 else 607 mlx5_core_info(dev0, "Operation mode is single FDB\n"); 608 } 609 610 if (err) { 611 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 612 if (mlx5_cmd_exec_in(dev0, destroy_lag, in)) 613 mlx5_core_err(dev0, 614 "Failed to deactivate RoCE LAG; driver restart required\n"); 615 } 616 617 return err; 618} 619 620int mlx5_activate_lag(struct mlx5_lag *ldev, 621 struct lag_tracker *tracker, 622 enum mlx5_lag_mode mode, 623 bool shared_fdb) 624{ 625 bool roce_lag = mode == MLX5_LAG_MODE_ROCE; 626 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 627 unsigned long flags = 0; 628 int err; 629 630 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags); 631 if (err) 632 return err; 633 634 if (mode != MLX5_LAG_MODE_MPESW) { 635 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map); 636 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 637 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, 638 ldev->v2p_map); 639 if (err) { 640 mlx5_core_err(dev0, 641 "Failed to create LAG port selection(%d)\n", 642 err); 643 return err; 644 } 645 } 646 } 647 648 err = mlx5_create_lag(ldev, tracker, mode, flags); 649 if (err) { 650 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 651 mlx5_lag_port_sel_destroy(ldev); 652 if (roce_lag) 653 mlx5_core_err(dev0, 654 "Failed to activate RoCE LAG\n"); 655 else 656 mlx5_core_err(dev0, 657 "Failed to activate VF LAG\n" 658 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 659 return err; 660 } 661 662 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 663 !roce_lag) 664 mlx5_lag_drop_rule_setup(ldev, tracker); 665 666 ldev->mode = mode; 667 ldev->mode_flags = flags; 668 return 0; 669} 670 671int mlx5_deactivate_lag(struct mlx5_lag *ldev) 672{ 673 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 674 struct mlx5_eswitch *master_esw = dev0->priv.eswitch; 675 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 676 bool roce_lag = __mlx5_lag_is_roce(ldev); 677 unsigned long flags = ldev->mode_flags; 678 int err; 679 int i; 680 681 ldev->mode = MLX5_LAG_MODE_NONE; 682 ldev->mode_flags = 0; 683 mlx5_lag_mp_reset(ldev); 684 685 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { 686 for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) 687 mlx5_eswitch_offloads_single_fdb_del_one(master_esw, 688 ldev->pf[i].dev->priv.eswitch); 689 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 690 } 691 692 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 693 err = mlx5_cmd_exec_in(dev0, destroy_lag, in); 694 if (err) { 695 if (roce_lag) { 696 mlx5_core_err(dev0, 697 "Failed to deactivate RoCE LAG; driver restart required\n"); 698 } else { 699 mlx5_core_err(dev0, 700 "Failed to deactivate VF LAG; driver restart required\n" 701 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 702 } 703 return err; 704 } 705 706 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 707 mlx5_lag_port_sel_destroy(ldev); 708 ldev->buckets = 1; 709 } 710 if (mlx5_lag_has_drop_rule(ldev)) 711 mlx5_lag_drop_rule_cleanup(ldev); 712 713 return 0; 714} 715 716#define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 4 717bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) 718{ 719#ifdef CONFIG_MLX5_ESWITCH 720 struct mlx5_core_dev *dev; 721 u8 mode; 722#endif 723 int i; 724 725 for (i = 0; i < ldev->ports; i++) 726 if (!ldev->pf[i].dev) 727 return false; 728 729#ifdef CONFIG_MLX5_ESWITCH 730 for (i = 0; i < ldev->ports; i++) { 731 dev = ldev->pf[i].dev; 732 if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) 733 return false; 734 } 735 736 dev = ldev->pf[MLX5_LAG_P1].dev; 737 mode = mlx5_eswitch_mode(dev); 738 for (i = 0; i < ldev->ports; i++) 739 if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) 740 return false; 741 742 if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports > MLX5_LAG_OFFLOADS_SUPPORTED_PORTS) 743 return false; 744#else 745 for (i = 0; i < ldev->ports; i++) 746 if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) 747 return false; 748#endif 749 return true; 750} 751 752void mlx5_lag_add_devices(struct mlx5_lag *ldev) 753{ 754 int i; 755 756 for (i = 0; i < ldev->ports; i++) { 757 if (!ldev->pf[i].dev) 758 continue; 759 760 if (ldev->pf[i].dev->priv.flags & 761 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 762 continue; 763 764 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 765 mlx5_rescan_drivers_locked(ldev->pf[i].dev); 766 } 767} 768 769void mlx5_lag_remove_devices(struct mlx5_lag *ldev) 770{ 771 int i; 772 773 for (i = 0; i < ldev->ports; i++) { 774 if (!ldev->pf[i].dev) 775 continue; 776 777 if (ldev->pf[i].dev->priv.flags & 778 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 779 continue; 780 781 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 782 mlx5_rescan_drivers_locked(ldev->pf[i].dev); 783 } 784} 785 786void mlx5_disable_lag(struct mlx5_lag *ldev) 787{ 788 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 789 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 790 bool roce_lag; 791 int err; 792 int i; 793 794 roce_lag = __mlx5_lag_is_roce(ldev); 795 796 if (shared_fdb) { 797 mlx5_lag_remove_devices(ldev); 798 } else if (roce_lag) { 799 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) { 800 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 801 mlx5_rescan_drivers_locked(dev0); 802 } 803 for (i = 1; i < ldev->ports; i++) 804 mlx5_nic_vport_disable_roce(ldev->pf[i].dev); 805 } 806 807 err = mlx5_deactivate_lag(ldev); 808 if (err) 809 return; 810 811 if (shared_fdb || roce_lag) 812 mlx5_lag_add_devices(ldev); 813 814 if (shared_fdb) 815 for (i = 0; i < ldev->ports; i++) 816 if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) 817 mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); 818} 819 820static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) 821{ 822 struct mlx5_core_dev *dev; 823 int i; 824 825 for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) { 826 dev = ldev->pf[i].dev; 827 if (is_mdev_switchdev_mode(dev) && 828 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && 829 MLX5_CAP_GEN(dev, lag_native_fdb_selection) && 830 MLX5_CAP_ESW(dev, root_ft_on_other_esw) && 831 mlx5_eswitch_get_npeers(dev->priv.eswitch) == 832 MLX5_CAP_GEN(dev, num_lag_ports) - 1) 833 continue; 834 return false; 835 } 836 837 dev = ldev->pf[MLX5_LAG_P1].dev; 838 if (is_mdev_switchdev_mode(dev) && 839 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && 840 mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) && 841 MLX5_CAP_ESW(dev, esw_shared_ingress_acl) && 842 mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1) 843 return true; 844 845 return false; 846} 847 848static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) 849{ 850 bool roce_lag = true; 851 int i; 852 853 for (i = 0; i < ldev->ports; i++) 854 roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev); 855 856#ifdef CONFIG_MLX5_ESWITCH 857 for (i = 0; i < ldev->ports; i++) 858 roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev); 859#endif 860 861 return roce_lag; 862} 863 864static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond) 865{ 866 return do_bond && __mlx5_lag_is_active(ldev) && 867 ldev->mode != MLX5_LAG_MODE_MPESW; 868} 869 870static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond) 871{ 872 return !do_bond && __mlx5_lag_is_active(ldev) && 873 ldev->mode != MLX5_LAG_MODE_MPESW; 874} 875 876static void mlx5_do_bond(struct mlx5_lag *ldev) 877{ 878 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 879 struct lag_tracker tracker = { }; 880 bool do_bond, roce_lag; 881 int err; 882 int i; 883 884 if (!mlx5_lag_is_ready(ldev)) { 885 do_bond = false; 886 } else { 887 /* VF LAG is in multipath mode, ignore bond change requests */ 888 if (mlx5_lag_is_multipath(dev0)) 889 return; 890 891 tracker = ldev->tracker; 892 893 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); 894 } 895 896 if (do_bond && !__mlx5_lag_is_active(ldev)) { 897 bool shared_fdb = mlx5_shared_fdb_supported(ldev); 898 899 roce_lag = mlx5_lag_is_roce_lag(ldev); 900 901 if (shared_fdb || roce_lag) 902 mlx5_lag_remove_devices(ldev); 903 904 err = mlx5_activate_lag(ldev, &tracker, 905 roce_lag ? MLX5_LAG_MODE_ROCE : 906 MLX5_LAG_MODE_SRIOV, 907 shared_fdb); 908 if (err) { 909 if (shared_fdb || roce_lag) 910 mlx5_lag_add_devices(ldev); 911 912 return; 913 } else if (roce_lag) { 914 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 915 mlx5_rescan_drivers_locked(dev0); 916 for (i = 1; i < ldev->ports; i++) 917 mlx5_nic_vport_enable_roce(ldev->pf[i].dev); 918 } else if (shared_fdb) { 919 int i; 920 921 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 922 mlx5_rescan_drivers_locked(dev0); 923 924 for (i = 0; i < ldev->ports; i++) { 925 err = mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); 926 if (err) 927 break; 928 } 929 930 if (err) { 931 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 932 mlx5_rescan_drivers_locked(dev0); 933 mlx5_deactivate_lag(ldev); 934 mlx5_lag_add_devices(ldev); 935 for (i = 0; i < ldev->ports; i++) 936 mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); 937 mlx5_core_err(dev0, "Failed to enable lag\n"); 938 return; 939 } 940 } 941 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) { 942 mlx5_modify_lag(ldev, &tracker); 943 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) { 944 mlx5_disable_lag(ldev); 945 } 946} 947 948/* The last mdev to unregister will destroy the workqueue before removing the 949 * devcom component, and as all the mdevs use the same devcom component we are 950 * guaranteed that the devcom is valid while the calling work is running. 951 */ 952struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev) 953{ 954 struct mlx5_devcom_comp_dev *devcom = NULL; 955 int i; 956 957 mutex_lock(&ldev->lock); 958 for (i = 0; i < ldev->ports; i++) { 959 if (ldev->pf[i].dev) { 960 devcom = ldev->pf[i].dev->priv.hca_devcom_comp; 961 break; 962 } 963 } 964 mutex_unlock(&ldev->lock); 965 return devcom; 966} 967 968static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) 969{ 970 queue_delayed_work(ldev->wq, &ldev->bond_work, delay); 971} 972 973static void mlx5_do_bond_work(struct work_struct *work) 974{ 975 struct delayed_work *delayed_work = to_delayed_work(work); 976 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, 977 bond_work); 978 struct mlx5_devcom_comp_dev *devcom; 979 int status; 980 981 devcom = mlx5_lag_get_devcom_comp(ldev); 982 if (!devcom) 983 return; 984 985 status = mlx5_devcom_comp_trylock(devcom); 986 if (!status) { 987 mlx5_queue_bond_work(ldev, HZ); 988 return; 989 } 990 991 mutex_lock(&ldev->lock); 992 if (ldev->mode_changes_in_progress) { 993 mutex_unlock(&ldev->lock); 994 mlx5_devcom_comp_unlock(devcom); 995 mlx5_queue_bond_work(ldev, HZ); 996 return; 997 } 998 999 mlx5_do_bond(ldev); 1000 mutex_unlock(&ldev->lock); 1001 mlx5_devcom_comp_unlock(devcom); 1002} 1003 1004static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, 1005 struct lag_tracker *tracker, 1006 struct netdev_notifier_changeupper_info *info) 1007{ 1008 struct net_device *upper = info->upper_dev, *ndev_tmp; 1009 struct netdev_lag_upper_info *lag_upper_info = NULL; 1010 bool is_bonded, is_in_lag, mode_supported; 1011 bool has_inactive = 0; 1012 struct slave *slave; 1013 u8 bond_status = 0; 1014 int num_slaves = 0; 1015 int changed = 0; 1016 int idx; 1017 1018 if (!netif_is_lag_master(upper)) 1019 return 0; 1020 1021 if (info->linking) 1022 lag_upper_info = info->upper_info; 1023 1024 /* The event may still be of interest if the slave does not belong to 1025 * us, but is enslaved to a master which has one or more of our netdevs 1026 * as slaves (e.g., if a new slave is added to a master that bonds two 1027 * of our netdevs, we should unbond). 1028 */ 1029 rcu_read_lock(); 1030 for_each_netdev_in_bond_rcu(upper, ndev_tmp) { 1031 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); 1032 if (idx >= 0) { 1033 slave = bond_slave_get_rcu(ndev_tmp); 1034 if (slave) 1035 has_inactive |= bond_is_slave_inactive(slave); 1036 bond_status |= (1 << idx); 1037 } 1038 1039 num_slaves++; 1040 } 1041 rcu_read_unlock(); 1042 1043 /* None of this lagdev's netdevs are slaves of this master. */ 1044 if (!(bond_status & GENMASK(ldev->ports - 1, 0))) 1045 return 0; 1046 1047 if (lag_upper_info) { 1048 tracker->tx_type = lag_upper_info->tx_type; 1049 tracker->hash_type = lag_upper_info->hash_type; 1050 } 1051 1052 tracker->has_inactive = has_inactive; 1053 /* Determine bonding status: 1054 * A device is considered bonded if both its physical ports are slaves 1055 * of the same lag master, and only them. 1056 */ 1057 is_in_lag = num_slaves == ldev->ports && 1058 bond_status == GENMASK(ldev->ports - 1, 0); 1059 1060 /* Lag mode must be activebackup or hash. */ 1061 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || 1062 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; 1063 1064 is_bonded = is_in_lag && mode_supported; 1065 if (tracker->is_bonded != is_bonded) { 1066 tracker->is_bonded = is_bonded; 1067 changed = 1; 1068 } 1069 1070 if (!is_in_lag) 1071 return changed; 1072 1073 if (!mlx5_lag_is_ready(ldev)) 1074 NL_SET_ERR_MSG_MOD(info->info.extack, 1075 "Can't activate LAG offload, PF is configured with more than 64 VFs"); 1076 else if (!mode_supported) 1077 NL_SET_ERR_MSG_MOD(info->info.extack, 1078 "Can't activate LAG offload, TX type isn't supported"); 1079 1080 return changed; 1081} 1082 1083static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, 1084 struct lag_tracker *tracker, 1085 struct net_device *ndev, 1086 struct netdev_notifier_changelowerstate_info *info) 1087{ 1088 struct netdev_lag_lower_state_info *lag_lower_info; 1089 int idx; 1090 1091 if (!netif_is_lag_port(ndev)) 1092 return 0; 1093 1094 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); 1095 if (idx < 0) 1096 return 0; 1097 1098 /* This information is used to determine virtual to physical 1099 * port mapping. 1100 */ 1101 lag_lower_info = info->lower_state_info; 1102 if (!lag_lower_info) 1103 return 0; 1104 1105 tracker->netdev_state[idx] = *lag_lower_info; 1106 1107 return 1; 1108} 1109 1110static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev, 1111 struct lag_tracker *tracker, 1112 struct net_device *ndev) 1113{ 1114 struct net_device *ndev_tmp; 1115 struct slave *slave; 1116 bool has_inactive = 0; 1117 int idx; 1118 1119 if (!netif_is_lag_master(ndev)) 1120 return 0; 1121 1122 rcu_read_lock(); 1123 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) { 1124 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); 1125 if (idx < 0) 1126 continue; 1127 1128 slave = bond_slave_get_rcu(ndev_tmp); 1129 if (slave) 1130 has_inactive |= bond_is_slave_inactive(slave); 1131 } 1132 rcu_read_unlock(); 1133 1134 if (tracker->has_inactive == has_inactive) 1135 return 0; 1136 1137 tracker->has_inactive = has_inactive; 1138 1139 return 1; 1140} 1141 1142/* this handler is always registered to netdev events */ 1143static int mlx5_lag_netdev_event(struct notifier_block *this, 1144 unsigned long event, void *ptr) 1145{ 1146 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 1147 struct lag_tracker tracker; 1148 struct mlx5_lag *ldev; 1149 int changed = 0; 1150 1151 if (event != NETDEV_CHANGEUPPER && 1152 event != NETDEV_CHANGELOWERSTATE && 1153 event != NETDEV_CHANGEINFODATA) 1154 return NOTIFY_DONE; 1155 1156 ldev = container_of(this, struct mlx5_lag, nb); 1157 1158 tracker = ldev->tracker; 1159 1160 switch (event) { 1161 case NETDEV_CHANGEUPPER: 1162 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr); 1163 break; 1164 case NETDEV_CHANGELOWERSTATE: 1165 changed = mlx5_handle_changelowerstate_event(ldev, &tracker, 1166 ndev, ptr); 1167 break; 1168 case NETDEV_CHANGEINFODATA: 1169 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev); 1170 break; 1171 } 1172 1173 ldev->tracker = tracker; 1174 1175 if (changed) 1176 mlx5_queue_bond_work(ldev, 0); 1177 1178 return NOTIFY_DONE; 1179} 1180 1181static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, 1182 struct mlx5_core_dev *dev, 1183 struct net_device *netdev) 1184{ 1185 unsigned int fn = mlx5_get_dev_index(dev); 1186 unsigned long flags; 1187 1188 if (fn >= ldev->ports) 1189 return; 1190 1191 spin_lock_irqsave(&lag_lock, flags); 1192 ldev->pf[fn].netdev = netdev; 1193 ldev->tracker.netdev_state[fn].link_up = 0; 1194 ldev->tracker.netdev_state[fn].tx_enabled = 0; 1195 spin_unlock_irqrestore(&lag_lock, flags); 1196} 1197 1198static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, 1199 struct net_device *netdev) 1200{ 1201 unsigned long flags; 1202 int i; 1203 1204 spin_lock_irqsave(&lag_lock, flags); 1205 for (i = 0; i < ldev->ports; i++) { 1206 if (ldev->pf[i].netdev == netdev) { 1207 ldev->pf[i].netdev = NULL; 1208 break; 1209 } 1210 } 1211 spin_unlock_irqrestore(&lag_lock, flags); 1212} 1213 1214static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, 1215 struct mlx5_core_dev *dev) 1216{ 1217 unsigned int fn = mlx5_get_dev_index(dev); 1218 1219 if (fn >= ldev->ports) 1220 return; 1221 1222 ldev->pf[fn].dev = dev; 1223 dev->priv.lag = ldev; 1224} 1225 1226static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, 1227 struct mlx5_core_dev *dev) 1228{ 1229 int i; 1230 1231 for (i = 0; i < ldev->ports; i++) 1232 if (ldev->pf[i].dev == dev) 1233 break; 1234 1235 if (i == ldev->ports) 1236 return; 1237 1238 ldev->pf[i].dev = NULL; 1239 dev->priv.lag = NULL; 1240} 1241 1242/* Must be called with HCA devcom component lock held */ 1243static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) 1244{ 1245 struct mlx5_devcom_comp_dev *pos = NULL; 1246 struct mlx5_lag *ldev = NULL; 1247 struct mlx5_core_dev *tmp_dev; 1248 1249 tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos); 1250 if (tmp_dev) 1251 ldev = mlx5_lag_dev(tmp_dev); 1252 1253 if (!ldev) { 1254 ldev = mlx5_lag_dev_alloc(dev); 1255 if (!ldev) { 1256 mlx5_core_err(dev, "Failed to alloc lag dev\n"); 1257 return 0; 1258 } 1259 mlx5_ldev_add_mdev(ldev, dev); 1260 return 0; 1261 } 1262 1263 mutex_lock(&ldev->lock); 1264 if (ldev->mode_changes_in_progress) { 1265 mutex_unlock(&ldev->lock); 1266 return -EAGAIN; 1267 } 1268 mlx5_ldev_get(ldev); 1269 mlx5_ldev_add_mdev(ldev, dev); 1270 mutex_unlock(&ldev->lock); 1271 1272 return 0; 1273} 1274 1275void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) 1276{ 1277 struct mlx5_lag *ldev; 1278 1279 ldev = mlx5_lag_dev(dev); 1280 if (!ldev) 1281 return; 1282 1283 /* mdev is being removed, might as well remove debugfs 1284 * as early as possible. 1285 */ 1286 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs); 1287recheck: 1288 mutex_lock(&ldev->lock); 1289 if (ldev->mode_changes_in_progress) { 1290 mutex_unlock(&ldev->lock); 1291 msleep(100); 1292 goto recheck; 1293 } 1294 mlx5_ldev_remove_mdev(ldev, dev); 1295 mutex_unlock(&ldev->lock); 1296 mlx5_ldev_put(ldev); 1297} 1298 1299void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) 1300{ 1301 int err; 1302 1303 if (!mlx5_lag_is_supported(dev)) 1304 return; 1305 1306 if (IS_ERR_OR_NULL(dev->priv.hca_devcom_comp)) 1307 return; 1308 1309recheck: 1310 mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp); 1311 err = __mlx5_lag_dev_add_mdev(dev); 1312 mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp); 1313 1314 if (err) { 1315 msleep(100); 1316 goto recheck; 1317 } 1318 mlx5_ldev_add_debugfs(dev); 1319} 1320 1321void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, 1322 struct net_device *netdev) 1323{ 1324 struct mlx5_lag *ldev; 1325 bool lag_is_active; 1326 1327 ldev = mlx5_lag_dev(dev); 1328 if (!ldev) 1329 return; 1330 1331 mutex_lock(&ldev->lock); 1332 mlx5_ldev_remove_netdev(ldev, netdev); 1333 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 1334 1335 lag_is_active = __mlx5_lag_is_active(ldev); 1336 mutex_unlock(&ldev->lock); 1337 1338 if (lag_is_active) 1339 mlx5_queue_bond_work(ldev, 0); 1340} 1341 1342void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, 1343 struct net_device *netdev) 1344{ 1345 struct mlx5_lag *ldev; 1346 int i; 1347 1348 ldev = mlx5_lag_dev(dev); 1349 if (!ldev) 1350 return; 1351 1352 mutex_lock(&ldev->lock); 1353 mlx5_ldev_add_netdev(ldev, dev, netdev); 1354 1355 for (i = 0; i < ldev->ports; i++) 1356 if (!ldev->pf[i].netdev) 1357 break; 1358 1359 if (i >= ldev->ports) 1360 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 1361 mutex_unlock(&ldev->lock); 1362 mlx5_queue_bond_work(ldev, 0); 1363} 1364 1365bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) 1366{ 1367 struct mlx5_lag *ldev; 1368 unsigned long flags; 1369 bool res; 1370 1371 spin_lock_irqsave(&lag_lock, flags); 1372 ldev = mlx5_lag_dev(dev); 1373 res = ldev && __mlx5_lag_is_roce(ldev); 1374 spin_unlock_irqrestore(&lag_lock, flags); 1375 1376 return res; 1377} 1378EXPORT_SYMBOL(mlx5_lag_is_roce); 1379 1380bool mlx5_lag_is_active(struct mlx5_core_dev *dev) 1381{ 1382 struct mlx5_lag *ldev; 1383 unsigned long flags; 1384 bool res; 1385 1386 spin_lock_irqsave(&lag_lock, flags); 1387 ldev = mlx5_lag_dev(dev); 1388 res = ldev && __mlx5_lag_is_active(ldev); 1389 spin_unlock_irqrestore(&lag_lock, flags); 1390 1391 return res; 1392} 1393EXPORT_SYMBOL(mlx5_lag_is_active); 1394 1395bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev) 1396{ 1397 struct mlx5_lag *ldev; 1398 unsigned long flags; 1399 bool res = 0; 1400 1401 spin_lock_irqsave(&lag_lock, flags); 1402 ldev = mlx5_lag_dev(dev); 1403 if (ldev) 1404 res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags); 1405 spin_unlock_irqrestore(&lag_lock, flags); 1406 1407 return res; 1408} 1409EXPORT_SYMBOL(mlx5_lag_mode_is_hash); 1410 1411bool mlx5_lag_is_master(struct mlx5_core_dev *dev) 1412{ 1413 struct mlx5_lag *ldev; 1414 unsigned long flags; 1415 bool res; 1416 1417 spin_lock_irqsave(&lag_lock, flags); 1418 ldev = mlx5_lag_dev(dev); 1419 res = ldev && __mlx5_lag_is_active(ldev) && 1420 dev == ldev->pf[MLX5_LAG_P1].dev; 1421 spin_unlock_irqrestore(&lag_lock, flags); 1422 1423 return res; 1424} 1425EXPORT_SYMBOL(mlx5_lag_is_master); 1426 1427bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) 1428{ 1429 struct mlx5_lag *ldev; 1430 unsigned long flags; 1431 bool res; 1432 1433 spin_lock_irqsave(&lag_lock, flags); 1434 ldev = mlx5_lag_dev(dev); 1435 res = ldev && __mlx5_lag_is_sriov(ldev); 1436 spin_unlock_irqrestore(&lag_lock, flags); 1437 1438 return res; 1439} 1440EXPORT_SYMBOL(mlx5_lag_is_sriov); 1441 1442bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) 1443{ 1444 struct mlx5_lag *ldev; 1445 unsigned long flags; 1446 bool res; 1447 1448 spin_lock_irqsave(&lag_lock, flags); 1449 ldev = mlx5_lag_dev(dev); 1450 res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 1451 spin_unlock_irqrestore(&lag_lock, flags); 1452 1453 return res; 1454} 1455EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); 1456 1457void mlx5_lag_disable_change(struct mlx5_core_dev *dev) 1458{ 1459 struct mlx5_lag *ldev; 1460 1461 ldev = mlx5_lag_dev(dev); 1462 if (!ldev) 1463 return; 1464 1465 mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp); 1466 mutex_lock(&ldev->lock); 1467 1468 ldev->mode_changes_in_progress++; 1469 if (__mlx5_lag_is_active(ldev)) 1470 mlx5_disable_lag(ldev); 1471 1472 mutex_unlock(&ldev->lock); 1473 mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp); 1474} 1475 1476void mlx5_lag_enable_change(struct mlx5_core_dev *dev) 1477{ 1478 struct mlx5_lag *ldev; 1479 1480 ldev = mlx5_lag_dev(dev); 1481 if (!ldev) 1482 return; 1483 1484 mutex_lock(&ldev->lock); 1485 ldev->mode_changes_in_progress--; 1486 mutex_unlock(&ldev->lock); 1487 mlx5_queue_bond_work(ldev, 0); 1488} 1489 1490struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) 1491{ 1492 struct net_device *ndev = NULL; 1493 struct mlx5_lag *ldev; 1494 unsigned long flags; 1495 int i; 1496 1497 spin_lock_irqsave(&lag_lock, flags); 1498 ldev = mlx5_lag_dev(dev); 1499 1500 if (!(ldev && __mlx5_lag_is_roce(ldev))) 1501 goto unlock; 1502 1503 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 1504 for (i = 0; i < ldev->ports; i++) 1505 if (ldev->tracker.netdev_state[i].tx_enabled) 1506 ndev = ldev->pf[i].netdev; 1507 if (!ndev) 1508 ndev = ldev->pf[ldev->ports - 1].netdev; 1509 } else { 1510 ndev = ldev->pf[MLX5_LAG_P1].netdev; 1511 } 1512 if (ndev) 1513 dev_hold(ndev); 1514 1515unlock: 1516 spin_unlock_irqrestore(&lag_lock, flags); 1517 1518 return ndev; 1519} 1520EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); 1521 1522u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, 1523 struct net_device *slave) 1524{ 1525 struct mlx5_lag *ldev; 1526 unsigned long flags; 1527 u8 port = 0; 1528 int i; 1529 1530 spin_lock_irqsave(&lag_lock, flags); 1531 ldev = mlx5_lag_dev(dev); 1532 if (!(ldev && __mlx5_lag_is_roce(ldev))) 1533 goto unlock; 1534 1535 for (i = 0; i < ldev->ports; i++) { 1536 if (ldev->pf[MLX5_LAG_P1].netdev == slave) { 1537 port = i; 1538 break; 1539 } 1540 } 1541 1542 port = ldev->v2p_map[port * ldev->buckets]; 1543 1544unlock: 1545 spin_unlock_irqrestore(&lag_lock, flags); 1546 return port; 1547} 1548EXPORT_SYMBOL(mlx5_lag_get_slave_port); 1549 1550u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev) 1551{ 1552 struct mlx5_lag *ldev; 1553 1554 ldev = mlx5_lag_dev(dev); 1555 if (!ldev) 1556 return 0; 1557 1558 return ldev->ports; 1559} 1560EXPORT_SYMBOL(mlx5_lag_get_num_ports); 1561 1562struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i) 1563{ 1564 struct mlx5_core_dev *peer_dev = NULL; 1565 struct mlx5_lag *ldev; 1566 unsigned long flags; 1567 int idx; 1568 1569 spin_lock_irqsave(&lag_lock, flags); 1570 ldev = mlx5_lag_dev(dev); 1571 if (!ldev) 1572 goto unlock; 1573 1574 if (*i == ldev->ports) 1575 goto unlock; 1576 for (idx = *i; idx < ldev->ports; idx++) 1577 if (ldev->pf[idx].dev != dev) 1578 break; 1579 1580 if (idx == ldev->ports) { 1581 *i = idx; 1582 goto unlock; 1583 } 1584 *i = idx + 1; 1585 1586 peer_dev = ldev->pf[idx].dev; 1587 1588unlock: 1589 spin_unlock_irqrestore(&lag_lock, flags); 1590 return peer_dev; 1591} 1592EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev); 1593 1594int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, 1595 u64 *values, 1596 int num_counters, 1597 size_t *offsets) 1598{ 1599 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); 1600 struct mlx5_core_dev **mdev; 1601 struct mlx5_lag *ldev; 1602 unsigned long flags; 1603 int num_ports; 1604 int ret, i, j; 1605 void *out; 1606 1607 out = kvzalloc(outlen, GFP_KERNEL); 1608 if (!out) 1609 return -ENOMEM; 1610 1611 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL); 1612 if (!mdev) { 1613 ret = -ENOMEM; 1614 goto free_out; 1615 } 1616 1617 memset(values, 0, sizeof(*values) * num_counters); 1618 1619 spin_lock_irqsave(&lag_lock, flags); 1620 ldev = mlx5_lag_dev(dev); 1621 if (ldev && __mlx5_lag_is_active(ldev)) { 1622 num_ports = ldev->ports; 1623 for (i = 0; i < ldev->ports; i++) 1624 mdev[i] = ldev->pf[i].dev; 1625 } else { 1626 num_ports = 1; 1627 mdev[MLX5_LAG_P1] = dev; 1628 } 1629 spin_unlock_irqrestore(&lag_lock, flags); 1630 1631 for (i = 0; i < num_ports; ++i) { 1632 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; 1633 1634 MLX5_SET(query_cong_statistics_in, in, opcode, 1635 MLX5_CMD_OP_QUERY_CONG_STATISTICS); 1636 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, 1637 out); 1638 if (ret) 1639 goto free_mdev; 1640 1641 for (j = 0; j < num_counters; ++j) 1642 values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); 1643 } 1644 1645free_mdev: 1646 kvfree(mdev); 1647free_out: 1648 kvfree(out); 1649 return ret; 1650} 1651EXPORT_SYMBOL(mlx5_lag_query_cong_counters); 1652