1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2/* Copyright (c) 2019 Mellanox Technologies. */ 3 4#include <linux/netdevice.h> 5#include <net/nexthop.h> 6#include "lag/lag.h" 7#include "lag/mp.h" 8#include "mlx5_core.h" 9#include "eswitch.h" 10#include "lib/events.h" 11 12static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) 13{ 14 return ldev->mode == MLX5_LAG_MODE_MULTIPATH; 15} 16 17#define MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS 2 18static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) 19{ 20 if (!mlx5_lag_is_ready(ldev)) 21 return false; 22 23 if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev)) 24 return false; 25 26 if (ldev->ports > MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS) 27 return false; 28 29 return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev, 30 ldev->pf[MLX5_LAG_P2].dev); 31} 32 33bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) 34{ 35 struct mlx5_lag *ldev = mlx5_lag_dev(dev); 36 37 return ldev && __mlx5_lag_is_multipath(ldev); 38} 39 40/** 41 * mlx5_lag_set_port_affinity 42 * 43 * @ldev: lag device 44 * @port: 45 * 0 - set normal affinity. 46 * 1 - set affinity to port 1. 47 * 2 - set affinity to port 2. 48 * 49 **/ 50static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, 51 enum mlx5_lag_port_affinity port) 52{ 53 struct lag_tracker tracker = {}; 54 55 if (!__mlx5_lag_is_multipath(ldev)) 56 return; 57 58 switch (port) { 59 case MLX5_LAG_NORMAL_AFFINITY: 60 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; 61 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; 62 tracker.netdev_state[MLX5_LAG_P1].link_up = true; 63 tracker.netdev_state[MLX5_LAG_P2].link_up = true; 64 break; 65 case MLX5_LAG_P1_AFFINITY: 66 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; 67 tracker.netdev_state[MLX5_LAG_P1].link_up = true; 68 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false; 69 tracker.netdev_state[MLX5_LAG_P2].link_up = false; 70 break; 71 case MLX5_LAG_P2_AFFINITY: 72 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false; 73 tracker.netdev_state[MLX5_LAG_P1].link_up = false; 74 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; 75 tracker.netdev_state[MLX5_LAG_P2].link_up = true; 76 break; 77 default: 78 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, 79 "Invalid affinity port %d", port); 80 return; 81 } 82 83 if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled) 84 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events, 85 MLX5_DEV_EVENT_PORT_AFFINITY, 86 (void *)0); 87 88 if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled) 89 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events, 90 MLX5_DEV_EVENT_PORT_AFFINITY, 91 (void *)0); 92 93 mlx5_modify_lag(ldev, &tracker); 94} 95 96static void mlx5_lag_fib_event_flush(struct notifier_block *nb) 97{ 98 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); 99 100 flush_workqueue(mp->wq); 101} 102 103static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len) 104{ 105 mp->fib.mfi = fi; 106 mp->fib.priority = fi->fib_priority; 107 mp->fib.dst = dst; 108 mp->fib.dst_len = dst_len; 109} 110 111struct mlx5_fib_event_work { 112 struct work_struct work; 113 struct mlx5_lag *ldev; 114 unsigned long event; 115 union { 116 struct fib_entry_notifier_info fen_info; 117 struct fib_nh_notifier_info fnh_info; 118 }; 119}; 120 121static struct net_device* 122mlx5_lag_get_next_fib_dev(struct mlx5_lag *ldev, 123 struct fib_info *fi, 124 struct net_device *current_dev) 125{ 126 struct net_device *fib_dev; 127 int i, ldev_idx, nhs; 128 129 nhs = fib_info_num_path(fi); 130 i = 0; 131 if (current_dev) { 132 for (; i < nhs; i++) { 133 fib_dev = fib_info_nh(fi, i)->fib_nh_dev; 134 if (fib_dev == current_dev) { 135 i++; 136 break; 137 } 138 } 139 } 140 for (; i < nhs; i++) { 141 fib_dev = fib_info_nh(fi, i)->fib_nh_dev; 142 ldev_idx = mlx5_lag_dev_get_netdev_idx(ldev, fib_dev); 143 if (ldev_idx >= 0) 144 return ldev->pf[ldev_idx].netdev; 145 } 146 147 return NULL; 148} 149 150static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, 151 struct fib_entry_notifier_info *fen_info) 152{ 153 struct net_device *nh_dev0, *nh_dev1; 154 struct fib_info *fi = fen_info->fi; 155 struct lag_mp *mp = &ldev->lag_mp; 156 157 /* Handle delete event */ 158 if (event == FIB_EVENT_ENTRY_DEL) { 159 /* stop track */ 160 if (mp->fib.mfi == fi) 161 mp->fib.mfi = NULL; 162 return; 163 } 164 165 /* Handle multipath entry with lower priority value */ 166 if (mp->fib.mfi && mp->fib.mfi != fi && 167 (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) && 168 fi->fib_priority >= mp->fib.priority) 169 return; 170 171 nh_dev0 = mlx5_lag_get_next_fib_dev(ldev, fi, NULL); 172 nh_dev1 = mlx5_lag_get_next_fib_dev(ldev, fi, nh_dev0); 173 174 /* Handle add/replace event */ 175 if (!nh_dev0) { 176 if (mp->fib.dst == fen_info->dst && mp->fib.dst_len == fen_info->dst_len) 177 mp->fib.mfi = NULL; 178 return; 179 } 180 181 if (nh_dev0 == nh_dev1) { 182 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, 183 "Multipath offload doesn't support routes with multiple nexthops of the same device"); 184 return; 185 } 186 187 if (!nh_dev1) { 188 if (__mlx5_lag_is_active(ldev)) { 189 int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev0); 190 191 i++; 192 mlx5_lag_set_port_affinity(ldev, i); 193 mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); 194 } 195 196 return; 197 } 198 199 /* First time we see multipath route */ 200 if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) { 201 struct lag_tracker tracker; 202 203 tracker = ldev->tracker; 204 mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false); 205 } 206 207 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); 208 mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); 209} 210 211static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, 212 unsigned long event, 213 struct fib_nh *fib_nh, 214 struct fib_info *fi) 215{ 216 struct lag_mp *mp = &ldev->lag_mp; 217 218 /* Check the nh event is related to the route */ 219 if (!mp->fib.mfi || mp->fib.mfi != fi) 220 return; 221 222 /* nh added/removed */ 223 if (event == FIB_EVENT_NH_DEL) { 224 int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev); 225 226 if (i >= 0) { 227 i = (i + 1) % 2 + 1; /* peer port */ 228 mlx5_lag_set_port_affinity(ldev, i); 229 } 230 } else if (event == FIB_EVENT_NH_ADD && 231 fib_info_num_path(fi) == 2) { 232 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); 233 } 234} 235 236static void mlx5_lag_fib_update(struct work_struct *work) 237{ 238 struct mlx5_fib_event_work *fib_work = 239 container_of(work, struct mlx5_fib_event_work, work); 240 struct mlx5_lag *ldev = fib_work->ldev; 241 struct fib_nh *fib_nh; 242 243 /* Protect internal structures from changes */ 244 rtnl_lock(); 245 switch (fib_work->event) { 246 case FIB_EVENT_ENTRY_REPLACE: 247 case FIB_EVENT_ENTRY_DEL: 248 mlx5_lag_fib_route_event(ldev, fib_work->event, 249 &fib_work->fen_info); 250 fib_info_put(fib_work->fen_info.fi); 251 break; 252 case FIB_EVENT_NH_ADD: 253 case FIB_EVENT_NH_DEL: 254 fib_nh = fib_work->fnh_info.fib_nh; 255 mlx5_lag_fib_nexthop_event(ldev, 256 fib_work->event, 257 fib_work->fnh_info.fib_nh, 258 fib_nh->nh_parent); 259 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); 260 break; 261 } 262 263 rtnl_unlock(); 264 kfree(fib_work); 265} 266 267static struct mlx5_fib_event_work * 268mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event) 269{ 270 struct mlx5_fib_event_work *fib_work; 271 272 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); 273 if (WARN_ON(!fib_work)) 274 return NULL; 275 276 INIT_WORK(&fib_work->work, mlx5_lag_fib_update); 277 fib_work->ldev = ldev; 278 fib_work->event = event; 279 280 return fib_work; 281} 282 283static int mlx5_lag_fib_event(struct notifier_block *nb, 284 unsigned long event, 285 void *ptr) 286{ 287 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); 288 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp); 289 struct fib_notifier_info *info = ptr; 290 struct mlx5_fib_event_work *fib_work; 291 struct fib_entry_notifier_info *fen_info; 292 struct fib_nh_notifier_info *fnh_info; 293 struct fib_info *fi; 294 295 if (info->family != AF_INET) 296 return NOTIFY_DONE; 297 298 if (!mlx5_lag_multipath_check_prereq(ldev)) 299 return NOTIFY_DONE; 300 301 switch (event) { 302 case FIB_EVENT_ENTRY_REPLACE: 303 case FIB_EVENT_ENTRY_DEL: 304 fen_info = container_of(info, struct fib_entry_notifier_info, 305 info); 306 fi = fen_info->fi; 307 if (fi->nh) 308 return NOTIFY_DONE; 309 310 fib_work = mlx5_lag_init_fib_work(ldev, event); 311 if (!fib_work) 312 return NOTIFY_DONE; 313 fib_work->fen_info = *fen_info; 314 /* Take reference on fib_info to prevent it from being 315 * freed while work is queued. Release it afterwards. 316 */ 317 fib_info_hold(fib_work->fen_info.fi); 318 break; 319 case FIB_EVENT_NH_ADD: 320 case FIB_EVENT_NH_DEL: 321 fnh_info = container_of(info, struct fib_nh_notifier_info, 322 info); 323 fib_work = mlx5_lag_init_fib_work(ldev, event); 324 if (!fib_work) 325 return NOTIFY_DONE; 326 fib_work->fnh_info = *fnh_info; 327 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); 328 break; 329 default: 330 return NOTIFY_DONE; 331 } 332 333 queue_work(mp->wq, &fib_work->work); 334 335 return NOTIFY_DONE; 336} 337 338void mlx5_lag_mp_reset(struct mlx5_lag *ldev) 339{ 340 /* Clear mfi, as it might become stale when a route delete event 341 * has been missed, see mlx5_lag_fib_route_event(). 342 */ 343 ldev->lag_mp.fib.mfi = NULL; 344} 345 346int mlx5_lag_mp_init(struct mlx5_lag *ldev) 347{ 348 struct lag_mp *mp = &ldev->lag_mp; 349 int err; 350 351 /* always clear mfi, as it might become stale when a route delete event 352 * has been missed 353 */ 354 mp->fib.mfi = NULL; 355 356 if (mp->fib_nb.notifier_call) 357 return 0; 358 359 mp->wq = create_singlethread_workqueue("mlx5_lag_mp"); 360 if (!mp->wq) 361 return -ENOMEM; 362 363 mp->fib_nb.notifier_call = mlx5_lag_fib_event; 364 err = register_fib_notifier(&init_net, &mp->fib_nb, 365 mlx5_lag_fib_event_flush, NULL); 366 if (err) { 367 destroy_workqueue(mp->wq); 368 mp->fib_nb.notifier_call = NULL; 369 } 370 371 return err; 372} 373 374void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) 375{ 376 struct lag_mp *mp = &ldev->lag_mp; 377 378 if (!mp->fib_nb.notifier_call) 379 return; 380 381 unregister_fib_notifier(&init_net, &mp->fib_nb); 382 destroy_workqueue(mp->wq); 383 mp->fib_nb.notifier_call = NULL; 384 mp->fib.mfi = NULL; 385} 386