1/* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33#include <linux/kernel.h> 34#include <linux/random.h> 35#include <linux/vmalloc.h> 36#include <linux/hardirq.h> 37#include <linux/mlx5/driver.h> 38#include <linux/kern_levels.h> 39#include "mlx5_core.h" 40#include "lib/eq.h" 41#include "lib/mlx5.h" 42#include "lib/events.h" 43#include "lib/pci_vsc.h" 44#include "lib/tout.h" 45#include "diag/fw_tracer.h" 46#include "diag/reporter_vnic.h" 47 48enum { 49 MAX_MISSES = 3, 50}; 51 52enum { 53 MLX5_DROP_HEALTH_WORK, 54}; 55 56enum { 57 MLX5_SENSOR_NO_ERR = 0, 58 MLX5_SENSOR_PCI_COMM_ERR = 1, 59 MLX5_SENSOR_PCI_ERR = 2, 60 MLX5_SENSOR_NIC_DISABLED = 3, 61 MLX5_SENSOR_NIC_SW_RESET = 4, 62 MLX5_SENSOR_FW_SYND_RFR = 5, 63}; 64 65enum { 66 MLX5_SEVERITY_MASK = 0x7, 67 MLX5_SEVERITY_VALID_MASK = 0x8, 68}; 69 70u8 mlx5_get_nic_state(struct mlx5_core_dev *dev) 71{ 72 return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7; 73} 74 75void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) 76{ 77 u32 cur_cmdq_addr_l_sz; 78 79 cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz); 80 iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) | 81 state << MLX5_NIC_IFC_OFFSET, 82 &dev->iseg->cmdq_addr_l_sz); 83} 84 85static bool sensor_pci_not_working(struct mlx5_core_dev *dev) 86{ 87 struct mlx5_core_health *health = &dev->priv.health; 88 struct health_buffer __iomem *h = health->health; 89 90 /* Offline PCI reads return 0xffffffff */ 91 return (ioread32be(&h->fw_ver) == 0xffffffff); 92} 93 94static int mlx5_health_get_rfr(u8 rfr_severity) 95{ 96 return rfr_severity >> MLX5_RFR_BIT_OFFSET; 97} 98 99static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev) 100{ 101 struct mlx5_core_health *health = &dev->priv.health; 102 struct health_buffer __iomem *h = health->health; 103 u8 synd = ioread8(&h->synd); 104 u8 rfr; 105 106 rfr = mlx5_health_get_rfr(ioread8(&h->rfr_severity)); 107 108 if (rfr && synd) 109 mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd); 110 return rfr && synd; 111} 112 113u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev) 114{ 115 if (sensor_pci_not_working(dev)) 116 return MLX5_SENSOR_PCI_COMM_ERR; 117 if (pci_channel_offline(dev->pdev)) 118 return MLX5_SENSOR_PCI_ERR; 119 if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED) 120 return MLX5_SENSOR_NIC_DISABLED; 121 if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET) 122 return MLX5_SENSOR_NIC_SW_RESET; 123 if (sensor_fw_synd_rfr(dev)) 124 return MLX5_SENSOR_FW_SYND_RFR; 125 126 return MLX5_SENSOR_NO_ERR; 127} 128 129static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock) 130{ 131 enum mlx5_vsc_state state; 132 int ret; 133 134 if (!mlx5_core_is_pf(dev)) 135 return -EBUSY; 136 137 /* Try to lock GW access, this stage doesn't return 138 * EBUSY because locked GW does not mean that other PF 139 * already started the reset. 140 */ 141 ret = mlx5_vsc_gw_lock(dev); 142 if (ret == -EBUSY) 143 return -EINVAL; 144 if (ret) 145 return ret; 146 147 state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK; 148 /* At this stage, if the return status == EBUSY, then we know 149 * for sure that another PF started the reset, so don't allow 150 * another reset. 151 */ 152 ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, state); 153 if (ret) 154 mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n"); 155 156 /* Unlock GW access */ 157 mlx5_vsc_gw_unlock(dev); 158 159 return ret; 160} 161 162static bool reset_fw_if_needed(struct mlx5_core_dev *dev) 163{ 164 bool supported = (ioread32be(&dev->iseg->initializing) >> 165 MLX5_FW_RESET_SUPPORTED_OFFSET) & 1; 166 u32 fatal_error; 167 168 if (!supported) 169 return false; 170 171 /* The reset only needs to be issued by one PF. The health buffer is 172 * shared between all functions, and will be cleared during a reset. 173 * Check again to avoid a redundant 2nd reset. If the fatal errors was 174 * PCI related a reset won't help. 175 */ 176 fatal_error = mlx5_health_check_fatal_sensors(dev); 177 if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR || 178 fatal_error == MLX5_SENSOR_NIC_DISABLED || 179 fatal_error == MLX5_SENSOR_NIC_SW_RESET) { 180 mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help."); 181 return false; 182 } 183 184 mlx5_core_warn(dev, "Issuing FW Reset\n"); 185 /* Write the NIC interface field to initiate the reset, the command 186 * interface address also resides here, don't overwrite it. 187 */ 188 mlx5_set_nic_state(dev, MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET); 189 190 return true; 191} 192 193static void enter_error_state(struct mlx5_core_dev *dev, bool force) 194{ 195 if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */ 196 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; 197 mlx5_cmd_flush(dev); 198 } 199 200 mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); 201} 202 203void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) 204{ 205 bool err_detected = false; 206 207 /* Mark the device as fatal in order to abort FW commands */ 208 if ((mlx5_health_check_fatal_sensors(dev) || force) && 209 dev->state == MLX5_DEVICE_STATE_UP) { 210 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; 211 err_detected = true; 212 } 213 mutex_lock(&dev->intf_state_mutex); 214 if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) 215 goto unlock;/* a previous error is still being handled */ 216 217 enter_error_state(dev, force); 218unlock: 219 mutex_unlock(&dev->intf_state_mutex); 220} 221 222void mlx5_error_sw_reset(struct mlx5_core_dev *dev) 223{ 224 unsigned long end, delay_ms = mlx5_tout_ms(dev, PCI_TOGGLE); 225 int lock = -EBUSY; 226 227 mutex_lock(&dev->intf_state_mutex); 228 if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) 229 goto unlock; 230 231 mlx5_core_err(dev, "start\n"); 232 233 if (mlx5_health_check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) { 234 /* Get cr-dump and reset FW semaphore */ 235 lock = lock_sem_sw_reset(dev, true); 236 237 if (lock == -EBUSY) { 238 delay_ms = mlx5_tout_ms(dev, FULL_CRDUMP); 239 goto recover_from_sw_reset; 240 } 241 /* Execute SW reset */ 242 reset_fw_if_needed(dev); 243 } 244 245recover_from_sw_reset: 246 /* Recover from SW reset */ 247 end = jiffies + msecs_to_jiffies(delay_ms); 248 do { 249 if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED) 250 break; 251 252 msleep(20); 253 } while (!time_after(jiffies, end)); 254 255 if (mlx5_get_nic_state(dev) != MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED) { 256 dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n", 257 mlx5_get_nic_state(dev), delay_ms); 258 } 259 260 /* Release FW semaphore if you are the lock owner */ 261 if (!lock) 262 lock_sem_sw_reset(dev, false); 263 264 mlx5_core_err(dev, "end\n"); 265 266unlock: 267 mutex_unlock(&dev->intf_state_mutex); 268} 269 270static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) 271{ 272 u8 nic_interface = mlx5_get_nic_state(dev); 273 274 switch (nic_interface) { 275 case MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER: 276 mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n"); 277 break; 278 279 case MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED: 280 mlx5_core_warn(dev, "starting teardown\n"); 281 break; 282 283 case MLX5_INITIAL_SEG_NIC_INTERFACE_NO_DRAM_NIC: 284 mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n"); 285 break; 286 287 case MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET: 288 /* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases: 289 * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded 290 * and this is a VF), this is not recoverable by SW reset. 291 * Logging of this is handled elsewhere. 292 * 2. FW reset has been issued by another function, driver can 293 * be reloaded to recover after the mode switches to 294 * MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED. 295 */ 296 if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR) 297 mlx5_core_warn(dev, "NIC SW reset in progress\n"); 298 break; 299 300 default: 301 mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n", 302 nic_interface); 303 } 304 305 mlx5_disable_device(dev); 306} 307 308int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev) 309{ 310 unsigned long end; 311 312 end = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FW_RESET)); 313 while (sensor_pci_not_working(dev)) { 314 if (time_after(jiffies, end)) 315 return -ETIMEDOUT; 316 if (test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) { 317 mlx5_core_warn(dev, "device is being removed, stop waiting for PCI\n"); 318 return -ENODEV; 319 } 320 msleep(100); 321 } 322 return 0; 323} 324 325static int mlx5_health_try_recover(struct mlx5_core_dev *dev) 326{ 327 mlx5_core_warn(dev, "handling bad device here\n"); 328 mlx5_handle_bad_state(dev); 329 if (mlx5_health_wait_pci_up(dev)) { 330 mlx5_core_err(dev, "health recovery flow aborted, PCI reads still not working\n"); 331 return -EIO; 332 } 333 mlx5_core_err(dev, "starting health recovery flow\n"); 334 if (mlx5_recover_device(dev) || mlx5_health_check_fatal_sensors(dev)) { 335 mlx5_core_err(dev, "health recovery failed\n"); 336 return -EIO; 337 } 338 339 mlx5_core_info(dev, "health recovery succeeded\n"); 340 return 0; 341} 342 343static const char *hsynd_str(u8 synd) 344{ 345 switch (synd) { 346 case MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_INTERNAL_ERR: 347 return "firmware internal error"; 348 case MLX5_INITIAL_SEG_HEALTH_SYNDROME_DEAD_IRISC: 349 return "irisc not responding"; 350 case MLX5_INITIAL_SEG_HEALTH_SYNDROME_HW_FATAL_ERR: 351 return "unrecoverable hardware error"; 352 case MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_CRC_ERR: 353 return "firmware CRC error"; 354 case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_FETCH_PCI_ERR: 355 return "ICM fetch PCI error"; 356 case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PAGE_ERR: 357 return "HW fatal error\n"; 358 case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ASYNCHRONOUS_EQ_BUF_OVERRUN: 359 return "async EQ buffer overrun"; 360 case MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_IN_ERR: 361 return "EQ error"; 362 case MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_INV: 363 return "Invalid EQ referenced"; 364 case MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR: 365 return "FFSER error"; 366 case MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR: 367 return "High temperature"; 368 case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PCI_POISONED_ERR: 369 return "ICM fetch PCI data poisoned error"; 370 default: 371 return "unrecognized error"; 372 } 373} 374 375static const char *mlx5_loglevel_str(int level) 376{ 377 switch (level) { 378 case LOGLEVEL_EMERG: 379 return "EMERGENCY"; 380 case LOGLEVEL_ALERT: 381 return "ALERT"; 382 case LOGLEVEL_CRIT: 383 return "CRITICAL"; 384 case LOGLEVEL_ERR: 385 return "ERROR"; 386 case LOGLEVEL_WARNING: 387 return "WARNING"; 388 case LOGLEVEL_NOTICE: 389 return "NOTICE"; 390 case LOGLEVEL_INFO: 391 return "INFO"; 392 case LOGLEVEL_DEBUG: 393 return "DEBUG"; 394 } 395 return "Unknown log level"; 396} 397 398static int mlx5_health_get_severity(u8 rfr_severity) 399{ 400 return rfr_severity & MLX5_SEVERITY_VALID_MASK ? 401 rfr_severity & MLX5_SEVERITY_MASK : LOGLEVEL_ERR; 402} 403 404static void print_health_info(struct mlx5_core_dev *dev) 405{ 406 struct mlx5_core_health *health = &dev->priv.health; 407 struct health_buffer __iomem *h = health->health; 408 u8 rfr_severity; 409 int severity; 410 int i; 411 412 /* If the syndrome is 0, the device is OK and no need to print buffer */ 413 if (!ioread8(&h->synd)) 414 return; 415 416 if (ioread32be(&h->fw_ver) == 0xFFFFFFFF) { 417 mlx5_log(dev, LOGLEVEL_ERR, "PCI slot is unavailable\n"); 418 return; 419 } 420 421 rfr_severity = ioread8(&h->rfr_severity); 422 severity = mlx5_health_get_severity(rfr_severity); 423 mlx5_log(dev, severity, "Health issue observed, %s, severity(%d) %s:\n", 424 hsynd_str(ioread8(&h->synd)), severity, mlx5_loglevel_str(severity)); 425 426 for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) 427 mlx5_log(dev, severity, "assert_var[%d] 0x%08x\n", i, 428 ioread32be(h->assert_var + i)); 429 430 mlx5_log(dev, severity, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr)); 431 mlx5_log(dev, severity, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra)); 432 mlx5_log(dev, severity, "fw_ver %d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), 433 fw_rev_sub(dev)); 434 mlx5_log(dev, severity, "time %u\n", ioread32be(&h->time)); 435 mlx5_log(dev, severity, "hw_id 0x%08x\n", ioread32be(&h->hw_id)); 436 mlx5_log(dev, severity, "rfr %d\n", mlx5_health_get_rfr(rfr_severity)); 437 mlx5_log(dev, severity, "severity %d (%s)\n", severity, mlx5_loglevel_str(severity)); 438 mlx5_log(dev, severity, "irisc_index %d\n", ioread8(&h->irisc_index)); 439 mlx5_log(dev, severity, "synd 0x%x: %s\n", ioread8(&h->synd), 440 hsynd_str(ioread8(&h->synd))); 441 mlx5_log(dev, severity, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd)); 442 mlx5_log(dev, severity, "raw fw_ver 0x%08x\n", ioread32be(&h->fw_ver)); 443} 444 445static int 446mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter, 447 struct devlink_fmsg *fmsg, 448 struct netlink_ext_ack *extack) 449{ 450 struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); 451 struct mlx5_core_health *health = &dev->priv.health; 452 struct health_buffer __iomem *h = health->health; 453 u8 synd = ioread8(&h->synd); 454 455 devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd); 456 if (!synd) 457 return 0; 458 459 devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd)); 460 461 return 0; 462} 463 464struct mlx5_fw_reporter_ctx { 465 u8 err_synd; 466 int miss_counter; 467}; 468 469static void 470mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg, 471 struct mlx5_fw_reporter_ctx *fw_reporter_ctx) 472{ 473 devlink_fmsg_u8_pair_put(fmsg, "syndrome", fw_reporter_ctx->err_synd); 474 devlink_fmsg_u32_pair_put(fmsg, "fw_miss_counter", fw_reporter_ctx->miss_counter); 475} 476 477static void 478mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev, 479 struct devlink_fmsg *fmsg) 480{ 481 struct mlx5_core_health *health = &dev->priv.health; 482 struct health_buffer __iomem *h = health->health; 483 u8 rfr_severity; 484 int i; 485 486 if (!ioread8(&h->synd)) 487 return; 488 489 devlink_fmsg_pair_nest_start(fmsg, "health buffer"); 490 devlink_fmsg_obj_nest_start(fmsg); 491 devlink_fmsg_arr_pair_nest_start(fmsg, "assert_var"); 492 for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) 493 devlink_fmsg_u32_put(fmsg, ioread32be(h->assert_var + i)); 494 devlink_fmsg_arr_pair_nest_end(fmsg); 495 devlink_fmsg_u32_pair_put(fmsg, "assert_exit_ptr", 496 ioread32be(&h->assert_exit_ptr)); 497 devlink_fmsg_u32_pair_put(fmsg, "assert_callra", 498 ioread32be(&h->assert_callra)); 499 devlink_fmsg_u32_pair_put(fmsg, "time", ioread32be(&h->time)); 500 devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id)); 501 rfr_severity = ioread8(&h->rfr_severity); 502 devlink_fmsg_u8_pair_put(fmsg, "rfr", mlx5_health_get_rfr(rfr_severity)); 503 devlink_fmsg_u8_pair_put(fmsg, "severity", mlx5_health_get_severity(rfr_severity)); 504 devlink_fmsg_u8_pair_put(fmsg, "irisc_index", ioread8(&h->irisc_index)); 505 devlink_fmsg_u8_pair_put(fmsg, "synd", ioread8(&h->synd)); 506 devlink_fmsg_u32_pair_put(fmsg, "ext_synd", ioread16be(&h->ext_synd)); 507 devlink_fmsg_u32_pair_put(fmsg, "raw_fw_ver", ioread32be(&h->fw_ver)); 508 devlink_fmsg_obj_nest_end(fmsg); 509 devlink_fmsg_pair_nest_end(fmsg); 510} 511 512static int 513mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter, 514 struct devlink_fmsg *fmsg, void *priv_ctx, 515 struct netlink_ext_ack *extack) 516{ 517 struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); 518 int err; 519 520 err = mlx5_fw_tracer_trigger_core_dump_general(dev); 521 if (err) 522 return err; 523 524 if (priv_ctx) { 525 struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; 526 527 mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx); 528 } 529 530 mlx5_fw_reporter_heath_buffer_data_put(dev, fmsg); 531 532 return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg); 533} 534 535static void mlx5_fw_reporter_err_work(struct work_struct *work) 536{ 537 struct mlx5_fw_reporter_ctx fw_reporter_ctx; 538 struct mlx5_core_health *health; 539 540 health = container_of(work, struct mlx5_core_health, report_work); 541 542 if (IS_ERR_OR_NULL(health->fw_reporter)) 543 return; 544 545 fw_reporter_ctx.err_synd = health->synd; 546 fw_reporter_ctx.miss_counter = health->miss_counter; 547 if (fw_reporter_ctx.err_synd) { 548 devlink_health_report(health->fw_reporter, 549 "FW syndrome reported", &fw_reporter_ctx); 550 return; 551 } 552 if (fw_reporter_ctx.miss_counter) 553 devlink_health_report(health->fw_reporter, 554 "FW miss counter reported", 555 &fw_reporter_ctx); 556} 557 558static const struct devlink_health_reporter_ops mlx5_fw_reporter_pf_ops = { 559 .name = "fw", 560 .diagnose = mlx5_fw_reporter_diagnose, 561 .dump = mlx5_fw_reporter_dump, 562}; 563 564static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = { 565 .name = "fw", 566 .diagnose = mlx5_fw_reporter_diagnose, 567}; 568 569static int 570mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter, 571 void *priv_ctx, 572 struct netlink_ext_ack *extack) 573{ 574 struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); 575 576 return mlx5_health_try_recover(dev); 577} 578 579static int 580mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter, 581 struct devlink_fmsg *fmsg, void *priv_ctx, 582 struct netlink_ext_ack *extack) 583{ 584 struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); 585 u32 crdump_size = dev->priv.health.crdump_size; 586 u32 *cr_data; 587 int err; 588 589 if (!mlx5_core_is_pf(dev)) 590 return -EPERM; 591 592 cr_data = kvmalloc(crdump_size, GFP_KERNEL); 593 if (!cr_data) 594 return -ENOMEM; 595 err = mlx5_crdump_collect(dev, cr_data); 596 if (err) 597 goto free_data; 598 599 if (priv_ctx) { 600 struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; 601 602 mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx); 603 } 604 605 devlink_fmsg_binary_pair_put(fmsg, "crdump_data", cr_data, crdump_size); 606 607free_data: 608 kvfree(cr_data); 609 return err; 610} 611 612static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) 613{ 614 struct mlx5_fw_reporter_ctx fw_reporter_ctx; 615 struct mlx5_core_health *health; 616 struct mlx5_core_dev *dev; 617 struct devlink *devlink; 618 struct mlx5_priv *priv; 619 620 health = container_of(work, struct mlx5_core_health, fatal_report_work); 621 priv = container_of(health, struct mlx5_priv, health); 622 dev = container_of(priv, struct mlx5_core_dev, priv); 623 devlink = priv_to_devlink(dev); 624 625 mutex_lock(&dev->intf_state_mutex); 626 if (test_bit(MLX5_DROP_HEALTH_WORK, &health->flags)) { 627 mlx5_core_err(dev, "health works are not permitted at this stage\n"); 628 mutex_unlock(&dev->intf_state_mutex); 629 return; 630 } 631 mutex_unlock(&dev->intf_state_mutex); 632 enter_error_state(dev, false); 633 if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) { 634 devl_lock(devlink); 635 if (mlx5_health_try_recover(dev)) 636 mlx5_core_err(dev, "health recovery failed\n"); 637 devl_unlock(devlink); 638 return; 639 } 640 fw_reporter_ctx.err_synd = health->synd; 641 fw_reporter_ctx.miss_counter = health->miss_counter; 642 if (devlink_health_report(health->fw_fatal_reporter, 643 "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) { 644 /* If recovery wasn't performed, due to grace period, 645 * unload the driver. This ensures that the driver 646 * closes all its resources and it is not subjected to 647 * requests from the kernel. 648 */ 649 mlx5_core_err(dev, "Driver is in error state. Unloading\n"); 650 mlx5_unload_one(dev, false); 651 } 652} 653 654static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_pf_ops = { 655 .name = "fw_fatal", 656 .recover = mlx5_fw_fatal_reporter_recover, 657 .dump = mlx5_fw_fatal_reporter_dump, 658}; 659 660static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { 661 .name = "fw_fatal", 662 .recover = mlx5_fw_fatal_reporter_recover, 663}; 664 665#define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000 666#define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000 667#define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000 668#define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 669 670void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) 671{ 672 const struct devlink_health_reporter_ops *fw_fatal_ops; 673 struct mlx5_core_health *health = &dev->priv.health; 674 const struct devlink_health_reporter_ops *fw_ops; 675 struct devlink *devlink = priv_to_devlink(dev); 676 u64 grace_period; 677 678 fw_fatal_ops = &mlx5_fw_fatal_reporter_pf_ops; 679 fw_ops = &mlx5_fw_reporter_pf_ops; 680 if (mlx5_core_is_ecpf(dev)) { 681 grace_period = MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD; 682 } else if (mlx5_core_is_pf(dev)) { 683 grace_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD; 684 } else { 685 /* VF or SF */ 686 grace_period = MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD; 687 fw_fatal_ops = &mlx5_fw_fatal_reporter_ops; 688 fw_ops = &mlx5_fw_reporter_ops; 689 } 690 691 health->fw_reporter = 692 devl_health_reporter_create(devlink, fw_ops, 0, dev); 693 if (IS_ERR(health->fw_reporter)) 694 mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n", 695 PTR_ERR(health->fw_reporter)); 696 697 health->fw_fatal_reporter = 698 devl_health_reporter_create(devlink, 699 fw_fatal_ops, 700 grace_period, 701 dev); 702 if (IS_ERR(health->fw_fatal_reporter)) 703 mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n", 704 PTR_ERR(health->fw_fatal_reporter)); 705} 706 707static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev) 708{ 709 struct mlx5_core_health *health = &dev->priv.health; 710 711 if (!IS_ERR_OR_NULL(health->fw_reporter)) 712 devlink_health_reporter_destroy(health->fw_reporter); 713 714 if (!IS_ERR_OR_NULL(health->fw_fatal_reporter)) 715 devlink_health_reporter_destroy(health->fw_fatal_reporter); 716} 717 718static unsigned long get_next_poll_jiffies(struct mlx5_core_dev *dev) 719{ 720 unsigned long next; 721 722 get_random_bytes(&next, sizeof(next)); 723 next %= HZ; 724 next += jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL)); 725 726 return next; 727} 728 729void mlx5_trigger_health_work(struct mlx5_core_dev *dev) 730{ 731 struct mlx5_core_health *health = &dev->priv.health; 732 733 if (!mlx5_dev_is_lightweight(dev)) 734 queue_work(health->wq, &health->fatal_report_work); 735} 736 737#define MLX5_MSEC_PER_HOUR (MSEC_PER_SEC * 60 * 60) 738static void mlx5_health_log_ts_update(struct work_struct *work) 739{ 740 struct delayed_work *dwork = to_delayed_work(work); 741 u32 out[MLX5_ST_SZ_DW(mrtc_reg)] = {}; 742 u32 in[MLX5_ST_SZ_DW(mrtc_reg)] = {}; 743 struct mlx5_core_health *health; 744 struct mlx5_core_dev *dev; 745 struct mlx5_priv *priv; 746 u64 now_us; 747 748 health = container_of(dwork, struct mlx5_core_health, update_fw_log_ts_work); 749 priv = container_of(health, struct mlx5_priv, health); 750 dev = container_of(priv, struct mlx5_core_dev, priv); 751 752 now_us = ktime_to_us(ktime_get_real()); 753 754 MLX5_SET(mrtc_reg, in, time_h, now_us >> 32); 755 MLX5_SET(mrtc_reg, in, time_l, now_us & 0xFFFFFFFF); 756 mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MRTC, 0, 1); 757 758 queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 759 msecs_to_jiffies(MLX5_MSEC_PER_HOUR)); 760} 761 762static void poll_health(struct timer_list *t) 763{ 764 struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer); 765 struct mlx5_core_health *health = &dev->priv.health; 766 struct health_buffer __iomem *h = health->health; 767 u32 fatal_error; 768 u8 prev_synd; 769 u32 count; 770 771 if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) 772 goto out; 773 774 fatal_error = mlx5_health_check_fatal_sensors(dev); 775 776 if (fatal_error && !health->fatal_error) { 777 mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error); 778 dev->priv.health.fatal_error = fatal_error; 779 print_health_info(dev); 780 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; 781 mlx5_trigger_health_work(dev); 782 return; 783 } 784 785 count = ioread32be(health->health_counter); 786 if (count == health->prev) 787 ++health->miss_counter; 788 else 789 health->miss_counter = 0; 790 791 health->prev = count; 792 if (health->miss_counter == MAX_MISSES) { 793 mlx5_core_err(dev, "device's health compromised - reached miss count\n"); 794 print_health_info(dev); 795 queue_work(health->wq, &health->report_work); 796 } 797 798 prev_synd = health->synd; 799 health->synd = ioread8(&h->synd); 800 if (health->synd && health->synd != prev_synd) 801 queue_work(health->wq, &health->report_work); 802 803out: 804 mod_timer(&health->timer, get_next_poll_jiffies(dev)); 805} 806 807void mlx5_start_health_poll(struct mlx5_core_dev *dev) 808{ 809 u64 poll_interval_ms = mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL); 810 struct mlx5_core_health *health = &dev->priv.health; 811 812 timer_setup(&health->timer, poll_health, 0); 813 health->fatal_error = MLX5_SENSOR_NO_ERR; 814 clear_bit(MLX5_DROP_HEALTH_WORK, &health->flags); 815 health->health = &dev->iseg->health; 816 health->health_counter = &dev->iseg->health_counter; 817 818 health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms); 819 add_timer(&health->timer); 820} 821 822void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) 823{ 824 struct mlx5_core_health *health = &dev->priv.health; 825 826 if (disable_health) 827 set_bit(MLX5_DROP_HEALTH_WORK, &health->flags); 828 829 del_timer_sync(&health->timer); 830} 831 832void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev) 833{ 834 struct mlx5_core_health *health = &dev->priv.health; 835 836 if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc)) 837 queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0); 838} 839 840void mlx5_drain_health_wq(struct mlx5_core_dev *dev) 841{ 842 struct mlx5_core_health *health = &dev->priv.health; 843 844 set_bit(MLX5_DROP_HEALTH_WORK, &health->flags); 845 cancel_delayed_work_sync(&health->update_fw_log_ts_work); 846 cancel_work_sync(&health->report_work); 847 cancel_work_sync(&health->fatal_report_work); 848} 849 850void mlx5_health_cleanup(struct mlx5_core_dev *dev) 851{ 852 struct mlx5_core_health *health = &dev->priv.health; 853 854 cancel_delayed_work_sync(&health->update_fw_log_ts_work); 855 destroy_workqueue(health->wq); 856 mlx5_reporter_vnic_destroy(dev); 857 mlx5_fw_reporters_destroy(dev); 858} 859 860int mlx5_health_init(struct mlx5_core_dev *dev) 861{ 862 struct devlink *devlink = priv_to_devlink(dev); 863 struct mlx5_core_health *health; 864 char *name; 865 866 if (!mlx5_dev_is_lightweight(dev)) { 867 devl_lock(devlink); 868 mlx5_fw_reporters_create(dev); 869 devl_unlock(devlink); 870 } 871 mlx5_reporter_vnic_create(dev); 872 873 health = &dev->priv.health; 874 name = kmalloc(64, GFP_KERNEL); 875 if (!name) 876 goto out_err; 877 878 strcpy(name, "mlx5_health"); 879 strcat(name, dev_name(dev->device)); 880 health->wq = create_singlethread_workqueue(name); 881 kfree(name); 882 if (!health->wq) 883 goto out_err; 884 INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work); 885 INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work); 886 INIT_DELAYED_WORK(&health->update_fw_log_ts_work, mlx5_health_log_ts_update); 887 888 return 0; 889 890out_err: 891 mlx5_reporter_vnic_destroy(dev); 892 mlx5_fw_reporters_destroy(dev); 893 return -ENOMEM; 894} 895