Lines Matching refs:ras

49 static const char *RAS_FS_NAME = "ras";
89 /* ras block link */
201 dev_warn(adev->dev, " echo 1 > /sys/kernel/debug/dri/0/ras/ras_eeprom_reset\n");
431 * echo "disable <block>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
432 * echo "enable <block> <error>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
433 * echo "inject <block> <error> <sub-block> <address> <value> <mask>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
457 * echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
458 * echo inject umc ce 0 0 0 3 > /sys/kernel/debug/dri/0/ras/ras_ctrl
459 * echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
463 * To check disable/enable, see "ras" features at,
464 * /sys/class/drm/card[0/1/2...]/device/ras/features
467 * /sys/class/drm/card[0/1/2...]/device/ras/[gfx|sdma|umc|...]_err_count
471 * Check the "ras" mask at /sys/module/amdgpu/parameters/ras_mask
558 * echo 1 > ../ras/ras_eeprom_reset
602 * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
761 /* If hardware does not support ras, then do not create obj.
762 * But if hardware support ras, we can create the obj.
765 * IP checks con->support to see if it need disable ras.
801 /* For non-gfx ip, do not enable ras feature if it is not allowed */
803 /* Force issue enable or disable ras feature commands */
808 /* Only enable gfx ras feature from host side */
830 dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n",
859 /* There is no harm to issue a ras TA cmd regardless of
860 * the currecnt ras state.
866 /* With old ras TA, we might fail to enable ras.
878 /* setup the object then issue a ras TA disable cmd.*/
883 /* gfx block ras dsiable cmd must send to ras-ta */
889 /* clean gfx block ras features flag */
940 * bypass psp. vbios enable ras for us.
960 * bypass psp. vbios enable ras for us.
998 dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
1017 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
1024 ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(ras->umc_ecc));
1026 if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
1027 adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
1028 adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
1033 if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
1034 adev->umc.ras->ras_block.hw_ops->query_ras_error_address)
1035 adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, err_data);
1037 if (adev->umc.ras &&
1038 adev->umc.ras->ecc_info_query_ras_error_count)
1039 adev->umc.ras->ecc_info_query_ras_error_count(adev, err_data);
1041 if (adev->umc.ras &&
1042 adev->umc.ras->ecc_info_query_ras_error_address)
1043 adev->umc.ras->ecc_info_query_ras_error_address(adev, err_data);
1412 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
1434 /* skip ras error reset in gpu reset */
1435 if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery) ||
1514 dev_err(adev->dev, "ras inject %s failed %d\n",
1568 * all the ip blocks that support query ras error counters/status
1595 /* query all the ip blocks that support ras query interface */
1644 * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
1836 * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot
1842 * echo true > .../ras/auto_reboot
1882 * of RAS IPs during ras recovery.
1942 /* ras fs */
2015 /* ras fs end */
2030 if (adev->nbio.ras &&
2031 adev->nbio.ras->handle_ras_controller_intr_no_bifring)
2032 adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
2034 if (adev->nbio.ras &&
2035 adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring)
2036 adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring(adev);
2227 /* in case we registe the IH before enable ras feature */
2291 * PCIE_BIF IP has one different isr by ras controller
2292 * interrupt, the specific ras counter query will be
2434 struct amdgpu_ras *ras =
2437 struct amdgpu_device *adev = ras->adev;
2456 if (!ras->disable_ras_err_cnt_harvest) {
2475 if (amdgpu_device_should_recover_gpu(ras->adev)) {
2483 if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
2488 if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) {
2489 ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET;
2496 if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) {
2497 ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET;
2504 amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
2506 atomic_set(&ras->in_recovery, 0);
2628 &adev->psp.ras_context.ras->eeprom_control;
2690 * Justification of value bad_page_cnt_threshold in ras structure
2814 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
2819 ecc_log = &ras->umc_ecc_log;
2824 dev_err(adev->dev, "Failed to query ras error! ret:%d\n", ret);
2899 dev_info(adev->dev, "Start processing ras block %s(%d)\n",
3014 dev_warn(adev->dev, "Failed to initialize ras recovery! (%d)\n", ret);
3092 * force enable gfx ras, ignore vbios gfx ras flag
3109 /* Query ras capablity via atomfirmware interface */
3173 adev->umc.ras &&
3174 adev->umc.ras->query_ras_poison_mode) {
3178 adev->umc.ras->query_ras_poison_mode(adev);
3191 * check hardware's ras ability which will be saved in hw_supported.
3192 * if hardware does not support ras, we can skip some ras initializtion and
3193 * forbid some ras operations from IP.
3194 * if software itself, say boot parameter, limit the ras ability. We still
3196 * we have to initialize ras as normal. but need check if operation is
3206 /* query ras capability from psp */
3210 /* query ras capablity from bios */
3281 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
3284 if (!ras)
3288 ras->event_mgr = hive ? &hive->event_mgr : &ras->__event_mgr;
3293 ras_event_mgr_init(ras->event_mgr);
3327 /* set gfx block ras context feature for VEGA20 Gaming
3328 * send ras disable cmd to ras ta during ras late init.
3347 /* initialize nbio ras function ahead of any other
3348 * ras functions so hardware fatal error interrupt
3355 adev->nbio.ras = &nbio_v7_4_ras;
3359 /* unlike other generation of nbio ras,
3363 * enable nbio ras in such case. Instead,
3365 adev->nbio.ras = &nbio_v4_3_ras;
3369 adev->nbio.ras = &nbio_v7_9_ras;
3372 /* nbio ras is not available */
3376 /* nbio ras block needs to be enabled ahead of other ras blocks
3382 if (adev->nbio.ras &&
3383 adev->nbio.ras->init_ras_controller_interrupt) {
3384 r = adev->nbio.ras->init_ras_controller_interrupt(adev);
3389 if (adev->nbio.ras &&
3390 adev->nbio.ras->init_ras_err_event_athub_interrupt) {
3391 r = adev->nbio.ras->init_ras_err_event_athub_interrupt(adev);
3396 /* Packed socket_id to ras feature mask bits[31:29] */
3410 dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
3478 /* in resume phase, if fail to enable ras,
3479 * clean up all ras fs nodes, and disable ras */
3488 /* in resume phase, no need to create ras fs node */
3539 /* helper function to remove ras fs node and interrupt handler */
3569 /* clean ras context for VEGA20 Gaming after send ras disable cmd */
3577 * tricky thing that IP's actual ras error type should be
3583 /* We enable ras on all hw_supported block, but as boot
3605 /* Make sure all ras objects are disabled. */
3616 /* Guest side doesn't need init ras feature */
3638 dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
3668 /* Need disable ras on all IPs here before ip [hw/sw]fini */
3694 /* Clear ras blocks from ras_list and free ras block list node */
3720 struct amdgpu_ras *ras;
3722 ras = amdgpu_ras_get_context(adev);
3723 if (!ras)
3726 return atomic_read(&ras->fed);
3731 struct amdgpu_ras *ras;
3733 ras = amdgpu_ras_get_context(adev);
3734 if (ras)
3735 atomic_set(&ras->fed, !!status);
3745 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
3750 id = (u64)atomic64_read(&ras->event_mgr->seqnos[type]);
3764 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
3765 u64 event_id = (u64)atomic64_inc_return(&ras->event_mgr->seqnos[RAS_EVENT_TYPE_ISR]);
3771 ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
3908 return adev->psp.ras_context.ras;
3916 adev->psp.ras_context.ras = ras_con;
3920 /* check if ras is supported on block, say, sdma, gfx */
3925 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
3930 ret = ras && (adev->ras_enabled & (1 << block));
3933 * not enabled, even if the ras block is not supported on
3935 * ras block has ras configuration, it can be considered
3936 * that the ras block supports ras function.
3953 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
3955 if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
3956 amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
4028 /* Register each ip ras block into amdgpu ras */