1/* $NetBSD: amdgpu_amdkfd_gfx_v9.c,v 1.2 2021/12/18 23:44:58 riastradh Exp $ */ 2 3/* 4 * Copyright 2014-2018 Advanced Micro Devices, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 */ 24#include <sys/cdefs.h> 25__KERNEL_RCSID(0, "$NetBSD: amdgpu_amdkfd_gfx_v9.c,v 1.2 2021/12/18 23:44:58 riastradh Exp $"); 26 27#include <linux/mmu_context.h> 28 29#include "amdgpu.h" 30#include "amdgpu_amdkfd.h" 31#include "gc/gc_9_0_offset.h" 32#include "gc/gc_9_0_sh_mask.h" 33#include "vega10_enum.h" 34#include "sdma0/sdma0_4_0_offset.h" 35#include "sdma0/sdma0_4_0_sh_mask.h" 36#include "sdma1/sdma1_4_0_offset.h" 37#include "sdma1/sdma1_4_0_sh_mask.h" 38#include "athub/athub_1_0_offset.h" 39#include "athub/athub_1_0_sh_mask.h" 40#include "oss/osssys_4_0_offset.h" 41#include "oss/osssys_4_0_sh_mask.h" 42#include "soc15_common.h" 43#include "v9_structs.h" 44#include "soc15.h" 45#include "soc15d.h" 46#include "mmhub_v1_0.h" 47#include "gfxhub_v1_0.h" 48 49 50enum hqd_dequeue_request_type { 51 NO_ACTION = 0, 52 DRAIN_PIPE, 53 RESET_WAVES 54}; 55 56 57/* Because of REG_GET_FIELD() being used, we put this function in the 58 * asic specific file. 59 */ 60int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, 61 struct tile_config *config) 62{ 63 struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 64 65 config->gb_addr_config = adev->gfx.config.gb_addr_config; 66 67 config->tile_config_ptr = adev->gfx.config.tile_mode_array; 68 config->num_tile_configs = 69 ARRAY_SIZE(adev->gfx.config.tile_mode_array); 70 config->macro_tile_config_ptr = 71 adev->gfx.config.macrotile_mode_array; 72 config->num_macro_tile_configs = 73 ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 74 75 return 0; 76} 77 78static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) 79{ 80 return (struct amdgpu_device *)kgd; 81} 82 83static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, 84 uint32_t queue, uint32_t vmid) 85{ 86 struct amdgpu_device *adev = get_amdgpu_device(kgd); 87 88 mutex_lock(&adev->srbm_mutex); 89 soc15_grbm_select(adev, mec, pipe, queue, vmid); 90} 91 92static void unlock_srbm(struct kgd_dev *kgd) 93{ 94 struct amdgpu_device *adev = get_amdgpu_device(kgd); 95 96 soc15_grbm_select(adev, 0, 0, 0, 0); 97 mutex_unlock(&adev->srbm_mutex); 98} 99 100static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, 101 uint32_t queue_id) 102{ 103 struct amdgpu_device *adev = get_amdgpu_device(kgd); 104 105 uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 106 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 107 108 lock_srbm(kgd, mec, pipe, queue_id, 0); 109} 110 111static uint64_t get_queue_mask(struct amdgpu_device *adev, 112 uint32_t pipe_id, uint32_t queue_id) 113{ 114 unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + 115 queue_id; 116 117 return 1ull << bit; 118} 119 120static void release_queue(struct kgd_dev *kgd) 121{ 122 unlock_srbm(kgd); 123} 124 125void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, 126 uint32_t sh_mem_config, 127 uint32_t sh_mem_ape1_base, 128 uint32_t sh_mem_ape1_limit, 129 uint32_t sh_mem_bases) 130{ 131 struct amdgpu_device *adev = get_amdgpu_device(kgd); 132 133 lock_srbm(kgd, 0, 0, 0, vmid); 134 135 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); 136 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); 137 /* APE1 no longer exists on GFX9 */ 138 139 unlock_srbm(kgd); 140} 141 142int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, 143 unsigned int vmid) 144{ 145 struct amdgpu_device *adev = get_amdgpu_device(kgd); 146 147 /* 148 * We have to assume that there is no outstanding mapping. 149 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because 150 * a mapping is in progress or because a mapping finished 151 * and the SW cleared it. 152 * So the protocol is to always wait & clear. 153 */ 154 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | 155 ATC_VMID0_PASID_MAPPING__VALID_MASK; 156 157 /* 158 * need to do this twice, once for gfx and once for mmhub 159 * for ATC add 16 to VMID for mmhub, for IH different registers. 160 * ATC_VMID0..15 registers are separate from ATC_VMID16..31. 161 */ 162 163 WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, 164 pasid_mapping); 165 166 while (!(RREG32(SOC15_REG_OFFSET( 167 ATHUB, 0, 168 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & 169 (1U << vmid))) 170 cpu_relax(); 171 172 WREG32(SOC15_REG_OFFSET(ATHUB, 0, 173 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), 174 1U << vmid); 175 176 /* Mapping vmid to pasid also for IH block */ 177 WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, 178 pasid_mapping); 179 180 WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid, 181 pasid_mapping); 182 183 while (!(RREG32(SOC15_REG_OFFSET( 184 ATHUB, 0, 185 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & 186 (1U << (vmid + 16)))) 187 cpu_relax(); 188 189 WREG32(SOC15_REG_OFFSET(ATHUB, 0, 190 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), 191 1U << (vmid + 16)); 192 193 /* Mapping vmid to pasid also for IH block */ 194 WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid, 195 pasid_mapping); 196 return 0; 197} 198 199/* TODO - RING0 form of field is obsolete, seems to date back to SI 200 * but still works 201 */ 202 203int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) 204{ 205 struct amdgpu_device *adev = get_amdgpu_device(kgd); 206 uint32_t mec; 207 uint32_t pipe; 208 209 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 210 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 211 212 lock_srbm(kgd, mec, pipe, 0, 0); 213 214 WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), 215 CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | 216 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); 217 218 unlock_srbm(kgd); 219 220 return 0; 221} 222 223static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, 224 unsigned int engine_id, 225 unsigned int queue_id) 226{ 227 uint32_t sdma_engine_reg_base[2] = { 228 SOC15_REG_OFFSET(SDMA0, 0, 229 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, 230 SOC15_REG_OFFSET(SDMA1, 0, 231 mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL 232 }; 233 uint32_t retval = sdma_engine_reg_base[engine_id] 234 + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); 235 236 pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, 237 queue_id, retval); 238 239 return retval; 240} 241 242static inline struct v9_mqd *get_mqd(void *mqd) 243{ 244 return (struct v9_mqd *)mqd; 245} 246 247static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) 248{ 249 return (struct v9_sdma_mqd *)mqd; 250} 251 252int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 253 uint32_t queue_id, uint32_t __user *wptr, 254 uint32_t wptr_shift, uint32_t wptr_mask, 255 struct mm_struct *mm) 256{ 257 struct amdgpu_device *adev = get_amdgpu_device(kgd); 258 struct v9_mqd *m; 259 uint32_t *mqd_hqd; 260 uint32_t reg, hqd_base, data; 261 262 m = get_mqd(mqd); 263 264 acquire_queue(kgd, pipe_id, queue_id); 265 266 /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ 267 mqd_hqd = &m->cp_mqd_base_addr_lo; 268 hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); 269 270 for (reg = hqd_base; 271 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) 272 WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); 273 274 275 /* Activate doorbell logic before triggering WPTR poll. */ 276 data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, 277 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 278 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); 279 280 if (wptr) { 281 /* Don't read wptr with get_user because the user 282 * context may not be accessible (if this function 283 * runs in a work queue). Instead trigger a one-shot 284 * polling read from memory in the CP. This assumes 285 * that wptr is GPU-accessible in the queue's VMID via 286 * ATC or SVM. WPTR==RPTR before starting the poll so 287 * the CP starts fetching new commands from the right 288 * place. 289 * 290 * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit 291 * tricky. Assume that the queue didn't overflow. The 292 * number of valid bits in the 32-bit RPTR depends on 293 * the queue size. The remaining bits are taken from 294 * the saved 64-bit WPTR. If the WPTR wrapped, add the 295 * queue size. 296 */ 297 uint32_t queue_size = 298 2 << REG_GET_FIELD(m->cp_hqd_pq_control, 299 CP_HQD_PQ_CONTROL, QUEUE_SIZE); 300 uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); 301 302 if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) 303 guessed_wptr += queue_size; 304 guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); 305 guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; 306 307 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), 308 lower_32_bits(guessed_wptr)); 309 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), 310 upper_32_bits(guessed_wptr)); 311 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), 312 lower_32_bits((uintptr_t)wptr)); 313 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), 314 upper_32_bits((uintptr_t)wptr)); 315 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), 316 (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); 317 } 318 319 /* Start the EOP fetcher */ 320 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), 321 REG_SET_FIELD(m->cp_hqd_eop_rptr, 322 CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); 323 324 data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); 325 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); 326 327 release_queue(kgd); 328 329 return 0; 330} 331 332int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, 333 uint32_t pipe_id, uint32_t queue_id, 334 uint32_t doorbell_off) 335{ 336 struct amdgpu_device *adev = get_amdgpu_device(kgd); 337 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 338 struct v9_mqd *m; 339 uint32_t mec, pipe; 340 int r; 341 342 m = get_mqd(mqd); 343 344 acquire_queue(kgd, pipe_id, queue_id); 345 346 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 347 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 348 349 pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", 350 mec, pipe, queue_id); 351 352 spin_lock(&adev->gfx.kiq.ring_lock); 353 r = amdgpu_ring_alloc(kiq_ring, 7); 354 if (r) { 355 pr_err("Failed to alloc KIQ (%d).\n", r); 356 goto out_unlock; 357 } 358 359 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 360 amdgpu_ring_write(kiq_ring, 361 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 362 PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ 363 PACKET3_MAP_QUEUES_QUEUE(queue_id) | 364 PACKET3_MAP_QUEUES_PIPE(pipe) | 365 PACKET3_MAP_QUEUES_ME((mec - 1)) | 366 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 367 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 368 PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ 369 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 370 amdgpu_ring_write(kiq_ring, 371 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); 372 amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); 373 amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); 374 amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); 375 amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); 376 amdgpu_ring_commit(kiq_ring); 377 378out_unlock: 379 spin_unlock(&adev->gfx.kiq.ring_lock); 380 release_queue(kgd); 381 382 return r; 383} 384 385int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, 386 uint32_t pipe_id, uint32_t queue_id, 387 uint32_t (**dump)[2], uint32_t *n_regs) 388{ 389 struct amdgpu_device *adev = get_amdgpu_device(kgd); 390 uint32_t i = 0, reg; 391#define HQD_N_REGS 56 392#define DUMP_REG(addr) do { \ 393 if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ 394 break; \ 395 (*dump)[i][0] = (addr) << 2; \ 396 (*dump)[i++][1] = RREG32(addr); \ 397 } while (0) 398 399 *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); 400 if (*dump == NULL) 401 return -ENOMEM; 402 403 acquire_queue(kgd, pipe_id, queue_id); 404 405 for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); 406 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) 407 DUMP_REG(reg); 408 409 release_queue(kgd); 410 411 WARN_ON_ONCE(i != HQD_N_REGS); 412 *n_regs = i; 413 414 return 0; 415} 416 417static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, 418 uint32_t __user *wptr, struct mm_struct *mm) 419{ 420 struct amdgpu_device *adev = get_amdgpu_device(kgd); 421 struct v9_sdma_mqd *m; 422 uint32_t sdma_rlc_reg_offset; 423 unsigned long end_jiffies; 424 uint32_t data; 425 uint64_t data64; 426 uint64_t __user *wptr64 = (uint64_t __user *)wptr; 427 428 m = get_sdma_mqd(mqd); 429 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 430 m->sdma_queue_id); 431 432 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, 433 m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); 434 435 end_jiffies = msecs_to_jiffies(2000) + jiffies; 436 while (true) { 437 data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); 438 if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 439 break; 440 if (time_after(jiffies, end_jiffies)) { 441 pr_err("SDMA RLC not idle in %s\n", __func__); 442 return -ETIME; 443 } 444 usleep_range(500, 1000); 445 } 446 447 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, 448 m->sdmax_rlcx_doorbell_offset); 449 450 data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, 451 ENABLE, 1); 452 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); 453 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, 454 m->sdmax_rlcx_rb_rptr); 455 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, 456 m->sdmax_rlcx_rb_rptr_hi); 457 458 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); 459 if (read_user_wptr(mm, wptr64, data64)) { 460 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, 461 lower_32_bits(data64)); 462 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, 463 upper_32_bits(data64)); 464 } else { 465 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, 466 m->sdmax_rlcx_rb_rptr); 467 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, 468 m->sdmax_rlcx_rb_rptr_hi); 469 } 470 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); 471 472 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); 473 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, 474 m->sdmax_rlcx_rb_base_hi); 475 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 476 m->sdmax_rlcx_rb_rptr_addr_lo); 477 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, 478 m->sdmax_rlcx_rb_rptr_addr_hi); 479 480 data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, 481 RB_ENABLE, 1); 482 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); 483 484 return 0; 485} 486 487static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, 488 uint32_t engine_id, uint32_t queue_id, 489 uint32_t (**dump)[2], uint32_t *n_regs) 490{ 491 struct amdgpu_device *adev = get_amdgpu_device(kgd); 492 uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, 493 engine_id, queue_id); 494 uint32_t i = 0, reg; 495#undef HQD_N_REGS 496#define HQD_N_REGS (19+6+7+10) 497 498 *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); 499 if (*dump == NULL) 500 return -ENOMEM; 501 502 for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) 503 DUMP_REG(sdma_rlc_reg_offset + reg); 504 for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) 505 DUMP_REG(sdma_rlc_reg_offset + reg); 506 for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; 507 reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) 508 DUMP_REG(sdma_rlc_reg_offset + reg); 509 for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; 510 reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) 511 DUMP_REG(sdma_rlc_reg_offset + reg); 512 513 WARN_ON_ONCE(i != HQD_N_REGS); 514 *n_regs = i; 515 516 return 0; 517} 518 519bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 520 uint32_t pipe_id, uint32_t queue_id) 521{ 522 struct amdgpu_device *adev = get_amdgpu_device(kgd); 523 uint32_t act; 524 bool retval = false; 525 uint32_t low, high; 526 527 acquire_queue(kgd, pipe_id, queue_id); 528 act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); 529 if (act) { 530 low = lower_32_bits(queue_address >> 8); 531 high = upper_32_bits(queue_address >> 8); 532 533 if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && 534 high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) 535 retval = true; 536 } 537 release_queue(kgd); 538 return retval; 539} 540 541static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) 542{ 543 struct amdgpu_device *adev = get_amdgpu_device(kgd); 544 struct v9_sdma_mqd *m; 545 uint32_t sdma_rlc_reg_offset; 546 uint32_t sdma_rlc_rb_cntl; 547 548 m = get_sdma_mqd(mqd); 549 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 550 m->sdma_queue_id); 551 552 sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); 553 554 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) 555 return true; 556 557 return false; 558} 559 560int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, 561 enum kfd_preempt_type reset_type, 562 unsigned int utimeout, uint32_t pipe_id, 563 uint32_t queue_id) 564{ 565 struct amdgpu_device *adev = get_amdgpu_device(kgd); 566 enum hqd_dequeue_request_type type; 567 unsigned long end_jiffies; 568 uint32_t temp; 569 struct v9_mqd *m = get_mqd(mqd); 570 571 if (adev->in_gpu_reset) 572 return -EIO; 573 574 acquire_queue(kgd, pipe_id, queue_id); 575 576 if (m->cp_hqd_vmid == 0) 577 WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); 578 579 switch (reset_type) { 580 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: 581 type = DRAIN_PIPE; 582 break; 583 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: 584 type = RESET_WAVES; 585 break; 586 default: 587 type = DRAIN_PIPE; 588 break; 589 } 590 591 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); 592 593 end_jiffies = (utimeout * HZ / 1000) + jiffies; 594 while (true) { 595 temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); 596 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) 597 break; 598 if (time_after(jiffies, end_jiffies)) { 599 pr_err("cp queue preemption time out.\n"); 600 release_queue(kgd); 601 return -ETIME; 602 } 603 usleep_range(500, 1000); 604 } 605 606 release_queue(kgd); 607 return 0; 608} 609 610static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 611 unsigned int utimeout) 612{ 613 struct amdgpu_device *adev = get_amdgpu_device(kgd); 614 struct v9_sdma_mqd *m; 615 uint32_t sdma_rlc_reg_offset; 616 uint32_t temp; 617 unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; 618 619 m = get_sdma_mqd(mqd); 620 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 621 m->sdma_queue_id); 622 623 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); 624 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; 625 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); 626 627 while (true) { 628 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); 629 if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 630 break; 631 if (time_after(jiffies, end_jiffies)) { 632 pr_err("SDMA RLC not idle in %s\n", __func__); 633 return -ETIME; 634 } 635 usleep_range(500, 1000); 636 } 637 638 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); 639 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, 640 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | 641 SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); 642 643 m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); 644 m->sdmax_rlcx_rb_rptr_hi = 645 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); 646 647 return 0; 648} 649 650bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, 651 uint8_t vmid, uint16_t *p_pasid) 652{ 653 uint32_t value; 654 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 655 656 value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) 657 + vmid); 658 *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; 659 660 return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); 661} 662 663int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) 664{ 665 return 0; 666} 667 668int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, 669 unsigned int watch_point_id, 670 uint32_t cntl_val, 671 uint32_t addr_hi, 672 uint32_t addr_lo) 673{ 674 return 0; 675} 676 677int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, 678 uint32_t gfx_index_val, 679 uint32_t sq_cmd) 680{ 681 struct amdgpu_device *adev = get_amdgpu_device(kgd); 682 uint32_t data = 0; 683 684 mutex_lock(&adev->grbm_idx_mutex); 685 686 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); 687 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); 688 689 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 690 INSTANCE_BROADCAST_WRITES, 1); 691 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 692 SH_BROADCAST_WRITES, 1); 693 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 694 SE_BROADCAST_WRITES, 1); 695 696 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 697 mutex_unlock(&adev->grbm_idx_mutex); 698 699 return 0; 700} 701 702uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, 703 unsigned int watch_point_id, 704 unsigned int reg_offset) 705{ 706 return 0; 707} 708 709static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, 710 uint32_t vmid, uint64_t page_table_base) 711{ 712 struct amdgpu_device *adev = get_amdgpu_device(kgd); 713 714 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { 715 pr_err("trying to set page table base for wrong VMID %u\n", 716 vmid); 717 return; 718 } 719 720 mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); 721 722 gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); 723} 724 725const struct kfd2kgd_calls gfx_v9_kfd2kgd = { 726 .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, 727 .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, 728 .init_interrupts = kgd_gfx_v9_init_interrupts, 729 .hqd_load = kgd_gfx_v9_hqd_load, 730 .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, 731 .hqd_sdma_load = kgd_hqd_sdma_load, 732 .hqd_dump = kgd_gfx_v9_hqd_dump, 733 .hqd_sdma_dump = kgd_hqd_sdma_dump, 734 .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, 735 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, 736 .hqd_destroy = kgd_gfx_v9_hqd_destroy, 737 .hqd_sdma_destroy = kgd_hqd_sdma_destroy, 738 .address_watch_disable = kgd_gfx_v9_address_watch_disable, 739 .address_watch_execute = kgd_gfx_v9_address_watch_execute, 740 .wave_control_execute = kgd_gfx_v9_wave_control_execute, 741 .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, 742 .get_atc_vmid_pasid_mapping_info = 743 kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, 744 .get_tile_config = kgd_gfx_v9_get_tile_config, 745 .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, 746 .get_hive_id = amdgpu_amdkfd_get_hive_id, 747}; 748