1212420Sken/* $NetBSD: amdgpu_amdkfd_gfx_v9.c,v 1.2 2021/12/18 23:44:58 riastradh Exp $ */ 2212420Sken 3212420Sken/* 4212420Sken * Copyright 2014-2018 Advanced Micro Devices, Inc. 5212420Sken * 6212420Sken * Permission is hereby granted, free of charge, to any person obtaining a 7212420Sken * copy of this software and associated documentation files (the "Software"), 8212420Sken * to deal in the Software without restriction, including without limitation 9212420Sken * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10212420Sken * and/or sell copies of the Software, and to permit persons to whom the 11212420Sken * Software is furnished to do so, subject to the following conditions: 12212420Sken * 13212420Sken * The above copyright notice and this permission notice shall be included in 14212420Sken * all copies or substantial portions of the Software. 15212420Sken * 16212420Sken * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17212420Sken * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18212420Sken * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19212420Sken * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20212420Sken * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21212420Sken * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22212420Sken * OTHER DEALINGS IN THE SOFTWARE. 23212420Sken */ 24212420Sken#include <sys/cdefs.h> 25212420Sken__KERNEL_RCSID(0, "$NetBSD: amdgpu_amdkfd_gfx_v9.c,v 1.2 2021/12/18 23:44:58 riastradh Exp $"); 26212420Sken 27212420Sken#include <linux/mmu_context.h> 28212420Sken 29212420Sken#include "amdgpu.h" 30281564Sslm#include "amdgpu_amdkfd.h" 31212420Sken#include "gc/gc_9_0_offset.h" 32230592Sken#include "gc/gc_9_0_sh_mask.h" 33281564Sslm#include "vega10_enum.h" 34281564Sslm#include "sdma0/sdma0_4_0_offset.h" 35230592Sken#include "sdma0/sdma0_4_0_sh_mask.h" 36230592Sken#include "sdma1/sdma1_4_0_offset.h" 37230592Sken#include "sdma1/sdma1_4_0_sh_mask.h" 38230592Sken#include "athub/athub_1_0_offset.h" 39230592Sken#include "athub/athub_1_0_sh_mask.h" 40230592Sken#include "oss/osssys_4_0_offset.h" 41230592Sken#include "oss/osssys_4_0_sh_mask.h" 42230592Sken#include "soc15_common.h" 43230592Sken#include "v9_structs.h" 44230592Sken#include "soc15.h" 45230592Sken#include "soc15d.h" 46230592Sken#include "mmhub_v1_0.h" 47230592Sken#include "gfxhub_v1_0.h" 48230592Sken 49230592Sken 50230592Skenenum hqd_dequeue_request_type { 51230592Sken NO_ACTION = 0, 52230592Sken DRAIN_PIPE, 53230592Sken RESET_WAVES 54230592Sken}; 55230592Sken 56230592Sken 57230592Sken/* Because of REG_GET_FIELD() being used, we put this function in the 58281564Sslm * asic specific file. 59230592Sken */ 60230592Skenint kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, 61230592Sken struct tile_config *config) 62212420Sken{ 63212420Sken struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 64212420Sken 65212420Sken config->gb_addr_config = adev->gfx.config.gb_addr_config; 66213702Smdf 67213702Smdf config->tile_config_ptr = adev->gfx.config.tile_mode_array; 68230592Sken config->num_tile_configs = 69212420Sken ARRAY_SIZE(adev->gfx.config.tile_mode_array); 70212420Sken config->macro_tile_config_ptr = 71212420Sken adev->gfx.config.macrotile_mode_array; 72212420Sken config->num_macro_tile_configs = 73212420Sken ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 74212420Sken 75212420Sken return 0; 76212420Sken} 77212420Sken 78212420Skenstatic inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) 79212420Sken{ 80212420Sken return (struct amdgpu_device *)kgd; 81212420Sken} 82212420Sken 83230592Skenstatic void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, 84230592Sken uint32_t queue, uint32_t vmid) 85230592Sken{ 86213702Smdf struct amdgpu_device *adev = get_amdgpu_device(kgd); 87213702Smdf 88212420Sken mutex_lock(&adev->srbm_mutex); 89212420Sken soc15_grbm_select(adev, mec, pipe, queue, vmid); 90212420Sken} 91212420Sken 92212420Skenstatic void unlock_srbm(struct kgd_dev *kgd) 93230592Sken{ 94281564Sslm struct amdgpu_device *adev = get_amdgpu_device(kgd); 95212420Sken 96212420Sken soc15_grbm_select(adev, 0, 0, 0, 0); 97212420Sken mutex_unlock(&adev->srbm_mutex); 98212420Sken} 99212420Sken 100212420Skenstatic void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, 101230592Sken uint32_t queue_id) 102230592Sken{ 103230592Sken struct amdgpu_device *adev = get_amdgpu_device(kgd); 104212420Sken 105212420Sken uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 106281564Sslm uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 107230592Sken 108230592Sken lock_srbm(kgd, mec, pipe, queue_id, 0); 109212420Sken} 110212420Sken 111212420Skenstatic uint64_t get_queue_mask(struct amdgpu_device *adev, 112213702Smdf uint32_t pipe_id, uint32_t queue_id) 113212420Sken{ 114212420Sken unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + 115212420Sken queue_id; 116212420Sken 117212420Sken return 1ull << bit; 118212420Sken} 119213702Smdf 120212420Skenstatic void release_queue(struct kgd_dev *kgd) 121212420Sken{ 122212420Sken unlock_srbm(kgd); 123213708Smdf} 124213708Smdf 125213708Smdfvoid kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, 126213708Smdf uint32_t sh_mem_config, 127213708Smdf uint32_t sh_mem_ape1_base, 128213708Smdf uint32_t sh_mem_ape1_limit, 129213708Smdf uint32_t sh_mem_bases) 130213708Smdf{ 131213708Smdf struct amdgpu_device *adev = get_amdgpu_device(kgd); 132213708Smdf 133213707Smdf lock_srbm(kgd, 0, 0, 0, vmid); 134213707Smdf 135213707Smdf WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); 136213707Smdf WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); 137213707Smdf /* APE1 no longer exists on GFX9 */ 138213707Smdf 139213707Smdf unlock_srbm(kgd); 140213707Smdf} 141213707Smdf 142213707Smdfint kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, 143213708Smdf unsigned int vmid) 144213708Smdf{ 145213707Smdf struct amdgpu_device *adev = get_amdgpu_device(kgd); 146213707Smdf 147230592Sken /* 148230592Sken * We have to assume that there is no outstanding mapping. 149230592Sken * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because 150230592Sken * a mapping is in progress or because a mapping finished 151230592Sken * and the SW cleared it. 152230592Sken * So the protocol is to always wait & clear. 153230592Sken */ 154230592Sken uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | 155230592Sken ATC_VMID0_PASID_MAPPING__VALID_MASK; 156230592Sken 157230592Sken /* 158230592Sken * need to do this twice, once for gfx and once for mmhub 159230592Sken * for ATC add 16 to VMID for mmhub, for IH different registers. 160230592Sken * ATC_VMID0..15 registers are separate from ATC_VMID16..31. 161230592Sken */ 162230592Sken 163230592Sken WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, 164230592Sken pasid_mapping); 165230592Sken 166230592Sken while (!(RREG32(SOC15_REG_OFFSET( 167230592Sken ATHUB, 0, 168230592Sken mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & 169230592Sken (1U << vmid))) 170230592Sken cpu_relax(); 171230592Sken 172230592Sken WREG32(SOC15_REG_OFFSET(ATHUB, 0, 173230592Sken mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), 174230592Sken 1U << vmid); 175230592Sken 176230592Sken /* Mapping vmid to pasid also for IH block */ 177230592Sken WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, 178230592Sken pasid_mapping); 179230592Sken 180230592Sken WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid, 181212420Sken pasid_mapping); 182212420Sken 183230592Sken while (!(RREG32(SOC15_REG_OFFSET( 184230592Sken ATHUB, 0, 185230592Sken mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & 186230592Sken (1U << (vmid + 16)))) 187230592Sken cpu_relax(); 188230592Sken 189230592Sken WREG32(SOC15_REG_OFFSET(ATHUB, 0, 190230592Sken mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), 191230592Sken 1U << (vmid + 16)); 192230592Sken 193212420Sken /* Mapping vmid to pasid also for IH block */ 194212420Sken WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid, 195212420Sken pasid_mapping); 196212420Sken return 0; 197212420Sken} 198212420Sken 199212420Sken/* TODO - RING0 form of field is obsolete, seems to date back to SI 200212420Sken * but still works 201212420Sken */ 202212420Sken 203212420Skenint kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) 204212420Sken{ 205212420Sken struct amdgpu_device *adev = get_amdgpu_device(kgd); 206212420Sken uint32_t mec; 207212420Sken uint32_t pipe; 208212420Sken 209212420Sken mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 210212420Sken pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 211212420Sken 212212420Sken lock_srbm(kgd, mec, pipe, 0, 0); 213250900Smav 214250900Smav WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), 215212420Sken CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | 216212420Sken CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); 217212420Sken 218212420Sken unlock_srbm(kgd); 219212420Sken 220212420Sken return 0; 221212420Sken} 222212420Sken 223212420Skenstatic uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, 224212420Sken unsigned int engine_id, 225212420Sken unsigned int queue_id) 226212420Sken{ 227212420Sken uint32_t sdma_engine_reg_base[2] = { 228212420Sken SOC15_REG_OFFSET(SDMA0, 0, 229212420Sken mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, 230212420Sken SOC15_REG_OFFSET(SDMA1, 0, 231212420Sken mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL 232212420Sken }; 233212420Sken uint32_t retval = sdma_engine_reg_base[engine_id] 234212420Sken + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); 235212420Sken 236212420Sken pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, 237212420Sken queue_id, retval); 238212420Sken 239212420Sken return retval; 240212420Sken} 241212420Sken 242212420Skenstatic inline struct v9_mqd *get_mqd(void *mqd) 243212420Sken{ 244212420Sken return (struct v9_mqd *)mqd; 245212420Sken} 246212420Sken 247212420Skenstatic inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) 248212420Sken{ 249212420Sken return (struct v9_sdma_mqd *)mqd; 250212420Sken} 251212420Sken 252212420Skenint kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 253212420Sken uint32_t queue_id, uint32_t __user *wptr, 254212420Sken uint32_t wptr_shift, uint32_t wptr_mask, 255212420Sken struct mm_struct *mm) 256212420Sken{ 257212420Sken struct amdgpu_device *adev = get_amdgpu_device(kgd); 258212420Sken struct v9_mqd *m; 259212420Sken uint32_t *mqd_hqd; 260212420Sken uint32_t reg, hqd_base, data; 261212420Sken 262212420Sken m = get_mqd(mqd); 263212420Sken 264212420Sken acquire_queue(kgd, pipe_id, queue_id); 265212420Sken 266212420Sken /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ 267212420Sken mqd_hqd = &m->cp_mqd_base_addr_lo; 268212420Sken hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); 269212420Sken 270212420Sken for (reg = hqd_base; 271212420Sken reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) 272212420Sken WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); 273212420Sken 274212420Sken 275212420Sken /* Activate doorbell logic before triggering WPTR poll. */ 276212420Sken data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, 277212420Sken CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 278212420Sken WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); 279212420Sken 280212420Sken if (wptr) { 281212420Sken /* Don't read wptr with get_user because the user 282212420Sken * context may not be accessible (if this function 283212420Sken * runs in a work queue). Instead trigger a one-shot 284212420Sken * polling read from memory in the CP. This assumes 285212420Sken * that wptr is GPU-accessible in the queue's VMID via 286212420Sken * ATC or SVM. WPTR==RPTR before starting the poll so 287212420Sken * the CP starts fetching new commands from the right 288212420Sken * place. 289212420Sken * 290212420Sken * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit 291212420Sken * tricky. Assume that the queue didn't overflow. The 292212420Sken * number of valid bits in the 32-bit RPTR depends on 293212420Sken * the queue size. The remaining bits are taken from 294212420Sken * the saved 64-bit WPTR. If the WPTR wrapped, add the 295212420Sken * queue size. 296212420Sken */ 297212420Sken uint32_t queue_size = 298212420Sken 2 << REG_GET_FIELD(m->cp_hqd_pq_control, 299212420Sken CP_HQD_PQ_CONTROL, QUEUE_SIZE); 300212420Sken uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); 301212420Sken 302212420Sken if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) 303212420Sken guessed_wptr += queue_size; 304212420Sken guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); 305212420Sken guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; 306212420Sken 307212420Sken WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), 308212420Sken lower_32_bits(guessed_wptr)); 309212420Sken WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), 310251396Sasomers upper_32_bits(guessed_wptr)); 311212420Sken WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), 312212420Sken lower_32_bits((uintptr_t)wptr)); 313212420Sken WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), 314212420Sken upper_32_bits((uintptr_t)wptr)); 315278240Sscottl WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), 316278240Sscottl (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); 317278240Sscottl } 318278240Sscottl 319212420Sken /* Start the EOP fetcher */ 320212420Sken WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), 321212420Sken REG_SET_FIELD(m->cp_hqd_eop_rptr, 322212420Sken CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); 323212420Sken 324212420Sken data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); 325212420Sken WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); 326212420Sken 327212420Sken release_queue(kgd); 328212420Sken 329212420Sken return 0; 330212420Sken} 331212420Sken 332212420Skenint kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, 333212420Sken uint32_t pipe_id, uint32_t queue_id, 334212420Sken uint32_t doorbell_off) 335212420Sken{ 336212420Sken struct amdgpu_device *adev = get_amdgpu_device(kgd); 337212420Sken struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 338212420Sken struct v9_mqd *m; 339212420Sken uint32_t mec, pipe; 340212420Sken int r; 341212420Sken 342212420Sken m = get_mqd(mqd); 343212420Sken 344212420Sken acquire_queue(kgd, pipe_id, queue_id); 345212420Sken 346212420Sken mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 347212420Sken pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 348212420Sken 349212420Sken pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", 350212420Sken mec, pipe, queue_id); 351212420Sken 352212420Sken spin_lock(&adev->gfx.kiq.ring_lock); 353212420Sken r = amdgpu_ring_alloc(kiq_ring, 7); 354212420Sken if (r) { 355212420Sken pr_err("Failed to alloc KIQ (%d).\n", r); 356251396Sasomers goto out_unlock; 357212420Sken } 358212420Sken 359212420Sken amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 360212420Sken amdgpu_ring_write(kiq_ring, 361212420Sken PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 362212420Sken PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ 363212420Sken PACKET3_MAP_QUEUES_QUEUE(queue_id) | 364212420Sken PACKET3_MAP_QUEUES_PIPE(pipe) | 365212420Sken PACKET3_MAP_QUEUES_ME((mec - 1)) | 366212420Sken PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 367212420Sken PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 368212420Sken PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ 369212420Sken PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 370212420Sken amdgpu_ring_write(kiq_ring, 371212420Sken PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); 372212420Sken amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); 373212420Sken amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); 374212420Sken amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); 375212420Sken amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); 376212420Sken amdgpu_ring_commit(kiq_ring); 377212420Sken 378212420Skenout_unlock: 379212420Sken spin_unlock(&adev->gfx.kiq.ring_lock); 380212420Sken release_queue(kgd); 381212420Sken 382212420Sken return r; 383212420Sken} 384212420Sken 385212420Skenint kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, 386212420Sken uint32_t pipe_id, uint32_t queue_id, 387212420Sken uint32_t (**dump)[2], uint32_t *n_regs) 388212420Sken{ 389212420Sken struct amdgpu_device *adev = get_amdgpu_device(kgd); 390212420Sken uint32_t i = 0, reg; 391212420Sken#define HQD_N_REGS 56 392212420Sken#define DUMP_REG(addr) do { \ 393212420Sken if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ 394212420Sken break; \ 395212420Sken (*dump)[i][0] = (addr) << 2; \ 396212420Sken (*dump)[i++][1] = RREG32(addr); \ 397212420Sken } while (0) 398212420Sken 399212420Sken *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); 400212420Sken if (*dump == NULL) 401212420Sken return -ENOMEM; 402212420Sken 403212420Sken acquire_queue(kgd, pipe_id, queue_id); 404212420Sken 405212420Sken for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); 406212420Sken reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) 407212420Sken DUMP_REG(reg); 408212420Sken 409212420Sken release_queue(kgd); 410212420Sken 411212420Sken WARN_ON_ONCE(i != HQD_N_REGS); 412212420Sken *n_regs = i; 413212420Sken 414212420Sken return 0; 415212420Sken} 416216088Sken 417213839Smdfstatic int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, 418213839Smdf uint32_t __user *wptr, struct mm_struct *mm) 419213839Smdf{ 420213839Smdf struct amdgpu_device *adev = get_amdgpu_device(kgd); 421213839Smdf struct v9_sdma_mqd *m; 422213839Smdf uint32_t sdma_rlc_reg_offset; 423213839Smdf unsigned long end_jiffies; 424213839Smdf uint32_t data; 425213839Smdf uint64_t data64; 426213839Smdf uint64_t __user *wptr64 = (uint64_t __user *)wptr; 427213839Smdf 428213839Smdf m = get_sdma_mqd(mqd); 429213839Smdf sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 430213839Smdf m->sdma_queue_id); 431213708Smdf 432213708Smdf WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, 433213708Smdf m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); 434213708Smdf 435213708Smdf end_jiffies = msecs_to_jiffies(2000) + jiffies; 436213708Smdf while (true) { 437213708Smdf data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); 438213708Smdf if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 439213708Smdf break; 440213708Smdf if (time_after(jiffies, end_jiffies)) { 441213708Smdf pr_err("SDMA RLC not idle in %s\n", __func__); 442213708Smdf return -ETIME; 443213708Smdf } 444213708Smdf usleep_range(500, 1000); 445213708Smdf } 446213708Smdf 447213708Smdf WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, 448213708Smdf m->sdmax_rlcx_doorbell_offset); 449213708Smdf 450213708Smdf data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, 451213708Smdf ENABLE, 1); 452213708Smdf WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); 453213708Smdf WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, 454213708Smdf m->sdmax_rlcx_rb_rptr); 455213708Smdf WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, 456213708Smdf m->sdmax_rlcx_rb_rptr_hi); 457213708Smdf 458213708Smdf WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); 459213708Smdf if (read_user_wptr(mm, wptr64, data64)) { 460213708Smdf WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, 461213708Smdf lower_32_bits(data64)); 462213708Smdf WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, 463213708Smdf upper_32_bits(data64)); 464213708Smdf } else { 465213708Smdf WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, 466213708Smdf m->sdmax_rlcx_rb_rptr); 467213708Smdf WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, 468213708Smdf m->sdmax_rlcx_rb_rptr_hi); 469213708Smdf } 470213708Smdf WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); 471213708Smdf 472213708Smdf WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); 473213708Smdf WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, 474213708Smdf m->sdmax_rlcx_rb_base_hi); 475213708Smdf WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 476213708Smdf m->sdmax_rlcx_rb_rptr_addr_lo); 477213840Smdf WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, 478213840Smdf m->sdmax_rlcx_rb_rptr_addr_hi); 479213708Smdf 480213840Smdf data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, 481213840Smdf RB_ENABLE, 1); 482213840Smdf WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); 483213840Smdf 484213840Smdf return 0; 485213840Smdf} 486213708Smdf 487213708Smdfstatic int kgd_hqd_sdma_dump(struct kgd_dev *kgd, 488213708Smdf uint32_t engine_id, uint32_t queue_id, 489213708Smdf uint32_t (**dump)[2], uint32_t *n_regs) 490213708Smdf{ 491213840Smdf struct amdgpu_device *adev = get_amdgpu_device(kgd); 492213840Smdf uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, 493213840Smdf engine_id, queue_id); 494213840Smdf uint32_t i = 0, reg; 495213840Smdf#undef HQD_N_REGS 496213840Smdf#define HQD_N_REGS (19+6+7+10) 497213840Smdf 498213839Smdf *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); 499213840Smdf if (*dump == NULL) 500213840Smdf return -ENOMEM; 501213840Smdf 502213840Smdf for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) 503213840Smdf DUMP_REG(sdma_rlc_reg_offset + reg); 504213840Smdf for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) 505213840Smdf DUMP_REG(sdma_rlc_reg_offset + reg); 506213840Smdf for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; 507213840Smdf reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) 508213840Smdf DUMP_REG(sdma_rlc_reg_offset + reg); 509213840Smdf for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; 510213840Smdf reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) 511213840Smdf DUMP_REG(sdma_rlc_reg_offset + reg); 512213840Smdf 513213840Smdf WARN_ON_ONCE(i != HQD_N_REGS); 514213840Smdf *n_regs = i; 515213840Smdf 516213840Smdf return 0; 517213840Smdf} 518213840Smdf 519213840Smdfbool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 520213840Smdf uint32_t pipe_id, uint32_t queue_id) 521213708Smdf{ 522213708Smdf struct amdgpu_device *adev = get_amdgpu_device(kgd); 523213708Smdf uint32_t act; 524213708Smdf bool retval = false; 525213708Smdf uint32_t low, high; 526213708Smdf 527213708Smdf acquire_queue(kgd, pipe_id, queue_id); 528213708Smdf act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); 529213708Smdf if (act) { 530213708Smdf low = lower_32_bits(queue_address >> 8); 531213839Smdf high = upper_32_bits(queue_address >> 8); 532213708Smdf 533213708Smdf if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && 534213708Smdf high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) 535213708Smdf retval = true; 536213708Smdf } 537213839Smdf release_queue(kgd); 538213708Smdf return retval; 539213708Smdf} 540213708Smdf 541213708Smdfstatic bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) 542213708Smdf{ 543213708Smdf struct amdgpu_device *adev = get_amdgpu_device(kgd); 544213839Smdf struct v9_sdma_mqd *m; 545213839Smdf uint32_t sdma_rlc_reg_offset; 546213708Smdf uint32_t sdma_rlc_rb_cntl; 547213708Smdf 548213708Smdf m = get_sdma_mqd(mqd); 549213708Smdf sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 550213708Smdf m->sdma_queue_id); 551213839Smdf 552213839Smdf sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); 553213708Smdf 554213708Smdf if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) 555213708Smdf return true; 556213708Smdf 557213839Smdf return false; 558213839Smdf} 559213708Smdf 560238974Smavint kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, 561238974Smav enum kfd_preempt_type reset_type, 562213839Smdf unsigned int utimeout, uint32_t pipe_id, 563213708Smdf uint32_t queue_id) 564213708Smdf{ 565213708Smdf struct amdgpu_device *adev = get_amdgpu_device(kgd); 566213708Smdf enum hqd_dequeue_request_type type; 567213708Smdf unsigned long end_jiffies; 568213708Smdf uint32_t temp; 569213708Smdf struct v9_mqd *m = get_mqd(mqd); 570213708Smdf 571213708Smdf if (adev->in_gpu_reset) 572213708Smdf return -EIO; 573213708Smdf 574213708Smdf acquire_queue(kgd, pipe_id, queue_id); 575213708Smdf 576213708Smdf if (m->cp_hqd_vmid == 0) 577213708Smdf WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); 578213708Smdf 579213839Smdf switch (reset_type) { 580213708Smdf case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: 581213708Smdf type = DRAIN_PIPE; 582213708Smdf break; 583213708Smdf case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: 584213708Smdf type = RESET_WAVES; 585213708Smdf break; 586213708Smdf default: 587213708Smdf type = DRAIN_PIPE; 588213708Smdf break; 589213708Smdf } 590213708Smdf 591213708Smdf WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); 592213708Smdf 593213708Smdf end_jiffies = (utimeout * HZ / 1000) + jiffies; 594213708Smdf while (true) { 595213708Smdf temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); 596213708Smdf if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) 597213839Smdf break; 598213708Smdf if (time_after(jiffies, end_jiffies)) { 599213708Smdf pr_err("cp queue preemption time out.\n"); 600213708Smdf release_queue(kgd); 601213708Smdf return -ETIME; 602213708Smdf } 603213708Smdf usleep_range(500, 1000); 604213708Smdf } 605213708Smdf 606213708Smdf release_queue(kgd); 607213708Smdf return 0; 608213708Smdf} 609213708Smdf 610213708Smdfstatic int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 611213708Smdf unsigned int utimeout) 612213708Smdf{ 613213708Smdf struct amdgpu_device *adev = get_amdgpu_device(kgd); 614213708Smdf struct v9_sdma_mqd *m; 615213839Smdf uint32_t sdma_rlc_reg_offset; 616213708Smdf uint32_t temp; 617213708Smdf unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; 618213708Smdf 619213708Smdf m = get_sdma_mqd(mqd); 620213708Smdf sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 621213708Smdf m->sdma_queue_id); 622213708Smdf 623213708Smdf temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); 624213708Smdf temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; 625213708Smdf WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); 626213708Smdf 627213708Smdf while (true) { 628213708Smdf temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); 629213708Smdf if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 630213708Smdf break; 631213708Smdf if (time_after(jiffies, end_jiffies)) { 632213708Smdf pr_err("SDMA RLC not idle in %s\n", __func__); 633213708Smdf return -ETIME; 634213708Smdf } 635213708Smdf usleep_range(500, 1000); 636212420Sken } 637213708Smdf 638213708Smdf WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); 639212420Sken WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, 640213708Smdf RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | 641213708Smdf SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); 642213708Smdf 643213708Smdf m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); 644213708Smdf m->sdmax_rlcx_rb_rptr_hi = 645213708Smdf RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); 646213708Smdf 647213708Smdf return 0; 648213708Smdf} 649213708Smdf 650212420Skenbool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, 651212420Sken uint8_t vmid, uint16_t *p_pasid) 652213708Smdf{ 653212420Sken uint32_t value; 654213708Smdf struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 655213708Smdf 656212420Sken value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) 657213708Smdf + vmid); 658213708Smdf *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; 659213708Smdf 660213708Smdf return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); 661213708Smdf} 662212420Sken 663212420Skenint kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) 664212420Sken{ 665212420Sken return 0; 666212420Sken} 667212420Sken 668212420Skenint kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, 669213704Smdf unsigned int watch_point_id, 670213882Smdf uint32_t cntl_val, 671212420Sken uint32_t addr_hi, 672212420Sken uint32_t addr_lo) 673212420Sken{ 674212420Sken return 0; 675212420Sken} 676212420Sken 677212420Skenint kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, 678253550Sken uint32_t gfx_index_val, 679212420Sken uint32_t sq_cmd) 680322661Sken{ 681212420Sken struct amdgpu_device *adev = get_amdgpu_device(kgd); 682212420Sken uint32_t data = 0; 683212420Sken 684212420Sken mutex_lock(&adev->grbm_idx_mutex); 685212420Sken 686253550Sken WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); 687253550Sken WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); 688212420Sken 689213704Smdf data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 690213704Smdf INSTANCE_BROADCAST_WRITES, 1); 691213704Smdf data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 692213704Smdf SH_BROADCAST_WRITES, 1); 693213704Smdf data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 694213704Smdf SE_BROADCAST_WRITES, 1); 695213704Smdf 696212420Sken WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 697253550Sken mutex_unlock(&adev->grbm_idx_mutex); 698253550Sken 699212420Sken return 0; 700212420Sken} 701212420Sken 702212420Skenuint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, 703212420Sken unsigned int watch_point_id, 704212420Sken unsigned int reg_offset) 705212420Sken{ 706212420Sken return 0; 707212420Sken} 708212420Sken 709230592Skenstatic void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, 710212420Sken uint32_t vmid, uint64_t page_table_base) 711212420Sken{ 712238974Smav struct amdgpu_device *adev = get_amdgpu_device(kgd); 713253550Sken 714253550Sken if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { 715253550Sken pr_err("trying to set page table base for wrong VMID %u\n", 716253550Sken vmid); 717253550Sken return; 718253550Sken } 719238974Smav 720238974Smav mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); 721212420Sken 722322661Sken gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); 723212420Sken} 724322661Sken 725213882Smdfconst struct kfd2kgd_calls gfx_v9_kfd2kgd = { 726213882Smdf .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, 727322661Sken .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, 728212420Sken .init_interrupts = kgd_gfx_v9_init_interrupts, 729212420Sken .hqd_load = kgd_gfx_v9_hqd_load, 730212420Sken .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, 731238974Smav .hqd_sdma_load = kgd_hqd_sdma_load, 732238974Smav .hqd_dump = kgd_gfx_v9_hqd_dump, 733238974Smav .hqd_sdma_dump = kgd_hqd_sdma_dump, 734238974Smav .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, 735212420Sken .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, 736212420Sken .hqd_destroy = kgd_gfx_v9_hqd_destroy, 737253550Sken .hqd_sdma_destroy = kgd_hqd_sdma_destroy, 738253550Sken .address_watch_disable = kgd_gfx_v9_address_watch_disable, 739212420Sken .address_watch_execute = kgd_gfx_v9_address_watch_execute, 740212420Sken .wave_control_execute = kgd_gfx_v9_wave_control_execute, 741212420Sken .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, 742212420Sken .get_atc_vmid_pasid_mapping_info = 743212420Sken kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, 744213704Smdf .get_tile_config = kgd_gfx_v9_get_tile_config, 745212420Sken .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, 746253550Sken .get_hive_id = amdgpu_amdkfd_get_hive_id, 747212420Sken}; 748213704Smdf