Lines Matching refs:kfd

42  * kfd_locked is used to lock the kfd driver during suspend or reset
43 * once locked, kfd driver will stop any further GPU execution.
60 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
62 static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
64 static int kfd_resume(struct kfd_node *kfd);
66 static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
68 uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
80 kfd->device_info.num_sdma_queues_per_engine = 2;
98 kfd->device_info.num_sdma_queues_per_engine = 8;
104 kfd->device_info.num_sdma_queues_per_engine = 8;
107 bitmap_zero(kfd->device_info.reserved_sdma_queues_bitmap, KFD_MAX_SDMA_QUEUES);
115 kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
117 bitmap_set(kfd->device_info.reserved_sdma_queues_bitmap, 0,
118 kfd->adev->sdma.num_instances *
119 kfd->device_info.num_reserved_sdma_queues_per_engine);
126 static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
128 uint32_t gc_version = KFD_GC_VERSION(kfd);
139 kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
142 kfd->device_info.event_interrupt_class =
158 kfd->device_info.event_interrupt_class = &event_interrupt_class_v10;
165 kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
170 kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
174 static void kfd_device_info_init(struct kfd_dev *kfd,
177 uint32_t gc_version = KFD_GC_VERSION(kfd);
178 uint32_t asic_type = kfd->adev->asic_type;
180 kfd->device_info.max_pasid_bits = 16;
181 kfd->device_info.max_no_of_hqd = 24;
182 kfd->device_info.num_of_watch_points = 4;
183 kfd->device_info.mqd_size_aligned = MQD_SIZE_ALIGNED;
184 kfd->device_info.gfx_target_version = gfx_target_version;
186 if (KFD_IS_SOC15(kfd)) {
187 kfd->device_info.doorbell_size = 8;
188 kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
189 kfd->device_info.supports_cwsr = true;
191 kfd_device_info_set_sdma_info(kfd);
193 kfd_device_info_set_event_interrupt_class(kfd);
198 kfd->device_info.no_atomic_fw_version = 14;
200 kfd->device_info.no_atomic_fw_version = 3;
202 kfd->device_info.no_atomic_fw_version = 92;
204 kfd->device_info.no_atomic_fw_version = 145;
208 kfd->device_info.needs_pci_atomics = true;
215 kfd->device_info.needs_pci_atomics = true;
216 kfd->device_info.no_atomic_fw_version = kfd->adev->gfx.rs64_enable ? 509 : 0;
219 kfd->device_info.doorbell_size = 4;
220 kfd->device_info.ih_ring_entry_size = 4 * sizeof(uint32_t);
221 kfd->device_info.event_interrupt_class = &event_interrupt_class_cik;
222 kfd->device_info.num_sdma_queues_per_engine = 2;
227 kfd->device_info.supports_cwsr = true;
230 kfd->device_info.needs_pci_atomics = true;
236 struct kfd_dev *kfd = NULL;
417 dev_err(kfd_device, "GC IP %06x %s not supported in kfd\n",
420 dev_err(kfd_device, "%s %s not supported in kfd\n",
425 kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
426 if (!kfd)
429 kfd->adev = adev;
430 kfd_device_info_init(kfd, vf, gfx_target_version);
431 kfd->init_complete = false;
432 kfd->kfd2kgd = f2g;
433 atomic_set(&kfd->compute_profile, 0);
435 mutex_init(&kfd->doorbell_mutex);
437 ida_init(&kfd->doorbell_ida);
439 return kfd;
442 static void kfd_cwsr_init(struct kfd_dev *kfd)
444 if (cwsr_enable && kfd->device_info.supports_cwsr) {
445 if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
447 kfd->cwsr_isa = cwsr_trap_gfx8_hex;
448 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
449 } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) {
451 kfd->cwsr_isa = cwsr_trap_arcturus_hex;
452 kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
453 } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) {
455 kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
456 kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
457 } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) {
459 kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
460 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
461 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
463 kfd->cwsr_isa = cwsr_trap_gfx9_hex;
464 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
465 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) {
467 kfd->cwsr_isa = cwsr_trap_nv1x_hex;
468 kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
469 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) {
471 kfd->cwsr_isa = cwsr_trap_gfx10_hex;
472 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
475 kfd->cwsr_isa = cwsr_trap_gfx11_hex;
476 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
479 kfd->cwsr_enabled = true;
486 struct kfd_dev *kfd = node->kfd;
494 && kfd->mec2_fw_version >= 0x81b3) ||
496 && kfd->mec2_fw_version >= 0x1b3) ||
498 && kfd->mec2_fw_version >= 0x30) ||
500 && kfd->mec2_fw_version >= 0x28) ||
504 && kfd->mec2_fw_version >= 0x6b) ||
568 static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes)
574 knode = kfd->nodes[i];
581 kfd->nodes[i] = NULL;
620 bool kgd2kfd_device_init(struct kfd_dev *kfd,
630 kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
632 kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
634 kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
636 kfd->shared_resources = *gpu_resources;
638 kfd->num_nodes = amdgpu_xcp_get_num_xcp(kfd->adev->xcp_mgr);
640 if (kfd->num_nodes == 0) {
643 kfd->adev->gfx.num_xcc_per_xcp);
651 kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev);
652 if (!kfd->pci_atomic_requested &&
653 kfd->device_info.needs_pci_atomics &&
654 (!kfd->device_info.no_atomic_fw_version ||
655 kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) {
658 kfd->adev->pdev->vendor, kfd->adev->pdev->device,
659 kfd->mec_fw_version,
660 kfd->device_info.no_atomic_fw_version);
677 if (kfd->adev->xcp_mgr) {
678 partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr,
681 kfd->num_nodes != 1) {
695 kfd->device_info.mqd_size_aligned;
701 map_process_packet_size = KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) ?
715 kfd->adev, size, &kfd->gtt_mem,
716 &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
725 if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
730 if (kfd_doorbell_init(kfd)) {
737 kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
744 if (!kfd->hive_id && (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) && kfd->num_nodes > 1)
745 kfd->hive_id = pci_dev_id(kfd->adev->pdev);
747 kfd->noretry = kfd->adev->gmc.noretry;
749 kfd_cwsr_init(kfd);
752 kfd->num_nodes);
755 for (i = 0, xcp_idx = 0; i < kfd->num_nodes; i++) {
761 node->adev = kfd->adev;
762 node->kfd = kfd;
763 node->kfd2kgd = kfd->kfd2kgd;
765 node->xcp = amdgpu_get_next_xcp(kfd->adev->xcp_mgr, &xcp_idx);
773 (1U << NUM_XCC(kfd->adev->gfx.xcc_mask)) - 1;
782 if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) &&
784 kfd->num_nodes != 1) {
807 amdgpu_amdkfd_get_local_mem_info(kfd->adev,
810 if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3))
818 kfd->nodes[i] = node;
821 svm_range_set_max_pages(kfd->adev);
823 spin_lock_init(&kfd->watch_points_lock);
825 kfd->init_complete = true;
826 dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor,
827 kfd->adev->pdev->device);
829 pr_debug("Starting kfd with the following scheduling policy %d\n",
836 kfd_cleanup_nodes(kfd, i);
837 kfd_doorbell_fini(kfd);
839 kfd_gtt_sa_fini(kfd);
841 amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
845 kfd->adev->pdev->vendor, kfd->adev->pdev->device);
847 return kfd->init_complete;
850 void kgd2kfd_device_exit(struct kfd_dev *kfd)
852 if (kfd->init_complete) {
854 kfd_cleanup_nodes(kfd, kfd->num_nodes);
856 kfd_doorbell_fini(kfd);
857 ida_destroy(&kfd->doorbell_ida);
858 kfd_gtt_sa_fini(kfd);
859 amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
862 kfree(kfd);
865 int kgd2kfd_pre_reset(struct kfd_dev *kfd)
870 if (!kfd->init_complete)
873 for (i = 0; i < kfd->num_nodes; i++) {
874 node = kfd->nodes[i];
879 kgd2kfd_suspend(kfd, false);
881 for (i = 0; i < kfd->num_nodes; i++)
882 kfd_signal_reset_event(kfd->nodes[i]);
893 int kgd2kfd_post_reset(struct kfd_dev *kfd)
899 if (!kfd->init_complete)
902 for (i = 0; i < kfd->num_nodes; i++) {
903 ret = kfd_resume(kfd->nodes[i]);
912 for (i = 0; i < kfd->num_nodes; i++) {
913 node = kfd->nodes[i];
927 void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
932 if (!kfd->init_complete)
935 /* for runtime suspend, skip locking kfd */
944 for (i = 0; i < kfd->num_nodes; i++) {
945 node = kfd->nodes[i];
950 int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
954 if (!kfd->init_complete)
957 for (i = 0; i < kfd->num_nodes; i++) {
958 ret = kfd_resume(kfd->nodes[i]);
963 /* for runtime resume, skip unlocking kfd */
1004 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
1011 if (!kfd->init_complete)
1014 if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) {
1019 for (i = 0; i < kfd->num_nodes; i++) {
1020 node = kfd->nodes[i];
1126 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
1136 kfd->gtt_sa_chunk_size = chunk_size;
1137 kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
1139 kfd->gtt_sa_bitmap = bitmap_zalloc(kfd->gtt_sa_num_of_chunks,
1141 if (!kfd->gtt_sa_bitmap)
1145 kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
1147 mutex_init(&kfd->gtt_sa_lock);
1152 static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
1154 mutex_destroy(&kfd->gtt_sa_lock);
1155 bitmap_free(kfd->gtt_sa_bitmap);
1176 struct kfd_dev *kfd = node->kfd;
1181 if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
1192 mutex_lock(&kfd->gtt_sa_lock);
1196 found = find_next_zero_bit(kfd->gtt_sa_bitmap,
1197 kfd->gtt_sa_num_of_chunks,
1203 if (found == kfd->gtt_sa_num_of_chunks)
1210 kfd->gtt_start_gpu_addr,
1212 kfd->gtt_sa_chunk_size);
1214 kfd->gtt_start_cpu_ptr,
1216 kfd->gtt_sa_chunk_size);
1222 if (size <= kfd->gtt_sa_chunk_size) {
1224 __set_bit(found, kfd->gtt_sa_bitmap);
1229 cur_size = size - kfd->gtt_sa_chunk_size;
1232 find_next_zero_bit(kfd->gtt_sa_bitmap,
1233 kfd->gtt_sa_num_of_chunks, ++found);
1247 if (found == kfd->gtt_sa_num_of_chunks)
1251 if (cur_size <= kfd->gtt_sa_chunk_size)
1254 cur_size -= kfd->gtt_sa_chunk_size;
1262 bitmap_set(kfd->gtt_sa_bitmap, (*mem_obj)->range_start,
1266 mutex_unlock(&kfd->gtt_sa_lock);
1271 mutex_unlock(&kfd->gtt_sa_lock);
1278 struct kfd_dev *kfd = node->kfd;
1287 mutex_lock(&kfd->gtt_sa_lock);
1290 bitmap_clear(kfd->gtt_sa_bitmap, mem_obj->range_start,
1293 mutex_unlock(&kfd->gtt_sa_lock);
1299 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
1306 if (kfd)
1307 atomic_inc(&kfd->nodes[0]->sram_ecc_flag);
1312 if (atomic_inc_return(&node->kfd->compute_profile) == 1)
1318 int count = atomic_dec_return(&node->kfd->compute_profile);
1325 void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
1332 if (kfd && kfd->init_complete)
1333 kfd_smi_event_update_thermal_throttling(kfd->nodes[0],
1346 return node->adev->sdma.num_instances/(int)node->kfd->num_nodes;
1348 return min(node->adev->sdma.num_instances/(int)node->kfd->num_nodes, 2);
1354 return node->adev->sdma.num_instances/(int)node->kfd->num_nodes -