Lines Matching refs:kfd

42  * kfd_locked is used to lock the kfd driver during suspend or reset
43 * once locked, kfd driver will stop any further GPU execution.
60 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
62 static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
64 static int kfd_resume(struct kfd_node *kfd);
66 static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
68 uint32_t sdma_version = amdgpu_ip_version(kfd->adev, SDMA0_HWIP, 0);
80 kfd->device_info.num_sdma_queues_per_engine = 2;
100 kfd->device_info.num_sdma_queues_per_engine = 8;
106 kfd->device_info.num_sdma_queues_per_engine = 8;
109 bitmap_zero(kfd->device_info.reserved_sdma_queues_bitmap, KFD_MAX_SDMA_QUEUES);
119 kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
121 bitmap_set(kfd->device_info.reserved_sdma_queues_bitmap, 0,
122 kfd->adev->sdma.num_instances *
123 kfd->device_info.num_reserved_sdma_queues_per_engine);
130 static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
132 uint32_t gc_version = KFD_GC_VERSION(kfd);
143 kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
146 kfd->device_info.event_interrupt_class =
162 kfd->device_info.event_interrupt_class = &event_interrupt_class_v10;
171 kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
176 kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
180 static void kfd_device_info_init(struct kfd_dev *kfd,
183 uint32_t gc_version = KFD_GC_VERSION(kfd);
184 uint32_t asic_type = kfd->adev->asic_type;
186 kfd->device_info.max_pasid_bits = 16;
187 kfd->device_info.max_no_of_hqd = 24;
188 kfd->device_info.num_of_watch_points = 4;
189 kfd->device_info.mqd_size_aligned = MQD_SIZE_ALIGNED;
190 kfd->device_info.gfx_target_version = gfx_target_version;
192 if (KFD_IS_SOC15(kfd)) {
193 kfd->device_info.doorbell_size = 8;
194 kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
195 kfd->device_info.supports_cwsr = true;
197 kfd_device_info_set_sdma_info(kfd);
199 kfd_device_info_set_event_interrupt_class(kfd);
204 kfd->device_info.no_atomic_fw_version = 14;
206 kfd->device_info.no_atomic_fw_version = 3;
208 kfd->device_info.no_atomic_fw_version = 92;
210 kfd->device_info.no_atomic_fw_version = 145;
214 kfd->device_info.needs_pci_atomics = true;
221 kfd->device_info.needs_pci_atomics = true;
222 kfd->device_info.no_atomic_fw_version = kfd->adev->gfx.rs64_enable ? 509 : 0;
225 kfd->device_info.doorbell_size = 4;
226 kfd->device_info.ih_ring_entry_size = 4 * sizeof(uint32_t);
227 kfd->device_info.event_interrupt_class = &event_interrupt_class_cik;
228 kfd->device_info.num_sdma_queues_per_engine = 2;
233 kfd->device_info.supports_cwsr = true;
236 kfd->device_info.needs_pci_atomics = true;
242 struct kfd_dev *kfd = NULL;
439 "GC IP %06x %s not supported in kfd\n",
443 dev_info(kfd_device, "%s %s not supported in kfd\n",
448 kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
449 if (!kfd)
452 kfd->adev = adev;
453 kfd_device_info_init(kfd, vf, gfx_target_version);
454 kfd->init_complete = false;
455 kfd->kfd2kgd = f2g;
456 atomic_set(&kfd->compute_profile, 0);
458 mutex_init(&kfd->doorbell_mutex);
460 ida_init(&kfd->doorbell_ida);
462 return kfd;
465 static void kfd_cwsr_init(struct kfd_dev *kfd)
467 if (cwsr_enable && kfd->device_info.supports_cwsr) {
468 if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
471 kfd->cwsr_isa = cwsr_trap_gfx8_hex;
472 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
473 } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) {
476 kfd->cwsr_isa = cwsr_trap_arcturus_hex;
477 kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
478 } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) {
481 kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
482 kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
483 } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) {
486 kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
487 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
488 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
491 kfd->cwsr_isa = cwsr_trap_gfx9_hex;
492 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
493 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) {
496 kfd->cwsr_isa = cwsr_trap_nv1x_hex;
497 kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
498 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) {
501 kfd->cwsr_isa = cwsr_trap_gfx10_hex;
502 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
507 kfd->cwsr_isa = cwsr_trap_gfx11_hex;
508 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
511 kfd->cwsr_enabled = true;
518 struct kfd_dev *kfd = node->kfd;
526 && kfd->mec2_fw_version >= 0x81b3) ||
528 && kfd->mec2_fw_version >= 0x1b3) ||
530 && kfd->mec2_fw_version >= 0x30) ||
532 && kfd->mec2_fw_version >= 0x28) ||
536 && kfd->mec2_fw_version >= 0x6b) ||
600 static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes)
606 knode = kfd->nodes[i];
613 kfd->nodes[i] = NULL;
652 bool kgd2kfd_device_init(struct kfd_dev *kfd,
662 kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
664 kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
666 kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
668 kfd->shared_resources = *gpu_resources;
670 kfd->num_nodes = amdgpu_xcp_get_num_xcp(kfd->adev->xcp_mgr);
672 if (kfd->num_nodes == 0) {
675 kfd->adev->gfx.num_xcc_per_xcp);
683 kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev);
684 if (!kfd->pci_atomic_requested &&
685 kfd->device_info.needs_pci_atomics &&
686 (!kfd->device_info.no_atomic_fw_version ||
687 kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) {
690 kfd->adev->pdev->vendor, kfd->adev->pdev->device,
691 kfd->mec_fw_version,
692 kfd->device_info.no_atomic_fw_version);
709 if (kfd->adev->xcp_mgr) {
710 partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr,
713 kfd->num_nodes != 1) {
727 kfd->device_info.mqd_size_aligned;
733 map_process_packet_size = KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) ?
747 kfd->adev, size, &kfd->gtt_mem,
748 &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
757 if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
762 if (kfd_doorbell_init(kfd)) {
769 kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
776 if (!kfd->hive_id && (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) && kfd->num_nodes > 1)
777 kfd->hive_id = pci_dev_id(kfd->adev->pdev);
779 kfd->noretry = kfd->adev->gmc.noretry;
781 kfd_cwsr_init(kfd);
784 kfd->num_nodes);
787 for (i = 0, xcp_idx = 0; i < kfd->num_nodes; i++) {
793 node->adev = kfd->adev;
794 node->kfd = kfd;
795 node->kfd2kgd = kfd->kfd2kgd;
797 node->xcp = amdgpu_get_next_xcp(kfd->adev->xcp_mgr, &xcp_idx);
805 (1U << NUM_XCC(kfd->adev->gfx.xcc_mask)) - 1;
814 if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) &&
816 kfd->num_nodes != 1) {
839 amdgpu_amdkfd_get_local_mem_info(kfd->adev,
842 if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3))
850 kfd->nodes[i] = node;
853 svm_range_set_max_pages(kfd->adev);
855 spin_lock_init(&kfd->watch_points_lock);
857 kfd->init_complete = true;
858 dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor,
859 kfd->adev->pdev->device);
861 pr_debug("Starting kfd with the following scheduling policy %d\n",
868 kfd_cleanup_nodes(kfd, i);
869 kfd_doorbell_fini(kfd);
871 kfd_gtt_sa_fini(kfd);
873 amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
877 kfd->adev->pdev->vendor, kfd->adev->pdev->device);
879 return kfd->init_complete;
882 void kgd2kfd_device_exit(struct kfd_dev *kfd)
884 if (kfd->init_complete) {
886 kfd_cleanup_nodes(kfd, kfd->num_nodes);
888 kfd_doorbell_fini(kfd);
889 ida_destroy(&kfd->doorbell_ida);
890 kfd_gtt_sa_fini(kfd);
891 amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
894 kfree(kfd);
897 int kgd2kfd_pre_reset(struct kfd_dev *kfd)
902 if (!kfd->init_complete)
905 for (i = 0; i < kfd->num_nodes; i++) {
906 node = kfd->nodes[i];
911 kgd2kfd_suspend(kfd, false);
913 for (i = 0; i < kfd->num_nodes; i++)
914 kfd_signal_reset_event(kfd->nodes[i]);
925 int kgd2kfd_post_reset(struct kfd_dev *kfd)
931 if (!kfd->init_complete)
934 for (i = 0; i < kfd->num_nodes; i++) {
935 ret = kfd_resume(kfd->nodes[i]);
944 for (i = 0; i < kfd->num_nodes; i++) {
945 node = kfd->nodes[i];
959 void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
964 if (!kfd->init_complete)
967 /* for runtime suspend, skip locking kfd */
976 for (i = 0; i < kfd->num_nodes; i++) {
977 node = kfd->nodes[i];
982 int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
986 if (!kfd->init_complete)
989 for (i = 0; i < kfd->num_nodes; i++) {
990 ret = kfd_resume(kfd->nodes[i]);
995 /* for runtime resume, skip unlocking kfd */
1036 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
1043 if (!kfd->init_complete)
1046 if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) {
1051 for (i = 0; i < kfd->num_nodes; i++) {
1052 node = kfd->nodes[i];
1158 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
1168 kfd->gtt_sa_chunk_size = chunk_size;
1169 kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
1171 kfd->gtt_sa_bitmap = bitmap_zalloc(kfd->gtt_sa_num_of_chunks,
1173 if (!kfd->gtt_sa_bitmap)
1177 kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
1179 mutex_init(&kfd->gtt_sa_lock);
1184 static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
1186 mutex_destroy(&kfd->gtt_sa_lock);
1187 bitmap_free(kfd->gtt_sa_bitmap);
1208 struct kfd_dev *kfd = node->kfd;
1213 if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
1224 mutex_lock(&kfd->gtt_sa_lock);
1228 found = find_next_zero_bit(kfd->gtt_sa_bitmap,
1229 kfd->gtt_sa_num_of_chunks,
1235 if (found == kfd->gtt_sa_num_of_chunks)
1242 kfd->gtt_start_gpu_addr,
1244 kfd->gtt_sa_chunk_size);
1246 kfd->gtt_start_cpu_ptr,
1248 kfd->gtt_sa_chunk_size);
1254 if (size <= kfd->gtt_sa_chunk_size) {
1256 __set_bit(found, kfd->gtt_sa_bitmap);
1261 cur_size = size - kfd->gtt_sa_chunk_size;
1264 find_next_zero_bit(kfd->gtt_sa_bitmap,
1265 kfd->gtt_sa_num_of_chunks, ++found);
1279 if (found == kfd->gtt_sa_num_of_chunks)
1283 if (cur_size <= kfd->gtt_sa_chunk_size)
1286 cur_size -= kfd->gtt_sa_chunk_size;
1294 bitmap_set(kfd->gtt_sa_bitmap, (*mem_obj)->range_start,
1298 mutex_unlock(&kfd->gtt_sa_lock);
1303 mutex_unlock(&kfd->gtt_sa_lock);
1310 struct kfd_dev *kfd = node->kfd;
1319 mutex_lock(&kfd->gtt_sa_lock);
1322 bitmap_clear(kfd->gtt_sa_bitmap, mem_obj->range_start,
1325 mutex_unlock(&kfd->gtt_sa_lock);
1331 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
1338 if (kfd)
1339 atomic_inc(&kfd->nodes[0]->sram_ecc_flag);
1344 if (atomic_inc_return(&node->kfd->compute_profile) == 1)
1350 int count = atomic_dec_return(&node->kfd->compute_profile);
1357 void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
1364 if (kfd && kfd->init_complete)
1365 kfd_smi_event_update_thermal_throttling(kfd->nodes[0],
1378 return node->adev->sdma.num_instances/(int)node->kfd->num_nodes;
1380 return min(node->adev->sdma.num_instances/(int)node->kfd->num_nodes, 2);
1386 return node->adev->sdma.num_instances/(int)node->kfd->num_nodes -