Lines Matching defs:kfd

41  * kfd_locked is used to lock the kfd driver during suspend or reset
42 * once locked, kfd driver will stop any further GPU execution.
490 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
492 static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
494 static int kfd_resume(struct kfd_dev *kfd);
499 struct kfd_dev *kfd;
513 dev_err(kfd_device, "%s %s not supported in kfd\n",
518 kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
519 if (!kfd)
526 kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kgd);
528 !kfd->pci_atomic_requested) {
532 kfree(kfd);
536 kfd->kgd = kgd;
537 kfd->device_info = device_info;
538 kfd->pdev = pdev;
539 kfd->init_complete = false;
540 kfd->kfd2kgd = f2g;
541 atomic_set(&kfd->compute_profile, 0);
543 mutex_init(&kfd->doorbell_mutex);
544 memset(&kfd->doorbell_available_index, 0,
545 sizeof(kfd->doorbell_available_index));
547 atomic_set(&kfd->sram_ecc_flag, 0);
549 return kfd;
552 static void kfd_cwsr_init(struct kfd_dev *kfd)
554 if (cwsr_enable && kfd->device_info->supports_cwsr) {
555 if (kfd->device_info->asic_family < CHIP_VEGA10) {
557 kfd->cwsr_isa = cwsr_trap_gfx8_hex;
558 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
559 } else if (kfd->device_info->asic_family == CHIP_ARCTURUS) {
561 kfd->cwsr_isa = cwsr_trap_arcturus_hex;
562 kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
563 } else if (kfd->device_info->asic_family < CHIP_NAVI10) {
565 kfd->cwsr_isa = cwsr_trap_gfx9_hex;
566 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
569 kfd->cwsr_isa = cwsr_trap_gfx10_hex;
570 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
573 kfd->cwsr_enabled = true;
577 bool kgd2kfd_device_init(struct kfd_dev *kfd,
583 kfd->ddev = ddev;
584 kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
586 kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
588 kfd->shared_resources = *gpu_resources;
590 kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
591 kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
592 kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
593 - kfd->vm_info.first_vmid_kfd + 1;
597 || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
600 hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
601 kfd->vm_info.vmid_num_kfd);
602 kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
604 kfd->max_proc_per_quantum = hws_max_conc_proc;
607 if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd,
608 amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) {
610 amdgpu_amdkfd_get_num_gws(kfd->kgd));
615 kfd->device_info->mqd_size_aligned;
632 kfd->kgd, size, &kfd->gtt_mem,
633 &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
642 if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
647 if (kfd_doorbell_init(kfd)) {
653 if (kfd->kfd2kgd->get_hive_id)
654 kfd->hive_id = kfd->kfd2kgd->get_hive_id(kfd->kgd);
656 if (kfd_interrupt_init(kfd)) {
661 kfd->dqm = device_queue_manager_init(kfd);
662 if (!kfd->dqm) {
667 if (kfd_iommu_device_init(kfd)) {
672 kfd_cwsr_init(kfd);
674 if (kfd_resume(kfd))
677 kfd->dbgmgr = NULL;
679 if (kfd_topology_add_device(kfd)) {
684 kfd->init_complete = true;
685 dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
686 kfd->pdev->device);
688 pr_debug("Starting kfd with the following scheduling policy %d\n",
689 kfd->dqm->sched_policy);
696 device_queue_manager_uninit(kfd->dqm);
698 kfd_interrupt_exit(kfd);
700 kfd_doorbell_fini(kfd);
702 kfd_gtt_sa_fini(kfd);
704 amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
707 amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
710 kfd->pdev->vendor, kfd->pdev->device);
712 return kfd->init_complete;
715 void kgd2kfd_device_exit(struct kfd_dev *kfd)
717 if (kfd->init_complete) {
718 kgd2kfd_suspend(kfd);
719 device_queue_manager_uninit(kfd->dqm);
720 kfd_interrupt_exit(kfd);
721 kfd_topology_remove_device(kfd);
722 kfd_doorbell_fini(kfd);
723 kfd_gtt_sa_fini(kfd);
724 amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
726 amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
729 kfree(kfd);
732 int kgd2kfd_pre_reset(struct kfd_dev *kfd)
734 if (!kfd->init_complete)
737 kfd->dqm->ops.pre_reset(kfd->dqm);
739 kgd2kfd_suspend(kfd);
741 kfd_signal_reset_event(kfd);
751 int kgd2kfd_post_reset(struct kfd_dev *kfd)
755 if (!kfd->init_complete)
758 ret = kfd_resume(kfd);
763 atomic_set(&kfd->sram_ecc_flag, 0);
773 void kgd2kfd_suspend(struct kfd_dev *kfd)
775 if (!kfd->init_complete)
782 kfd->dqm->ops.stop(kfd->dqm);
784 kfd_iommu_suspend(kfd);
787 int kgd2kfd_resume(struct kfd_dev *kfd)
791 if (!kfd->init_complete)
794 ret = kfd_resume(kfd);
806 static int kfd_resume(struct kfd_dev *kfd)
810 err = kfd_iommu_resume(kfd);
814 kfd->pdev->vendor, kfd->pdev->device);
818 err = kfd->dqm->ops.start(kfd->dqm);
822 kfd->pdev->vendor, kfd->pdev->device);
829 kfd_iommu_suspend(kfd);
849 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
855 if (!kfd->init_complete)
858 if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
863 spin_lock_irqsave(&kfd->interrupt_lock, flags);
865 if (kfd->interrupts_active
866 && interrupt_is_wanted(kfd, ih_ring_entry,
868 && enqueue_ih_ring_entry(kfd,
870 kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work);
872 spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
961 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
973 kfd->gtt_sa_chunk_size = chunk_size;
974 kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
976 num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) /
979 kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL);
981 if (!kfd->gtt_sa_bitmap)
985 kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
987 mutex_init(&kfd->gtt_sa_lock);
993 static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
995 mutex_destroy(&kfd->gtt_sa_lock);
996 kfree(kfd->gtt_sa_bitmap);
1013 int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
1021 if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
1032 mutex_lock(&kfd->gtt_sa_lock);
1036 found = find_next_zero_bit(kfd->gtt_sa_bitmap,
1037 kfd->gtt_sa_num_of_chunks,
1043 if (found == kfd->gtt_sa_num_of_chunks)
1050 kfd->gtt_start_gpu_addr,
1052 kfd->gtt_sa_chunk_size);
1054 kfd->gtt_start_cpu_ptr,
1056 kfd->gtt_sa_chunk_size);
1062 if (size <= kfd->gtt_sa_chunk_size) {
1064 set_bit(found, kfd->gtt_sa_bitmap);
1069 cur_size = size - kfd->gtt_sa_chunk_size;
1072 find_next_zero_bit(kfd->gtt_sa_bitmap,
1073 kfd->gtt_sa_num_of_chunks, ++found);
1087 if (found == kfd->gtt_sa_num_of_chunks)
1091 if (cur_size <= kfd->gtt_sa_chunk_size)
1094 cur_size -= kfd->gtt_sa_chunk_size;
1105 set_bit(found, kfd->gtt_sa_bitmap);
1108 mutex_unlock(&kfd->gtt_sa_lock);
1113 mutex_unlock(&kfd->gtt_sa_lock);
1118 int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
1129 mutex_lock(&kfd->gtt_sa_lock);
1135 clear_bit(bit, kfd->gtt_sa_bitmap);
1137 mutex_unlock(&kfd->gtt_sa_lock);
1143 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
1145 if (kfd)
1146 atomic_inc(&kfd->sram_ecc_flag);
1149 void kfd_inc_compute_active(struct kfd_dev *kfd)
1151 if (atomic_inc_return(&kfd->compute_profile) == 1)
1152 amdgpu_amdkfd_set_compute_idle(kfd->kgd, false);
1155 void kfd_dec_compute_active(struct kfd_dev *kfd)
1157 int count = atomic_dec_return(&kfd->compute_profile);
1160 amdgpu_amdkfd_set_compute_idle(kfd->kgd, true);