Lines Matching refs:dqm

45 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
48 static int execute_queues_cpsch(struct device_queue_manager *dqm,
52 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
58 static int map_queues_cpsch(struct device_queue_manager *dqm);
60 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
63 static inline void deallocate_hqd(struct device_queue_manager *dqm,
65 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
66 static int allocate_sdma_queue(struct device_queue_manager *dqm,
78 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
81 int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec
82 + pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe;
85 for (i = 0; i < dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i)
87 dqm->dev->kfd->shared_resources.cp_queue_bitmap))
92 unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
94 return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap,
98 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
100 return dqm->dev->kfd->shared_resources.num_queue_per_pipe;
103 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
105 return dqm->dev->kfd->shared_resources.num_pipe_per_mec;
108 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
110 return kfd_get_num_sdma_engines(dqm->dev) +
111 kfd_get_num_xgmi_sdma_engines(dqm->dev);
114 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
116 return kfd_get_num_sdma_engines(dqm->dev) *
117 dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
120 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
122 return kfd_get_num_xgmi_sdma_engines(dqm->dev) *
123 dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
126 static void init_sdma_bitmaps(struct device_queue_manager *dqm)
128 bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES);
129 bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm));
131 bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES);
132 bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm));
135 bitmap_andnot(dqm->sdma_bitmap, dqm->sdma_bitmap,
136 dqm->dev->kfd->device_info.reserved_sdma_queues_bitmap,
140 void program_sh_mem_settings(struct device_queue_manager *dqm,
143 uint32_t xcc_mask = dqm->dev->xcc_mask;
147 dqm->dev->kfd2kgd->program_sh_mem_settings(
148 dqm->dev->adev, qpd->vmid, qpd->sh_mem_config,
153 static void kfd_hws_hang(struct device_queue_manager *dqm)
158 dqm->is_hws_hang = true;
164 if (!dqm->is_resetting)
165 schedule_work(&dqm->hw_exception_work);
188 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
191 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
197 if (dqm->is_hws_hang)
252 kfd_hws_hang(dqm);
258 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
261 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
265 if (dqm->is_hws_hang)
280 kfd_hws_hang(dqm);
286 static int remove_all_queues_mes(struct device_queue_manager *dqm)
289 struct device *dev = dqm->dev->adev->dev;
294 list_for_each_entry(cur, &dqm->queues, list) {
298 retval = remove_queue_mes(dqm, q, qpd);
303 dqm->dev->id);
313 static void increment_queue_count(struct device_queue_manager *dqm,
317 dqm->active_queue_count++;
320 dqm->active_cp_queue_count++;
323 dqm->gws_queue_count++;
328 static void decrement_queue_count(struct device_queue_manager *dqm,
332 dqm->active_queue_count--;
335 dqm->active_cp_queue_count--;
338 dqm->gws_queue_count--;
351 struct kfd_node *dev = qpd->dqm->dev;
378 uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id *
379 get_num_all_sdma_engines(qpd->dqm) +
422 struct kfd_node *dev = qpd->dqm->dev;
433 static void program_trap_handler_settings(struct device_queue_manager *dqm,
436 uint32_t xcc_mask = dqm->dev->xcc_mask;
439 if (dqm->dev->kfd2kgd->program_trap_handler_settings)
441 dqm->dev->kfd2kgd->program_trap_handler_settings(
442 dqm->dev->adev, qpd->vmid, qpd->tba_addr,
446 static int allocate_vmid(struct device_queue_manager *dqm,
450 struct device *dev = dqm->dev->adev->dev;
453 for (i = dqm->dev->vm_info.first_vmid_kfd;
454 i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
455 if (!dqm->vmid_pasid[i]) {
468 dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
470 set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
475 program_sh_mem_settings(dqm, qpd);
477 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled)
478 program_trap_handler_settings(dqm, qpd);
483 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev,
489 if (dqm->dev->kfd2kgd->set_scratch_backing_va)
490 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev,
499 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf;
514 static void deallocate_vmid(struct device_queue_manager *dqm,
518 struct device *dev = dqm->dev->adev->dev;
528 set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
529 dqm->vmid_pasid[qpd->vmid] = 0;
535 static int create_queue_nocpsch(struct device_queue_manager *dqm,
544 dqm_lock(dqm);
546 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
548 dqm->total_queue_count);
554 retval = allocate_vmid(dqm, qpd, q);
569 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
572 retval = allocate_hqd(dqm, q);
579 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
582 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
589 /* Temporarily release dqm lock to avoid a circular lock dependency */
590 dqm_unlock(dqm);
592 dqm_lock(dqm);
608 if (!dqm->sched_running) {
627 increment_queue_count(dqm, qpd, q);
633 dqm->total_queue_count++;
635 dqm->total_queue_count);
644 deallocate_hqd(dqm, q);
647 deallocate_sdma_queue(dqm, q);
650 deallocate_vmid(dqm, qpd, q);
652 dqm_unlock(dqm);
656 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
663 for (pipe = dqm->next_pipe_to_allocate, i = 0;
664 i < get_pipes_per_mec(dqm);
665 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
667 if (!is_pipe_enabled(dqm, 0, pipe))
670 if (dqm->allocated_queues[pipe] != 0) {
671 bit = ffs(dqm->allocated_queues[pipe]) - 1;
672 dqm->allocated_queues[pipe] &= ~(1 << bit);
685 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
690 static inline void deallocate_hqd(struct device_queue_manager *dqm,
693 dqm->allocated_queues[q->pipe] |= (1 << q->queue);
766 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
773 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
777 deallocate_hqd(dqm, q);
779 deallocate_sdma_queue(dqm, q);
781 deallocate_sdma_queue(dqm, q);
787 dqm->total_queue_count--;
791 if (!dqm->sched_running) {
807 dqm->dev);
811 dbgdev_wave_reset_wavefronts(dqm->dev,
816 deallocate_vmid(dqm, qpd, q);
820 decrement_queue_count(dqm, qpd, q);
825 static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
831 struct device *dev = dqm->dev->adev->dev;
834 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
846 dqm_lock(dqm);
847 retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
850 dqm_unlock(dqm);
857 static int update_queue(struct device_queue_manager *dqm, struct queue *q,
861 struct device *dev = dqm->dev->adev->dev;
866 dqm_lock(dqm);
872 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
879 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
880 if (!dqm->dev->kfd->shared_resources.enable_mes)
881 retval = unmap_queues_cpsch(dqm,
884 retval = remove_queue_mes(dqm, q, &pdd->qpd);
895 if (!dqm->sched_running) {
901 (dqm->dev->kfd->cwsr_enabled ?
916 * dqm->active_queue_count to determine whether a new runlist must be
920 increment_queue_count(dqm, &pdd->qpd, q);
922 decrement_queue_count(dqm, &pdd->qpd, q);
925 dqm->gws_queue_count++;
931 dqm->gws_queue_count--;
937 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
938 if (!dqm->dev->kfd->shared_resources.enable_mes)
939 retval = map_queues_cpsch(dqm);
941 retval = add_queue_mes(dqm, q, &pdd->qpd);
956 dqm_unlock(dqm);
960 /* suspend_single_queue does not lock the dqm like the
962 * lock the dqm before calling, and unlock after calling.
964 * The reason we don't lock the dqm is because this function may be
966 * multiple times, we will just keep the dqm locked for all of the calls.
968 static int suspend_single_queue(struct device_queue_manager *dqm,
992 if (dqm->dev->kfd->shared_resources.enable_mes) {
993 int r = remove_queue_mes(dqm, q, &pdd->qpd);
999 decrement_queue_count(dqm, &pdd->qpd, q);
1006 /* resume_single_queue does not lock the dqm like the functions
1008 * lock the dqm before calling, and unlock after calling.
1010 * The reason we don't lock the dqm is because this function may be
1012 * multiple times, we will just keep the dqm locked for all of the calls.
1014 static int resume_single_queue(struct device_queue_manager *dqm,
1032 if (dqm->dev->kfd->shared_resources.enable_mes) {
1033 int r = add_queue_mes(dqm, q, &pdd->qpd);
1040 increment_queue_count(dqm, qpd, q);
1046 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
1054 dqm_lock(dqm);
1071 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1074 decrement_queue_count(dqm, qpd, q);
1076 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
1080 (dqm->dev->kfd->cwsr_enabled ?
1092 dqm_unlock(dqm);
1096 static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
1100 struct device *dev = dqm->dev->adev->dev;
1104 dqm_lock(dqm);
1129 decrement_queue_count(dqm, qpd, q);
1131 if (dqm->dev->kfd->shared_resources.enable_mes) {
1132 retval = remove_queue_mes(dqm, q, qpd);
1141 if (!dqm->dev->kfd->shared_resources.enable_mes)
1142 retval = execute_queues_cpsch(dqm,
1149 dqm_unlock(dqm);
1153 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
1168 dqm_lock(dqm);
1184 dqm->dev->kfd2kgd->set_vm_context_page_table_base(
1185 dqm->dev->adev,
1208 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1211 increment_queue_count(dqm, qpd, q);
1213 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
1230 dqm_unlock(dqm);
1234 static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
1238 struct device *dev = dqm->dev->adev->dev;
1245 dqm_lock(dqm);
1274 increment_queue_count(dqm, &pdd->qpd, q);
1276 if (dqm->dev->kfd->shared_resources.enable_mes) {
1277 retval = add_queue_mes(dqm, q, qpd);
1285 if (!dqm->dev->kfd->shared_resources.enable_mes)
1286 retval = execute_queues_cpsch(dqm,
1293 dqm_unlock(dqm);
1297 static int register_process(struct device_queue_manager *dqm,
1315 dqm_lock(dqm);
1316 list_add(&n->list, &dqm->queues);
1322 retval = dqm->asic_ops.update_qpd(dqm, qpd);
1324 dqm->processes_count++;
1326 dqm_unlock(dqm);
1331 kfd_inc_compute_active(dqm->dev);
1336 static int unregister_process(struct device_queue_manager *dqm,
1346 dqm_lock(dqm);
1348 list_for_each_entry_safe(cur, next, &dqm->queues, list) {
1352 dqm->processes_count--;
1356 /* qpd not found in dqm list */
1359 dqm_unlock(dqm);
1365 kfd_dec_compute_active(dqm->dev);
1371 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
1374 uint32_t xcc_mask = dqm->dev->xcc_mask;
1378 ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
1379 dqm->dev->adev, pasid, vmid, xcc_id);
1387 static void init_interrupts(struct device_queue_manager *dqm)
1389 uint32_t xcc_mask = dqm->dev->xcc_mask;
1393 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) {
1394 if (is_pipe_enabled(dqm, 0, i)) {
1395 dqm->dev->kfd2kgd->init_interrupts(
1396 dqm->dev->adev, i, xcc_id);
1402 static int initialize_nocpsch(struct device_queue_manager *dqm)
1406 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1408 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
1410 if (!dqm->allocated_queues)
1413 mutex_init(&dqm->lock_hidden);
1414 INIT_LIST_HEAD(&dqm->queues);
1415 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
1416 dqm->active_cp_queue_count = 0;
1417 dqm->gws_queue_count = 0;
1419 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
1420 int pipe_offset = pipe * get_queues_per_pipe(dqm);
1422 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
1424 dqm->dev->kfd->shared_resources.cp_queue_bitmap))
1425 dqm->allocated_queues[pipe] |= 1 << queue;
1428 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
1430 init_sdma_bitmaps(dqm);
1435 static void uninitialize(struct device_queue_manager *dqm)
1439 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0);
1441 kfree(dqm->allocated_queues);
1443 kfree(dqm->mqd_mgrs[i]);
1444 mutex_destroy(&dqm->lock_hidden);
1447 static int start_nocpsch(struct device_queue_manager *dqm)
1452 init_interrupts(dqm);
1454 if (dqm->dev->adev->asic_type == CHIP_HAWAII)
1455 r = pm_init(&dqm->packet_mgr, dqm);
1457 dqm->sched_running = true;
1462 static int stop_nocpsch(struct device_queue_manager *dqm)
1464 dqm_lock(dqm);
1465 if (!dqm->sched_running) {
1466 dqm_unlock(dqm);
1470 if (dqm->dev->adev->asic_type == CHIP_HAWAII)
1471 pm_uninit(&dqm->packet_mgr, false);
1472 dqm->sched_running = false;
1473 dqm_unlock(dqm);
1478 static void pre_reset(struct device_queue_manager *dqm)
1480 dqm_lock(dqm);
1481 dqm->is_resetting = true;
1482 dqm_unlock(dqm);
1485 static int allocate_sdma_queue(struct device_queue_manager *dqm,
1488 struct device *dev = dqm->dev->adev->dev;
1492 if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
1499 if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) {
1503 clear_bit(*restore_sdma_id, dqm->sdma_bitmap);
1507 bit = find_first_bit(dqm->sdma_bitmap,
1508 get_num_sdma_queues(dqm));
1509 clear_bit(bit, dqm->sdma_bitmap);
1514 q->sdma_id % kfd_get_num_sdma_engines(dqm->dev);
1516 kfd_get_num_sdma_engines(dqm->dev);
1518 if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
1524 if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) {
1528 clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap);
1531 bit = find_first_bit(dqm->xgmi_sdma_bitmap,
1532 get_num_xgmi_sdma_queues(dqm));
1533 clear_bit(bit, dqm->xgmi_sdma_bitmap);
1543 kfd_get_num_sdma_engines(dqm->dev) +
1544 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev);
1546 kfd_get_num_xgmi_sdma_engines(dqm->dev);
1555 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
1559 if (q->sdma_id >= get_num_sdma_queues(dqm))
1561 set_bit(q->sdma_id, dqm->sdma_bitmap);
1563 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
1565 set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap);
1573 static int set_sched_resources(struct device_queue_manager *dqm)
1577 struct device *dev = dqm->dev->adev->dev;
1579 res.vmid_mask = dqm->dev->compute_vmid_bitmap;
1583 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe)
1584 / dqm->dev->kfd->shared_resources.num_pipe_per_mec;
1586 if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap))
1604 dqm->dev->adev, i);
1614 return pm_send_set_resources(&dqm->packet_mgr, &res);
1617 static int initialize_cpsch(struct device_queue_manager *dqm)
1619 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1621 mutex_init(&dqm->lock_hidden);
1622 INIT_LIST_HEAD(&dqm->queues);
1623 dqm->active_queue_count = dqm->processes_count = 0;
1624 dqm->active_cp_queue_count = 0;
1625 dqm->gws_queue_count = 0;
1626 dqm->active_runlist = false;
1627 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
1628 dqm->trap_debug_vmid = 0;
1630 init_sdma_bitmaps(dqm);
1632 if (dqm->dev->kfd2kgd->get_iq_wait_times)
1633 dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
1634 &dqm->wait_times,
1635 ffs(dqm->dev->xcc_mask) - 1);
1639 static int start_cpsch(struct device_queue_manager *dqm)
1641 struct device *dev = dqm->dev->adev->dev;
1646 dqm_lock(dqm);
1648 if (!dqm->dev->kfd->shared_resources.enable_mes) {
1649 retval = pm_init(&dqm->packet_mgr, dqm);
1653 retval = set_sched_resources(dqm);
1660 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
1661 &dqm->fence_mem);
1666 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr;
1667 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
1669 init_interrupts(dqm);
1672 dqm->is_hws_hang = false;
1673 dqm->is_resetting = false;
1674 dqm->sched_running = true;
1676 if (!dqm->dev->kfd->shared_resources.enable_mes)
1677 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
1680 if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu &&
1681 (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) {
1685 retval = pm_update_grace_period(&dqm->packet_mgr,
1689 else if (dqm->dev->kfd2kgd->build_grace_period_packet_info)
1690 /* Update dqm->wait_times maintained in software */
1691 dqm->dev->kfd2kgd->build_grace_period_packet_info(
1692 dqm->dev->adev, dqm->wait_times,
1694 &dqm->wait_times);
1697 dqm_unlock(dqm);
1702 if (!dqm->dev->kfd->shared_resources.enable_mes)
1703 pm_uninit(&dqm->packet_mgr, false);
1705 dqm_unlock(dqm);
1709 static int stop_cpsch(struct device_queue_manager *dqm)
1713 dqm_lock(dqm);
1714 if (!dqm->sched_running) {
1715 dqm_unlock(dqm);
1719 if (!dqm->is_hws_hang) {
1720 if (!dqm->dev->kfd->shared_resources.enable_mes)
1721 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
1723 remove_all_queues_mes(dqm);
1726 hanging = dqm->is_hws_hang || dqm->is_resetting;
1727 dqm->sched_running = false;
1729 if (!dqm->dev->kfd->shared_resources.enable_mes)
1730 pm_release_ib(&dqm->packet_mgr);
1732 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
1733 if (!dqm->dev->kfd->shared_resources.enable_mes)
1734 pm_uninit(&dqm->packet_mgr, hanging);
1735 dqm_unlock(dqm);
1740 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
1744 dqm_lock(dqm);
1745 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1747 dqm->total_queue_count);
1748 dqm_unlock(dqm);
1756 dqm->total_queue_count++;
1758 dqm->total_queue_count);
1761 increment_queue_count(dqm, qpd, kq->queue);
1763 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
1765 dqm_unlock(dqm);
1770 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
1774 dqm_lock(dqm);
1776 decrement_queue_count(dqm, qpd, kq->queue);
1778 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
1784 dqm->total_queue_count--;
1786 dqm->total_queue_count);
1787 dqm_unlock(dqm);
1790 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1798 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1800 dqm->total_queue_count);
1807 dqm_lock(dqm);
1808 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
1809 dqm_unlock(dqm);
1818 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1823 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
1832 dqm_lock(dqm);
1854 increment_queue_count(dqm, qpd, q);
1856 if (!dqm->dev->kfd->shared_resources.enable_mes)
1857 retval = execute_queues_cpsch(dqm,
1860 retval = add_queue_mes(dqm, q, qpd);
1869 dqm->total_queue_count++;
1872 dqm->total_queue_count);
1874 dqm_unlock(dqm);
1881 decrement_queue_count(dqm, qpd, q);
1883 dqm_unlock(dqm);
1889 dqm_lock(dqm);
1890 deallocate_sdma_queue(dqm, q);
1891 dqm_unlock(dqm);
1897 int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm,
1902 struct device *dev = dqm->dev->adev->dev;
1903 uint64_t *fence_addr = dqm->fence_addr;
1907 if (amdgpu_amdkfd_is_fed(dqm->dev->adev))
1927 /* dqm->lock mutex has to be locked before calling this function */
1928 static int map_queues_cpsch(struct device_queue_manager *dqm)
1930 struct device *dev = dqm->dev->adev->dev;
1933 if (!dqm->sched_running)
1935 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
1937 if (dqm->active_runlist)
1940 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues);
1946 dqm->active_runlist = true;
1951 /* dqm->lock mutex has to be locked before calling this function */
1952 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1958 struct device *dev = dqm->dev->adev->dev;
1962 if (!dqm->sched_running)
1964 if (dqm->is_hws_hang || dqm->is_resetting)
1966 if (!dqm->active_runlist)
1970 retval = pm_update_grace_period(&dqm->packet_mgr, grace_period);
1975 retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset);
1979 *dqm->fence_addr = KFD_FENCE_INIT;
1980 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr,
1983 retval = amdkfd_fence_wait_timeout(dqm, KFD_FENCE_COMPLETED,
1987 kfd_hws_hang(dqm);
1999 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
2000 if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) {
2004 kfd_hws_hang(dqm);
2010 if (pm_update_grace_period(&dqm->packet_mgr,
2015 pm_release_ib(&dqm->packet_mgr);
2016 dqm->active_runlist = false;
2022 static int reset_queues_cpsch(struct device_queue_manager *dqm,
2027 dqm_lock(dqm);
2029 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID,
2032 dqm_unlock(dqm);
2036 /* dqm->lock mutex has to be locked before calling this function */
2037 static int execute_queues_cpsch(struct device_queue_manager *dqm,
2044 if (dqm->is_hws_hang)
2046 retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false);
2050 return map_queues_cpsch(dqm);
2053 static int wait_on_destroy_queue(struct device_queue_manager *dqm,
2066 dqm_unlock(dqm);
2068 ret = wait_event_interruptible(dqm->destroy_wait,
2072 dqm_lock(dqm);
2078 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
2086 struct device *dev = dqm->dev->adev->dev;
2099 dqm_lock(dqm);
2101 retval = wait_on_destroy_queue(dqm, q);
2104 dqm_unlock(dqm);
2118 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
2125 deallocate_sdma_queue(dqm, q);
2132 decrement_queue_count(dqm, qpd, q);
2133 if (!dqm->dev->kfd->shared_resources.enable_mes) {
2134 retval = execute_queues_cpsch(dqm,
2140 retval = remove_queue_mes(dqm, q, qpd);
2148 dqm->total_queue_count--;
2150 dqm->total_queue_count);
2152 dqm_unlock(dqm);
2155 * Do free_mqd and raise delete event after dqm_unlock(dqm) to avoid
2168 dqm_unlock(dqm);
2180 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
2189 if (!dqm->asic_ops.set_cache_memory_policy)
2192 dqm_lock(dqm);
2222 retval = dqm->asic_ops.set_cache_memory_policy(
2223 dqm,
2230 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
2231 program_sh_mem_settings(dqm, qpd);
2238 dqm_unlock(dqm);
2242 static int process_termination_nocpsch(struct device_queue_manager *dqm,
2250 dqm_lock(dqm);
2258 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
2260 ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
2263 dqm_unlock(dqm);
2265 dqm_lock(dqm);
2269 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
2273 dqm->processes_count--;
2279 dqm_unlock(dqm);
2285 kfd_dec_compute_active(dqm->dev);
2290 static int get_wave_state(struct device_queue_manager *dqm,
2298 dqm_lock(dqm);
2300 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
2305 dqm_unlock(dqm);
2309 dqm_unlock(dqm);
2312 * get_wave_state is outside the dqm lock to prevent circular locking
2320 static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
2329 dqm_lock(dqm);
2330 mqd_mgr = dqm->mqd_mgrs[mqd_type];
2337 dqm_unlock(dqm);
2340 static int checkpoint_mqd(struct device_queue_manager *dqm,
2350 dqm_lock(dqm);
2357 mqd_mgr = dqm->mqd_mgrs[mqd_type];
2366 dqm_unlock(dqm);
2370 static int process_termination_cpsch(struct device_queue_manager *dqm,
2375 struct device *dev = dqm->dev->adev->dev;
2385 dqm_lock(dqm);
2390 decrement_queue_count(dqm, qpd, kq->queue);
2392 dqm->total_queue_count--;
2399 deallocate_sdma_queue(dqm, q);
2401 deallocate_sdma_queue(dqm, q);
2404 decrement_queue_count(dqm, qpd, q);
2406 if (dqm->dev->kfd->shared_resources.enable_mes) {
2407 retval = remove_queue_mes(dqm, q, qpd);
2414 dqm->total_queue_count--;
2418 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
2422 dqm->processes_count--;
2428 if (!dqm->dev->kfd->shared_resources.enable_mes)
2429 retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD);
2431 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
2432 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
2433 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
2442 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
2446 dqm_unlock(dqm);
2448 dqm_lock(dqm);
2450 dqm_unlock(dqm);
2456 kfd_dec_compute_active(dqm->dev);
2461 static int init_mqd_managers(struct device_queue_manager *dqm)
2464 struct device *dev = dqm->dev->adev->dev;
2468 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
2473 dqm->mqd_mgrs[i] = mqd_mgr;
2480 kfree(dqm->mqd_mgrs[j]);
2481 dqm->mqd_mgrs[j] = NULL;
2488 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
2491 struct kfd_node *dev = dqm->dev;
2492 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
2493 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
2494 get_num_all_sdma_engines(dqm) *
2496 (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
2497 NUM_XCC(dqm->dev->xcc_mask));
2508 struct device_queue_manager *dqm;
2512 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
2513 if (!dqm)
2525 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
2528 dqm->sched_policy = sched_policy;
2532 dqm->dev = dev;
2533 switch (dqm->sched_policy) {
2536 /* initialize dqm for cp scheduling */
2537 dqm->ops.create_queue = create_queue_cpsch;
2538 dqm->ops.initialize = initialize_cpsch;
2539 dqm->ops.start = start_cpsch;
2540 dqm->ops.stop = stop_cpsch;
2541 dqm->ops.pre_reset = pre_reset;
2542 dqm->ops.destroy_queue = destroy_queue_cpsch;
2543 dqm->ops.update_queue = update_queue;
2544 dqm->ops.register_process = register_process;
2545 dqm->ops.unregister_process = unregister_process;
2546 dqm->ops.uninitialize = uninitialize;
2547 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
2548 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
2549 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
2550 dqm->ops.process_termination = process_termination_cpsch;
2551 dqm->ops.evict_process_queues = evict_process_queues_cpsch;
2552 dqm->ops.restore_process_queues = restore_process_queues_cpsch;
2553 dqm->ops.get_wave_state = get_wave_state;
2554 dqm->ops.reset_queues = reset_queues_cpsch;
2555 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
2556 dqm->ops.checkpoint_mqd = checkpoint_mqd;
2559 /* initialize dqm for no cp scheduling */
2560 dqm->ops.start = start_nocpsch;
2561 dqm->ops.stop = stop_nocpsch;
2562 dqm->ops.pre_reset = pre_reset;
2563 dqm->ops.create_queue = create_queue_nocpsch;
2564 dqm->ops.destroy_queue = destroy_queue_nocpsch;
2565 dqm->ops.update_queue = update_queue;
2566 dqm->ops.register_process = register_process;
2567 dqm->ops.unregister_process = unregister_process;
2568 dqm->ops.initialize = initialize_nocpsch;
2569 dqm->ops.uninitialize = uninitialize;
2570 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
2571 dqm->ops.process_termination = process_termination_nocpsch;
2572 dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
2573 dqm->ops.restore_process_queues =
2575 dqm->ops.get_wave_state = get_wave_state;
2576 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
2577 dqm->ops.checkpoint_mqd = checkpoint_mqd;
2580 dev_err(dev->adev->dev, "Invalid scheduling policy %d\n", dqm->sched_policy);
2587 device_queue_manager_init_cik(&dqm->asic_ops);
2597 device_queue_manager_init_vi(&dqm->asic_ops);
2602 device_queue_manager_init_v11(&dqm->asic_ops);
2604 device_queue_manager_init_v10(&dqm->asic_ops);
2606 device_queue_manager_init_v9(&dqm->asic_ops);
2614 if (init_mqd_managers(dqm))
2617 if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) {
2622 if (!dqm->ops.initialize(dqm)) {
2623 init_waitqueue_head(&dqm->destroy_wait);
2624 return dqm;
2628 kfree(dqm);
2640 void device_queue_manager_uninit(struct device_queue_manager *dqm)
2642 dqm->ops.stop(dqm);
2643 dqm->ops.uninitialize(dqm);
2644 if (!dqm->dev->kfd->shared_resources.enable_mes)
2645 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
2646 kfree(dqm);
2649 int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid)
2658 pdd = kfd_get_process_device_data(dqm->dev, p);
2660 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
2668 struct device_queue_manager *dqm = container_of(work,
2670 amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
2673 int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
2677 struct device *dev = dqm->dev->adev->dev;
2680 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
2681 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
2685 dqm_lock(dqm);
2687 if (dqm->trap_debug_vmid != 0) {
2693 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
2698 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap;
2699 updated_vmid_mask &= ~(1 << dqm->dev->vm_info.last_vmid_kfd);
2701 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask;
2702 dqm->trap_debug_vmid = dqm->dev->vm_info.last_vmid_kfd;
2703 r = set_sched_resources(dqm);
2707 r = map_queues_cpsch(dqm);
2711 pr_debug("Reserved VMID for trap debug: %i\n", dqm->trap_debug_vmid);
2714 dqm_unlock(dqm);
2721 int release_debug_trap_vmid(struct device_queue_manager *dqm,
2724 struct device *dev = dqm->dev->adev->dev;
2729 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
2730 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
2734 dqm_lock(dqm);
2735 trap_debug_vmid = dqm->trap_debug_vmid;
2736 if (dqm->trap_debug_vmid == 0) {
2742 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
2747 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap;
2748 updated_vmid_mask |= (1 << dqm->dev->vm_info.last_vmid_kfd);
2750 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask;
2751 dqm->trap_debug_vmid = 0;
2752 r = set_sched_resources(dqm);
2756 r = map_queues_cpsch(dqm);
2763 dqm_unlock(dqm);
2819 struct device_queue_manager *dqm = pdd->dev->dqm;
2823 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
2873 struct device_queue_manager *dqm = pdd->dev->dqm;
2874 struct device *dev = dqm->dev->adev->dev;
2879 dqm_lock(dqm);
2892 int err = resume_single_queue(dqm, &pdd->qpd, q);
2905 if (dqm->dev->kfd->shared_resources.enable_mes) {
2906 wake_up_all(&dqm->destroy_wait);
2916 dqm_unlock(dqm);
2920 r = execute_queues_cpsch(dqm,
2940 wake_up_all(&dqm->destroy_wait);
2944 dqm_unlock(dqm);
2976 struct device_queue_manager *dqm = pdd->dev->dqm;
2977 struct device *dev = dqm->dev->adev->dev;
2983 dqm_lock(dqm);
2992 int err = suspend_single_queue(dqm, pdd, q);
2993 bool is_mes = dqm->dev->kfd->shared_resources.enable_mes;
3014 dqm_unlock(dqm);
3017 amdgpu_amdkfd_debug_mem_fence(dqm->dev->adev);
3021 r = execute_queues_cpsch(dqm,
3045 dqm_unlock(dqm);
3047 amdgpu_device_flush_hdp(dqm->dev->adev, NULL);
3111 int debug_lock_and_unmap(struct device_queue_manager *dqm)
3113 struct device *dev = dqm->dev->adev->dev;
3116 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
3117 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
3121 if (!kfd_dbg_is_per_vmid_supported(dqm->dev))
3124 dqm_lock(dqm);
3126 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 0, false);
3128 dqm_unlock(dqm);
3133 int debug_map_and_unlock(struct device_queue_manager *dqm)
3135 struct device *dev = dqm->dev->adev->dev;
3138 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
3139 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
3143 if (!kfd_dbg_is_per_vmid_supported(dqm->dev))
3146 r = map_queues_cpsch(dqm);
3148 dqm_unlock(dqm);
3153 int debug_refresh_runlist(struct device_queue_manager *dqm)
3155 int r = debug_lock_and_unmap(dqm);
3160 return debug_map_and_unlock(dqm);
3188 struct device_queue_manager *dqm = data;
3189 uint32_t xcc_mask = dqm->dev->xcc_mask;
3195 if (!dqm->sched_running) {
3201 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
3210 KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1,
3211 KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm),
3218 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
3219 int pipe_offset = pipe * get_queues_per_pipe(dqm);
3221 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
3223 dqm->dev->kfd->shared_resources.cp_queue_bitmap))
3226 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
3243 sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm);
3245 pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm));
3248 queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
3250 r = dqm->dev->kfd2kgd->hqd_sdma_dump(
3251 dqm->dev->adev, pipe, queue, &dump, &n_regs);
3266 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm)
3270 dqm_lock(dqm);
3271 r = pm_debugfs_hang_hws(&dqm->packet_mgr);
3273 dqm_unlock(dqm);
3276 dqm->active_runlist = true;
3277 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES,
3279 dqm_unlock(dqm);