1// SPDX-License-Identifier: MIT 2/* 3 * Copyright 2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: AMD 24 * 25 */ 26 27#include "clk_mgr.h" 28#include "resource.h" 29#include "dcn321_fpu.h" 30#include "dcn32/dcn32_resource.h" 31#include "dcn321/dcn321_resource.h" 32#include "dml/dcn32/display_mode_vba_util_32.h" 33 34#define DCN3_2_DEFAULT_DET_SIZE 256 35 36struct _vcs_dpi_ip_params_st dcn3_21_ip = { 37 .gpuvm_enable = 0, 38 .gpuvm_max_page_table_levels = 4, 39 .hostvm_enable = 0, 40 .rob_buffer_size_kbytes = 128, 41 .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE, 42 .config_return_buffer_size_in_kbytes = 1280, 43 .compressed_buffer_segment_size_in_kbytes = 64, 44 .meta_fifo_size_in_kentries = 22, 45 .zero_size_buffer_entries = 512, 46 .compbuf_reserved_space_64b = 256, 47 .compbuf_reserved_space_zs = 64, 48 .dpp_output_buffer_pixels = 2560, 49 .opp_output_buffer_lines = 1, 50 .pixel_chunk_size_kbytes = 8, 51 .alpha_pixel_chunk_size_kbytes = 4, 52 .min_pixel_chunk_size_bytes = 1024, 53 .dcc_meta_buffer_size_bytes = 6272, 54 .meta_chunk_size_kbytes = 2, 55 .min_meta_chunk_size_bytes = 256, 56 .writeback_chunk_size_kbytes = 8, 57 .ptoi_supported = false, 58 .num_dsc = 4, 59 .maximum_dsc_bits_per_component = 12, 60 .maximum_pixels_per_line_per_dsc_unit = 6016, 61 .dsc422_native_support = true, 62 .is_line_buffer_bpp_fixed = true, 63 .line_buffer_fixed_bpp = 57, 64 .line_buffer_size_bits = 1171920, 65 .max_line_buffer_lines = 32, 66 .writeback_interface_buffer_size_kbytes = 90, 67 .max_num_dpp = 4, 68 .max_num_otg = 4, 69 .max_num_hdmi_frl_outputs = 1, 70 .max_num_wb = 1, 71 .max_dchub_pscl_bw_pix_per_clk = 4, 72 .max_pscl_lb_bw_pix_per_clk = 2, 73 .max_lb_vscl_bw_pix_per_clk = 4, 74 .max_vscl_hscl_bw_pix_per_clk = 4, 75 .max_hscl_ratio = 6, 76 .max_vscl_ratio = 6, 77 .max_hscl_taps = 8, 78 .max_vscl_taps = 8, 79 .dpte_buffer_size_in_pte_reqs_luma = 64, 80 .dpte_buffer_size_in_pte_reqs_chroma = 34, 81 .dispclk_ramp_margin_percent = 1, 82 .max_inter_dcn_tile_repeaters = 8, 83 .cursor_buffer_size = 16, 84 .cursor_chunk_size = 2, 85 .writeback_line_buffer_buffer_size = 0, 86 .writeback_min_hscl_ratio = 1, 87 .writeback_min_vscl_ratio = 1, 88 .writeback_max_hscl_ratio = 1, 89 .writeback_max_vscl_ratio = 1, 90 .writeback_max_hscl_taps = 1, 91 .writeback_max_vscl_taps = 1, 92 .dppclk_delay_subtotal = 47, 93 .dppclk_delay_scl = 50, 94 .dppclk_delay_scl_lb_only = 16, 95 .dppclk_delay_cnvc_formatter = 28, 96 .dppclk_delay_cnvc_cursor = 6, 97 .dispclk_delay_subtotal = 125, 98 .dynamic_metadata_vm_enabled = false, 99 .odm_combine_4to1_supported = false, 100 .dcc_supported = true, 101 .max_num_dp2p0_outputs = 2, 102 .max_num_dp2p0_streams = 4, 103}; 104 105struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { 106 .clock_limits = { 107 { 108 .state = 0, 109 .dcfclk_mhz = 1434.0, 110 .fabricclk_mhz = 2250.0, 111 .dispclk_mhz = 1720.0, 112 .dppclk_mhz = 1720.0, 113 .phyclk_mhz = 810.0, 114 .phyclk_d18_mhz = 667.0, 115 .phyclk_d32_mhz = 313.0, 116 .socclk_mhz = 1200.0, 117 .dscclk_mhz = 573.333, 118 .dram_speed_mts = 16000.0, 119 .dtbclk_mhz = 1564.0, 120 }, 121 }, 122 .num_states = 1, 123 .sr_exit_time_us = 19.95, 124 .sr_enter_plus_exit_time_us = 24.36, 125 .sr_exit_z8_time_us = 285.0, 126 .sr_enter_plus_exit_z8_time_us = 320, 127 .writeback_latency_us = 12.0, 128 .round_trip_ping_latency_dcfclk_cycles = 207, 129 .urgent_latency_pixel_data_only_us = 4, 130 .urgent_latency_pixel_mixed_with_vm_data_us = 4, 131 .urgent_latency_vm_data_only_us = 4, 132 .fclk_change_latency_us = 7, 133 .usr_retraining_latency_us = 0, 134 .smn_latency_us = 0, 135 .mall_allocated_for_dcn_mbytes = 32, 136 .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, 137 .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, 138 .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, 139 .pct_ideal_sdp_bw_after_urgent = 90.0, 140 .pct_ideal_fabric_bw_after_urgent = 67.0, 141 .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, 142 .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented 143 .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented 144 .pct_ideal_dram_bw_after_urgent_strobe = 67.0, 145 .max_avg_sdp_bw_use_normal_percent = 80.0, 146 .max_avg_fabric_bw_use_normal_percent = 60.0, 147 .max_avg_dram_bw_use_normal_strobe_percent = 50.0, 148 .max_avg_dram_bw_use_normal_percent = 15.0, 149 .num_chans = 8, 150 .dram_channel_width_bytes = 2, 151 .fabric_datapath_to_dcn_data_return_bytes = 64, 152 .return_bus_width_bytes = 64, 153 .downspread_percent = 0.38, 154 .dcn_downspread_percent = 0.5, 155 .dram_clock_change_latency_us = 400, 156 .dispclk_dppclk_vco_speed_mhz = 4300.0, 157 .do_urgent_latency_adjustment = true, 158 .urgent_latency_adjustment_fabric_clock_component_us = 1.0, 159 .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, 160}; 161 162static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) 163{ 164 if (entry->dcfclk_mhz > 0) { 165 float bw_on_sdp = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100); 166 167 entry->fabricclk_mhz = bw_on_sdp / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100)); 168 entry->dram_speed_mts = bw_on_sdp / (dcn3_21_soc.num_chans * 169 dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); 170 } else if (entry->fabricclk_mhz > 0) { 171 float bw_on_fabric = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100); 172 173 entry->dcfclk_mhz = bw_on_fabric / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100)); 174 entry->dram_speed_mts = bw_on_fabric / (dcn3_21_soc.num_chans * 175 dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); 176 } else if (entry->dram_speed_mts > 0) { 177 float bw_on_dram = entry->dram_speed_mts * dcn3_21_soc.num_chans * 178 dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); 179 180 entry->fabricclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100)); 181 entry->dcfclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100)); 182 } 183} 184 185static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry) 186{ 187 float memory_bw_kbytes_sec; 188 float fabric_bw_kbytes_sec; 189 float sdp_bw_kbytes_sec; 190 float limiting_bw_kbytes_sec; 191 192 memory_bw_kbytes_sec = entry->dram_speed_mts * dcn3_21_soc.num_chans * 193 dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); 194 195 fabric_bw_kbytes_sec = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100); 196 197 sdp_bw_kbytes_sec = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100); 198 199 limiting_bw_kbytes_sec = memory_bw_kbytes_sec; 200 201 if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec) 202 limiting_bw_kbytes_sec = fabric_bw_kbytes_sec; 203 204 if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec) 205 limiting_bw_kbytes_sec = sdp_bw_kbytes_sec; 206 207 return limiting_bw_kbytes_sec; 208} 209 210static void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, 211 unsigned int *num_entries, 212 struct _vcs_dpi_voltage_scaling_st *entry) 213{ 214 int i = 0; 215 int index = 0; 216 217 dc_assert_fp_enabled(); 218 219 if (*num_entries == 0) { 220 table[0] = *entry; 221 (*num_entries)++; 222 } else { 223 while (entry->net_bw_in_kbytes_sec > table[index].net_bw_in_kbytes_sec) { 224 index++; 225 if (index >= *num_entries) 226 break; 227 } 228 229 for (i = *num_entries; i > index; i--) 230 table[i] = table[i - 1]; 231 232 table[index] = *entry; 233 (*num_entries)++; 234 } 235} 236 237static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, 238 unsigned int index) 239{ 240 int i; 241 242 if (*num_entries == 0) 243 return; 244 245 for (i = index; i < *num_entries - 1; i++) { 246 table[i] = table[i + 1]; 247 } 248 memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); 249} 250 251static void swap_table_entries(struct _vcs_dpi_voltage_scaling_st *first_entry, 252 struct _vcs_dpi_voltage_scaling_st *second_entry) 253{ 254 struct _vcs_dpi_voltage_scaling_st temp_entry = *first_entry; 255 *first_entry = *second_entry; 256 *second_entry = temp_entry; 257} 258 259/* 260 * sort_entries_with_same_bw - Sort entries sharing the same bandwidth by DCFCLK 261 */ 262static void sort_entries_with_same_bw(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) 263{ 264 unsigned int start_index = 0; 265 unsigned int end_index = 0; 266 unsigned int current_bw = 0; 267 268 for (int i = 0; i < (*num_entries - 1); i++) { 269 if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) { 270 current_bw = table[i].net_bw_in_kbytes_sec; 271 start_index = i; 272 end_index = ++i; 273 274 while ((i < (*num_entries - 1)) && (table[i+1].net_bw_in_kbytes_sec == current_bw)) 275 end_index = ++i; 276 } 277 278 if (start_index != end_index) { 279 for (int j = start_index; j < end_index; j++) { 280 for (int k = start_index; k < end_index; k++) { 281 if (table[k].dcfclk_mhz > table[k+1].dcfclk_mhz) 282 swap_table_entries(&table[k], &table[k+1]); 283 } 284 } 285 } 286 287 start_index = 0; 288 end_index = 0; 289 290 } 291} 292 293/* 294 * remove_inconsistent_entries - Ensure entries with the same bandwidth have MEMCLK and FCLK monotonically increasing 295 * and remove entries that do not follow this order 296 */ 297static void remove_inconsistent_entries(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) 298{ 299 for (int i = 0; i < (*num_entries - 1); i++) { 300 if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) { 301 if ((table[i].dram_speed_mts > table[i+1].dram_speed_mts) || 302 (table[i].fabricclk_mhz > table[i+1].fabricclk_mhz)) 303 remove_entry_from_table_at_index(table, num_entries, i); 304 } 305 } 306} 307 308/* 309 * override_max_clk_values - Overwrite the max clock frequencies with the max DC mode timings 310 * Input: 311 * max_clk_limit - struct containing the desired clock timings 312 * Output: 313 * curr_clk_limit - struct containing the timings that need to be overwritten 314 * Return: 0 upon success, non-zero for failure 315 */ 316static int override_max_clk_values(struct clk_limit_table_entry *max_clk_limit, 317 struct clk_limit_table_entry *curr_clk_limit) 318{ 319 if (NULL == max_clk_limit || NULL == curr_clk_limit) 320 return -1; //invalid parameters 321 322 //only overwrite if desired max clock frequency is initialized 323 if (max_clk_limit->dcfclk_mhz != 0) 324 curr_clk_limit->dcfclk_mhz = max_clk_limit->dcfclk_mhz; 325 326 if (max_clk_limit->fclk_mhz != 0) 327 curr_clk_limit->fclk_mhz = max_clk_limit->fclk_mhz; 328 329 if (max_clk_limit->memclk_mhz != 0) 330 curr_clk_limit->memclk_mhz = max_clk_limit->memclk_mhz; 331 332 if (max_clk_limit->socclk_mhz != 0) 333 curr_clk_limit->socclk_mhz = max_clk_limit->socclk_mhz; 334 335 if (max_clk_limit->dtbclk_mhz != 0) 336 curr_clk_limit->dtbclk_mhz = max_clk_limit->dtbclk_mhz; 337 338 if (max_clk_limit->dispclk_mhz != 0) 339 curr_clk_limit->dispclk_mhz = max_clk_limit->dispclk_mhz; 340 341 return 0; 342} 343 344static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk_bw_params *bw_params, 345 struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) 346{ 347 int i, j; 348 struct _vcs_dpi_voltage_scaling_st entry = {0}; 349 struct clk_limit_table_entry max_clk_data = {0}; 350 351 unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; 352 353 static const unsigned int num_dcfclk_stas = 5; 354 unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; 355 356 unsigned int num_uclk_dpms = 0; 357 unsigned int num_fclk_dpms = 0; 358 unsigned int num_dcfclk_dpms = 0; 359 360 unsigned int num_dc_uclk_dpms = 0; 361 unsigned int num_dc_fclk_dpms = 0; 362 unsigned int num_dc_dcfclk_dpms = 0; 363 364 for (i = 0; i < MAX_NUM_DPM_LVL; i++) { 365 if (bw_params->clk_table.entries[i].dcfclk_mhz > max_clk_data.dcfclk_mhz) 366 max_clk_data.dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; 367 if (bw_params->clk_table.entries[i].fclk_mhz > max_clk_data.fclk_mhz) 368 max_clk_data.fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; 369 if (bw_params->clk_table.entries[i].memclk_mhz > max_clk_data.memclk_mhz) 370 max_clk_data.memclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; 371 if (bw_params->clk_table.entries[i].dispclk_mhz > max_clk_data.dispclk_mhz) 372 max_clk_data.dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; 373 if (bw_params->clk_table.entries[i].dppclk_mhz > max_clk_data.dppclk_mhz) 374 max_clk_data.dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; 375 if (bw_params->clk_table.entries[i].phyclk_mhz > max_clk_data.phyclk_mhz) 376 max_clk_data.phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; 377 if (bw_params->clk_table.entries[i].dtbclk_mhz > max_clk_data.dtbclk_mhz) 378 max_clk_data.dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; 379 380 if (bw_params->clk_table.entries[i].memclk_mhz > 0) { 381 num_uclk_dpms++; 382 if (bw_params->clk_table.entries[i].memclk_mhz <= bw_params->dc_mode_limit.memclk_mhz) 383 num_dc_uclk_dpms++; 384 } 385 if (bw_params->clk_table.entries[i].fclk_mhz > 0) { 386 num_fclk_dpms++; 387 if (bw_params->clk_table.entries[i].fclk_mhz <= bw_params->dc_mode_limit.fclk_mhz) 388 num_dc_fclk_dpms++; 389 } 390 if (bw_params->clk_table.entries[i].dcfclk_mhz > 0) { 391 num_dcfclk_dpms++; 392 if (bw_params->clk_table.entries[i].dcfclk_mhz <= bw_params->dc_mode_limit.dcfclk_mhz) 393 num_dc_dcfclk_dpms++; 394 } 395 } 396 397 if (!disable_dc_mode_overwrite) { 398 //Overwrite max frequencies with max DC mode frequencies for DC mode systems 399 override_max_clk_values(&bw_params->dc_mode_limit, &max_clk_data); 400 num_uclk_dpms = num_dc_uclk_dpms; 401 num_fclk_dpms = num_dc_fclk_dpms; 402 num_dcfclk_dpms = num_dc_dcfclk_dpms; 403 bw_params->clk_table.num_entries_per_clk.num_memclk_levels = num_uclk_dpms; 404 bw_params->clk_table.num_entries_per_clk.num_fclk_levels = num_fclk_dpms; 405 } 406 407 if (num_dcfclk_dpms > 0 && bw_params->clk_table.entries[0].fclk_mhz > min_fclk_mhz) 408 min_fclk_mhz = bw_params->clk_table.entries[0].fclk_mhz; 409 410 if (!max_clk_data.dcfclk_mhz || !max_clk_data.dispclk_mhz || !max_clk_data.dtbclk_mhz) 411 return -1; 412 413 if (max_clk_data.dppclk_mhz == 0) 414 max_clk_data.dppclk_mhz = max_clk_data.dispclk_mhz; 415 416 if (max_clk_data.fclk_mhz == 0) 417 max_clk_data.fclk_mhz = max_clk_data.dcfclk_mhz * 418 dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 419 dcn3_21_soc.pct_ideal_fabric_bw_after_urgent; 420 421 if (max_clk_data.phyclk_mhz == 0) 422 max_clk_data.phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz; 423 424 *num_entries = 0; 425 entry.dispclk_mhz = max_clk_data.dispclk_mhz; 426 entry.dscclk_mhz = max_clk_data.dispclk_mhz / 3; 427 entry.dppclk_mhz = max_clk_data.dppclk_mhz; 428 entry.dtbclk_mhz = max_clk_data.dtbclk_mhz; 429 entry.phyclk_mhz = max_clk_data.phyclk_mhz; 430 entry.phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz; 431 entry.phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz; 432 433 // Insert all the DCFCLK STAs 434 for (i = 0; i < num_dcfclk_stas; i++) { 435 entry.dcfclk_mhz = dcfclk_sta_targets[i]; 436 entry.fabricclk_mhz = 0; 437 entry.dram_speed_mts = 0; 438 439 get_optimal_ntuple(&entry); 440 entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); 441 dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); 442 } 443 444 // Insert the max DCFCLK 445 entry.dcfclk_mhz = max_clk_data.dcfclk_mhz; 446 entry.fabricclk_mhz = 0; 447 entry.dram_speed_mts = 0; 448 449 get_optimal_ntuple(&entry); 450 entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); 451 dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); 452 453 // Insert the UCLK DPMS 454 for (i = 0; i < num_uclk_dpms; i++) { 455 entry.dcfclk_mhz = 0; 456 entry.fabricclk_mhz = 0; 457 entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; 458 459 get_optimal_ntuple(&entry); 460 entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); 461 dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); 462 } 463 464 // If FCLK is coarse grained, insert individual DPMs. 465 if (num_fclk_dpms > 2) { 466 for (i = 0; i < num_fclk_dpms; i++) { 467 entry.dcfclk_mhz = 0; 468 entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; 469 entry.dram_speed_mts = 0; 470 471 get_optimal_ntuple(&entry); 472 entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); 473 dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); 474 } 475 } 476 // If FCLK fine grained, only insert max 477 else { 478 entry.dcfclk_mhz = 0; 479 entry.fabricclk_mhz = max_clk_data.fclk_mhz; 480 entry.dram_speed_mts = 0; 481 482 get_optimal_ntuple(&entry); 483 entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); 484 dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); 485 } 486 487 // At this point, the table contains all "points of interest" based on 488 // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock 489 // ratios (by derate, are exact). 490 491 // Remove states that require higher clocks than are supported 492 for (i = *num_entries - 1; i >= 0 ; i--) { 493 if (table[i].dcfclk_mhz > max_clk_data.dcfclk_mhz || 494 table[i].fabricclk_mhz > max_clk_data.fclk_mhz || 495 table[i].dram_speed_mts > max_clk_data.memclk_mhz * 16) 496 remove_entry_from_table_at_index(table, num_entries, i); 497 } 498 499 // Insert entry with all max dc limits without bandwitch matching 500 if (!disable_dc_mode_overwrite) { 501 struct _vcs_dpi_voltage_scaling_st max_dc_limits_entry = entry; 502 503 max_dc_limits_entry.dcfclk_mhz = max_clk_data.dcfclk_mhz; 504 max_dc_limits_entry.fabricclk_mhz = max_clk_data.fclk_mhz; 505 max_dc_limits_entry.dram_speed_mts = max_clk_data.memclk_mhz * 16; 506 507 max_dc_limits_entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&max_dc_limits_entry); 508 dcn321_insert_entry_into_table_sorted(table, num_entries, &max_dc_limits_entry); 509 510 sort_entries_with_same_bw(table, num_entries); 511 remove_inconsistent_entries(table, num_entries); 512 } 513 514 515 516 // At this point, the table only contains supported points of interest 517 // it could be used as is, but some states may be redundant due to 518 // coarse grained nature of some clocks, so we want to round up to 519 // coarse grained DPMs and remove duplicates. 520 521 // Round up UCLKs 522 for (i = *num_entries - 1; i >= 0 ; i--) { 523 for (j = 0; j < num_uclk_dpms; j++) { 524 if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) { 525 table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16; 526 break; 527 } 528 } 529 } 530 531 // If FCLK is coarse grained, round up to next DPMs 532 if (num_fclk_dpms > 2) { 533 for (i = *num_entries - 1; i >= 0 ; i--) { 534 for (j = 0; j < num_fclk_dpms; j++) { 535 if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) { 536 table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz; 537 break; 538 } 539 } 540 } 541 } 542 // Otherwise, round up to minimum. 543 else { 544 for (i = *num_entries - 1; i >= 0 ; i--) { 545 if (table[i].fabricclk_mhz < min_fclk_mhz) { 546 table[i].fabricclk_mhz = min_fclk_mhz; 547 } 548 } 549 } 550 551 // Round DCFCLKs up to minimum 552 for (i = *num_entries - 1; i >= 0 ; i--) { 553 if (table[i].dcfclk_mhz < min_dcfclk_mhz) { 554 table[i].dcfclk_mhz = min_dcfclk_mhz; 555 } 556 } 557 558 // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. 559 i = 0; 560 while (i < *num_entries - 1) { 561 if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz && 562 table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz && 563 table[i].dram_speed_mts == table[i + 1].dram_speed_mts) 564 remove_entry_from_table_at_index(table, num_entries, i + 1); 565 else 566 i++; 567 } 568 569 // Fix up the state indicies 570 for (i = *num_entries - 1; i >= 0 ; i--) { 571 table[i].state = i; 572 } 573 574 return 0; 575} 576 577static void dcn321_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, 578 unsigned int *optimal_dcfclk, 579 unsigned int *optimal_fclk) 580{ 581 double bw_from_dram, bw_from_dram1, bw_from_dram2; 582 583 bw_from_dram1 = uclk_mts * dcn3_21_soc.num_chans * 584 dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_dram_bw_use_normal_percent / 100); 585 bw_from_dram2 = uclk_mts * dcn3_21_soc.num_chans * 586 dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100); 587 588 bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; 589 590 if (optimal_fclk) 591 *optimal_fclk = bw_from_dram / 592 (dcn3_21_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100)); 593 594 if (optimal_dcfclk) 595 *optimal_dcfclk = bw_from_dram / 596 (dcn3_21_soc.return_bus_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100)); 597} 598 599/** dcn321_update_bw_bounding_box 600 * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from spreadsheet 601 * with actual values as per dGPU SKU: 602 * -with passed few options from dc->config 603 * -with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might need to get it from PM FW) 604 * -with passed latency values (passed in ns units) in dc-> bb override for debugging purposes 605 * -with passed latencies from VBIOS (in 100_ns units) if available for certain dGPU SKU 606 * -with number of DRAM channels from VBIOS (which differ for certain dGPU SKU of the same ASIC) 607 * -clocks levels with passed clk_table entries from Clk Mgr as reported by PM FW for different 608 * clocks (which might differ for certain dGPU SKU of the same ASIC) 609 */ 610void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) 611{ 612 dc_assert_fp_enabled(); 613 /* Overrides from dc->config options */ 614 dcn3_21_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk; 615 616 /* Override from passed dc->bb_overrides if available*/ 617 if ((int)(dcn3_21_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns 618 && dc->bb_overrides.sr_exit_time_ns) { 619 dcn3_21_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; 620 } 621 622 if ((int)(dcn3_21_soc.sr_enter_plus_exit_time_us * 1000) 623 != dc->bb_overrides.sr_enter_plus_exit_time_ns 624 && dc->bb_overrides.sr_enter_plus_exit_time_ns) { 625 dcn3_21_soc.sr_enter_plus_exit_time_us = 626 dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; 627 } 628 629 if ((int)(dcn3_21_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns 630 && dc->bb_overrides.urgent_latency_ns) { 631 dcn3_21_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0; 632 dcn3_21_soc.urgent_latency_pixel_data_only_us = dc->bb_overrides.urgent_latency_ns / 1000.0; 633 } 634 635 if ((int)(dcn3_21_soc.dram_clock_change_latency_us * 1000) 636 != dc->bb_overrides.dram_clock_change_latency_ns 637 && dc->bb_overrides.dram_clock_change_latency_ns) { 638 dcn3_21_soc.dram_clock_change_latency_us = 639 dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; 640 } 641 642 if ((int)(dcn3_21_soc.fclk_change_latency_us * 1000) 643 != dc->bb_overrides.fclk_clock_change_latency_ns 644 && dc->bb_overrides.fclk_clock_change_latency_ns) { 645 dcn3_21_soc.fclk_change_latency_us = 646 dc->bb_overrides.fclk_clock_change_latency_ns / 1000; 647 } 648 649 if ((int)(dcn3_21_soc.dummy_pstate_latency_us * 1000) 650 != dc->bb_overrides.dummy_clock_change_latency_ns 651 && dc->bb_overrides.dummy_clock_change_latency_ns) { 652 dcn3_21_soc.dummy_pstate_latency_us = 653 dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0; 654 } 655 656 /* Override from VBIOS if VBIOS bb_info available */ 657 if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { 658 struct bp_soc_bb_info bb_info = {0}; 659 660 if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { 661 if (bb_info.dram_clock_change_latency_100ns > 0) 662 dcn3_21_soc.dram_clock_change_latency_us = 663 bb_info.dram_clock_change_latency_100ns * 10; 664 665 if (bb_info.dram_sr_enter_exit_latency_100ns > 0) 666 dcn3_21_soc.sr_enter_plus_exit_time_us = 667 bb_info.dram_sr_enter_exit_latency_100ns * 10; 668 669 if (bb_info.dram_sr_exit_latency_100ns > 0) 670 dcn3_21_soc.sr_exit_time_us = 671 bb_info.dram_sr_exit_latency_100ns * 10; 672 } 673 } 674 675 /* Override from VBIOS for num_chan */ 676 if (dc->ctx->dc_bios->vram_info.num_chans) { 677 dcn3_21_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; 678 dcn3_21_soc.mall_allocated_for_dcn_mbytes = (double)(dcn32_calc_num_avail_chans_for_mall(dc, 679 dc->ctx->dc_bios->vram_info.num_chans) * dc->caps.mall_size_per_mem_channel); 680 } 681 682 if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) 683 dcn3_21_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; 684 685 /* DML DSC delay factor workaround */ 686 dcn3_21_ip.dsc_delay_factor_wa = dc->debug.dsc_delay_factor_wa_x1000 / 1000.0; 687 688 dcn3_21_ip.min_prefetch_in_strobe_us = dc->debug.min_prefetch_in_strobe_ns / 1000.0; 689 690 /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */ 691 dcn3_21_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; 692 dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; 693 694 /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */ 695 if (dc->debug.use_legacy_soc_bb_mechanism) { 696 unsigned int i = 0, j = 0, num_states = 0; 697 698 unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0}; 699 unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0}; 700 unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0}; 701 unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0}; 702 703 unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {615, 906, 1324, 1564}; 704 unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0; 705 unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; 706 707 for (i = 0; i < MAX_NUM_DPM_LVL; i++) { 708 if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) 709 max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; 710 if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) 711 max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; 712 if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) 713 max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; 714 if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) 715 max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; 716 } 717 if (!max_dcfclk_mhz) 718 max_dcfclk_mhz = dcn3_21_soc.clock_limits[0].dcfclk_mhz; 719 if (!max_dispclk_mhz) 720 max_dispclk_mhz = dcn3_21_soc.clock_limits[0].dispclk_mhz; 721 if (!max_dppclk_mhz) 722 max_dppclk_mhz = dcn3_21_soc.clock_limits[0].dppclk_mhz; 723 if (!max_phyclk_mhz) 724 max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz; 725 726 if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { 727 // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array 728 dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; 729 num_dcfclk_sta_targets++; 730 } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { 731 // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates 732 for (i = 0; i < num_dcfclk_sta_targets; i++) { 733 if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { 734 dcfclk_sta_targets[i] = max_dcfclk_mhz; 735 break; 736 } 737 } 738 // Update size of array since we "removed" duplicates 739 num_dcfclk_sta_targets = i + 1; 740 } 741 742 num_uclk_states = bw_params->clk_table.num_entries; 743 744 // Calculate optimal dcfclk for each uclk 745 for (i = 0; i < num_uclk_states; i++) { 746 dcn321_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, 747 &optimal_dcfclk_for_uclk[i], NULL); 748 if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { 749 optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; 750 } 751 } 752 753 // Calculate optimal uclk for each dcfclk sta target 754 for (i = 0; i < num_dcfclk_sta_targets; i++) { 755 for (j = 0; j < num_uclk_states; j++) { 756 if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { 757 optimal_uclk_for_dcfclk_sta_targets[i] = 758 bw_params->clk_table.entries[j].memclk_mhz * 16; 759 break; 760 } 761 } 762 } 763 764 i = 0; 765 j = 0; 766 // create the final dcfclk and uclk table 767 while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { 768 if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { 769 dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; 770 dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; 771 } else { 772 if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { 773 dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; 774 dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; 775 } else { 776 j = num_uclk_states; 777 } 778 } 779 } 780 781 while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) { 782 dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; 783 dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; 784 } 785 786 while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && 787 optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { 788 dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; 789 dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; 790 } 791 792 dcn3_21_soc.num_states = num_states; 793 for (i = 0; i < dcn3_21_soc.num_states; i++) { 794 dcn3_21_soc.clock_limits[i].state = i; 795 dcn3_21_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; 796 dcn3_21_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; 797 798 /* Fill all states with max values of all these clocks */ 799 dcn3_21_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; 800 dcn3_21_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; 801 dcn3_21_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; 802 dcn3_21_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3; 803 804 /* Populate from bw_params for DTBCLK, SOCCLK */ 805 if (i > 0) { 806 if (!bw_params->clk_table.entries[i].dtbclk_mhz) { 807 dcn3_21_soc.clock_limits[i].dtbclk_mhz = dcn3_21_soc.clock_limits[i-1].dtbclk_mhz; 808 } else { 809 dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; 810 } 811 } else if (bw_params->clk_table.entries[i].dtbclk_mhz) { 812 dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; 813 } 814 815 if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) 816 dcn3_21_soc.clock_limits[i].socclk_mhz = dcn3_21_soc.clock_limits[i-1].socclk_mhz; 817 else 818 dcn3_21_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; 819 820 if (!dram_speed_mts[i] && i > 0) 821 dcn3_21_soc.clock_limits[i].dram_speed_mts = dcn3_21_soc.clock_limits[i-1].dram_speed_mts; 822 else 823 dcn3_21_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; 824 825 /* These clocks cannot come from bw_params, always fill from dcn3_21_soc[0] */ 826 /* PHYCLK_D18, PHYCLK_D32 */ 827 dcn3_21_soc.clock_limits[i].phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz; 828 dcn3_21_soc.clock_limits[i].phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz; 829 } 830 } else { 831 build_synthetic_soc_states(dc->debug.disable_dc_mode_overwrite, bw_params, 832 dcn3_21_soc.clock_limits, &dcn3_21_soc.num_states); 833 } 834 835 /* Re-init DML with updated bb */ 836 dml_init_instance(&dc->dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32); 837 if (dc->current_state) 838 dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32); 839} 840 841