1/* 2 * Copyright 2017 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 26#include "../display_mode_lib.h" 27#include "../display_mode_vba.h" 28#include "../dml_inline_defs.h" 29#include "display_rq_dlg_calc_31.h" 30#include "../dcn30/display_mode_vba_30.h" 31 32static bool is_dual_plane(enum source_format_class source_format) 33{ 34 bool ret_val = 0; 35 36 if ((source_format == dm_420_12) || (source_format == dm_420_8) || (source_format == dm_420_10) || (source_format == dm_rgbe_alpha)) 37 ret_val = 1; 38 39 return ret_val; 40} 41 42static double get_refcyc_per_delivery( 43 struct display_mode_lib *mode_lib, 44 double refclk_freq_in_mhz, 45 double pclk_freq_in_mhz, 46 unsigned int odm_combine, 47 unsigned int recout_width, 48 unsigned int hactive, 49 double vratio, 50 double hscale_pixel_rate, 51 unsigned int delivery_width, 52 unsigned int req_per_swath_ub) 53{ 54 double refcyc_per_delivery = 0.0; 55 56 if (vratio <= 1.0) { 57 if (odm_combine) 58 refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) ((unsigned int) odm_combine * 2) 59 * dml_min((double) recout_width, (double) hactive / ((unsigned int) odm_combine * 2)) / pclk_freq_in_mhz / (double) req_per_swath_ub; 60 else 61 refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width / pclk_freq_in_mhz / (double) req_per_swath_ub; 62 } else { 63 refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width / (double) hscale_pixel_rate / (double) req_per_swath_ub; 64 } 65 66#ifdef __DML_RQ_DLG_CALC_DEBUG__ 67 dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); 68 dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); 69 dml_print("DML_DLG: %s: recout_width = %d\n", __func__, recout_width); 70 dml_print("DML_DLG: %s: vratio = %3.2f\n", __func__, vratio); 71 dml_print("DML_DLG: %s: req_per_swath_ub = %d\n", __func__, req_per_swath_ub); 72 dml_print("DML_DLG: %s: hscale_pixel_rate = %3.2f\n", __func__, hscale_pixel_rate); 73 dml_print("DML_DLG: %s: delivery_width = %d\n", __func__, delivery_width); 74 dml_print("DML_DLG: %s: refcyc_per_delivery= %3.2f\n", __func__, refcyc_per_delivery); 75#endif 76 77 return refcyc_per_delivery; 78 79} 80 81static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size) 82{ 83 if (tile_size == dm_256k_tile) 84 return (256 * 1024); 85 else if (tile_size == dm_64k_tile) 86 return (64 * 1024); 87 else 88 return (4 * 1024); 89} 90 91static void extract_rq_sizing_regs(struct display_mode_lib *mode_lib, display_data_rq_regs_st *rq_regs, const display_data_rq_sizing_params_st *rq_sizing) 92{ 93 print__data_rq_sizing_params_st(mode_lib, rq_sizing); 94 95 rq_regs->chunk_size = dml_log2(rq_sizing->chunk_bytes) - 10; 96 97 if (rq_sizing->min_chunk_bytes == 0) 98 rq_regs->min_chunk_size = 0; 99 else 100 rq_regs->min_chunk_size = dml_log2(rq_sizing->min_chunk_bytes) - 8 + 1; 101 102 rq_regs->meta_chunk_size = dml_log2(rq_sizing->meta_chunk_bytes) - 10; 103 if (rq_sizing->min_meta_chunk_bytes == 0) 104 rq_regs->min_meta_chunk_size = 0; 105 else 106 rq_regs->min_meta_chunk_size = dml_log2(rq_sizing->min_meta_chunk_bytes) - 6 + 1; 107 108 rq_regs->dpte_group_size = dml_log2(rq_sizing->dpte_group_bytes) - 6; 109 rq_regs->mpte_group_size = dml_log2(rq_sizing->mpte_group_bytes) - 6; 110} 111 112static void extract_rq_regs(struct display_mode_lib *mode_lib, display_rq_regs_st *rq_regs, const display_rq_params_st *rq_param) 113{ 114 unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; 115 unsigned int detile_buf_plane1_addr = 0; 116 117 extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), &rq_param->sizing.rq_l); 118 119 rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(dml_log2(rq_param->dlg.rq_l.dpte_row_height), 1) - 3; 120 121 if (rq_param->yuv420) { 122 extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), &rq_param->sizing.rq_c); 123 rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(dml_log2(rq_param->dlg.rq_c.dpte_row_height), 1) - 3; 124 } 125 126 rq_regs->rq_regs_l.swath_height = dml_log2(rq_param->dlg.rq_l.swath_height); 127 rq_regs->rq_regs_c.swath_height = dml_log2(rq_param->dlg.rq_c.swath_height); 128 129 // FIXME: take the max between luma, chroma chunk size? 130 // okay for now, as we are setting chunk_bytes to 8kb anyways 131 if (rq_param->sizing.rq_l.chunk_bytes >= 32 * 1024 || (rq_param->yuv420 && rq_param->sizing.rq_c.chunk_bytes >= 32 * 1024)) { //32kb 132 rq_regs->drq_expansion_mode = 0; 133 } else { 134 rq_regs->drq_expansion_mode = 2; 135 } 136 rq_regs->prq_expansion_mode = 1; 137 rq_regs->mrq_expansion_mode = 1; 138 rq_regs->crq_expansion_mode = 1; 139 140 // Note: detile_buf_plane1_addr is in unit of 1KB 141 if (rq_param->yuv420) { 142 if ((double) rq_param->misc.rq_l.stored_swath_bytes / (double) rq_param->misc.rq_c.stored_swath_bytes <= 1.5) { 143 detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma 144#ifdef __DML_RQ_DLG_CALC_DEBUG__ 145 dml_print("DML_DLG: %s: detile_buf_plane1_addr = %0d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr); 146#endif 147 } else { 148 detile_buf_plane1_addr = dml_round_to_multiple((unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0; // 2/3 to luma 149#ifdef __DML_RQ_DLG_CALC_DEBUG__ 150 dml_print("DML_DLG: %s: detile_buf_plane1_addr = %0d (1/3 chroma)\n", __func__, detile_buf_plane1_addr); 151#endif 152 } 153 } 154 rq_regs->plane1_base_address = detile_buf_plane1_addr; 155 156#ifdef __DML_RQ_DLG_CALC_DEBUG__ 157 dml_print("DML_DLG: %s: detile_buf_size_in_bytes = %0d\n", __func__, detile_buf_size_in_bytes); 158 dml_print("DML_DLG: %s: detile_buf_plane1_addr = %0d\n", __func__, detile_buf_plane1_addr); 159 dml_print("DML_DLG: %s: plane1_base_address = %0d\n", __func__, rq_regs->plane1_base_address); 160 dml_print("DML_DLG: %s: rq_l.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_l.stored_swath_bytes); 161 dml_print("DML_DLG: %s: rq_c.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_c.stored_swath_bytes); 162 dml_print("DML_DLG: %s: rq_l.swath_height = %0d\n", __func__, rq_param->dlg.rq_l.swath_height); 163 dml_print("DML_DLG: %s: rq_c.swath_height = %0d\n", __func__, rq_param->dlg.rq_c.swath_height); 164#endif 165} 166 167static void handle_det_buf_split(struct display_mode_lib *mode_lib, display_rq_params_st *rq_param, const display_pipe_source_params_st *pipe_src_param) 168{ 169 unsigned int total_swath_bytes = 0; 170 unsigned int swath_bytes_l = 0; 171 unsigned int swath_bytes_c = 0; 172 unsigned int full_swath_bytes_packed_l = 0; 173 unsigned int full_swath_bytes_packed_c = 0; 174 bool req128_l = 0; 175 bool req128_c = 0; 176 bool surf_linear = (pipe_src_param->sw_mode == dm_sw_linear); 177 bool surf_vert = (pipe_src_param->source_scan == dm_vert); 178 unsigned int log2_swath_height_l = 0; 179 unsigned int log2_swath_height_c = 0; 180 unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; 181 182 full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes; 183 full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes; 184 185#ifdef __DML_RQ_DLG_CALC_DEBUG__ 186 dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", __func__, full_swath_bytes_packed_l); 187 dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", __func__, full_swath_bytes_packed_c); 188#endif 189 190 if (rq_param->yuv420_10bpc) { 191 full_swath_bytes_packed_l = dml_round_to_multiple(rq_param->misc.rq_l.full_swath_bytes * 2.0 / 3.0, 256, 1) + 256; 192 full_swath_bytes_packed_c = dml_round_to_multiple(rq_param->misc.rq_c.full_swath_bytes * 2.0 / 3.0, 256, 1) + 256; 193#ifdef __DML_RQ_DLG_CALC_DEBUG__ 194 dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d (3-2 packing)\n", __func__, full_swath_bytes_packed_l); 195 dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d (3-2 packing)\n", __func__, full_swath_bytes_packed_c); 196#endif 197 } 198 199 if (rq_param->yuv420) 200 total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c; 201 else 202 total_swath_bytes = 2 * full_swath_bytes_packed_l; 203 204#ifdef __DML_RQ_DLG_CALC_DEBUG__ 205 dml_print("DML_DLG: %s: total_swath_bytes = %0d\n", __func__, total_swath_bytes); 206 dml_print("DML_DLG: %s: detile_buf_size_in_bytes = %0d\n", __func__, detile_buf_size_in_bytes); 207#endif 208 209 if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request 210 req128_l = 0; 211 req128_c = 0; 212 swath_bytes_l = full_swath_bytes_packed_l; 213 swath_bytes_c = full_swath_bytes_packed_c; 214 } else if (!rq_param->yuv420) { 215 req128_l = 1; 216 req128_c = 0; 217 swath_bytes_c = full_swath_bytes_packed_c; 218 swath_bytes_l = full_swath_bytes_packed_l / 2; 219 } else if ((double) full_swath_bytes_packed_l / (double) full_swath_bytes_packed_c < 1.5) { 220 req128_l = 0; 221 req128_c = 1; 222 swath_bytes_l = full_swath_bytes_packed_l; 223 swath_bytes_c = full_swath_bytes_packed_c / 2; 224 225 total_swath_bytes = 2 * swath_bytes_l + 2 * swath_bytes_c; 226 227 if (total_swath_bytes > detile_buf_size_in_bytes) { 228 req128_l = 1; 229 swath_bytes_l = full_swath_bytes_packed_l / 2; 230 } 231 } else { 232 req128_l = 1; 233 req128_c = 0; 234 swath_bytes_l = full_swath_bytes_packed_l / 2; 235 swath_bytes_c = full_swath_bytes_packed_c; 236 237 total_swath_bytes = 2 * swath_bytes_l + 2 * swath_bytes_c; 238 239 if (total_swath_bytes > detile_buf_size_in_bytes) { 240 req128_c = 1; 241 swath_bytes_c = full_swath_bytes_packed_c / 2; 242 } 243 } 244 245 if (rq_param->yuv420) 246 total_swath_bytes = 2 * swath_bytes_l + 2 * swath_bytes_c; 247 else 248 total_swath_bytes = 2 * swath_bytes_l; 249 250 rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l; 251 rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c; 252 253#ifdef __DML_RQ_DLG_CALC_DEBUG__ 254 dml_print("DML_DLG: %s: total_swath_bytes = %0d\n", __func__, total_swath_bytes); 255 dml_print("DML_DLG: %s: rq_l.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_l.stored_swath_bytes); 256 dml_print("DML_DLG: %s: rq_c.stored_swath_bytes = %0d\n", __func__, rq_param->misc.rq_c.stored_swath_bytes); 257#endif 258 if (surf_linear) { 259 log2_swath_height_l = 0; 260 log2_swath_height_c = 0; 261 } else { 262 unsigned int swath_height_l; 263 unsigned int swath_height_c; 264 265 if (!surf_vert) { 266 swath_height_l = rq_param->misc.rq_l.blk256_height; 267 swath_height_c = rq_param->misc.rq_c.blk256_height; 268 } else { 269 swath_height_l = rq_param->misc.rq_l.blk256_width; 270 swath_height_c = rq_param->misc.rq_c.blk256_width; 271 } 272 273 if (swath_height_l > 0) 274 log2_swath_height_l = dml_log2(swath_height_l); 275 276 if (req128_l && log2_swath_height_l > 0) 277 log2_swath_height_l -= 1; 278 279 if (swath_height_c > 0) 280 log2_swath_height_c = dml_log2(swath_height_c); 281 282 if (req128_c && log2_swath_height_c > 0) 283 log2_swath_height_c -= 1; 284 } 285 286 rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l; 287 rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c; 288 289#ifdef __DML_RQ_DLG_CALC_DEBUG__ 290 dml_print("DML_DLG: %s: req128_l = %0d\n", __func__, req128_l); 291 dml_print("DML_DLG: %s: req128_c = %0d\n", __func__, req128_c); 292 dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", __func__, full_swath_bytes_packed_l); 293 dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", __func__, full_swath_bytes_packed_c); 294 dml_print("DML_DLG: %s: swath_height luma = %0d\n", __func__, rq_param->dlg.rq_l.swath_height); 295 dml_print("DML_DLG: %s: swath_height chroma = %0d\n", __func__, rq_param->dlg.rq_c.swath_height); 296#endif 297} 298 299static void get_meta_and_pte_attr( 300 struct display_mode_lib *mode_lib, 301 display_data_rq_dlg_params_st *rq_dlg_param, 302 display_data_rq_misc_params_st *rq_misc_param, 303 display_data_rq_sizing_params_st *rq_sizing_param, 304 unsigned int vp_width, 305 unsigned int vp_height, 306 unsigned int data_pitch, 307 unsigned int meta_pitch, 308 unsigned int source_format, 309 unsigned int tiling, 310 unsigned int macro_tile_size, 311 unsigned int source_scan, 312 unsigned int hostvm_enable, 313 unsigned int is_chroma, 314 unsigned int surface_height) 315{ 316 bool surf_linear = (tiling == dm_sw_linear); 317 bool surf_vert = (source_scan == dm_vert); 318 319 unsigned int bytes_per_element; 320 unsigned int bytes_per_element_y; 321 unsigned int bytes_per_element_c; 322 323 unsigned int blk256_width = 0; 324 unsigned int blk256_height = 0; 325 326 unsigned int blk256_width_y = 0; 327 unsigned int blk256_height_y = 0; 328 unsigned int blk256_width_c = 0; 329 unsigned int blk256_height_c = 0; 330 unsigned int log2_bytes_per_element; 331 unsigned int log2_blk256_width; 332 unsigned int log2_blk256_height; 333 unsigned int blk_bytes; 334 unsigned int log2_blk_bytes; 335 unsigned int log2_blk_height; 336 unsigned int log2_blk_width; 337 unsigned int log2_meta_req_bytes; 338 unsigned int log2_meta_req_height; 339 unsigned int log2_meta_req_width; 340 unsigned int meta_req_width; 341 unsigned int meta_req_height; 342 unsigned int log2_meta_row_height; 343 unsigned int meta_row_width_ub; 344 unsigned int log2_meta_chunk_bytes; 345 unsigned int log2_meta_chunk_height; 346 347 //full sized meta chunk width in unit of data elements 348 unsigned int log2_meta_chunk_width; 349 unsigned int log2_min_meta_chunk_bytes; 350 unsigned int min_meta_chunk_width; 351 unsigned int meta_chunk_width; 352 unsigned int meta_chunk_per_row_int; 353 unsigned int meta_row_remainder; 354 unsigned int meta_chunk_threshold; 355 unsigned int meta_blk_height; 356 unsigned int meta_surface_bytes; 357 unsigned int vmpg_bytes; 358 unsigned int meta_pte_req_per_frame_ub; 359 unsigned int meta_pte_bytes_per_frame_ub; 360 const unsigned int log2_vmpg_bytes = dml_log2(mode_lib->soc.gpuvm_min_page_size_bytes); 361 const bool dual_plane_en = is_dual_plane((enum source_format_class) (source_format)); 362 const unsigned int dpte_buf_in_pte_reqs = 363 dual_plane_en ? (is_chroma ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma) : (mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma 364 + mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma); 365 366 unsigned int log2_vmpg_height = 0; 367 unsigned int log2_vmpg_width = 0; 368 unsigned int log2_dpte_req_height_ptes = 0; 369 unsigned int log2_dpte_req_height = 0; 370 unsigned int log2_dpte_req_width = 0; 371 unsigned int log2_dpte_row_height_linear = 0; 372 unsigned int log2_dpte_row_height = 0; 373 unsigned int log2_dpte_group_width = 0; 374 unsigned int dpte_row_width_ub = 0; 375 unsigned int dpte_req_height = 0; 376 unsigned int dpte_req_width = 0; 377 unsigned int dpte_group_width = 0; 378 unsigned int log2_dpte_group_bytes = 0; 379 unsigned int log2_dpte_group_length = 0; 380 double byte_per_pixel_det_y; 381 double byte_per_pixel_det_c; 382 383 dml30_CalculateBytePerPixelAnd256BBlockSizes( 384 (enum source_format_class) (source_format), 385 (enum dm_swizzle_mode) (tiling), 386 &bytes_per_element_y, 387 &bytes_per_element_c, 388 &byte_per_pixel_det_y, 389 &byte_per_pixel_det_c, 390 &blk256_height_y, 391 &blk256_height_c, 392 &blk256_width_y, 393 &blk256_width_c); 394 395 if (!is_chroma) { 396 blk256_width = blk256_width_y; 397 blk256_height = blk256_height_y; 398 bytes_per_element = bytes_per_element_y; 399 } else { 400 blk256_width = blk256_width_c; 401 blk256_height = blk256_height_c; 402 bytes_per_element = bytes_per_element_c; 403 } 404 405 log2_bytes_per_element = dml_log2(bytes_per_element); 406 407 dml_print("DML_DLG: %s: surf_linear = %d\n", __func__, surf_linear); 408 dml_print("DML_DLG: %s: surf_vert = %d\n", __func__, surf_vert); 409 dml_print("DML_DLG: %s: blk256_width = %d\n", __func__, blk256_width); 410 dml_print("DML_DLG: %s: blk256_height = %d\n", __func__, blk256_height); 411 412 log2_blk256_width = dml_log2((double) blk256_width); 413 log2_blk256_height = dml_log2((double) blk256_height); 414 blk_bytes = surf_linear ? 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size); 415 log2_blk_bytes = dml_log2((double) blk_bytes); 416 log2_blk_height = 0; 417 log2_blk_width = 0; 418 419 // remember log rule 420 // "+" in log is multiply 421 // "-" in log is divide 422 // "/2" is like square root 423 // blk is vertical biased 424 if (tiling != dm_sw_linear) 425 log2_blk_height = log2_blk256_height + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1); 426 else 427 log2_blk_height = 0; // blk height of 1 428 429 log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height; 430 431 if (!surf_vert) { 432 int unsigned temp; 433 434 temp = dml_round_to_multiple(vp_width - 1, blk256_width, 1) + blk256_width; 435 if (data_pitch < blk256_width) { 436 dml_print("WARNING: DML_DLG: %s: swath_size calculation ignoring data_pitch=%u < blk256_width=%u\n", __func__, data_pitch, blk256_width); 437 } else { 438 if (temp > data_pitch) { 439 if (data_pitch >= vp_width) 440 temp = data_pitch; 441 else 442 dml_print("WARNING: DML_DLG: %s: swath_size calculation ignoring data_pitch=%u < vp_width=%u\n", __func__, data_pitch, vp_width); 443 } 444 } 445 rq_dlg_param->swath_width_ub = temp; 446 rq_dlg_param->req_per_swath_ub = temp >> log2_blk256_width; 447 } else { 448 int unsigned temp; 449 450 temp = dml_round_to_multiple(vp_height - 1, blk256_height, 1) + blk256_height; 451 if (surface_height < blk256_height) { 452 dml_print("WARNING: DML_DLG: %s swath_size calculation ignored surface_height=%u < blk256_height=%u\n", __func__, surface_height, blk256_height); 453 } else { 454 if (temp > surface_height) { 455 if (surface_height >= vp_height) 456 temp = surface_height; 457 else 458 dml_print("WARNING: DML_DLG: %s swath_size calculation ignored surface_height=%u < vp_height=%u\n", __func__, surface_height, vp_height); 459 } 460 } 461 rq_dlg_param->swath_width_ub = temp; 462 rq_dlg_param->req_per_swath_ub = temp >> log2_blk256_height; 463 } 464 465 if (!surf_vert) 466 rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height * bytes_per_element; 467 else 468 rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width * bytes_per_element; 469 470 rq_misc_param->blk256_height = blk256_height; 471 rq_misc_param->blk256_width = blk256_width; 472 473 // ------- 474 // meta 475 // ------- 476 log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element 477 478 // each 64b meta request for dcn is 8x8 meta elements and 479 // a meta element covers one 256b block of the data surface. 480 log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256 481 log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element - log2_meta_req_height; 482 meta_req_width = 1 << log2_meta_req_width; 483 meta_req_height = 1 << log2_meta_req_height; 484 log2_meta_row_height = 0; 485 meta_row_width_ub = 0; 486 487 // the dimensions of a meta row are meta_row_width x meta_row_height in elements. 488 // calculate upper bound of the meta_row_width 489 if (!surf_vert) { 490 log2_meta_row_height = log2_meta_req_height; 491 meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1) + meta_req_width; 492 rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width; 493 } else { 494 log2_meta_row_height = log2_meta_req_width; 495 meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1) + meta_req_height; 496 rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height; 497 } 498 rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64; 499 500 rq_dlg_param->meta_row_height = 1 << log2_meta_row_height; 501 502 log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes); 503 log2_meta_chunk_height = log2_meta_row_height; 504 505 //full sized meta chunk width in unit of data elements 506 log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element - log2_meta_chunk_height; 507 log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes); 508 min_meta_chunk_width = 1 << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element - log2_meta_chunk_height); 509 meta_chunk_width = 1 << log2_meta_chunk_width; 510 meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width); 511 meta_row_remainder = meta_row_width_ub % meta_chunk_width; 512 meta_chunk_threshold = 0; 513 meta_blk_height = blk256_height * 64; 514 meta_surface_bytes = meta_pitch * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1) + meta_blk_height) * bytes_per_element / 256; 515 vmpg_bytes = mode_lib->soc.gpuvm_min_page_size_bytes; 516 meta_pte_req_per_frame_ub = (dml_round_to_multiple(meta_surface_bytes - vmpg_bytes, 8 * vmpg_bytes, 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes); 517 meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; //64B mpte request 518 rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub; 519 520 dml_print("DML_DLG: %s: meta_blk_height = %d\n", __func__, meta_blk_height); 521 dml_print("DML_DLG: %s: meta_surface_bytes = %d\n", __func__, meta_surface_bytes); 522 dml_print("DML_DLG: %s: meta_pte_req_per_frame_ub = %d\n", __func__, meta_pte_req_per_frame_ub); 523 dml_print("DML_DLG: %s: meta_pte_bytes_per_frame_ub = %d\n", __func__, meta_pte_bytes_per_frame_ub); 524 525 if (!surf_vert) 526 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width; 527 else 528 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height; 529 530 if (meta_row_remainder <= meta_chunk_threshold) 531 rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 532 else 533 rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 534 535 // ------ 536 // dpte 537 // ------ 538 if (surf_linear) { 539 log2_vmpg_height = 0; // one line high 540 } else { 541 log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height; 542 } 543 log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height; 544 545 // only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4. 546 if (surf_linear) { //one 64B PTE request returns 8 PTEs 547 log2_dpte_req_height_ptes = 0; 548 log2_dpte_req_width = log2_vmpg_width + 3; 549 log2_dpte_req_height = 0; 550 } else if (log2_blk_bytes == 12) { //4KB tile means 4kB page size 551 //one 64B req gives 8x1 PTEs for 4KB tile 552 log2_dpte_req_height_ptes = 0; 553 log2_dpte_req_width = log2_blk_width + 3; 554 log2_dpte_req_height = log2_blk_height + 0; 555 } else if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) { // tile block >= 64KB 556 //two 64B reqs of 2x4 PTEs give 16 PTEs to cover 64KB 557 log2_dpte_req_height_ptes = 4; 558 log2_dpte_req_width = log2_blk256_width + 4; // log2_64KB_width 559 log2_dpte_req_height = log2_blk256_height + 4; // log2_64KB_height 560 } else { //64KB page size and must 64KB tile block 561 //one 64B req gives 8x1 PTEs for 64KB tile 562 log2_dpte_req_height_ptes = 0; 563 log2_dpte_req_width = log2_blk_width + 3; 564 log2_dpte_req_height = log2_blk_height + 0; 565 } 566 567 // The dpte request dimensions in data elements is dpte_req_width x dpte_req_height 568 // log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent 569 // That depends on the pte shape (i.e. 8x1, 4x2, 2x4) 570 //log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes; 571 //log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes; 572 dpte_req_height = 1 << log2_dpte_req_height; 573 dpte_req_width = 1 << log2_dpte_req_width; 574 575 // calculate pitch dpte row buffer can hold 576 // round the result down to a power of two. 577 if (surf_linear) { 578 unsigned int dpte_row_height; 579 580 log2_dpte_row_height_linear = dml_floor(dml_log2(dpte_buf_in_pte_reqs * dpte_req_width / data_pitch), 1); 581 582 dml_print("DML_DLG: %s: is_chroma = %d\n", __func__, is_chroma); 583 dml_print("DML_DLG: %s: dpte_buf_in_pte_reqs = %d\n", __func__, dpte_buf_in_pte_reqs); 584 dml_print("DML_DLG: %s: log2_dpte_row_height_linear = %d\n", __func__, log2_dpte_row_height_linear); 585 586 ASSERT(log2_dpte_row_height_linear >= 3); 587 588 if (log2_dpte_row_height_linear > 7) 589 log2_dpte_row_height_linear = 7; 590 591 log2_dpte_row_height = log2_dpte_row_height_linear; 592 // For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary. 593 // the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering. 594 dpte_row_height = 1 << log2_dpte_row_height; 595 dpte_row_width_ub = dml_round_to_multiple(data_pitch * dpte_row_height - 1, dpte_req_width, 1) + dpte_req_width; 596 rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; 597 } else { 598 // the upper bound of the dpte_row_width without dependency on viewport position follows. 599 // for tiled mode, row height is the same as req height and row store up to vp size upper bound 600 if (!surf_vert) { 601 log2_dpte_row_height = log2_dpte_req_height; 602 dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1) + dpte_req_width; 603 rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; 604 } else { 605 log2_dpte_row_height = (log2_blk_width < log2_dpte_req_width) ? log2_blk_width : log2_dpte_req_width; 606 dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1) + dpte_req_height; 607 rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height; 608 } 609 } 610 if (log2_blk_bytes >= 16 && log2_vmpg_bytes == 12) // tile block >= 64KB 611 rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 128; //2*64B dpte request 612 else 613 rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; //64B dpte request 614 615 rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height; 616 617 // the dpte_group_bytes is reduced for the specific case of vertical 618 // access of a tile surface that has dpte request of 8x1 ptes. 619 if (hostvm_enable) 620 rq_sizing_param->dpte_group_bytes = 512; 621 else { 622 if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group 623 rq_sizing_param->dpte_group_bytes = 512; 624 else 625 rq_sizing_param->dpte_group_bytes = 2048; 626 } 627 628 //since pte request size is 64byte, the number of data pte requests per full sized group is as follows. 629 log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes); 630 log2_dpte_group_length = log2_dpte_group_bytes - 6; //length in 64b requests 631 632 // full sized data pte group width in elements 633 if (!surf_vert) 634 log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width; 635 else 636 log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height; 637 638 //But if the tile block >=64KB and the page size is 4KB, then each dPTE request is 2*64B 639 if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) // tile block >= 64KB 640 log2_dpte_group_width = log2_dpte_group_width - 1; 641 642 dpte_group_width = 1 << log2_dpte_group_width; 643 644 // since dpte groups are only aligned to dpte_req_width and not dpte_group_width, 645 // the upper bound for the dpte groups per row is as follows. 646 rq_dlg_param->dpte_groups_per_row_ub = dml_ceil((double) dpte_row_width_ub / dpte_group_width, 1); 647} 648 649static void get_surf_rq_param( 650 struct display_mode_lib *mode_lib, 651 display_data_rq_sizing_params_st *rq_sizing_param, 652 display_data_rq_dlg_params_st *rq_dlg_param, 653 display_data_rq_misc_params_st *rq_misc_param, 654 const display_pipe_params_st *pipe_param, 655 bool is_chroma, 656 bool is_alpha) 657{ 658 bool mode_422 = 0; 659 unsigned int vp_width = 0; 660 unsigned int vp_height = 0; 661 unsigned int data_pitch = 0; 662 unsigned int meta_pitch = 0; 663 unsigned int surface_height = 0; 664 unsigned int ppe = mode_422 ? 2 : 1; 665 666 // FIXME check if ppe apply for both luma and chroma in 422 case 667 if (is_chroma | is_alpha) { 668 vp_width = pipe_param->src.viewport_width_c / ppe; 669 vp_height = pipe_param->src.viewport_height_c; 670 data_pitch = pipe_param->src.data_pitch_c; 671 meta_pitch = pipe_param->src.meta_pitch_c; 672 surface_height = pipe_param->src.surface_height_y / 2.0; 673 } else { 674 vp_width = pipe_param->src.viewport_width / ppe; 675 vp_height = pipe_param->src.viewport_height; 676 data_pitch = pipe_param->src.data_pitch; 677 meta_pitch = pipe_param->src.meta_pitch; 678 surface_height = pipe_param->src.surface_height_y; 679 } 680 681 if (pipe_param->dest.odm_combine) { 682 unsigned int access_dir; 683 unsigned int full_src_vp_width; 684 unsigned int hactive_odm; 685 unsigned int src_hactive_odm; 686 687 access_dir = (pipe_param->src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed 688 hactive_odm = pipe_param->dest.hactive / ((unsigned int) pipe_param->dest.odm_combine * 2); 689 if (is_chroma) { 690 full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio_c * pipe_param->dest.full_recout_width; 691 src_hactive_odm = pipe_param->scale_ratio_depth.hscl_ratio_c * hactive_odm; 692 } else { 693 full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio * pipe_param->dest.full_recout_width; 694 src_hactive_odm = pipe_param->scale_ratio_depth.hscl_ratio * hactive_odm; 695 } 696 697 if (access_dir == 0) { 698 vp_width = dml_min(full_src_vp_width, src_hactive_odm); 699 dml_print("DML_DLG: %s: vp_width = %d\n", __func__, vp_width); 700 } else { 701 vp_height = dml_min(full_src_vp_width, src_hactive_odm); 702 dml_print("DML_DLG: %s: vp_height = %d\n", __func__, vp_height); 703 704 } 705 dml_print("DML_DLG: %s: full_src_vp_width = %d\n", __func__, full_src_vp_width); 706 dml_print("DML_DLG: %s: hactive_odm = %d\n", __func__, hactive_odm); 707 dml_print("DML_DLG: %s: src_hactive_odm = %d\n", __func__, src_hactive_odm); 708 } 709 710 rq_sizing_param->chunk_bytes = 8192; 711 712 if (is_alpha) { 713 rq_sizing_param->chunk_bytes = 4096; 714 } 715 716 if (rq_sizing_param->chunk_bytes == 64 * 1024) 717 rq_sizing_param->min_chunk_bytes = 0; 718 else 719 rq_sizing_param->min_chunk_bytes = 1024; 720 721 rq_sizing_param->meta_chunk_bytes = 2048; 722 rq_sizing_param->min_meta_chunk_bytes = 256; 723 724 if (pipe_param->src.hostvm) 725 rq_sizing_param->mpte_group_bytes = 512; 726 else 727 rq_sizing_param->mpte_group_bytes = 2048; 728 729 get_meta_and_pte_attr( 730 mode_lib, 731 rq_dlg_param, 732 rq_misc_param, 733 rq_sizing_param, 734 vp_width, 735 vp_height, 736 data_pitch, 737 meta_pitch, 738 pipe_param->src.source_format, 739 pipe_param->src.sw_mode, 740 pipe_param->src.macro_tile_size, 741 pipe_param->src.source_scan, 742 pipe_param->src.hostvm, 743 is_chroma, 744 surface_height); 745} 746 747static void dml_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib, display_rq_params_st *rq_param, const display_pipe_params_st *pipe_param) 748{ 749 // get param for luma surface 750 rq_param->yuv420 = pipe_param->src.source_format == dm_420_8 || pipe_param->src.source_format == dm_420_10 || pipe_param->src.source_format == dm_rgbe_alpha 751 || pipe_param->src.source_format == dm_420_12; 752 753 rq_param->yuv420_10bpc = pipe_param->src.source_format == dm_420_10; 754 755 rq_param->rgbe_alpha = (pipe_param->src.source_format == dm_rgbe_alpha) ? 1 : 0; 756 757 get_surf_rq_param(mode_lib, &(rq_param->sizing.rq_l), &(rq_param->dlg.rq_l), &(rq_param->misc.rq_l), pipe_param, 0, 0); 758 759 if (is_dual_plane((enum source_format_class) (pipe_param->src.source_format))) { 760 // get param for chroma surface 761 get_surf_rq_param(mode_lib, &(rq_param->sizing.rq_c), &(rq_param->dlg.rq_c), &(rq_param->misc.rq_c), pipe_param, 1, rq_param->rgbe_alpha); 762 } 763 764 // calculate how to split the det buffer space between luma and chroma 765 handle_det_buf_split(mode_lib, rq_param, &pipe_param->src); 766 print__rq_params_st(mode_lib, rq_param); 767} 768 769void dml31_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib, display_rq_regs_st *rq_regs, const display_pipe_params_st *pipe_param) 770{ 771 display_rq_params_st rq_param = {0}; 772 773 memset(rq_regs, 0, sizeof(*rq_regs)); 774 dml_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param); 775 extract_rq_regs(mode_lib, rq_regs, &rq_param); 776 777 print__rq_regs_st(mode_lib, rq_regs); 778} 779 780static void calculate_ttu_cursor( 781 struct display_mode_lib *mode_lib, 782 double *refcyc_per_req_delivery_pre_cur, 783 double *refcyc_per_req_delivery_cur, 784 double refclk_freq_in_mhz, 785 double ref_freq_to_pix_freq, 786 double hscale_pixel_rate_l, 787 double hscl_ratio, 788 double vratio_pre_l, 789 double vratio_l, 790 unsigned int cur_width, 791 enum cursor_bpp cur_bpp) 792{ 793 unsigned int cur_src_width = cur_width; 794 unsigned int cur_req_size = 0; 795 unsigned int cur_req_width = 0; 796 double cur_width_ub = 0.0; 797 double cur_req_per_width = 0.0; 798 double hactive_cur = 0.0; 799 800 ASSERT(cur_src_width <= 256); 801 802 *refcyc_per_req_delivery_pre_cur = 0.0; 803 *refcyc_per_req_delivery_cur = 0.0; 804 if (cur_src_width > 0) { 805 unsigned int cur_bit_per_pixel = 0; 806 807 if (cur_bpp == dm_cur_2bit) { 808 cur_req_size = 64; // byte 809 cur_bit_per_pixel = 2; 810 } else { // 32bit 811 cur_bit_per_pixel = 32; 812 if (cur_src_width >= 1 && cur_src_width <= 16) 813 cur_req_size = 64; 814 else if (cur_src_width >= 17 && cur_src_width <= 31) 815 cur_req_size = 128; 816 else 817 cur_req_size = 256; 818 } 819 820 cur_req_width = (double) cur_req_size / ((double) cur_bit_per_pixel / 8.0); 821 cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1) * (double) cur_req_width; 822 cur_req_per_width = cur_width_ub / (double) cur_req_width; 823 hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor 824 825 if (vratio_pre_l <= 1.0) { 826 *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq / (double) cur_req_per_width; 827 } else { 828 *refcyc_per_req_delivery_pre_cur = (double) refclk_freq_in_mhz * (double) cur_src_width / hscale_pixel_rate_l / (double) cur_req_per_width; 829 } 830 831 ASSERT(*refcyc_per_req_delivery_pre_cur < dml_pow(2, 13)); 832 833 if (vratio_l <= 1.0) { 834 *refcyc_per_req_delivery_cur = hactive_cur * ref_freq_to_pix_freq / (double) cur_req_per_width; 835 } else { 836 *refcyc_per_req_delivery_cur = (double) refclk_freq_in_mhz * (double) cur_src_width / hscale_pixel_rate_l / (double) cur_req_per_width; 837 } 838 839 dml_print("DML_DLG: %s: cur_req_width = %d\n", __func__, cur_req_width); 840 dml_print("DML_DLG: %s: cur_width_ub = %3.2f\n", __func__, cur_width_ub); 841 dml_print("DML_DLG: %s: cur_req_per_width = %3.2f\n", __func__, cur_req_per_width); 842 dml_print("DML_DLG: %s: hactive_cur = %3.2f\n", __func__, hactive_cur); 843 dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur = %3.2f\n", __func__, *refcyc_per_req_delivery_pre_cur); 844 dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur = %3.2f\n", __func__, *refcyc_per_req_delivery_cur); 845 846 ASSERT(*refcyc_per_req_delivery_cur < dml_pow(2, 13)); 847 } 848} 849 850// Note: currently taken in as is. 851// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma. 852static void dml_rq_dlg_get_dlg_params( 853 struct display_mode_lib *mode_lib, 854 const display_e2e_pipe_params_st *e2e_pipe_param, 855 const unsigned int num_pipes, 856 const unsigned int pipe_idx, 857 display_dlg_regs_st *disp_dlg_regs, 858 display_ttu_regs_st *disp_ttu_regs, 859 const display_rq_dlg_params_st *rq_dlg_param, 860 const display_dlg_sys_params_st *dlg_sys_param, 861 const bool cstate_en, 862 const bool pstate_en, 863 const bool vm_en, 864 const bool ignore_viewport_pos, 865 const bool immediate_flip_support) 866{ 867 const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src; 868 const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest; 869 const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg; 870 const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth; 871 const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps; 872 unsigned int pipe_index_in_combine[DC__NUM_PIPES__MAX]; 873 874 // ------------------------- 875 // Section 1.15.2.1: OTG dependent Params 876 // ------------------------- 877 // Timing 878 unsigned int htotal = dst->htotal; 879 unsigned int hblank_end = dst->hblank_end; 880 unsigned int vblank_start = dst->vblank_start; 881 unsigned int vblank_end = dst->vblank_end; 882 883 double dppclk_freq_in_mhz = clks->dppclk_mhz; 884 double refclk_freq_in_mhz = clks->refclk_mhz; 885 double pclk_freq_in_mhz = dst->pixel_rate_mhz; 886 bool interlaced = dst->interlaced; 887 double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz; 888 double min_ttu_vblank; 889 unsigned int dlg_vblank_start; 890 bool dual_plane; 891 bool mode_422; 892 unsigned int access_dir; 893 unsigned int vp_height_l; 894 unsigned int vp_width_l; 895 unsigned int vp_height_c; 896 unsigned int vp_width_c; 897 898 // Scaling 899 unsigned int htaps_l; 900 unsigned int htaps_c; 901 double hratio_l; 902 double hratio_c; 903 double vratio_l; 904 double vratio_c; 905 906 unsigned int swath_width_ub_l; 907 unsigned int dpte_groups_per_row_ub_l; 908 unsigned int swath_width_ub_c; 909 unsigned int dpte_groups_per_row_ub_c; 910 911 unsigned int meta_chunks_per_row_ub_l; 912 unsigned int meta_chunks_per_row_ub_c; 913 unsigned int vupdate_offset; 914 unsigned int vupdate_width; 915 unsigned int vready_offset; 916 917 unsigned int vstartup_start; 918 unsigned int dst_x_after_scaler; 919 unsigned int dst_y_after_scaler; 920 double dst_y_prefetch; 921 double dst_y_per_vm_vblank; 922 double dst_y_per_row_vblank; 923 double dst_y_per_vm_flip; 924 double dst_y_per_row_flip; 925 double max_dst_y_per_vm_vblank; 926 double max_dst_y_per_row_vblank; 927 double vratio_pre_l; 928 double vratio_pre_c; 929 unsigned int req_per_swath_ub_l; 930 unsigned int req_per_swath_ub_c; 931 unsigned int meta_row_height_l; 932 unsigned int meta_row_height_c; 933 unsigned int swath_width_pixels_ub_l; 934 unsigned int swath_width_pixels_ub_c; 935 unsigned int scaler_rec_in_width_l; 936 unsigned int scaler_rec_in_width_c; 937 unsigned int dpte_row_height_l; 938 unsigned int dpte_row_height_c; 939 double hscale_pixel_rate_l; 940 double hscale_pixel_rate_c; 941 double min_hratio_fact_l; 942 double min_hratio_fact_c; 943 double refcyc_per_line_delivery_pre_l; 944 double refcyc_per_line_delivery_pre_c; 945 double refcyc_per_line_delivery_l; 946 double refcyc_per_line_delivery_c; 947 948 double refcyc_per_req_delivery_pre_l; 949 double refcyc_per_req_delivery_pre_c; 950 double refcyc_per_req_delivery_l; 951 double refcyc_per_req_delivery_c; 952 953 unsigned int full_recout_width; 954 double refcyc_per_req_delivery_pre_cur0; 955 double refcyc_per_req_delivery_cur0; 956 double refcyc_per_req_delivery_pre_cur1; 957 double refcyc_per_req_delivery_cur1; 958 int unsigned vba__min_dst_y_next_start = get_min_dst_y_next_start(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // FROM VBA 959 int unsigned vba__vready_after_vcount0 = get_vready_at_or_after_vsync(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA 960 961 float vba__refcyc_per_line_delivery_pre_l = get_refcyc_per_line_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA 962 float vba__refcyc_per_line_delivery_l = get_refcyc_per_line_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA 963 964 float vba__refcyc_per_req_delivery_pre_l = get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA 965 float vba__refcyc_per_req_delivery_l = get_refcyc_per_req_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA 966 967 memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); 968 memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); 969 970 dml_print("DML_DLG: %s: cstate_en = %d\n", __func__, cstate_en); 971 dml_print("DML_DLG: %s: pstate_en = %d\n", __func__, pstate_en); 972 dml_print("DML_DLG: %s: vm_en = %d\n", __func__, vm_en); 973 dml_print("DML_DLG: %s: ignore_viewport_pos = %d\n", __func__, ignore_viewport_pos); 974 dml_print("DML_DLG: %s: immediate_flip_support = %d\n", __func__, immediate_flip_support); 975 976 dml_print("DML_DLG: %s: dppclk_freq_in_mhz = %3.2f\n", __func__, dppclk_freq_in_mhz); 977 dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); 978 dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); 979 dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced); ASSERT(ref_freq_to_pix_freq < 4.0); 980 981 disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19)); 982 disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal * dml_pow(2, 8)); 983 disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits 984 985 //set_prefetch_mode(mode_lib, cstate_en, pstate_en, ignore_viewport_pos, immediate_flip_support); 986 min_ttu_vblank = get_min_ttu_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA 987 988 dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start; 989 disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2)); 990 disp_dlg_regs->min_dst_y_next_start_us = 0; 991 ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18)); 992 993 dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank); 994 dml_print("DML_DLG: %s: min_dst_y_next_start = 0x%0x\n", __func__, disp_dlg_regs->min_dst_y_next_start); 995 dml_print("DML_DLG: %s: dlg_vblank_start = 0x%0x\n", __func__, dlg_vblank_start); 996 dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq); 997 dml_print("DML_DLG: %s: vba__min_dst_y_next_start = 0x%0x\n", __func__, vba__min_dst_y_next_start); 998 999 //old_impl_vs_vba_impl("min_dst_y_next_start", dlg_vblank_start, vba__min_dst_y_next_start); 1000 1001 // ------------------------- 1002 // Section 1.15.2.2: Prefetch, Active and TTU 1003 // ------------------------- 1004 // Prefetch Calc 1005 // Source 1006 dual_plane = is_dual_plane((enum source_format_class) (src->source_format)); 1007 mode_422 = 0; 1008 access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed 1009 vp_height_l = src->viewport_height; 1010 vp_width_l = src->viewport_width; 1011 vp_height_c = src->viewport_height_c; 1012 vp_width_c = src->viewport_width_c; 1013 1014 // Scaling 1015 htaps_l = taps->htaps; 1016 htaps_c = taps->htaps_c; 1017 hratio_l = scl->hscl_ratio; 1018 hratio_c = scl->hscl_ratio_c; 1019 vratio_l = scl->vscl_ratio; 1020 vratio_c = scl->vscl_ratio_c; 1021 1022 swath_width_ub_l = rq_dlg_param->rq_l.swath_width_ub; 1023 dpte_groups_per_row_ub_l = rq_dlg_param->rq_l.dpte_groups_per_row_ub; 1024 swath_width_ub_c = rq_dlg_param->rq_c.swath_width_ub; 1025 dpte_groups_per_row_ub_c = rq_dlg_param->rq_c.dpte_groups_per_row_ub; 1026 1027 meta_chunks_per_row_ub_l = rq_dlg_param->rq_l.meta_chunks_per_row_ub; 1028 meta_chunks_per_row_ub_c = rq_dlg_param->rq_c.meta_chunks_per_row_ub; 1029 vupdate_offset = dst->vupdate_offset; 1030 vupdate_width = dst->vupdate_width; 1031 vready_offset = dst->vready_offset; 1032 1033 vstartup_start = dst->vstartup_start; 1034 if (interlaced) { 1035 if (vstartup_start / 2.0 - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal <= vblank_end / 2.0) 1036 disp_dlg_regs->vready_after_vcount0 = 1; 1037 else 1038 disp_dlg_regs->vready_after_vcount0 = 0; 1039 } else { 1040 if (vstartup_start - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal <= vblank_end) 1041 disp_dlg_regs->vready_after_vcount0 = 1; 1042 else 1043 disp_dlg_regs->vready_after_vcount0 = 0; 1044 } 1045 1046 dml_print("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0); 1047 dml_print("DML_DLG: %s: vba__vready_after_vcount0 = %d\n", __func__, vba__vready_after_vcount0); 1048 //old_impl_vs_vba_impl("vready_after_vcount0", disp_dlg_regs->vready_after_vcount0, vba__vready_after_vcount0); 1049 1050 if (interlaced) 1051 vstartup_start = vstartup_start / 2; 1052 1053 dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA 1054 dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA 1055 1056 // do some adjustment on the dst_after scaler to account for odm combine mode 1057 dml_print("DML_DLG: %s: input dst_x_after_scaler = %d\n", __func__, dst_x_after_scaler); 1058 dml_print("DML_DLG: %s: input dst_y_after_scaler = %d\n", __func__, dst_y_after_scaler); 1059 1060 // need to figure out which side of odm combine we're in 1061 if (dst->odm_combine) { 1062 // figure out which pipes go together 1063 bool visited[DC__NUM_PIPES__MAX]; 1064 unsigned int i, j, k; 1065 1066 for (k = 0; k < num_pipes; ++k) { 1067 visited[k] = false; 1068 pipe_index_in_combine[k] = 0; 1069 } 1070 1071 for (i = 0; i < num_pipes; i++) { 1072 if (e2e_pipe_param[i].pipe.src.is_hsplit && !visited[i]) { 1073 1074 unsigned int grp = e2e_pipe_param[i].pipe.src.hsplit_grp; 1075 unsigned int grp_idx = 0; 1076 1077 for (j = i; j < num_pipes; j++) { 1078 if (e2e_pipe_param[j].pipe.src.hsplit_grp == grp && e2e_pipe_param[j].pipe.src.is_hsplit && !visited[j]) { 1079 pipe_index_in_combine[j] = grp_idx; 1080 dml_print("DML_DLG: %s: pipe[%d] is in grp %d idx %d\n", __func__, j, grp, grp_idx); 1081 grp_idx++; 1082 visited[j] = true; 1083 } 1084 } 1085 } 1086 } 1087 1088 } 1089 1090 if (dst->odm_combine == dm_odm_combine_mode_disabled) { 1091 disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end * ref_freq_to_pix_freq); 1092 } else { 1093 unsigned int odm_combine_factor = (dst->odm_combine == dm_odm_combine_mode_2to1 ? 2 : 4); // TODO: We should really check that 4to1 is supported before setting it to 4 1094 unsigned int odm_pipe_index = pipe_index_in_combine[pipe_idx]; 1095 disp_dlg_regs->refcyc_h_blank_end = (unsigned int) (((double) hblank_end + odm_pipe_index * (double) dst->hactive / odm_combine_factor) * ref_freq_to_pix_freq); 1096 } ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)dml_pow(2, 13)); 1097 1098 dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal); 1099 dml_print("DML_DLG: %s: dst_x_after_scaler[%d] = %d\n", __func__, pipe_idx, dst_x_after_scaler); 1100 dml_print("DML_DLG: %s: dst_y_after_scaler[%d] = %d\n", __func__, pipe_idx, dst_y_after_scaler); 1101 1102 dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA 1103 dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA 1104 dst_y_per_row_vblank = get_dst_y_per_row_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA 1105 dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA 1106 dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA 1107 1108 max_dst_y_per_vm_vblank = 32.0; //U5.2 1109 max_dst_y_per_row_vblank = 16.0; //U4.2 1110 1111 // magic! 1112 if (htotal <= 75) { 1113 max_dst_y_per_vm_vblank = 100.0; 1114 max_dst_y_per_row_vblank = 100.0; 1115 } 1116 1117 dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch); 1118 dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip); 1119 dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip); 1120 dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank); 1121 dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank); 1122 1123 ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank); ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank); 1124 1125 ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank)); 1126 1127 vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA 1128 vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA 1129 1130 dml_print("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, vratio_pre_l); 1131 dml_print("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, vratio_pre_c); 1132 1133 // Active 1134 req_per_swath_ub_l = rq_dlg_param->rq_l.req_per_swath_ub; 1135 req_per_swath_ub_c = rq_dlg_param->rq_c.req_per_swath_ub; 1136 meta_row_height_l = rq_dlg_param->rq_l.meta_row_height; 1137 meta_row_height_c = rq_dlg_param->rq_c.meta_row_height; 1138 swath_width_pixels_ub_l = 0; 1139 swath_width_pixels_ub_c = 0; 1140 scaler_rec_in_width_l = 0; 1141 scaler_rec_in_width_c = 0; 1142 dpte_row_height_l = rq_dlg_param->rq_l.dpte_row_height; 1143 dpte_row_height_c = rq_dlg_param->rq_c.dpte_row_height; 1144 1145 if (mode_422) { 1146 swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element 1147 swath_width_pixels_ub_c = swath_width_ub_c * 2; 1148 } else { 1149 swath_width_pixels_ub_l = swath_width_ub_l * 1; 1150 swath_width_pixels_ub_c = swath_width_ub_c * 1; 1151 } 1152 1153 hscale_pixel_rate_l = 0.; 1154 hscale_pixel_rate_c = 0.; 1155 min_hratio_fact_l = 1.0; 1156 min_hratio_fact_c = 1.0; 1157 1158 if (hratio_l <= 1) 1159 min_hratio_fact_l = 2.0; 1160 else if (htaps_l <= 6) { 1161 if ((hratio_l * 2.0) > 4.0) 1162 min_hratio_fact_l = 4.0; 1163 else 1164 min_hratio_fact_l = hratio_l * 2.0; 1165 } else { 1166 if (hratio_l > 4.0) 1167 min_hratio_fact_l = 4.0; 1168 else 1169 min_hratio_fact_l = hratio_l; 1170 } 1171 1172 hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz; 1173 1174 dml_print("DML_DLG: %s: hratio_l = %3.2f\n", __func__, hratio_l); 1175 dml_print("DML_DLG: %s: min_hratio_fact_l = %3.2f\n", __func__, min_hratio_fact_l); 1176 dml_print("DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", __func__, hscale_pixel_rate_l); 1177 1178 if (hratio_c <= 1) 1179 min_hratio_fact_c = 2.0; 1180 else if (htaps_c <= 6) { 1181 if ((hratio_c * 2.0) > 4.0) 1182 min_hratio_fact_c = 4.0; 1183 else 1184 min_hratio_fact_c = hratio_c * 2.0; 1185 } else { 1186 if (hratio_c > 4.0) 1187 min_hratio_fact_c = 4.0; 1188 else 1189 min_hratio_fact_c = hratio_c; 1190 } 1191 1192 hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz; 1193 1194 refcyc_per_line_delivery_pre_l = 0.; 1195 refcyc_per_line_delivery_pre_c = 0.; 1196 refcyc_per_line_delivery_l = 0.; 1197 refcyc_per_line_delivery_c = 0.; 1198 1199 refcyc_per_req_delivery_pre_l = 0.; 1200 refcyc_per_req_delivery_pre_c = 0.; 1201 refcyc_per_req_delivery_l = 0.; 1202 refcyc_per_req_delivery_c = 0.; 1203 1204 full_recout_width = 0; 1205 // In ODM 1206 if (src->is_hsplit) { 1207 // This "hack" is only allowed (and valid) for MPC combine. In ODM 1208 // combine, you MUST specify the full_recout_width...according to Oswin 1209 if (dst->full_recout_width == 0 && !dst->odm_combine) { 1210 dml_print("DML_DLG: %s: Warning: full_recout_width not set in hsplit mode\n", __func__); 1211 full_recout_width = dst->recout_width * 2; // assume half split for dcn1 1212 } else 1213 full_recout_width = dst->full_recout_width; 1214 } else 1215 full_recout_width = dst->recout_width; 1216 1217 // As of DCN2, mpc_combine and odm_combine are mutually exclusive 1218 refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery( 1219 mode_lib, 1220 refclk_freq_in_mhz, 1221 pclk_freq_in_mhz, 1222 dst->odm_combine, 1223 full_recout_width, 1224 dst->hactive, 1225 vratio_pre_l, 1226 hscale_pixel_rate_l, 1227 swath_width_pixels_ub_l, 1228 1); // per line 1229 1230 refcyc_per_line_delivery_l = get_refcyc_per_delivery( 1231 mode_lib, 1232 refclk_freq_in_mhz, 1233 pclk_freq_in_mhz, 1234 dst->odm_combine, 1235 full_recout_width, 1236 dst->hactive, 1237 vratio_l, 1238 hscale_pixel_rate_l, 1239 swath_width_pixels_ub_l, 1240 1); // per line 1241 1242 dml_print("DML_DLG: %s: full_recout_width = %d\n", __func__, full_recout_width); 1243 dml_print("DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", __func__, hscale_pixel_rate_l); 1244 dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, refcyc_per_line_delivery_pre_l); 1245 dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, refcyc_per_line_delivery_l); 1246 dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, vba__refcyc_per_line_delivery_pre_l); 1247 dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_l = %3.2f\n", __func__, vba__refcyc_per_line_delivery_l); 1248 1249 //old_impl_vs_vba_impl("refcyc_per_line_delivery_pre_l", refcyc_per_line_delivery_pre_l, vba__refcyc_per_line_delivery_pre_l); 1250 //old_impl_vs_vba_impl("refcyc_per_line_delivery_l", refcyc_per_line_delivery_l, vba__refcyc_per_line_delivery_l); 1251 1252 if (dual_plane) { 1253 float vba__refcyc_per_line_delivery_pre_c = get_refcyc_per_line_delivery_pre_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA 1254 float vba__refcyc_per_line_delivery_c = get_refcyc_per_line_delivery_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA 1255 1256 refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery( 1257 mode_lib, 1258 refclk_freq_in_mhz, 1259 pclk_freq_in_mhz, 1260 dst->odm_combine, 1261 full_recout_width, 1262 dst->hactive, 1263 vratio_pre_c, 1264 hscale_pixel_rate_c, 1265 swath_width_pixels_ub_c, 1266 1); // per line 1267 1268 refcyc_per_line_delivery_c = get_refcyc_per_delivery( 1269 mode_lib, 1270 refclk_freq_in_mhz, 1271 pclk_freq_in_mhz, 1272 dst->odm_combine, 1273 full_recout_width, 1274 dst->hactive, 1275 vratio_c, 1276 hscale_pixel_rate_c, 1277 swath_width_pixels_ub_c, 1278 1); // per line 1279 1280 dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, refcyc_per_line_delivery_pre_c); 1281 dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, refcyc_per_line_delivery_c); 1282 dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, vba__refcyc_per_line_delivery_pre_c); 1283 dml_print("DML_DLG: %s: vba__refcyc_per_line_delivery_c = %3.2f\n", __func__, vba__refcyc_per_line_delivery_c); 1284 1285 //old_impl_vs_vba_impl("refcyc_per_line_delivery_pre_c", refcyc_per_line_delivery_pre_c, vba__refcyc_per_line_delivery_pre_c); 1286 //old_impl_vs_vba_impl("refcyc_per_line_delivery_c", refcyc_per_line_delivery_c, vba__refcyc_per_line_delivery_c); 1287 } 1288 1289 if (src->dynamic_metadata_enable && src->gpuvm) 1290 disp_dlg_regs->refcyc_per_vm_dmdata = get_refcyc_per_vm_dmdata_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA 1291 1292 disp_dlg_regs->dmdata_dl_delta = get_dmdata_dl_delta_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA 1293 1294 // TTU - Luma / Chroma 1295 if (access_dir) { // vertical access 1296 scaler_rec_in_width_l = vp_height_l; 1297 scaler_rec_in_width_c = vp_height_c; 1298 } else { 1299 scaler_rec_in_width_l = vp_width_l; 1300 scaler_rec_in_width_c = vp_width_c; 1301 } 1302 1303 refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery( 1304 mode_lib, 1305 refclk_freq_in_mhz, 1306 pclk_freq_in_mhz, 1307 dst->odm_combine, 1308 full_recout_width, 1309 dst->hactive, 1310 vratio_pre_l, 1311 hscale_pixel_rate_l, 1312 scaler_rec_in_width_l, 1313 req_per_swath_ub_l); // per req 1314 1315 refcyc_per_req_delivery_l = get_refcyc_per_delivery( 1316 mode_lib, 1317 refclk_freq_in_mhz, 1318 pclk_freq_in_mhz, 1319 dst->odm_combine, 1320 full_recout_width, 1321 dst->hactive, 1322 vratio_l, 1323 hscale_pixel_rate_l, 1324 scaler_rec_in_width_l, 1325 req_per_swath_ub_l); // per req 1326 1327 dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, refcyc_per_req_delivery_pre_l); 1328 dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, refcyc_per_req_delivery_l); 1329 dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, vba__refcyc_per_req_delivery_pre_l); 1330 dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_l = %3.2f\n", __func__, vba__refcyc_per_req_delivery_l); 1331 1332 //old_impl_vs_vba_impl("refcyc_per_req_delivery_pre_l", refcyc_per_req_delivery_pre_l, vba__refcyc_per_req_delivery_pre_l); 1333 //old_impl_vs_vba_impl("refcyc_per_req_delivery_l", refcyc_per_req_delivery_l, vba__refcyc_per_req_delivery_l); 1334 1335 ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13)); 1336 1337 if (dual_plane) { 1338 float vba__refcyc_per_req_delivery_pre_c = get_refcyc_per_req_delivery_pre_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA 1339 float vba__refcyc_per_req_delivery_c = get_refcyc_per_req_delivery_c_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA 1340 1341 refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery( 1342 mode_lib, 1343 refclk_freq_in_mhz, 1344 pclk_freq_in_mhz, 1345 dst->odm_combine, 1346 full_recout_width, 1347 dst->hactive, 1348 vratio_pre_c, 1349 hscale_pixel_rate_c, 1350 scaler_rec_in_width_c, 1351 req_per_swath_ub_c); // per req 1352 refcyc_per_req_delivery_c = get_refcyc_per_delivery( 1353 mode_lib, 1354 refclk_freq_in_mhz, 1355 pclk_freq_in_mhz, 1356 dst->odm_combine, 1357 full_recout_width, 1358 dst->hactive, 1359 vratio_c, 1360 hscale_pixel_rate_c, 1361 scaler_rec_in_width_c, 1362 req_per_swath_ub_c); // per req 1363 1364 dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, refcyc_per_req_delivery_pre_c); 1365 dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, refcyc_per_req_delivery_c); 1366 dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, vba__refcyc_per_req_delivery_pre_c); 1367 dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_c = %3.2f\n", __func__, vba__refcyc_per_req_delivery_c); 1368 1369 //old_impl_vs_vba_impl("refcyc_per_req_delivery_pre_c", refcyc_per_req_delivery_pre_c, vba__refcyc_per_req_delivery_pre_c); 1370 //old_impl_vs_vba_impl("refcyc_per_req_delivery_c", refcyc_per_req_delivery_c, vba__refcyc_per_req_delivery_c); 1371 1372 ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13)); 1373 } 1374 1375 // TTU - Cursor 1376 refcyc_per_req_delivery_pre_cur0 = 0.0; 1377 refcyc_per_req_delivery_cur0 = 0.0; 1378 1379 ASSERT(src->num_cursors <= 1); 1380 1381 if (src->num_cursors > 0) { 1382 float vba__refcyc_per_req_delivery_pre_cur0; 1383 float vba__refcyc_per_req_delivery_cur0; 1384 1385 calculate_ttu_cursor( 1386 mode_lib, 1387 &refcyc_per_req_delivery_pre_cur0, 1388 &refcyc_per_req_delivery_cur0, 1389 refclk_freq_in_mhz, 1390 ref_freq_to_pix_freq, 1391 hscale_pixel_rate_l, 1392 scl->hscl_ratio, 1393 vratio_pre_l, 1394 vratio_l, 1395 src->cur0_src_width, 1396 (enum cursor_bpp) (src->cur0_bpp)); 1397 1398 vba__refcyc_per_req_delivery_pre_cur0 = get_refcyc_per_cursor_req_delivery_pre_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA 1399 vba__refcyc_per_req_delivery_cur0 = get_refcyc_per_cursor_req_delivery_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA 1400 1401 dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur0 = %3.2f\n", __func__, refcyc_per_req_delivery_pre_cur0); 1402 dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur0 = %3.2f\n", __func__, refcyc_per_req_delivery_cur0); 1403 dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_pre_cur0 = %3.2f\n", __func__, vba__refcyc_per_req_delivery_pre_cur0); 1404 dml_print("DML_DLG: %s: vba__refcyc_per_req_delivery_cur0 = %3.2f\n", __func__, vba__refcyc_per_req_delivery_cur0); 1405 1406 //old_impl_vs_vba_impl("refcyc_per_req_delivery_pre_cur0", refcyc_per_req_delivery_pre_cur0, vba__refcyc_per_req_delivery_pre_cur0); 1407 //old_impl_vs_vba_impl("refcyc_per_req_delivery_cur0", refcyc_per_req_delivery_cur0, vba__refcyc_per_req_delivery_cur0); 1408 } 1409 1410 refcyc_per_req_delivery_pre_cur1 = 0.0; 1411 refcyc_per_req_delivery_cur1 = 0.0; 1412 1413 // TTU - Misc 1414 // all hard-coded 1415 1416 // Assignment to register structures 1417 disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line 1418 ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8); 1419 disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk 1420 ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)dml_pow(2, 13)); 1421 disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2)); 1422 disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2)); 1423 disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2)); 1424 disp_dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2)); 1425 disp_dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2)); 1426 1427 disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19)); 1428 disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19)); 1429 1430 dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank); 1431 dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); 1432 dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); 1433 dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); 1434 1435 disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int) (dst_y_per_row_vblank * (double) htotal * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l); 1436 ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13)); 1437 1438 if (dual_plane) { 1439 disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank * (double) htotal * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_c); 1440 ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)dml_pow(2, 13)); 1441 } 1442 1443 disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (unsigned int) (dst_y_per_row_vblank * (double) htotal * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l); 1444 ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int)dml_pow(2, 13)); 1445 1446 disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; // dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now 1447 1448 disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_l; 1449 disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_l; 1450 1451 if (dual_plane) { 1452 disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_c; 1453 disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int) (dst_y_per_row_flip * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_c; 1454 } 1455 1456 disp_dlg_regs->refcyc_per_vm_group_vblank = get_refcyc_per_vm_group_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA 1457 disp_dlg_regs->refcyc_per_vm_group_flip = get_refcyc_per_vm_group_flip_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA 1458 disp_dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // From VBA 1459 disp_dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // From VBA 1460 1461 // Clamp to max for now 1462 if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int) dml_pow(2, 23)) 1463 disp_dlg_regs->refcyc_per_vm_group_vblank = dml_pow(2, 23) - 1; 1464 1465 if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int) dml_pow(2, 23)) 1466 disp_dlg_regs->refcyc_per_vm_group_flip = dml_pow(2, 23) - 1; 1467 1468 if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int) dml_pow(2, 23)) 1469 disp_dlg_regs->refcyc_per_vm_req_vblank = dml_pow(2, 23) - 1; 1470 1471 if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int) dml_pow(2, 23)) 1472 disp_dlg_regs->refcyc_per_vm_req_flip = dml_pow(2, 23) - 1; 1473 1474 disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l / (double) vratio_l * dml_pow(2, 2)); 1475 ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int)dml_pow(2, 17)); 1476 if (dual_plane) { 1477 disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c / (double) vratio_c * dml_pow(2, 2)); 1478 if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) { 1479 dml_print( 1480 "DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u larger than supported by register format U15.2 %u\n", 1481 __func__, 1482 disp_dlg_regs->dst_y_per_pte_row_nom_c, 1483 (unsigned int) dml_pow(2, 17) - 1); 1484 } 1485 } 1486 1487 disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l / (double) vratio_l * dml_pow(2, 2)); 1488 ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int)dml_pow(2, 17)); 1489 1490 disp_dlg_regs->dst_y_per_meta_row_nom_c = (unsigned int) ((double) meta_row_height_c / (double) vratio_c * dml_pow(2, 2)); 1491 ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_c < (unsigned int)dml_pow(2, 17)); 1492 1493 disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq 1494 / (double) dpte_groups_per_row_ub_l); 1495 if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23)) 1496 disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1; 1497 disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq 1498 / (double) meta_chunks_per_row_ub_l); 1499 if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23)) 1500 disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1; 1501 1502 if (dual_plane) { 1503 disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int) ((double) dpte_row_height_c / (double) vratio_c * (double) htotal * ref_freq_to_pix_freq 1504 / (double) dpte_groups_per_row_ub_c); 1505 if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23)) 1506 disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1; 1507 1508 // TODO: Is this the right calculation? Does htotal need to be halved? 1509 disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (unsigned int) ((double) meta_row_height_c / (double) vratio_c * (double) htotal * ref_freq_to_pix_freq 1510 / (double) meta_chunks_per_row_ub_c); 1511 if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23)) 1512 disp_dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1; 1513 } 1514 1515 disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_l, 1); 1516 disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(refcyc_per_line_delivery_l, 1); 1517 ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)dml_pow(2, 13)); ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)dml_pow(2, 13)); 1518 1519 disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_c, 1); 1520 disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(refcyc_per_line_delivery_c, 1); 1521 ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)dml_pow(2, 13)); ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)dml_pow(2, 13)); 1522 1523 disp_dlg_regs->chunk_hdl_adjust_cur0 = 3; 1524 disp_dlg_regs->dst_y_offset_cur0 = 0; 1525 disp_dlg_regs->chunk_hdl_adjust_cur1 = 3; 1526 disp_dlg_regs->dst_y_offset_cur1 = 0; 1527 1528 disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off 1529 1530 disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l * dml_pow(2, 10)); 1531 disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l * dml_pow(2, 10)); 1532 disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c * dml_pow(2, 10)); 1533 disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c * dml_pow(2, 10)); 1534 disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10)); 1535 disp_ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0 * dml_pow(2, 10)); 1536 disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 = (unsigned int) (refcyc_per_req_delivery_pre_cur1 * dml_pow(2, 10)); 1537 disp_ttu_regs->refcyc_per_req_delivery_cur1 = (unsigned int) (refcyc_per_req_delivery_cur1 * dml_pow(2, 10)); 1538 1539 disp_ttu_regs->qos_level_low_wm = 0; 1540 ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14)); 1541 1542 disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal * ref_freq_to_pix_freq); 1543 ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14)); 1544 1545 disp_ttu_regs->qos_level_flip = 14; 1546 disp_ttu_regs->qos_level_fixed_l = 8; 1547 disp_ttu_regs->qos_level_fixed_c = 8; 1548 disp_ttu_regs->qos_level_fixed_cur0 = 8; 1549 disp_ttu_regs->qos_ramp_disable_l = 0; 1550 disp_ttu_regs->qos_ramp_disable_c = 0; 1551 disp_ttu_regs->qos_ramp_disable_cur0 = 0; 1552 1553 disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz; 1554 ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24)); 1555 1556 print__ttu_regs_st(mode_lib, disp_ttu_regs); 1557 print__dlg_regs_st(mode_lib, disp_dlg_regs); 1558} 1559 1560void dml31_rq_dlg_get_dlg_reg( 1561 struct display_mode_lib *mode_lib, 1562 display_dlg_regs_st *dlg_regs, 1563 display_ttu_regs_st *ttu_regs, 1564 const display_e2e_pipe_params_st *e2e_pipe_param, 1565 const unsigned int num_pipes, 1566 const unsigned int pipe_idx, 1567 const bool cstate_en, 1568 const bool pstate_en, 1569 const bool vm_en, 1570 const bool ignore_viewport_pos, 1571 const bool immediate_flip_support) 1572{ 1573 display_rq_params_st rq_param = {0}; 1574 display_dlg_sys_params_st dlg_sys_param = {0}; 1575 1576 // Get watermark and Tex. 1577 dlg_sys_param.t_urg_wm_us = get_wm_urgent(mode_lib, e2e_pipe_param, num_pipes); 1578 dlg_sys_param.deepsleep_dcfclk_mhz = get_clk_dcf_deepsleep(mode_lib, e2e_pipe_param, num_pipes); 1579 dlg_sys_param.t_extra_us = get_urgent_extra_latency(mode_lib, e2e_pipe_param, num_pipes); 1580 dlg_sys_param.mem_trip_us = get_wm_memory_trip(mode_lib, e2e_pipe_param, num_pipes); 1581 dlg_sys_param.t_mclk_wm_us = get_wm_dram_clock_change(mode_lib, e2e_pipe_param, num_pipes); 1582 dlg_sys_param.t_sr_wm_us = get_wm_stutter_enter_exit(mode_lib, e2e_pipe_param, num_pipes); 1583 dlg_sys_param.total_flip_bw = get_total_immediate_flip_bw(mode_lib, e2e_pipe_param, num_pipes); 1584 dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, e2e_pipe_param, num_pipes); 1585 1586 print__dlg_sys_params_st(mode_lib, &dlg_sys_param); 1587 1588 // system parameter calculation done 1589 1590 dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx); 1591 dml_rq_dlg_get_rq_params(mode_lib, &rq_param, &e2e_pipe_param[pipe_idx].pipe); 1592 dml_rq_dlg_get_dlg_params( 1593 mode_lib, 1594 e2e_pipe_param, 1595 num_pipes, 1596 pipe_idx, 1597 dlg_regs, 1598 ttu_regs, 1599 &rq_param.dlg, 1600 &dlg_sys_param, 1601 cstate_en, 1602 pstate_en, 1603 vm_en, 1604 ignore_viewport_pos, 1605 immediate_flip_support); 1606 dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx); 1607} 1608 1609