1/* $NetBSD: amdgpu_dml1_display_rq_dlg_calc.c,v 1.2 2021/12/18 23:45:04 riastradh Exp $ */ 2 3/* 4 * Copyright 2017 Advanced Micro Devices, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: AMD 25 * 26 */ 27 28#include <sys/cdefs.h> 29__KERNEL_RCSID(0, "$NetBSD: amdgpu_dml1_display_rq_dlg_calc.c,v 1.2 2021/12/18 23:45:04 riastradh Exp $"); 30 31#include "dml1_display_rq_dlg_calc.h" 32#include "display_mode_lib.h" 33 34#include "dml_inline_defs.h" 35 36/* 37 * NOTE: 38 * This file is gcc-parseable HW gospel, coming straight from HW engineers. 39 * 40 * It doesn't adhere to Linux kernel style and sometimes will do things in odd 41 * ways. Unless there is something clearly wrong with it the code should 42 * remain as-is as it provides us with a guarantee from HW that it is correct. 43 */ 44 45static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) 46{ 47 unsigned int ret_val = 0; 48 49 if (source_format == dm_444_16) { 50 if (!is_chroma) 51 ret_val = 2; 52 } else if (source_format == dm_444_32) { 53 if (!is_chroma) 54 ret_val = 4; 55 } else if (source_format == dm_444_64) { 56 if (!is_chroma) 57 ret_val = 8; 58 } else if (source_format == dm_420_8) { 59 if (is_chroma) 60 ret_val = 2; 61 else 62 ret_val = 1; 63 } else if (source_format == dm_420_10) { 64 if (is_chroma) 65 ret_val = 4; 66 else 67 ret_val = 2; 68 } 69 return ret_val; 70} 71 72static bool is_dual_plane(enum source_format_class source_format) 73{ 74 bool ret_val = 0; 75 76 if ((source_format == dm_420_8) || (source_format == dm_420_10)) 77 ret_val = 1; 78 79 return ret_val; 80} 81 82static void get_blk256_size( 83 unsigned int *blk256_width, 84 unsigned int *blk256_height, 85 unsigned int bytes_per_element) 86{ 87 if (bytes_per_element == 1) { 88 *blk256_width = 16; 89 *blk256_height = 16; 90 } else if (bytes_per_element == 2) { 91 *blk256_width = 16; 92 *blk256_height = 8; 93 } else if (bytes_per_element == 4) { 94 *blk256_width = 8; 95 *blk256_height = 8; 96 } else if (bytes_per_element == 8) { 97 *blk256_width = 8; 98 *blk256_height = 4; 99 } 100} 101 102static double get_refcyc_per_delivery( 103 struct display_mode_lib *mode_lib, 104 double refclk_freq_in_mhz, 105 double pclk_freq_in_mhz, 106 unsigned int recout_width, 107 double vratio, 108 double hscale_pixel_rate, 109 unsigned int delivery_width, 110 unsigned int req_per_swath_ub) 111{ 112 double refcyc_per_delivery = 0.0; 113 114 if (vratio <= 1.0) { 115 refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width 116 / pclk_freq_in_mhz / (double) req_per_swath_ub; 117 } else { 118 refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width 119 / (double) hscale_pixel_rate / (double) req_per_swath_ub; 120 } 121 122 DTRACE("DLG: %s: refclk_freq_in_mhz = %3.2f", __func__, refclk_freq_in_mhz); 123 DTRACE("DLG: %s: pclk_freq_in_mhz = %3.2f", __func__, pclk_freq_in_mhz); 124 DTRACE("DLG: %s: recout_width = %d", __func__, recout_width); 125 DTRACE("DLG: %s: vratio = %3.2f", __func__, vratio); 126 DTRACE("DLG: %s: req_per_swath_ub = %d", __func__, req_per_swath_ub); 127 DTRACE("DLG: %s: refcyc_per_delivery= %3.2f", __func__, refcyc_per_delivery); 128 129 return refcyc_per_delivery; 130 131} 132 133static double get_vratio_pre( 134 struct display_mode_lib *mode_lib, 135 unsigned int max_num_sw, 136 unsigned int max_partial_sw, 137 unsigned int swath_height, 138 double vinit, 139 double l_sw) 140{ 141 double prefill = dml_floor(vinit, 1); 142 double vratio_pre = 1.0; 143 144 vratio_pre = (max_num_sw * swath_height + max_partial_sw) / l_sw; 145 146 if (swath_height > 4) { 147 double tmp0 = (max_num_sw * swath_height) / (l_sw - (prefill - 3.0) / 2.0); 148 149 if (tmp0 > vratio_pre) 150 vratio_pre = tmp0; 151 } 152 153 DTRACE("DLG: %s: max_num_sw = %0d", __func__, max_num_sw); 154 DTRACE("DLG: %s: max_partial_sw = %0d", __func__, max_partial_sw); 155 DTRACE("DLG: %s: swath_height = %0d", __func__, swath_height); 156 DTRACE("DLG: %s: vinit = %3.2f", __func__, vinit); 157 DTRACE("DLG: %s: vratio_pre = %3.2f", __func__, vratio_pre); 158 159 if (vratio_pre < 1.0) { 160 DTRACE("WARNING_DLG: %s: vratio_pre=%3.2f < 1.0, set to 1.0", __func__, vratio_pre); 161 vratio_pre = 1.0; 162 } 163 164 if (vratio_pre > 4.0) { 165 DTRACE( 166 "WARNING_DLG: %s: vratio_pre=%3.2f > 4.0 (max scaling ratio). set to 4.0", 167 __func__, 168 vratio_pre); 169 vratio_pre = 4.0; 170 } 171 172 return vratio_pre; 173} 174 175static void get_swath_need( 176 struct display_mode_lib *mode_lib, 177 unsigned int *max_num_sw, 178 unsigned int *max_partial_sw, 179 unsigned int swath_height, 180 double vinit) 181{ 182 double prefill = dml_floor(vinit, 1); 183 unsigned int max_partial_sw_int; 184 185 DTRACE("DLG: %s: swath_height = %0d", __func__, swath_height); 186 DTRACE("DLG: %s: vinit = %3.2f", __func__, vinit); 187 188 ASSERT(prefill > 0.0 && prefill <= 8.0); 189 190 *max_num_sw = (unsigned int) (dml_ceil((prefill - 1.0) / (double) swath_height, 1) + 1.0); /* prefill has to be >= 1 */ 191 max_partial_sw_int = 192 (prefill == 1) ? 193 (swath_height - 1) : 194 ((unsigned int) (prefill - 2.0) % swath_height); 195 *max_partial_sw = (max_partial_sw_int < 1) ? 1 : max_partial_sw_int; /* ensure minimum of 1 is used */ 196 197 DTRACE("DLG: %s: max_num_sw = %0d", __func__, *max_num_sw); 198 DTRACE("DLG: %s: max_partial_sw = %0d", __func__, *max_partial_sw); 199} 200 201static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size) 202{ 203 if (tile_size == dm_256k_tile) 204 return (256 * 1024); 205 else if (tile_size == dm_64k_tile) 206 return (64 * 1024); 207 else 208 return (4 * 1024); 209} 210 211static void extract_rq_sizing_regs( 212 struct display_mode_lib *mode_lib, 213 struct _vcs_dpi_display_data_rq_regs_st *rq_regs, 214 const struct _vcs_dpi_display_data_rq_sizing_params_st rq_sizing) 215{ 216 DTRACE("DLG: %s: rq_sizing param", __func__); 217 print__data_rq_sizing_params_st(mode_lib, rq_sizing); 218 219 rq_regs->chunk_size = dml_log2(rq_sizing.chunk_bytes) - 10; 220 221 if (rq_sizing.min_chunk_bytes == 0) 222 rq_regs->min_chunk_size = 0; 223 else 224 rq_regs->min_chunk_size = dml_log2(rq_sizing.min_chunk_bytes) - 8 + 1; 225 226 rq_regs->meta_chunk_size = dml_log2(rq_sizing.meta_chunk_bytes) - 10; 227 if (rq_sizing.min_meta_chunk_bytes == 0) 228 rq_regs->min_meta_chunk_size = 0; 229 else 230 rq_regs->min_meta_chunk_size = dml_log2(rq_sizing.min_meta_chunk_bytes) - 6 + 1; 231 232 rq_regs->dpte_group_size = dml_log2(rq_sizing.dpte_group_bytes) - 6; 233 rq_regs->mpte_group_size = dml_log2(rq_sizing.mpte_group_bytes) - 6; 234} 235 236void dml1_extract_rq_regs( 237 struct display_mode_lib *mode_lib, 238 struct _vcs_dpi_display_rq_regs_st *rq_regs, 239 const struct _vcs_dpi_display_rq_params_st rq_param) 240{ 241 unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; 242 unsigned int detile_buf_plane1_addr = 0; 243 244 extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), rq_param.sizing.rq_l); 245 if (rq_param.yuv420) 246 extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), rq_param.sizing.rq_c); 247 248 rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height); 249 rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height); 250 251 /* TODO: take the max between luma, chroma chunk size? 252 * okay for now, as we are setting chunk_bytes to 8kb anyways 253 */ 254 if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024) { /*32kb */ 255 rq_regs->drq_expansion_mode = 0; 256 } else { 257 rq_regs->drq_expansion_mode = 2; 258 } 259 rq_regs->prq_expansion_mode = 1; 260 rq_regs->mrq_expansion_mode = 1; 261 rq_regs->crq_expansion_mode = 1; 262 263 if (rq_param.yuv420) { 264 if ((double) rq_param.misc.rq_l.stored_swath_bytes 265 / (double) rq_param.misc.rq_c.stored_swath_bytes <= 1.5) { 266 detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 64.0); /* half to chroma */ 267 } else { 268 detile_buf_plane1_addr = dml_round_to_multiple( 269 (unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0), 270 256, 271 0) / 64.0; /* 2/3 to chroma */ 272 } 273 } 274 rq_regs->plane1_base_address = detile_buf_plane1_addr; 275} 276 277static void handle_det_buf_split( 278 struct display_mode_lib *mode_lib, 279 struct _vcs_dpi_display_rq_params_st *rq_param, 280 const struct _vcs_dpi_display_pipe_source_params_st pipe_src_param) 281{ 282 unsigned int total_swath_bytes = 0; 283 unsigned int swath_bytes_l = 0; 284 unsigned int swath_bytes_c = 0; 285 unsigned int full_swath_bytes_packed_l = 0; 286 unsigned int full_swath_bytes_packed_c = 0; 287 bool req128_l = 0; 288 bool req128_c = 0; 289 bool surf_linear = (pipe_src_param.sw_mode == dm_sw_linear); 290 bool surf_vert = (pipe_src_param.source_scan == dm_vert); 291 unsigned int log2_swath_height_l = 0; 292 unsigned int log2_swath_height_c = 0; 293 unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; 294 295 full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes; 296 full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes; 297 298 if (rq_param->yuv420_10bpc) { 299 full_swath_bytes_packed_l = dml_round_to_multiple( 300 rq_param->misc.rq_l.full_swath_bytes * 2 / 3, 301 256, 302 1) + 256; 303 full_swath_bytes_packed_c = dml_round_to_multiple( 304 rq_param->misc.rq_c.full_swath_bytes * 2 / 3, 305 256, 306 1) + 256; 307 } 308 309 if (rq_param->yuv420) { 310 total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c; 311 312 if (total_swath_bytes <= detile_buf_size_in_bytes) { /*full 256b request */ 313 req128_l = 0; 314 req128_c = 0; 315 swath_bytes_l = full_swath_bytes_packed_l; 316 swath_bytes_c = full_swath_bytes_packed_c; 317 } else { /*128b request (for luma only for yuv420 8bpc) */ 318 req128_l = 1; 319 req128_c = 0; 320 swath_bytes_l = full_swath_bytes_packed_l / 2; 321 swath_bytes_c = full_swath_bytes_packed_c; 322 } 323 324 /* Bug workaround, luma and chroma req size needs to be the same. (see: DEGVIDCN10-137) 325 * TODO: Remove after rtl fix 326 */ 327 if (req128_l == 1) { 328 req128_c = 1; 329 DTRACE("DLG: %s: bug workaround DEGVIDCN10-137", __func__); 330 } 331 332 /* Note: assumption, the config that pass in will fit into 333 * the detiled buffer. 334 */ 335 } else { 336 total_swath_bytes = 2 * full_swath_bytes_packed_l; 337 338 if (total_swath_bytes <= detile_buf_size_in_bytes) 339 req128_l = 0; 340 else 341 req128_l = 1; 342 343 swath_bytes_l = total_swath_bytes; 344 swath_bytes_c = 0; 345 } 346 rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l; 347 rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c; 348 349 if (surf_linear) { 350 log2_swath_height_l = 0; 351 log2_swath_height_c = 0; 352 } else if (!surf_vert) { 353 log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_height) - req128_l; 354 log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_height) - req128_c; 355 } else { 356 log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_width) - req128_l; 357 log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_width) - req128_c; 358 } 359 rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l; 360 rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c; 361 362 DTRACE("DLG: %s: req128_l = %0d", __func__, req128_l); 363 DTRACE("DLG: %s: req128_c = %0d", __func__, req128_c); 364 DTRACE("DLG: %s: full_swath_bytes_packed_l = %0d", __func__, full_swath_bytes_packed_l); 365 DTRACE("DLG: %s: full_swath_bytes_packed_c = %0d", __func__, full_swath_bytes_packed_c); 366} 367 368/* Need refactor. */ 369static void dml1_rq_dlg_get_row_heights( 370 struct display_mode_lib *mode_lib, 371 unsigned int *o_dpte_row_height, 372 unsigned int *o_meta_row_height, 373 unsigned int vp_width, 374 unsigned int data_pitch, 375 int source_format, 376 int tiling, 377 int macro_tile_size, 378 int source_scan, 379 int is_chroma) 380{ 381 bool surf_linear = (tiling == dm_sw_linear); 382 bool surf_vert = (source_scan == dm_vert); 383 384 unsigned int bytes_per_element = get_bytes_per_element( 385 (enum source_format_class) source_format, 386 is_chroma); 387 unsigned int log2_bytes_per_element = dml_log2(bytes_per_element); 388 unsigned int blk256_width = 0; 389 unsigned int blk256_height = 0; 390 391 unsigned int log2_blk256_height; 392 unsigned int blk_bytes; 393 unsigned int log2_blk_bytes; 394 unsigned int log2_blk_height; 395 unsigned int log2_blk_width; 396 unsigned int log2_meta_req_bytes; 397 unsigned int log2_meta_req_height; 398 unsigned int log2_meta_req_width; 399 unsigned int log2_meta_row_height; 400 unsigned int log2_vmpg_bytes; 401 unsigned int dpte_buf_in_pte_reqs; 402 unsigned int log2_vmpg_height; 403 unsigned int log2_vmpg_width; 404 unsigned int log2_dpte_req_height_ptes; 405 unsigned int log2_dpte_req_width_ptes; 406 unsigned int log2_dpte_req_height; 407 unsigned int log2_dpte_req_width; 408 unsigned int log2_dpte_row_height_linear; 409 unsigned int log2_dpte_row_height; 410 unsigned int dpte_req_width; 411 412 if (surf_linear) { 413 blk256_width = 256; 414 blk256_height = 1; 415 } else { 416 get_blk256_size(&blk256_width, &blk256_height, bytes_per_element); 417 } 418 419 log2_blk256_height = dml_log2((double) blk256_height); 420 blk_bytes = surf_linear ? 421 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size); 422 log2_blk_bytes = dml_log2((double) blk_bytes); 423 log2_blk_height = 0; 424 log2_blk_width = 0; 425 426 /* remember log rule 427 * "+" in log is multiply 428 * "-" in log is divide 429 * "/2" is like square root 430 * blk is vertical biased 431 */ 432 if (tiling != dm_sw_linear) 433 log2_blk_height = log2_blk256_height 434 + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1); 435 else 436 log2_blk_height = 0; /* blk height of 1 */ 437 438 log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height; 439 440 /* ------- */ 441 /* meta */ 442 /* ------- */ 443 log2_meta_req_bytes = 6; /* meta request is 64b and is 8x8byte meta element */ 444 445 /* each 64b meta request for dcn is 8x8 meta elements and 446 * a meta element covers one 256b block of the the data surface. 447 */ 448 log2_meta_req_height = log2_blk256_height + 3; /* meta req is 8x8 */ 449 log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element 450 - log2_meta_req_height; 451 log2_meta_row_height = 0; 452 453 /* the dimensions of a meta row are meta_row_width x meta_row_height in elements. 454 * calculate upper bound of the meta_row_width 455 */ 456 if (!surf_vert) 457 log2_meta_row_height = log2_meta_req_height; 458 else 459 log2_meta_row_height = log2_meta_req_width; 460 461 *o_meta_row_height = 1 << log2_meta_row_height; 462 463 /* ------ */ 464 /* dpte */ 465 /* ------ */ 466 log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes); 467 dpte_buf_in_pte_reqs = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; 468 469 log2_vmpg_height = 0; 470 log2_vmpg_width = 0; 471 log2_dpte_req_height_ptes = 0; 472 log2_dpte_req_width_ptes = 0; 473 log2_dpte_req_height = 0; 474 log2_dpte_req_width = 0; 475 log2_dpte_row_height_linear = 0; 476 log2_dpte_row_height = 0; 477 dpte_req_width = 0; /* 64b dpte req width in data element */ 478 479 if (surf_linear) 480 log2_vmpg_height = 0; /* one line high */ 481 else 482 log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height; 483 log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height; 484 485 /* only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4. */ 486 if (log2_blk_bytes <= log2_vmpg_bytes) 487 log2_dpte_req_height_ptes = 0; 488 else if (log2_blk_height - log2_vmpg_height >= 2) 489 log2_dpte_req_height_ptes = 2; 490 else 491 log2_dpte_req_height_ptes = log2_blk_height - log2_vmpg_height; 492 log2_dpte_req_width_ptes = 3 - log2_dpte_req_height_ptes; 493 494 ASSERT((log2_dpte_req_width_ptes == 3 && log2_dpte_req_height_ptes == 0) || /* 8x1 */ 495 (log2_dpte_req_width_ptes == 2 && log2_dpte_req_height_ptes == 1) || /* 4x2 */ 496 (log2_dpte_req_width_ptes == 1 && log2_dpte_req_height_ptes == 2)); /* 2x4 */ 497 498 /* the dpte request dimensions in data elements is dpte_req_width x dpte_req_height 499 * log2_wmpg_width is how much 1 pte represent, now trying to calculate how much 64b pte req represent 500 */ 501 log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes; 502 log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes; 503 dpte_req_width = 1 << log2_dpte_req_width; 504 505 /* calculate pitch dpte row buffer can hold 506 * round the result down to a power of two. 507 */ 508 if (surf_linear) { 509 log2_dpte_row_height_linear = dml_floor( 510 dml_log2(dpte_buf_in_pte_reqs * dpte_req_width / data_pitch), 511 1); 512 513 ASSERT(log2_dpte_row_height_linear >= 3); 514 515 if (log2_dpte_row_height_linear > 7) 516 log2_dpte_row_height_linear = 7; 517 518 log2_dpte_row_height = log2_dpte_row_height_linear; 519 } else { 520 /* the upper bound of the dpte_row_width without dependency on viewport position follows. */ 521 if (!surf_vert) 522 log2_dpte_row_height = log2_dpte_req_height; 523 else 524 log2_dpte_row_height = 525 (log2_blk_width < log2_dpte_req_width) ? 526 log2_blk_width : log2_dpte_req_width; 527 } 528 529 /* From programming guide: 530 * There is a special case of saving only half of ptes returned due to buffer space limits. 531 * this case applies to 4 and 8bpe in horizontal access of a vp_width greater than 2560+16 532 * when the pte request is 2x4 ptes (which happens when vmpg_bytes =4kb and tile blk_bytes >=64kb). 533 */ 534 if (!surf_vert && vp_width > (2560 + 16) && bytes_per_element >= 4 && log2_vmpg_bytes == 12 535 && log2_blk_bytes >= 16) 536 log2_dpte_row_height = log2_dpte_row_height - 1; /*half of the full height */ 537 538 *o_dpte_row_height = 1 << log2_dpte_row_height; 539} 540 541static void get_surf_rq_param( 542 struct display_mode_lib *mode_lib, 543 struct _vcs_dpi_display_data_rq_sizing_params_st *rq_sizing_param, 544 struct _vcs_dpi_display_data_rq_dlg_params_st *rq_dlg_param, 545 struct _vcs_dpi_display_data_rq_misc_params_st *rq_misc_param, 546 const struct _vcs_dpi_display_pipe_source_params_st pipe_src_param, 547 bool is_chroma) 548{ 549 bool mode_422 = 0; 550 unsigned int vp_width = 0; 551 unsigned int vp_height = 0; 552 unsigned int data_pitch = 0; 553 unsigned int meta_pitch = 0; 554 unsigned int ppe = mode_422 ? 2 : 1; 555 bool surf_linear; 556 bool surf_vert; 557 unsigned int bytes_per_element; 558 unsigned int log2_bytes_per_element; 559 unsigned int blk256_width; 560 unsigned int blk256_height; 561 unsigned int log2_blk256_width; 562 unsigned int log2_blk256_height; 563 unsigned int blk_bytes; 564 unsigned int log2_blk_bytes; 565 unsigned int log2_blk_height; 566 unsigned int log2_blk_width; 567 unsigned int log2_meta_req_bytes; 568 unsigned int log2_meta_req_height; 569 unsigned int log2_meta_req_width; 570 unsigned int meta_req_width; 571 unsigned int meta_req_height; 572 unsigned int log2_meta_row_height; 573 unsigned int meta_row_width_ub; 574 unsigned int log2_meta_chunk_bytes; 575 unsigned int log2_meta_chunk_height; 576 unsigned int log2_meta_chunk_width; 577 unsigned int log2_min_meta_chunk_bytes; 578 unsigned int min_meta_chunk_width; 579 unsigned int meta_chunk_width; 580 unsigned int meta_chunk_per_row_int; 581 unsigned int meta_row_remainder; 582 unsigned int meta_chunk_threshold; 583 unsigned int meta_blk_bytes; 584 unsigned int meta_blk_height; 585 unsigned int meta_blk_width; 586 unsigned int meta_surface_bytes; 587 unsigned int vmpg_bytes; 588 unsigned int meta_pte_req_per_frame_ub; 589 unsigned int meta_pte_bytes_per_frame_ub; 590 unsigned int log2_vmpg_bytes; 591 unsigned int dpte_buf_in_pte_reqs; 592 unsigned int log2_vmpg_height; 593 unsigned int log2_vmpg_width; 594 unsigned int log2_dpte_req_height_ptes; 595 unsigned int log2_dpte_req_width_ptes; 596 unsigned int log2_dpte_req_height; 597 unsigned int log2_dpte_req_width; 598 unsigned int log2_dpte_row_height_linear; 599 unsigned int log2_dpte_row_height; 600 unsigned int log2_dpte_group_width; 601 unsigned int dpte_row_width_ub; 602 unsigned int dpte_row_height; 603 unsigned int dpte_req_height; 604 unsigned int dpte_req_width; 605 unsigned int dpte_group_width; 606 unsigned int log2_dpte_group_bytes; 607 unsigned int log2_dpte_group_length; 608 unsigned int func_meta_row_height, func_dpte_row_height; 609 610 /* TODO check if ppe apply for both luma and chroma in 422 case */ 611 if (is_chroma) { 612 vp_width = pipe_src_param.viewport_width_c / ppe; 613 vp_height = pipe_src_param.viewport_height_c; 614 data_pitch = pipe_src_param.data_pitch_c; 615 meta_pitch = pipe_src_param.meta_pitch_c; 616 } else { 617 vp_width = pipe_src_param.viewport_width / ppe; 618 vp_height = pipe_src_param.viewport_height; 619 data_pitch = pipe_src_param.data_pitch; 620 meta_pitch = pipe_src_param.meta_pitch; 621 } 622 623 rq_sizing_param->chunk_bytes = 8192; 624 625 if (rq_sizing_param->chunk_bytes == 64 * 1024) 626 rq_sizing_param->min_chunk_bytes = 0; 627 else 628 rq_sizing_param->min_chunk_bytes = 1024; 629 630 rq_sizing_param->meta_chunk_bytes = 2048; 631 rq_sizing_param->min_meta_chunk_bytes = 256; 632 633 rq_sizing_param->mpte_group_bytes = 2048; 634 635 surf_linear = (pipe_src_param.sw_mode == dm_sw_linear); 636 surf_vert = (pipe_src_param.source_scan == dm_vert); 637 638 bytes_per_element = get_bytes_per_element( 639 (enum source_format_class) pipe_src_param.source_format, 640 is_chroma); 641 log2_bytes_per_element = dml_log2(bytes_per_element); 642 blk256_width = 0; 643 blk256_height = 0; 644 645 if (surf_linear) { 646 blk256_width = 256 / bytes_per_element; 647 blk256_height = 1; 648 } else { 649 get_blk256_size(&blk256_width, &blk256_height, bytes_per_element); 650 } 651 652 DTRACE("DLG: %s: surf_linear = %d", __func__, surf_linear); 653 DTRACE("DLG: %s: surf_vert = %d", __func__, surf_vert); 654 DTRACE("DLG: %s: blk256_width = %d", __func__, blk256_width); 655 DTRACE("DLG: %s: blk256_height = %d", __func__, blk256_height); 656 657 log2_blk256_width = dml_log2((double) blk256_width); 658 log2_blk256_height = dml_log2((double) blk256_height); 659 blk_bytes = 660 surf_linear ? 256 : get_blk_size_bytes( 661 (enum source_macro_tile_size) pipe_src_param.macro_tile_size); 662 log2_blk_bytes = dml_log2((double) blk_bytes); 663 log2_blk_height = 0; 664 log2_blk_width = 0; 665 666 /* remember log rule 667 * "+" in log is multiply 668 * "-" in log is divide 669 * "/2" is like square root 670 * blk is vertical biased 671 */ 672 if (pipe_src_param.sw_mode != dm_sw_linear) 673 log2_blk_height = log2_blk256_height 674 + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1); 675 else 676 log2_blk_height = 0; /* blk height of 1 */ 677 678 log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height; 679 680 if (!surf_vert) { 681 rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_width - 1, blk256_width, 1) 682 + blk256_width; 683 rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_width; 684 } else { 685 rq_dlg_param->swath_width_ub = dml_round_to_multiple( 686 vp_height - 1, 687 blk256_height, 688 1) + blk256_height; 689 rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_height; 690 } 691 692 if (!surf_vert) 693 rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height 694 * bytes_per_element; 695 else 696 rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width 697 * bytes_per_element; 698 699 rq_misc_param->blk256_height = blk256_height; 700 rq_misc_param->blk256_width = blk256_width; 701 702 /* ------- */ 703 /* meta */ 704 /* ------- */ 705 log2_meta_req_bytes = 6; /* meta request is 64b and is 8x8byte meta element */ 706 707 /* each 64b meta request for dcn is 8x8 meta elements and 708 * a meta element covers one 256b block of the the data surface. 709 */ 710 log2_meta_req_height = log2_blk256_height + 3; /* meta req is 8x8 byte, each byte represent 1 blk256 */ 711 log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element 712 - log2_meta_req_height; 713 meta_req_width = 1 << log2_meta_req_width; 714 meta_req_height = 1 << log2_meta_req_height; 715 log2_meta_row_height = 0; 716 meta_row_width_ub = 0; 717 718 /* the dimensions of a meta row are meta_row_width x meta_row_height in elements. 719 * calculate upper bound of the meta_row_width 720 */ 721 if (!surf_vert) { 722 log2_meta_row_height = log2_meta_req_height; 723 meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1) 724 + meta_req_width; 725 rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width; 726 } else { 727 log2_meta_row_height = log2_meta_req_width; 728 meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1) 729 + meta_req_height; 730 rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height; 731 } 732 rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64; 733 734 log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes); 735 log2_meta_chunk_height = log2_meta_row_height; 736 737 /*full sized meta chunk width in unit of data elements */ 738 log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element 739 - log2_meta_chunk_height; 740 log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes); 741 min_meta_chunk_width = 1 742 << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element 743 - log2_meta_chunk_height); 744 meta_chunk_width = 1 << log2_meta_chunk_width; 745 meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width); 746 meta_row_remainder = meta_row_width_ub % meta_chunk_width; 747 meta_chunk_threshold = 0; 748 meta_blk_bytes = 4096; 749 meta_blk_height = blk256_height * 64; 750 meta_blk_width = meta_blk_bytes * 256 / bytes_per_element / meta_blk_height; 751 meta_surface_bytes = meta_pitch 752 * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1) 753 + meta_blk_height) * bytes_per_element / 256; 754 vmpg_bytes = mode_lib->soc.vmm_page_size_bytes; 755 meta_pte_req_per_frame_ub = (dml_round_to_multiple( 756 meta_surface_bytes - vmpg_bytes, 757 8 * vmpg_bytes, 758 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes); 759 meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; /*64B mpte request */ 760 rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub; 761 762 DTRACE("DLG: %s: meta_blk_height = %d", __func__, meta_blk_height); 763 DTRACE("DLG: %s: meta_blk_width = %d", __func__, meta_blk_width); 764 DTRACE("DLG: %s: meta_surface_bytes = %d", __func__, meta_surface_bytes); 765 DTRACE("DLG: %s: meta_pte_req_per_frame_ub = %d", __func__, meta_pte_req_per_frame_ub); 766 DTRACE("DLG: %s: meta_pte_bytes_per_frame_ub = %d", __func__, meta_pte_bytes_per_frame_ub); 767 768 if (!surf_vert) 769 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width; 770 else 771 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height; 772 773 if (meta_row_remainder <= meta_chunk_threshold) 774 rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 775 else 776 rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 777 778 rq_dlg_param->meta_row_height = 1 << log2_meta_row_height; 779 780 /* ------ */ 781 /* dpte */ 782 /* ------ */ 783 log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes); 784 dpte_buf_in_pte_reqs = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; 785 786 log2_vmpg_height = 0; 787 log2_vmpg_width = 0; 788 log2_dpte_req_height_ptes = 0; 789 log2_dpte_req_width_ptes = 0; 790 log2_dpte_req_height = 0; 791 log2_dpte_req_width = 0; 792 log2_dpte_row_height_linear = 0; 793 log2_dpte_row_height = 0; 794 log2_dpte_group_width = 0; 795 dpte_row_width_ub = 0; 796 dpte_row_height = 0; 797 dpte_req_height = 0; /* 64b dpte req height in data element */ 798 dpte_req_width = 0; /* 64b dpte req width in data element */ 799 dpte_group_width = 0; 800 log2_dpte_group_bytes = 0; 801 log2_dpte_group_length = 0; 802 803 if (surf_linear) 804 log2_vmpg_height = 0; /* one line high */ 805 else 806 log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height; 807 log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height; 808 809 /* only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4. */ 810 if (log2_blk_bytes <= log2_vmpg_bytes) 811 log2_dpte_req_height_ptes = 0; 812 else if (log2_blk_height - log2_vmpg_height >= 2) 813 log2_dpte_req_height_ptes = 2; 814 else 815 log2_dpte_req_height_ptes = log2_blk_height - log2_vmpg_height; 816 log2_dpte_req_width_ptes = 3 - log2_dpte_req_height_ptes; 817 818 /* Ensure we only have the 3 shapes */ 819 ASSERT((log2_dpte_req_width_ptes == 3 && log2_dpte_req_height_ptes == 0) || /* 8x1 */ 820 (log2_dpte_req_width_ptes == 2 && log2_dpte_req_height_ptes == 1) || /* 4x2 */ 821 (log2_dpte_req_width_ptes == 1 && log2_dpte_req_height_ptes == 2)); /* 2x4 */ 822 823 /* The dpte request dimensions in data elements is dpte_req_width x dpte_req_height 824 * log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent 825 * That depends on the pte shape (i.e. 8x1, 4x2, 2x4) 826 */ 827 log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes; 828 log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes; 829 dpte_req_height = 1 << log2_dpte_req_height; 830 dpte_req_width = 1 << log2_dpte_req_width; 831 832 /* calculate pitch dpte row buffer can hold 833 * round the result down to a power of two. 834 */ 835 if (surf_linear) { 836 log2_dpte_row_height_linear = dml_floor( 837 dml_log2(dpte_buf_in_pte_reqs * dpte_req_width / data_pitch), 838 1); 839 840 ASSERT(log2_dpte_row_height_linear >= 3); 841 842 if (log2_dpte_row_height_linear > 7) 843 log2_dpte_row_height_linear = 7; 844 845 log2_dpte_row_height = log2_dpte_row_height_linear; 846 rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height; 847 848 /* For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary. 849 * the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering. 850 */ 851 dpte_row_width_ub = dml_round_to_multiple( 852 data_pitch * dpte_row_height - 1, 853 dpte_req_width, 854 1) + dpte_req_width; 855 rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; 856 } else { 857 /* for tiled mode, row height is the same as req height and row store up to vp size upper bound */ 858 if (!surf_vert) { 859 log2_dpte_row_height = log2_dpte_req_height; 860 dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1) 861 + dpte_req_width; 862 rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; 863 } else { 864 log2_dpte_row_height = 865 (log2_blk_width < log2_dpte_req_width) ? 866 log2_blk_width : log2_dpte_req_width; 867 dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1) 868 + dpte_req_height; 869 rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height; 870 } 871 rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height; 872 } 873 rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; 874 875 /* From programming guide: 876 * There is a special case of saving only half of ptes returned due to buffer space limits. 877 * this case applies to 4 and 8bpe in horizontal access of a vp_width greater than 2560+16 878 * when the pte request is 2x4 ptes (which happens when vmpg_bytes =4kb and tile blk_bytes >=64kb). 879 */ 880 if (!surf_vert && vp_width > (2560 + 16) && bytes_per_element >= 4 && log2_vmpg_bytes == 12 881 && log2_blk_bytes >= 16) { 882 log2_dpte_row_height = log2_dpte_row_height - 1; /*half of the full height */ 883 rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height; 884 } 885 886 /* the dpte_group_bytes is reduced for the specific case of vertical 887 * access of a tile surface that has dpte request of 8x1 ptes. 888 */ 889 if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) /*reduced, in this case, will have page fault within a group */ 890 rq_sizing_param->dpte_group_bytes = 512; 891 else 892 /*full size */ 893 rq_sizing_param->dpte_group_bytes = 2048; 894 895 /*since pte request size is 64byte, the number of data pte requests per full sized group is as follows. */ 896 log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes); 897 log2_dpte_group_length = log2_dpte_group_bytes - 6; /*length in 64b requests */ 898 899 /* full sized data pte group width in elements */ 900 if (!surf_vert) 901 log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width; 902 else 903 log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height; 904 905 dpte_group_width = 1 << log2_dpte_group_width; 906 907 /* since dpte groups are only aligned to dpte_req_width and not dpte_group_width, 908 * the upper bound for the dpte groups per row is as follows. 909 */ 910 rq_dlg_param->dpte_groups_per_row_ub = dml_ceil( 911 (double) dpte_row_width_ub / dpte_group_width, 912 1); 913 914 dml1_rq_dlg_get_row_heights( 915 mode_lib, 916 &func_dpte_row_height, 917 &func_meta_row_height, 918 vp_width, 919 data_pitch, 920 pipe_src_param.source_format, 921 pipe_src_param.sw_mode, 922 pipe_src_param.macro_tile_size, 923 pipe_src_param.source_scan, 924 is_chroma); 925 926 /* Just a check to make sure this function and the new one give the same 927 * result. The standalone get_row_heights() function is based off of the 928 * code in this function so the same changes need to be made to both. 929 */ 930 if (rq_dlg_param->meta_row_height != func_meta_row_height) { 931 DTRACE( 932 "MISMATCH: rq_dlg_param->meta_row_height = %d", 933 rq_dlg_param->meta_row_height); 934 DTRACE("MISMATCH: func_meta_row_height = %d", func_meta_row_height); 935 ASSERT(0); 936 } 937 938 if (rq_dlg_param->dpte_row_height != func_dpte_row_height) { 939 DTRACE( 940 "MISMATCH: rq_dlg_param->dpte_row_height = %d", 941 rq_dlg_param->dpte_row_height); 942 DTRACE("MISMATCH: func_dpte_row_height = %d", func_dpte_row_height); 943 ASSERT(0); 944 } 945} 946 947void dml1_rq_dlg_get_rq_params( 948 struct display_mode_lib *mode_lib, 949 struct _vcs_dpi_display_rq_params_st *rq_param, 950 const struct _vcs_dpi_display_pipe_source_params_st pipe_src_param) 951{ 952 /* get param for luma surface */ 953 rq_param->yuv420 = pipe_src_param.source_format == dm_420_8 954 || pipe_src_param.source_format == dm_420_10; 955 rq_param->yuv420_10bpc = pipe_src_param.source_format == dm_420_10; 956 957 get_surf_rq_param( 958 mode_lib, 959 &(rq_param->sizing.rq_l), 960 &(rq_param->dlg.rq_l), 961 &(rq_param->misc.rq_l), 962 pipe_src_param, 963 0); 964 965 if (is_dual_plane((enum source_format_class) pipe_src_param.source_format)) { 966 /* get param for chroma surface */ 967 get_surf_rq_param( 968 mode_lib, 969 &(rq_param->sizing.rq_c), 970 &(rq_param->dlg.rq_c), 971 &(rq_param->misc.rq_c), 972 pipe_src_param, 973 1); 974 } 975 976 /* calculate how to split the det buffer space between luma and chroma */ 977 handle_det_buf_split(mode_lib, rq_param, pipe_src_param); 978 print__rq_params_st(mode_lib, *rq_param); 979} 980 981/* Note: currently taken in as is. 982 * Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma. 983 */ 984void dml1_rq_dlg_get_dlg_params( 985 struct display_mode_lib *mode_lib, 986 struct _vcs_dpi_display_dlg_regs_st *disp_dlg_regs, 987 struct _vcs_dpi_display_ttu_regs_st *disp_ttu_regs, 988 const struct _vcs_dpi_display_rq_dlg_params_st rq_dlg_param, 989 const struct _vcs_dpi_display_dlg_sys_params_st dlg_sys_param, 990 const struct _vcs_dpi_display_e2e_pipe_params_st e2e_pipe_param, 991 const bool cstate_en, 992 const bool pstate_en, 993 const bool vm_en, 994 const bool iflip_en) 995{ 996 /* Timing */ 997 unsigned int htotal = e2e_pipe_param.pipe.dest.htotal; 998 unsigned int hblank_end = e2e_pipe_param.pipe.dest.hblank_end; 999 unsigned int vblank_start = e2e_pipe_param.pipe.dest.vblank_start; 1000 unsigned int vblank_end = e2e_pipe_param.pipe.dest.vblank_end; 1001 bool interlaced = e2e_pipe_param.pipe.dest.interlaced; 1002 unsigned int min_vblank = mode_lib->ip.min_vblank_lines; 1003 1004 double pclk_freq_in_mhz = e2e_pipe_param.pipe.dest.pixel_rate_mhz; 1005 double refclk_freq_in_mhz = e2e_pipe_param.clks_cfg.refclk_mhz; 1006 double dppclk_freq_in_mhz = e2e_pipe_param.clks_cfg.dppclk_mhz; 1007 double dispclk_freq_in_mhz = e2e_pipe_param.clks_cfg.dispclk_mhz; 1008 1009 double ref_freq_to_pix_freq; 1010 double prefetch_xy_calc_in_dcfclk; 1011 double min_dcfclk_mhz; 1012 double t_calc_us; 1013 double min_ttu_vblank; 1014 double min_dst_y_ttu_vblank; 1015 unsigned int dlg_vblank_start; 1016 bool dcc_en; 1017 bool dual_plane; 1018 bool mode_422; 1019 unsigned int access_dir; 1020 unsigned int bytes_per_element_l; 1021 unsigned int bytes_per_element_c; 1022 unsigned int vp_height_l; 1023 unsigned int vp_width_l; 1024 unsigned int vp_height_c; 1025 unsigned int vp_width_c; 1026 unsigned int htaps_l; 1027 unsigned int htaps_c; 1028 double hratios_l; 1029 double hratios_c; 1030 double vratio_l; 1031 double vratio_c; 1032 double line_time_in_us; 1033 double vinit_l; 1034 double vinit_c; 1035 double vinit_bot_l; 1036 double vinit_bot_c; 1037 unsigned int swath_height_l; 1038 unsigned int swath_width_ub_l; 1039 unsigned int dpte_bytes_per_row_ub_l; 1040 unsigned int dpte_groups_per_row_ub_l; 1041 unsigned int meta_pte_bytes_per_frame_ub_l; 1042 unsigned int meta_bytes_per_row_ub_l; 1043 unsigned int swath_height_c; 1044 unsigned int swath_width_ub_c; 1045 unsigned int dpte_bytes_per_row_ub_c; 1046 unsigned int dpte_groups_per_row_ub_c; 1047 unsigned int meta_chunks_per_row_ub_l; 1048 unsigned int vupdate_offset; 1049 unsigned int vupdate_width; 1050 unsigned int vready_offset; 1051 unsigned int dppclk_delay_subtotal; 1052 unsigned int dispclk_delay_subtotal; 1053 unsigned int pixel_rate_delay_subtotal; 1054 unsigned int vstartup_start; 1055 unsigned int dst_x_after_scaler; 1056 unsigned int dst_y_after_scaler; 1057 double line_wait; 1058 double line_o; 1059 double line_setup; 1060 double line_calc; 1061 double dst_y_prefetch; 1062 double t_pre_us; 1063 unsigned int vm_bytes; 1064 unsigned int meta_row_bytes; 1065 unsigned int max_num_sw_l; 1066 unsigned int max_num_sw_c; 1067 unsigned int max_partial_sw_l; 1068 unsigned int max_partial_sw_c; 1069 double max_vinit_l; 1070 double max_vinit_c; 1071 unsigned int lsw_l; 1072 unsigned int lsw_c; 1073 unsigned int sw_bytes_ub_l; 1074 unsigned int sw_bytes_ub_c; 1075 unsigned int sw_bytes; 1076 unsigned int dpte_row_bytes; 1077 double prefetch_bw; 1078 double flip_bw; 1079 double t_vm_us; 1080 double t_r0_us; 1081 double dst_y_per_vm_vblank; 1082 double dst_y_per_row_vblank; 1083 double min_dst_y_per_vm_vblank; 1084 double min_dst_y_per_row_vblank; 1085 double lsw; 1086 double vratio_pre_l; 1087 double vratio_pre_c; 1088 unsigned int req_per_swath_ub_l; 1089 unsigned int req_per_swath_ub_c; 1090 unsigned int meta_row_height_l; 1091 unsigned int swath_width_pixels_ub_l; 1092 unsigned int swath_width_pixels_ub_c; 1093 unsigned int scaler_rec_in_width_l; 1094 unsigned int scaler_rec_in_width_c; 1095 unsigned int dpte_row_height_l; 1096 unsigned int dpte_row_height_c; 1097 double hscale_pixel_rate_l; 1098 double hscale_pixel_rate_c; 1099 double min_hratio_fact_l; 1100 double min_hratio_fact_c; 1101 double refcyc_per_line_delivery_pre_l; 1102 double refcyc_per_line_delivery_pre_c; 1103 double refcyc_per_line_delivery_l; 1104 double refcyc_per_line_delivery_c; 1105 double refcyc_per_req_delivery_pre_l; 1106 double refcyc_per_req_delivery_pre_c; 1107 double refcyc_per_req_delivery_l; 1108 double refcyc_per_req_delivery_c; 1109 double refcyc_per_req_delivery_pre_cur0; 1110 double refcyc_per_req_delivery_cur0; 1111 unsigned int full_recout_width; 1112 double hratios_cur0; 1113 unsigned int cur0_src_width; 1114 enum cursor_bpp cur0_bpp; 1115 unsigned int cur0_req_size; 1116 unsigned int cur0_req_width; 1117 double cur0_width_ub; 1118 double cur0_req_per_width; 1119 double hactive_cur0; 1120 1121 memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); 1122 memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); 1123 1124 DTRACE("DLG: %s: cstate_en = %d", __func__, cstate_en); 1125 DTRACE("DLG: %s: pstate_en = %d", __func__, pstate_en); 1126 DTRACE("DLG: %s: vm_en = %d", __func__, vm_en); 1127 DTRACE("DLG: %s: iflip_en = %d", __func__, iflip_en); 1128 1129 /* ------------------------- */ 1130 /* Section 1.5.2.1: OTG dependent Params */ 1131 /* ------------------------- */ 1132 DTRACE("DLG: %s: dppclk_freq_in_mhz = %3.2f", __func__, dppclk_freq_in_mhz); 1133 DTRACE("DLG: %s: dispclk_freq_in_mhz = %3.2f", __func__, dispclk_freq_in_mhz); 1134 DTRACE("DLG: %s: refclk_freq_in_mhz = %3.2f", __func__, refclk_freq_in_mhz); 1135 DTRACE("DLG: %s: pclk_freq_in_mhz = %3.2f", __func__, pclk_freq_in_mhz); 1136 DTRACE("DLG: %s: interlaced = %d", __func__, interlaced); 1137 1138 ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz; 1139 ASSERT(ref_freq_to_pix_freq < 4.0); 1140 disp_dlg_regs->ref_freq_to_pix_freq = 1141 (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19)); 1142 disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal 1143 * dml_pow(2, 8)); 1144 disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end 1145 * (double) ref_freq_to_pix_freq); 1146 ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int) dml_pow(2, 13)); 1147 disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; /* 15 bits */ 1148 1149 prefetch_xy_calc_in_dcfclk = 24.0; /* TODO: ip_param */ 1150 min_dcfclk_mhz = dlg_sys_param.deepsleep_dcfclk_mhz; 1151 t_calc_us = prefetch_xy_calc_in_dcfclk / min_dcfclk_mhz; 1152 min_ttu_vblank = dlg_sys_param.t_urg_wm_us; 1153 if (cstate_en) 1154 min_ttu_vblank = dml_max(dlg_sys_param.t_sr_wm_us, min_ttu_vblank); 1155 if (pstate_en) 1156 min_ttu_vblank = dml_max(dlg_sys_param.t_mclk_wm_us, min_ttu_vblank); 1157 min_ttu_vblank = min_ttu_vblank + t_calc_us; 1158 1159 min_dst_y_ttu_vblank = min_ttu_vblank * pclk_freq_in_mhz / (double) htotal; 1160 dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start; 1161 1162 disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start 1163 + min_dst_y_ttu_vblank) * dml_pow(2, 2)); 1164 ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int) dml_pow(2, 18)); 1165 1166 DTRACE("DLG: %s: min_dcfclk_mhz = %3.2f", __func__, min_dcfclk_mhz); 1167 DTRACE("DLG: %s: min_ttu_vblank = %3.2f", __func__, min_ttu_vblank); 1168 DTRACE( 1169 "DLG: %s: min_dst_y_ttu_vblank = %3.2f", 1170 __func__, 1171 min_dst_y_ttu_vblank); 1172 DTRACE("DLG: %s: t_calc_us = %3.2f", __func__, t_calc_us); 1173 DTRACE( 1174 "DLG: %s: disp_dlg_regs->min_dst_y_next_start = 0x%0x", 1175 __func__, 1176 disp_dlg_regs->min_dst_y_next_start); 1177 DTRACE( 1178 "DLG: %s: ref_freq_to_pix_freq = %3.2f", 1179 __func__, 1180 ref_freq_to_pix_freq); 1181 1182 /* ------------------------- */ 1183 /* Section 1.5.2.2: Prefetch, Active and TTU */ 1184 /* ------------------------- */ 1185 /* Prefetch Calc */ 1186 /* Source */ 1187 dcc_en = e2e_pipe_param.pipe.src.dcc; 1188 dual_plane = is_dual_plane( 1189 (enum source_format_class) e2e_pipe_param.pipe.src.source_format); 1190 mode_422 = 0; /* TODO */ 1191 access_dir = (e2e_pipe_param.pipe.src.source_scan == dm_vert); /* vp access direction: horizontal or vertical accessed */ 1192 bytes_per_element_l = get_bytes_per_element( 1193 (enum source_format_class) e2e_pipe_param.pipe.src.source_format, 1194 0); 1195 bytes_per_element_c = get_bytes_per_element( 1196 (enum source_format_class) e2e_pipe_param.pipe.src.source_format, 1197 1); 1198 vp_height_l = e2e_pipe_param.pipe.src.viewport_height; 1199 vp_width_l = e2e_pipe_param.pipe.src.viewport_width; 1200 vp_height_c = e2e_pipe_param.pipe.src.viewport_height_c; 1201 vp_width_c = e2e_pipe_param.pipe.src.viewport_width_c; 1202 1203 /* Scaling */ 1204 htaps_l = e2e_pipe_param.pipe.scale_taps.htaps; 1205 htaps_c = e2e_pipe_param.pipe.scale_taps.htaps_c; 1206 hratios_l = e2e_pipe_param.pipe.scale_ratio_depth.hscl_ratio; 1207 hratios_c = e2e_pipe_param.pipe.scale_ratio_depth.hscl_ratio_c; 1208 vratio_l = e2e_pipe_param.pipe.scale_ratio_depth.vscl_ratio; 1209 vratio_c = e2e_pipe_param.pipe.scale_ratio_depth.vscl_ratio_c; 1210 1211 line_time_in_us = (htotal / pclk_freq_in_mhz); 1212 vinit_l = e2e_pipe_param.pipe.scale_ratio_depth.vinit; 1213 vinit_c = e2e_pipe_param.pipe.scale_ratio_depth.vinit_c; 1214 vinit_bot_l = e2e_pipe_param.pipe.scale_ratio_depth.vinit_bot; 1215 vinit_bot_c = e2e_pipe_param.pipe.scale_ratio_depth.vinit_bot_c; 1216 1217 swath_height_l = rq_dlg_param.rq_l.swath_height; 1218 swath_width_ub_l = rq_dlg_param.rq_l.swath_width_ub; 1219 dpte_bytes_per_row_ub_l = rq_dlg_param.rq_l.dpte_bytes_per_row_ub; 1220 dpte_groups_per_row_ub_l = rq_dlg_param.rq_l.dpte_groups_per_row_ub; 1221 meta_pte_bytes_per_frame_ub_l = rq_dlg_param.rq_l.meta_pte_bytes_per_frame_ub; 1222 meta_bytes_per_row_ub_l = rq_dlg_param.rq_l.meta_bytes_per_row_ub; 1223 1224 swath_height_c = rq_dlg_param.rq_c.swath_height; 1225 swath_width_ub_c = rq_dlg_param.rq_c.swath_width_ub; 1226 dpte_bytes_per_row_ub_c = rq_dlg_param.rq_c.dpte_bytes_per_row_ub; 1227 dpte_groups_per_row_ub_c = rq_dlg_param.rq_c.dpte_groups_per_row_ub; 1228 1229 meta_chunks_per_row_ub_l = rq_dlg_param.rq_l.meta_chunks_per_row_ub; 1230 vupdate_offset = e2e_pipe_param.pipe.dest.vupdate_offset; 1231 vupdate_width = e2e_pipe_param.pipe.dest.vupdate_width; 1232 vready_offset = e2e_pipe_param.pipe.dest.vready_offset; 1233 1234 dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal; 1235 dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal; 1236 pixel_rate_delay_subtotal = dppclk_delay_subtotal * pclk_freq_in_mhz / dppclk_freq_in_mhz 1237 + dispclk_delay_subtotal * pclk_freq_in_mhz / dispclk_freq_in_mhz; 1238 1239 vstartup_start = e2e_pipe_param.pipe.dest.vstartup_start; 1240 1241 if (interlaced) 1242 vstartup_start = vstartup_start / 2; 1243 1244 if (vstartup_start >= min_vblank) { 1245 DTRACE( 1246 "WARNING_DLG: %s: vblank_start=%d vblank_end=%d", 1247 __func__, 1248 vblank_start, 1249 vblank_end); 1250 DTRACE( 1251 "WARNING_DLG: %s: vstartup_start=%d should be less than min_vblank=%d", 1252 __func__, 1253 vstartup_start, 1254 min_vblank); 1255 min_vblank = vstartup_start + 1; 1256 DTRACE( 1257 "WARNING_DLG: %s: vstartup_start=%d should be less than min_vblank=%d", 1258 __func__, 1259 vstartup_start, 1260 min_vblank); 1261 } 1262 1263 dst_x_after_scaler = 0; 1264 dst_y_after_scaler = 0; 1265 1266 if (e2e_pipe_param.pipe.src.is_hsplit) 1267 dst_x_after_scaler = pixel_rate_delay_subtotal 1268 + e2e_pipe_param.pipe.dest.recout_width; 1269 else 1270 dst_x_after_scaler = pixel_rate_delay_subtotal; 1271 1272 if (e2e_pipe_param.dout.output_format == dm_420) 1273 dst_y_after_scaler = 1; 1274 else 1275 dst_y_after_scaler = 0; 1276 1277 if (dst_x_after_scaler >= htotal) { 1278 dst_x_after_scaler = dst_x_after_scaler - htotal; 1279 dst_y_after_scaler = dst_y_after_scaler + 1; 1280 } 1281 1282 DTRACE("DLG: %s: htotal = %d", __func__, htotal); 1283 DTRACE( 1284 "DLG: %s: pixel_rate_delay_subtotal = %d", 1285 __func__, 1286 pixel_rate_delay_subtotal); 1287 DTRACE("DLG: %s: dst_x_after_scaler = %d", __func__, dst_x_after_scaler); 1288 DTRACE("DLG: %s: dst_y_after_scaler = %d", __func__, dst_y_after_scaler); 1289 1290 line_wait = mode_lib->soc.urgent_latency_us; 1291 if (cstate_en) 1292 line_wait = dml_max(mode_lib->soc.sr_enter_plus_exit_time_us, line_wait); 1293 if (pstate_en) 1294 line_wait = dml_max( 1295 mode_lib->soc.dram_clock_change_latency_us 1296 + mode_lib->soc.urgent_latency_us, 1297 line_wait); 1298 line_wait = line_wait / line_time_in_us; 1299 1300 line_o = (double) dst_y_after_scaler + dst_x_after_scaler / (double) htotal; 1301 line_setup = (double) (vupdate_offset + vupdate_width + vready_offset) / (double) htotal; 1302 line_calc = t_calc_us / line_time_in_us; 1303 1304 DTRACE( 1305 "DLG: %s: soc.sr_enter_plus_exit_time_us = %3.2f", 1306 __func__, 1307 (double) mode_lib->soc.sr_enter_plus_exit_time_us); 1308 DTRACE( 1309 "DLG: %s: soc.dram_clock_change_latency_us = %3.2f", 1310 __func__, 1311 (double) mode_lib->soc.dram_clock_change_latency_us); 1312 DTRACE( 1313 "DLG: %s: soc.urgent_latency_us = %3.2f", 1314 __func__, 1315 mode_lib->soc.urgent_latency_us); 1316 1317 DTRACE("DLG: %s: swath_height_l = %d", __func__, swath_height_l); 1318 if (dual_plane) 1319 DTRACE("DLG: %s: swath_height_c = %d", __func__, swath_height_c); 1320 1321 DTRACE( 1322 "DLG: %s: t_srx_delay_us = %3.2f", 1323 __func__, 1324 (double) dlg_sys_param.t_srx_delay_us); 1325 DTRACE("DLG: %s: line_time_in_us = %3.2f", __func__, (double) line_time_in_us); 1326 DTRACE("DLG: %s: vupdate_offset = %d", __func__, vupdate_offset); 1327 DTRACE("DLG: %s: vupdate_width = %d", __func__, vupdate_width); 1328 DTRACE("DLG: %s: vready_offset = %d", __func__, vready_offset); 1329 DTRACE("DLG: %s: line_time_in_us = %3.2f", __func__, line_time_in_us); 1330 DTRACE("DLG: %s: line_wait = %3.2f", __func__, line_wait); 1331 DTRACE("DLG: %s: line_o = %3.2f", __func__, line_o); 1332 DTRACE("DLG: %s: line_setup = %3.2f", __func__, line_setup); 1333 DTRACE("DLG: %s: line_calc = %3.2f", __func__, line_calc); 1334 1335 dst_y_prefetch = ((double) min_vblank - 1.0) 1336 - (line_setup + line_calc + line_wait + line_o); 1337 DTRACE("DLG: %s: dst_y_prefetch (before rnd) = %3.2f", __func__, dst_y_prefetch); 1338 ASSERT(dst_y_prefetch >= 2.0); 1339 1340 dst_y_prefetch = dml_floor(4.0 * (dst_y_prefetch + 0.125), 1) / 4; 1341 DTRACE("DLG: %s: dst_y_prefetch (after rnd) = %3.2f", __func__, dst_y_prefetch); 1342 1343 t_pre_us = dst_y_prefetch * line_time_in_us; 1344 vm_bytes = 0; 1345 meta_row_bytes = 0; 1346 1347 if (dcc_en && vm_en) 1348 vm_bytes = meta_pte_bytes_per_frame_ub_l; 1349 if (dcc_en) 1350 meta_row_bytes = meta_bytes_per_row_ub_l; 1351 1352 max_num_sw_l = 0; 1353 max_num_sw_c = 0; 1354 max_partial_sw_l = 0; 1355 max_partial_sw_c = 0; 1356 1357 max_vinit_l = interlaced ? dml_max(vinit_l, vinit_bot_l) : vinit_l; 1358 max_vinit_c = interlaced ? dml_max(vinit_c, vinit_bot_c) : vinit_c; 1359 1360 get_swath_need(mode_lib, &max_num_sw_l, &max_partial_sw_l, swath_height_l, max_vinit_l); 1361 if (dual_plane) 1362 get_swath_need( 1363 mode_lib, 1364 &max_num_sw_c, 1365 &max_partial_sw_c, 1366 swath_height_c, 1367 max_vinit_c); 1368 1369 lsw_l = max_num_sw_l * swath_height_l + max_partial_sw_l; 1370 lsw_c = max_num_sw_c * swath_height_c + max_partial_sw_c; 1371 sw_bytes_ub_l = lsw_l * swath_width_ub_l * bytes_per_element_l; 1372 sw_bytes_ub_c = lsw_c * swath_width_ub_c * bytes_per_element_c; 1373 sw_bytes = 0; 1374 dpte_row_bytes = 0; 1375 1376 if (vm_en) { 1377 if (dual_plane) 1378 dpte_row_bytes = dpte_bytes_per_row_ub_l + dpte_bytes_per_row_ub_c; 1379 else 1380 dpte_row_bytes = dpte_bytes_per_row_ub_l; 1381 } else { 1382 dpte_row_bytes = 0; 1383 } 1384 1385 if (dual_plane) 1386 sw_bytes = sw_bytes_ub_l + sw_bytes_ub_c; 1387 else 1388 sw_bytes = sw_bytes_ub_l; 1389 1390 DTRACE("DLG: %s: sw_bytes_ub_l = %d", __func__, sw_bytes_ub_l); 1391 DTRACE("DLG: %s: sw_bytes_ub_c = %d", __func__, sw_bytes_ub_c); 1392 DTRACE("DLG: %s: sw_bytes = %d", __func__, sw_bytes); 1393 DTRACE("DLG: %s: vm_bytes = %d", __func__, vm_bytes); 1394 DTRACE("DLG: %s: meta_row_bytes = %d", __func__, meta_row_bytes); 1395 DTRACE("DLG: %s: dpte_row_bytes = %d", __func__, dpte_row_bytes); 1396 1397 prefetch_bw = (vm_bytes + 2 * dpte_row_bytes + 2 * meta_row_bytes + sw_bytes) / t_pre_us; 1398 flip_bw = ((vm_bytes + dpte_row_bytes + meta_row_bytes) * dlg_sys_param.total_flip_bw) 1399 / (double) dlg_sys_param.total_flip_bytes; 1400 t_vm_us = line_time_in_us / 4.0; 1401 if (vm_en && dcc_en) { 1402 t_vm_us = dml_max( 1403 dlg_sys_param.t_extra_us, 1404 dml_max((double) vm_bytes / prefetch_bw, t_vm_us)); 1405 1406 if (iflip_en && !dual_plane) { 1407 t_vm_us = dml_max(mode_lib->soc.urgent_latency_us, t_vm_us); 1408 if (flip_bw > 0.) 1409 t_vm_us = dml_max(vm_bytes / flip_bw, t_vm_us); 1410 } 1411 } 1412 1413 t_r0_us = dml_max(dlg_sys_param.t_extra_us - t_vm_us, line_time_in_us - t_vm_us); 1414 1415 if (vm_en || dcc_en) { 1416 t_r0_us = dml_max( 1417 (double) (dpte_row_bytes + meta_row_bytes) / prefetch_bw, 1418 dlg_sys_param.t_extra_us); 1419 t_r0_us = dml_max((double) (line_time_in_us - t_vm_us), t_r0_us); 1420 1421 if (iflip_en && !dual_plane) { 1422 t_r0_us = dml_max(mode_lib->soc.urgent_latency_us * 2.0, t_r0_us); 1423 if (flip_bw > 0.) 1424 t_r0_us = dml_max( 1425 (dpte_row_bytes + meta_row_bytes) / flip_bw, 1426 t_r0_us); 1427 } 1428 } 1429 1430 disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; /* in terms of line */ 1431 disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; /* in terms of refclk */ 1432 ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int) dml_pow(2, 13)); 1433 DTRACE( 1434 "DLG: %s: disp_dlg_regs->dst_y_after_scaler = 0x%0x", 1435 __func__, 1436 disp_dlg_regs->dst_y_after_scaler); 1437 DTRACE( 1438 "DLG: %s: disp_dlg_regs->refcyc_x_after_scaler = 0x%0x", 1439 __func__, 1440 disp_dlg_regs->refcyc_x_after_scaler); 1441 1442 disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2)); 1443 DTRACE( 1444 "DLG: %s: disp_dlg_regs->dst_y_prefetch = %d", 1445 __func__, 1446 disp_dlg_regs->dst_y_prefetch); 1447 1448 dst_y_per_vm_vblank = 0.0; 1449 dst_y_per_row_vblank = 0.0; 1450 1451 dst_y_per_vm_vblank = t_vm_us / line_time_in_us; 1452 dst_y_per_vm_vblank = dml_floor(4.0 * (dst_y_per_vm_vblank + 0.125), 1) / 4.0; 1453 disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2)); 1454 1455 dst_y_per_row_vblank = t_r0_us / line_time_in_us; 1456 dst_y_per_row_vblank = dml_floor(4.0 * (dst_y_per_row_vblank + 0.125), 1) / 4.0; 1457 disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2)); 1458 1459 DTRACE("DLG: %s: lsw_l = %d", __func__, lsw_l); 1460 DTRACE("DLG: %s: lsw_c = %d", __func__, lsw_c); 1461 DTRACE("DLG: %s: dpte_bytes_per_row_ub_l = %d", __func__, dpte_bytes_per_row_ub_l); 1462 DTRACE("DLG: %s: dpte_bytes_per_row_ub_c = %d", __func__, dpte_bytes_per_row_ub_c); 1463 1464 DTRACE("DLG: %s: prefetch_bw = %3.2f", __func__, prefetch_bw); 1465 DTRACE("DLG: %s: flip_bw = %3.2f", __func__, flip_bw); 1466 DTRACE("DLG: %s: t_pre_us = %3.2f", __func__, t_pre_us); 1467 DTRACE("DLG: %s: t_vm_us = %3.2f", __func__, t_vm_us); 1468 DTRACE("DLG: %s: t_r0_us = %3.2f", __func__, t_r0_us); 1469 DTRACE("DLG: %s: dst_y_per_vm_vblank = %3.2f", __func__, dst_y_per_vm_vblank); 1470 DTRACE("DLG: %s: dst_y_per_row_vblank = %3.2f", __func__, dst_y_per_row_vblank); 1471 DTRACE("DLG: %s: dst_y_prefetch = %3.2f", __func__, dst_y_prefetch); 1472 1473 min_dst_y_per_vm_vblank = 8.0; 1474 min_dst_y_per_row_vblank = 16.0; 1475 if (htotal <= 75) { 1476 min_vblank = 300; 1477 min_dst_y_per_vm_vblank = 100.0; 1478 min_dst_y_per_row_vblank = 100.0; 1479 } 1480 1481 ASSERT(dst_y_per_vm_vblank < min_dst_y_per_vm_vblank); 1482 ASSERT(dst_y_per_row_vblank < min_dst_y_per_row_vblank); 1483 1484 ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank)); 1485 lsw = dst_y_prefetch - (dst_y_per_vm_vblank + dst_y_per_row_vblank); 1486 1487 DTRACE("DLG: %s: lsw = %3.2f", __func__, lsw); 1488 1489 vratio_pre_l = get_vratio_pre( 1490 mode_lib, 1491 max_num_sw_l, 1492 max_partial_sw_l, 1493 swath_height_l, 1494 max_vinit_l, 1495 lsw); 1496 vratio_pre_c = 1.0; 1497 if (dual_plane) 1498 vratio_pre_c = get_vratio_pre( 1499 mode_lib, 1500 max_num_sw_c, 1501 max_partial_sw_c, 1502 swath_height_c, 1503 max_vinit_c, 1504 lsw); 1505 1506 DTRACE("DLG: %s: vratio_pre_l=%3.2f", __func__, vratio_pre_l); 1507 DTRACE("DLG: %s: vratio_pre_c=%3.2f", __func__, vratio_pre_c); 1508 1509 ASSERT(vratio_pre_l <= 4.0); 1510 if (vratio_pre_l >= 4.0) 1511 disp_dlg_regs->vratio_prefetch = (unsigned int) dml_pow(2, 21) - 1; 1512 else 1513 disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19)); 1514 1515 ASSERT(vratio_pre_c <= 4.0); 1516 if (vratio_pre_c >= 4.0) 1517 disp_dlg_regs->vratio_prefetch_c = (unsigned int) dml_pow(2, 21) - 1; 1518 else 1519 disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19)); 1520 1521 disp_dlg_regs->refcyc_per_pte_group_vblank_l = 1522 (unsigned int) (dst_y_per_row_vblank * (double) htotal 1523 * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l); 1524 ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int) dml_pow(2, 13)); 1525 1526 disp_dlg_regs->refcyc_per_pte_group_vblank_c = 1527 (unsigned int) (dst_y_per_row_vblank * (double) htotal 1528 * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_c); 1529 ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int) dml_pow(2, 13)); 1530 1531 disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = 1532 (unsigned int) (dst_y_per_row_vblank * (double) htotal 1533 * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l); 1534 ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int) dml_pow(2, 13)); 1535 1536 disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = 1537 disp_dlg_regs->refcyc_per_meta_chunk_vblank_l;/* dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now */ 1538 1539 /* Active */ 1540 req_per_swath_ub_l = rq_dlg_param.rq_l.req_per_swath_ub; 1541 req_per_swath_ub_c = rq_dlg_param.rq_c.req_per_swath_ub; 1542 meta_row_height_l = rq_dlg_param.rq_l.meta_row_height; 1543 swath_width_pixels_ub_l = 0; 1544 swath_width_pixels_ub_c = 0; 1545 scaler_rec_in_width_l = 0; 1546 scaler_rec_in_width_c = 0; 1547 dpte_row_height_l = rq_dlg_param.rq_l.dpte_row_height; 1548 dpte_row_height_c = rq_dlg_param.rq_c.dpte_row_height; 1549 1550 disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l 1551 / (double) vratio_l * dml_pow(2, 2)); 1552 ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int) dml_pow(2, 17)); 1553 1554 disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c 1555 / (double) vratio_c * dml_pow(2, 2)); 1556 ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_c < (unsigned int) dml_pow(2, 17)); 1557 1558 disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l 1559 / (double) vratio_l * dml_pow(2, 2)); 1560 ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int) dml_pow(2, 17)); 1561 1562 disp_dlg_regs->dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_l; /* dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now */ 1563 1564 disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l 1565 / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq 1566 / (double) dpte_groups_per_row_ub_l); 1567 if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23)) 1568 disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1; 1569 1570 disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int) ((double) dpte_row_height_c 1571 / (double) vratio_c * (double) htotal * ref_freq_to_pix_freq 1572 / (double) dpte_groups_per_row_ub_c); 1573 if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23)) 1574 disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1; 1575 1576 disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l 1577 / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq 1578 / (double) meta_chunks_per_row_ub_l); 1579 if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23)) 1580 disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1; 1581 1582 if (mode_422) { 1583 swath_width_pixels_ub_l = swath_width_ub_l * 2; /* *2 for 2 pixel per element */ 1584 swath_width_pixels_ub_c = swath_width_ub_c * 2; 1585 } else { 1586 swath_width_pixels_ub_l = swath_width_ub_l * 1; 1587 swath_width_pixels_ub_c = swath_width_ub_c * 1; 1588 } 1589 1590 hscale_pixel_rate_l = 0.; 1591 hscale_pixel_rate_c = 0.; 1592 min_hratio_fact_l = 1.0; 1593 min_hratio_fact_c = 1.0; 1594 1595 if (htaps_l <= 1) 1596 min_hratio_fact_l = 2.0; 1597 else if (htaps_l <= 6) { 1598 if ((hratios_l * 2.0) > 4.0) 1599 min_hratio_fact_l = 4.0; 1600 else 1601 min_hratio_fact_l = hratios_l * 2.0; 1602 } else { 1603 if (hratios_l > 4.0) 1604 min_hratio_fact_l = 4.0; 1605 else 1606 min_hratio_fact_l = hratios_l; 1607 } 1608 1609 hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz; 1610 1611 if (htaps_c <= 1) 1612 min_hratio_fact_c = 2.0; 1613 else if (htaps_c <= 6) { 1614 if ((hratios_c * 2.0) > 4.0) 1615 min_hratio_fact_c = 4.0; 1616 else 1617 min_hratio_fact_c = hratios_c * 2.0; 1618 } else { 1619 if (hratios_c > 4.0) 1620 min_hratio_fact_c = 4.0; 1621 else 1622 min_hratio_fact_c = hratios_c; 1623 } 1624 1625 hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz; 1626 1627 refcyc_per_line_delivery_pre_l = 0.; 1628 refcyc_per_line_delivery_pre_c = 0.; 1629 refcyc_per_line_delivery_l = 0.; 1630 refcyc_per_line_delivery_c = 0.; 1631 1632 refcyc_per_req_delivery_pre_l = 0.; 1633 refcyc_per_req_delivery_pre_c = 0.; 1634 refcyc_per_req_delivery_l = 0.; 1635 refcyc_per_req_delivery_c = 0.; 1636 refcyc_per_req_delivery_pre_cur0 = 0.; 1637 refcyc_per_req_delivery_cur0 = 0.; 1638 1639 full_recout_width = 0; 1640 if (e2e_pipe_param.pipe.src.is_hsplit) { 1641 if (e2e_pipe_param.pipe.dest.full_recout_width == 0) { 1642 DTRACE("DLG: %s: Warningfull_recout_width not set in hsplit mode", __func__); 1643 full_recout_width = e2e_pipe_param.pipe.dest.recout_width * 2; /* assume half split for dcn1 */ 1644 } else 1645 full_recout_width = e2e_pipe_param.pipe.dest.full_recout_width; 1646 } else 1647 full_recout_width = e2e_pipe_param.pipe.dest.recout_width; 1648 1649 refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery( 1650 mode_lib, 1651 refclk_freq_in_mhz, 1652 pclk_freq_in_mhz, 1653 full_recout_width, 1654 vratio_pre_l, 1655 hscale_pixel_rate_l, 1656 swath_width_pixels_ub_l, 1657 1); /* per line */ 1658 1659 refcyc_per_line_delivery_l = get_refcyc_per_delivery( 1660 mode_lib, 1661 refclk_freq_in_mhz, 1662 pclk_freq_in_mhz, 1663 full_recout_width, 1664 vratio_l, 1665 hscale_pixel_rate_l, 1666 swath_width_pixels_ub_l, 1667 1); /* per line */ 1668 1669 DTRACE("DLG: %s: full_recout_width = %d", __func__, full_recout_width); 1670 DTRACE("DLG: %s: hscale_pixel_rate_l = %3.2f", __func__, hscale_pixel_rate_l); 1671 DTRACE( 1672 "DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f", 1673 __func__, 1674 refcyc_per_line_delivery_pre_l); 1675 DTRACE( 1676 "DLG: %s: refcyc_per_line_delivery_l = %3.2f", 1677 __func__, 1678 refcyc_per_line_delivery_l); 1679 1680 disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor( 1681 refcyc_per_line_delivery_pre_l, 1682 1); 1683 disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor( 1684 refcyc_per_line_delivery_l, 1685 1); 1686 ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int) dml_pow(2, 13)); 1687 ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int) dml_pow(2, 13)); 1688 1689 if (dual_plane) { 1690 refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery( 1691 mode_lib, 1692 refclk_freq_in_mhz, 1693 pclk_freq_in_mhz, 1694 full_recout_width, 1695 vratio_pre_c, 1696 hscale_pixel_rate_c, 1697 swath_width_pixels_ub_c, 1698 1); /* per line */ 1699 1700 refcyc_per_line_delivery_c = get_refcyc_per_delivery( 1701 mode_lib, 1702 refclk_freq_in_mhz, 1703 pclk_freq_in_mhz, 1704 full_recout_width, 1705 vratio_c, 1706 hscale_pixel_rate_c, 1707 swath_width_pixels_ub_c, 1708 1); /* per line */ 1709 1710 DTRACE( 1711 "DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f", 1712 __func__, 1713 refcyc_per_line_delivery_pre_c); 1714 DTRACE( 1715 "DLG: %s: refcyc_per_line_delivery_c = %3.2f", 1716 __func__, 1717 refcyc_per_line_delivery_c); 1718 1719 disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor( 1720 refcyc_per_line_delivery_pre_c, 1721 1); 1722 disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor( 1723 refcyc_per_line_delivery_c, 1724 1); 1725 ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int) dml_pow(2, 13)); 1726 ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int) dml_pow(2, 13)); 1727 } 1728 disp_dlg_regs->chunk_hdl_adjust_cur0 = 3; 1729 1730 /* TTU - Luma / Chroma */ 1731 if (access_dir) { /* vertical access */ 1732 scaler_rec_in_width_l = vp_height_l; 1733 scaler_rec_in_width_c = vp_height_c; 1734 } else { 1735 scaler_rec_in_width_l = vp_width_l; 1736 scaler_rec_in_width_c = vp_width_c; 1737 } 1738 1739 refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery( 1740 mode_lib, 1741 refclk_freq_in_mhz, 1742 pclk_freq_in_mhz, 1743 full_recout_width, 1744 vratio_pre_l, 1745 hscale_pixel_rate_l, 1746 scaler_rec_in_width_l, 1747 req_per_swath_ub_l); /* per req */ 1748 refcyc_per_req_delivery_l = get_refcyc_per_delivery( 1749 mode_lib, 1750 refclk_freq_in_mhz, 1751 pclk_freq_in_mhz, 1752 full_recout_width, 1753 vratio_l, 1754 hscale_pixel_rate_l, 1755 scaler_rec_in_width_l, 1756 req_per_swath_ub_l); /* per req */ 1757 1758 DTRACE( 1759 "DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f", 1760 __func__, 1761 refcyc_per_req_delivery_pre_l); 1762 DTRACE( 1763 "DLG: %s: refcyc_per_req_delivery_l = %3.2f", 1764 __func__, 1765 refcyc_per_req_delivery_l); 1766 1767 disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l 1768 * dml_pow(2, 10)); 1769 disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l 1770 * dml_pow(2, 10)); 1771 1772 ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); 1773 ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13)); 1774 1775 if (dual_plane) { 1776 refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery( 1777 mode_lib, 1778 refclk_freq_in_mhz, 1779 pclk_freq_in_mhz, 1780 full_recout_width, 1781 vratio_pre_c, 1782 hscale_pixel_rate_c, 1783 scaler_rec_in_width_c, 1784 req_per_swath_ub_c); /* per req */ 1785 refcyc_per_req_delivery_c = get_refcyc_per_delivery( 1786 mode_lib, 1787 refclk_freq_in_mhz, 1788 pclk_freq_in_mhz, 1789 full_recout_width, 1790 vratio_c, 1791 hscale_pixel_rate_c, 1792 scaler_rec_in_width_c, 1793 req_per_swath_ub_c); /* per req */ 1794 1795 DTRACE( 1796 "DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f", 1797 __func__, 1798 refcyc_per_req_delivery_pre_c); 1799 DTRACE( 1800 "DLG: %s: refcyc_per_req_delivery_c = %3.2f", 1801 __func__, 1802 refcyc_per_req_delivery_c); 1803 1804 disp_ttu_regs->refcyc_per_req_delivery_pre_c = 1805 (unsigned int) (refcyc_per_req_delivery_pre_c * dml_pow(2, 10)); 1806 disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c 1807 * dml_pow(2, 10)); 1808 1809 ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); 1810 ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13)); 1811 } 1812 1813 /* TTU - Cursor */ 1814 hratios_cur0 = e2e_pipe_param.pipe.scale_ratio_depth.hscl_ratio; 1815 cur0_src_width = e2e_pipe_param.pipe.src.cur0_src_width; /* cursor source width */ 1816 cur0_bpp = (enum cursor_bpp) e2e_pipe_param.pipe.src.cur0_bpp; 1817 cur0_req_size = 0; 1818 cur0_req_width = 0; 1819 cur0_width_ub = 0.0; 1820 cur0_req_per_width = 0.0; 1821 hactive_cur0 = 0.0; 1822 1823 ASSERT(cur0_src_width <= 256); 1824 1825 if (cur0_src_width > 0) { 1826 unsigned int cur0_bit_per_pixel = 0; 1827 1828 if (cur0_bpp == dm_cur_2bit) { 1829 cur0_req_size = 64; /* byte */ 1830 cur0_bit_per_pixel = 2; 1831 } else { /* 32bit */ 1832 cur0_bit_per_pixel = 32; 1833 if (cur0_src_width >= 1 && cur0_src_width <= 16) 1834 cur0_req_size = 64; 1835 else if (cur0_src_width >= 17 && cur0_src_width <= 31) 1836 cur0_req_size = 128; 1837 else 1838 cur0_req_size = 256; 1839 } 1840 1841 cur0_req_width = (double) cur0_req_size / ((double) cur0_bit_per_pixel / 8.0); 1842 cur0_width_ub = dml_ceil((double) cur0_src_width / (double) cur0_req_width, 1) 1843 * (double) cur0_req_width; 1844 cur0_req_per_width = cur0_width_ub / (double) cur0_req_width; 1845 hactive_cur0 = (double) cur0_src_width / hratios_cur0; /* TODO: oswin to think about what to do for cursor */ 1846 1847 if (vratio_pre_l <= 1.0) { 1848 refcyc_per_req_delivery_pre_cur0 = hactive_cur0 * ref_freq_to_pix_freq 1849 / (double) cur0_req_per_width; 1850 } else { 1851 refcyc_per_req_delivery_pre_cur0 = (double) refclk_freq_in_mhz 1852 * (double) cur0_src_width / hscale_pixel_rate_l 1853 / (double) cur0_req_per_width; 1854 } 1855 1856 disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = 1857 (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10)); 1858 ASSERT(refcyc_per_req_delivery_pre_cur0 < dml_pow(2, 13)); 1859 1860 if (vratio_l <= 1.0) { 1861 refcyc_per_req_delivery_cur0 = hactive_cur0 * ref_freq_to_pix_freq 1862 / (double) cur0_req_per_width; 1863 } else { 1864 refcyc_per_req_delivery_cur0 = (double) refclk_freq_in_mhz 1865 * (double) cur0_src_width / hscale_pixel_rate_l 1866 / (double) cur0_req_per_width; 1867 } 1868 1869 DTRACE("DLG: %s: cur0_req_width = %d", __func__, cur0_req_width); 1870 DTRACE( 1871 "DLG: %s: cur0_width_ub = %3.2f", 1872 __func__, 1873 cur0_width_ub); 1874 DTRACE( 1875 "DLG: %s: cur0_req_per_width = %3.2f", 1876 __func__, 1877 cur0_req_per_width); 1878 DTRACE( 1879 "DLG: %s: hactive_cur0 = %3.2f", 1880 __func__, 1881 hactive_cur0); 1882 DTRACE( 1883 "DLG: %s: refcyc_per_req_delivery_pre_cur0 = %3.2f", 1884 __func__, 1885 refcyc_per_req_delivery_pre_cur0); 1886 DTRACE( 1887 "DLG: %s: refcyc_per_req_delivery_cur0 = %3.2f", 1888 __func__, 1889 refcyc_per_req_delivery_cur0); 1890 1891 disp_ttu_regs->refcyc_per_req_delivery_cur0 = 1892 (unsigned int) (refcyc_per_req_delivery_cur0 * dml_pow(2, 10)); 1893 ASSERT(refcyc_per_req_delivery_cur0 < dml_pow(2, 13)); 1894 } else { 1895 disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = 0; 1896 disp_ttu_regs->refcyc_per_req_delivery_cur0 = 0; 1897 } 1898 1899 /* TTU - Misc */ 1900 disp_ttu_regs->qos_level_low_wm = 0; 1901 ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14)); 1902 disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal 1903 * ref_freq_to_pix_freq); 1904 ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14)); 1905 1906 disp_ttu_regs->qos_level_flip = 14; 1907 disp_ttu_regs->qos_level_fixed_l = 8; 1908 disp_ttu_regs->qos_level_fixed_c = 8; 1909 disp_ttu_regs->qos_level_fixed_cur0 = 8; 1910 disp_ttu_regs->qos_ramp_disable_l = 0; 1911 disp_ttu_regs->qos_ramp_disable_c = 0; 1912 disp_ttu_regs->qos_ramp_disable_cur0 = 0; 1913 1914 disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz; 1915 ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24)); 1916 1917 print__ttu_regs_st(mode_lib, *disp_ttu_regs); 1918 print__dlg_regs_st(mode_lib, *disp_dlg_regs); 1919} 1920