1/* 2 * Copyright 2022 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25#include "display_mode_vba_util_32.h" 26#include "../dml_inline_defs.h" 27#include "display_mode_vba_32.h" 28#include "../display_mode_lib.h" 29 30#define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096 31 32unsigned int dml32_dscceComputeDelay( 33 unsigned int bpc, 34 double BPP, 35 unsigned int sliceWidth, 36 unsigned int numSlices, 37 enum output_format_class pixelFormat, 38 enum output_encoder_class Output) 39{ 40 // valid bpc = source bits per component in the set of {8, 10, 12} 41 // valid bpp = increments of 1/16 of a bit 42 // min = 6/7/8 in N420/N422/444, respectively 43 // max = such that compression is 1:1 44 //valid sliceWidth = number of pixels per slice line, 45 // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 46 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 47 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 48 49 // fixed value 50 unsigned int rcModelSize = 8192; 51 52 // N422/N420 operate at 2 pixels per clock 53 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L, 54 Delay, pixels; 55 56 if (pixelFormat == dm_420) 57 pixelsPerClock = 2; 58 else if (pixelFormat == dm_n422) 59 pixelsPerClock = 2; 60 // #all other modes operate at 1 pixel per clock 61 else 62 pixelsPerClock = 1; 63 64 //initial transmit delay as per PPS 65 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); 66 67 //compute ssm delay 68 if (bpc == 8) 69 D = 81; 70 else if (bpc == 10) 71 D = 89; 72 else 73 D = 113; 74 75 //divide by pixel per cycle to compute slice width as seen by DSC 76 w = sliceWidth / pixelsPerClock; 77 78 //422 mode has an additional cycle of delay 79 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) 80 s = 0; 81 else 82 s = 1; 83 84 //main calculation for the dscce 85 ix = initalXmitDelay + 45; 86 wx = (w + 2) / 3; 87 p = 3 * wx - w; 88 l0 = ix / w; 89 a = ix + p * l0; 90 ax = (a + 2) / 3 + D + 6 + 1; 91 L = (ax + wx - 1) / wx; 92 if ((ix % w) == 0 && p != 0) 93 lstall = 1; 94 else 95 lstall = 0; 96 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; 97 98 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 99 pixels = Delay * 3 * pixelsPerClock; 100 101#ifdef __DML_VBA_DEBUG__ 102 dml_print("DML::%s: bpc: %d\n", __func__, bpc); 103 dml_print("DML::%s: BPP: %f\n", __func__, BPP); 104 dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth); 105 dml_print("DML::%s: numSlices: %d\n", __func__, numSlices); 106 dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat); 107 dml_print("DML::%s: Output: %d\n", __func__, Output); 108 dml_print("DML::%s: pixels: %d\n", __func__, pixels); 109#endif 110 111 return pixels; 112} 113 114unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) 115{ 116 unsigned int Delay = 0; 117 118 if (pixelFormat == dm_420) { 119 // sfr 120 Delay = Delay + 2; 121 // dsccif 122 Delay = Delay + 0; 123 // dscc - input deserializer 124 Delay = Delay + 3; 125 // dscc gets pixels every other cycle 126 Delay = Delay + 2; 127 // dscc - input cdc fifo 128 Delay = Delay + 12; 129 // dscc gets pixels every other cycle 130 Delay = Delay + 13; 131 // dscc - cdc uncertainty 132 Delay = Delay + 2; 133 // dscc - output cdc fifo 134 Delay = Delay + 7; 135 // dscc gets pixels every other cycle 136 Delay = Delay + 3; 137 // dscc - cdc uncertainty 138 Delay = Delay + 2; 139 // dscc - output serializer 140 Delay = Delay + 1; 141 // sft 142 Delay = Delay + 1; 143 } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) { 144 // sfr 145 Delay = Delay + 2; 146 // dsccif 147 Delay = Delay + 1; 148 // dscc - input deserializer 149 Delay = Delay + 5; 150 // dscc - input cdc fifo 151 Delay = Delay + 25; 152 // dscc - cdc uncertainty 153 Delay = Delay + 2; 154 // dscc - output cdc fifo 155 Delay = Delay + 10; 156 // dscc - cdc uncertainty 157 Delay = Delay + 2; 158 // dscc - output serializer 159 Delay = Delay + 1; 160 // sft 161 Delay = Delay + 1; 162 } else { 163 // sfr 164 Delay = Delay + 2; 165 // dsccif 166 Delay = Delay + 0; 167 // dscc - input deserializer 168 Delay = Delay + 3; 169 // dscc - input cdc fifo 170 Delay = Delay + 12; 171 // dscc - cdc uncertainty 172 Delay = Delay + 2; 173 // dscc - output cdc fifo 174 Delay = Delay + 7; 175 // dscc - output serializer 176 Delay = Delay + 1; 177 // dscc - cdc uncertainty 178 Delay = Delay + 2; 179 // sft 180 Delay = Delay + 1; 181 } 182 183 return Delay; 184} 185 186 187bool IsVertical(enum dm_rotation_angle Scan) 188{ 189 bool is_vert = false; 190 191 if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m) 192 is_vert = true; 193 else 194 is_vert = false; 195 return is_vert; 196} 197 198void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput( 199 double HRatio, 200 double HRatioChroma, 201 double VRatio, 202 double VRatioChroma, 203 double MaxDCHUBToPSCLThroughput, 204 double MaxPSCLToLBThroughput, 205 double PixelClock, 206 enum source_format_class SourcePixelFormat, 207 unsigned int HTaps, 208 unsigned int HTapsChroma, 209 unsigned int VTaps, 210 unsigned int VTapsChroma, 211 212 /* output */ 213 double *PSCL_THROUGHPUT, 214 double *PSCL_THROUGHPUT_CHROMA, 215 double *DPPCLKUsingSingleDPP) 216{ 217 double DPPCLKUsingSingleDPPLuma; 218 double DPPCLKUsingSingleDPPChroma; 219 220 if (HRatio > 1) { 221 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / 222 dml_ceil((double) HTaps / 6.0, 1.0)); 223 } else { 224 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); 225 } 226 227 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / 228 *PSCL_THROUGHPUT, 1); 229 230 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) 231 DPPCLKUsingSingleDPPLuma = 2 * PixelClock; 232 233 if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 && 234 SourcePixelFormat != dm_rgbe_alpha)) { 235 *PSCL_THROUGHPUT_CHROMA = 0; 236 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; 237 } else { 238 if (HRatioChroma > 1) { 239 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * 240 HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0)); 241 } else { 242 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); 243 } 244 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma), 245 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); 246 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) 247 DPPCLKUsingSingleDPPChroma = 2 * PixelClock; 248 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); 249 } 250} 251 252void dml32_CalculateBytePerPixelAndBlockSizes( 253 enum source_format_class SourcePixelFormat, 254 enum dm_swizzle_mode SurfaceTiling, 255 256 /* Output */ 257 unsigned int *BytePerPixelY, 258 unsigned int *BytePerPixelC, 259 double *BytePerPixelDETY, 260 double *BytePerPixelDETC, 261 unsigned int *BlockHeight256BytesY, 262 unsigned int *BlockHeight256BytesC, 263 unsigned int *BlockWidth256BytesY, 264 unsigned int *BlockWidth256BytesC, 265 unsigned int *MacroTileHeightY, 266 unsigned int *MacroTileHeightC, 267 unsigned int *MacroTileWidthY, 268 unsigned int *MacroTileWidthC) 269{ 270 if (SourcePixelFormat == dm_444_64) { 271 *BytePerPixelDETY = 8; 272 *BytePerPixelDETC = 0; 273 *BytePerPixelY = 8; 274 *BytePerPixelC = 0; 275 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) { 276 *BytePerPixelDETY = 4; 277 *BytePerPixelDETC = 0; 278 *BytePerPixelY = 4; 279 *BytePerPixelC = 0; 280 } else if (SourcePixelFormat == dm_444_16) { 281 *BytePerPixelDETY = 2; 282 *BytePerPixelDETC = 0; 283 *BytePerPixelY = 2; 284 *BytePerPixelC = 0; 285 } else if (SourcePixelFormat == dm_444_8) { 286 *BytePerPixelDETY = 1; 287 *BytePerPixelDETC = 0; 288 *BytePerPixelY = 1; 289 *BytePerPixelC = 0; 290 } else if (SourcePixelFormat == dm_rgbe_alpha) { 291 *BytePerPixelDETY = 4; 292 *BytePerPixelDETC = 1; 293 *BytePerPixelY = 4; 294 *BytePerPixelC = 1; 295 } else if (SourcePixelFormat == dm_420_8) { 296 *BytePerPixelDETY = 1; 297 *BytePerPixelDETC = 2; 298 *BytePerPixelY = 1; 299 *BytePerPixelC = 2; 300 } else if (SourcePixelFormat == dm_420_12) { 301 *BytePerPixelDETY = 2; 302 *BytePerPixelDETC = 4; 303 *BytePerPixelY = 2; 304 *BytePerPixelC = 4; 305 } else { 306 *BytePerPixelDETY = 4.0 / 3; 307 *BytePerPixelDETC = 8.0 / 3; 308 *BytePerPixelY = 2; 309 *BytePerPixelC = 4; 310 } 311#ifdef __DML_VBA_DEBUG__ 312 dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat); 313 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); 314 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); 315 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY); 316 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC); 317#endif 318 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 319 || SourcePixelFormat == dm_444_16 320 || SourcePixelFormat == dm_444_8 321 || SourcePixelFormat == dm_mono_16 322 || SourcePixelFormat == dm_mono_8 323 || SourcePixelFormat == dm_rgbe)) { 324 if (SurfaceTiling == dm_sw_linear) 325 *BlockHeight256BytesY = 1; 326 else if (SourcePixelFormat == dm_444_64) 327 *BlockHeight256BytesY = 4; 328 else if (SourcePixelFormat == dm_444_8) 329 *BlockHeight256BytesY = 16; 330 else 331 *BlockHeight256BytesY = 8; 332 333 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 334 *BlockHeight256BytesC = 0; 335 *BlockWidth256BytesC = 0; 336 } else { 337 if (SurfaceTiling == dm_sw_linear) { 338 *BlockHeight256BytesY = 1; 339 *BlockHeight256BytesC = 1; 340 } else if (SourcePixelFormat == dm_rgbe_alpha) { 341 *BlockHeight256BytesY = 8; 342 *BlockHeight256BytesC = 16; 343 } else if (SourcePixelFormat == dm_420_8) { 344 *BlockHeight256BytesY = 16; 345 *BlockHeight256BytesC = 8; 346 } else { 347 *BlockHeight256BytesY = 8; 348 *BlockHeight256BytesC = 8; 349 } 350 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 351 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; 352 } 353#ifdef __DML_VBA_DEBUG__ 354 dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY); 355 dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY); 356 dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC); 357 dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC); 358#endif 359 360 if (SurfaceTiling == dm_sw_linear) { 361 *MacroTileHeightY = *BlockHeight256BytesY; 362 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; 363 *MacroTileHeightC = *BlockHeight256BytesC; 364 if (*MacroTileHeightC == 0) 365 *MacroTileWidthC = 0; 366 else 367 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; 368 } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t || 369 SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) { 370 *MacroTileHeightY = 16 * *BlockHeight256BytesY; 371 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; 372 *MacroTileHeightC = 16 * *BlockHeight256BytesC; 373 if (*MacroTileHeightC == 0) 374 *MacroTileWidthC = 0; 375 else 376 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; 377 } else { 378 *MacroTileHeightY = 32 * *BlockHeight256BytesY; 379 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; 380 *MacroTileHeightC = 32 * *BlockHeight256BytesC; 381 if (*MacroTileHeightC == 0) 382 *MacroTileWidthC = 0; 383 else 384 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; 385 } 386 387#ifdef __DML_VBA_DEBUG__ 388 dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY); 389 dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY); 390 dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC); 391 dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC); 392#endif 393} // CalculateBytePerPixelAndBlockSizes 394 395void dml32_CalculateSwathAndDETConfiguration( 396 unsigned int DETSizeOverride[], 397 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 398 unsigned int ConfigReturnBufferSizeInKByte, 399 unsigned int MaxTotalDETInKByte, 400 unsigned int MinCompressedBufferSizeInKByte, 401 double ForceSingleDPP, 402 unsigned int NumberOfActiveSurfaces, 403 unsigned int nomDETInKByte, 404 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 405 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, 406 unsigned int PixelChunkSizeKBytes, 407 unsigned int ROBSizeKBytes, 408 unsigned int CompressedBufferSegmentSizeInkByteFinal, 409 enum output_encoder_class Output[], 410 double ReadBandwidthLuma[], 411 double ReadBandwidthChroma[], 412 double MaximumSwathWidthLuma[], 413 double MaximumSwathWidthChroma[], 414 enum dm_rotation_angle SourceRotation[], 415 bool ViewportStationary[], 416 enum source_format_class SourcePixelFormat[], 417 enum dm_swizzle_mode SurfaceTiling[], 418 unsigned int ViewportWidth[], 419 unsigned int ViewportHeight[], 420 unsigned int ViewportXStart[], 421 unsigned int ViewportYStart[], 422 unsigned int ViewportXStartC[], 423 unsigned int ViewportYStartC[], 424 unsigned int SurfaceWidthY[], 425 unsigned int SurfaceWidthC[], 426 unsigned int SurfaceHeightY[], 427 unsigned int SurfaceHeightC[], 428 unsigned int Read256BytesBlockHeightY[], 429 unsigned int Read256BytesBlockHeightC[], 430 unsigned int Read256BytesBlockWidthY[], 431 unsigned int Read256BytesBlockWidthC[], 432 enum odm_combine_mode ODMMode[], 433 unsigned int BlendingAndTiming[], 434 unsigned int BytePerPixY[], 435 unsigned int BytePerPixC[], 436 double BytePerPixDETY[], 437 double BytePerPixDETC[], 438 unsigned int HActive[], 439 double HRatio[], 440 double HRatioChroma[], 441 unsigned int DPPPerSurface[], 442 443 /* Output */ 444 unsigned int swath_width_luma_ub[], 445 unsigned int swath_width_chroma_ub[], 446 double SwathWidth[], 447 double SwathWidthChroma[], 448 unsigned int SwathHeightY[], 449 unsigned int SwathHeightC[], 450 unsigned int DETBufferSizeInKByte[], 451 unsigned int DETBufferSizeY[], 452 unsigned int DETBufferSizeC[], 453 bool *UnboundedRequestEnabled, 454 unsigned int *CompressedBufferSizeInkByte, 455 unsigned int *CompBufReservedSpaceKBytes, 456 bool *CompBufReservedSpaceNeedAdjustment, 457 bool ViewportSizeSupportPerSurface[], 458 bool *ViewportSizeSupport) 459{ 460 unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 461 unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 462 unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; 463 unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; 464 unsigned int RoundedUpSwathSizeBytesY; 465 unsigned int RoundedUpSwathSizeBytesC; 466 double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; 467 double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; 468 unsigned int k; 469 unsigned int TotalActiveDPP = 0; 470 bool NoChromaSurfaces = true; 471 unsigned int DETBufferSizeInKByteForSwathCalculation; 472 473#ifdef __DML_VBA_DEBUG__ 474 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 475 dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes); 476 dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes); 477#endif 478 dml32_CalculateSwathWidth(ForceSingleDPP, 479 NumberOfActiveSurfaces, 480 SourcePixelFormat, 481 SourceRotation, 482 ViewportStationary, 483 ViewportWidth, 484 ViewportHeight, 485 ViewportXStart, 486 ViewportYStart, 487 ViewportXStartC, 488 ViewportYStartC, 489 SurfaceWidthY, 490 SurfaceWidthC, 491 SurfaceHeightY, 492 SurfaceHeightC, 493 ODMMode, 494 BytePerPixY, 495 BytePerPixC, 496 Read256BytesBlockHeightY, 497 Read256BytesBlockHeightC, 498 Read256BytesBlockWidthY, 499 Read256BytesBlockWidthC, 500 BlendingAndTiming, 501 HActive, 502 HRatio, 503 DPPPerSurface, 504 505 /* Output */ 506 SwathWidthdoubleDPP, 507 SwathWidthdoubleDPPChroma, 508 SwathWidth, 509 SwathWidthChroma, 510 MaximumSwathHeightY, 511 MaximumSwathHeightC, 512 swath_width_luma_ub, 513 swath_width_chroma_ub); 514 515 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 516 RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; 517 RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; 518#ifdef __DML_VBA_DEBUG__ 519 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 520 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); 521 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]); 522 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]); 523 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, 524 RoundedUpMaxSwathSizeBytesY[k]); 525 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); 526 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]); 527 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]); 528 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, 529 RoundedUpMaxSwathSizeBytesC[k]); 530#endif 531 532 if (SourcePixelFormat[k] == dm_420_10) { 533 RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256); 534 RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256); 535 } 536 } 537 538 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 539 TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); 540 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 541 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) { 542 NoChromaSurfaces = false; 543 } 544 } 545 546 // By default, just set the reserved space to 2 pixel chunks size 547 *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2; 548 549 // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data 550 // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio] 551 // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req 552 *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512); 553 554 if (*CompBufReservedSpaceNeedAdjustment == 1) { 555 *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512; 556 } 557 558 #ifdef __DML_VBA_DEBUG__ 559 dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes); 560 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment); 561 #endif 562 563 *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); 564 565 dml32_CalculateDETBufferSize(DETSizeOverride, 566 UseMALLForPStateChange, 567 ForceSingleDPP, 568 NumberOfActiveSurfaces, 569 *UnboundedRequestEnabled, 570 nomDETInKByte, 571 MaxTotalDETInKByte, 572 ConfigReturnBufferSizeInKByte, 573 MinCompressedBufferSizeInKByte, 574 CompressedBufferSegmentSizeInkByteFinal, 575 SourcePixelFormat, 576 ReadBandwidthLuma, 577 ReadBandwidthChroma, 578 RoundedUpMaxSwathSizeBytesY, 579 RoundedUpMaxSwathSizeBytesC, 580 DPPPerSurface, 581 582 /* Output */ 583 DETBufferSizeInKByte, // per hubp pipe 584 CompressedBufferSizeInkByte); 585 586#ifdef __DML_VBA_DEBUG__ 587 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); 588 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 589 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 590 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); 591 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); 592 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 593#endif 594 595 *ViewportSizeSupport = true; 596 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 597 598 DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == 599 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]); 600#ifdef __DML_VBA_DEBUG__ 601 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k, 602 DETBufferSizeInKByteForSwathCalculation); 603#endif 604 605 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= 606 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 607 SwathHeightY[k] = MaximumSwathHeightY[k]; 608 SwathHeightC[k] = MaximumSwathHeightC[k]; 609 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; 610 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; 611 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && 612 RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= 613 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 614 SwathHeightY[k] = MaximumSwathHeightY[k] / 2; 615 SwathHeightC[k] = MaximumSwathHeightC[k]; 616 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; 617 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; 618 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && 619 RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= 620 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 621 SwathHeightY[k] = MaximumSwathHeightY[k]; 622 SwathHeightC[k] = MaximumSwathHeightC[k] / 2; 623 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; 624 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; 625 } else { 626 SwathHeightY[k] = MaximumSwathHeightY[k] / 2; 627 SwathHeightC[k] = MaximumSwathHeightC[k] / 2; 628 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; 629 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; 630 } 631 632 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > 633 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) 634 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 && 635 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 636 *ViewportSizeSupport = false; 637 ViewportSizeSupportPerSurface[k] = false; 638 } else { 639 ViewportSizeSupportPerSurface[k] = true; 640 } 641 642 if (SwathHeightC[k] == 0) { 643#ifdef __DML_VBA_DEBUG__ 644 dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k); 645#endif 646 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024; 647 DETBufferSizeC[k] = 0; 648 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { 649#ifdef __DML_VBA_DEBUG__ 650 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k); 651#endif 652 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2; 653 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2; 654 } else { 655#ifdef __DML_VBA_DEBUG__ 656 dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k); 657#endif 658 DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024); 659 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k]; 660 } 661 662#ifdef __DML_VBA_DEBUG__ 663 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); 664 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]); 665 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, 666 k, RoundedUpMaxSwathSizeBytesY[k]); 667 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, 668 k, RoundedUpMaxSwathSizeBytesC[k]); 669 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY); 670 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC); 671 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 672 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 673 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]); 674 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k, 675 ViewportSizeSupportPerSurface[k]); 676#endif 677 678 } 679} // CalculateSwathAndDETConfiguration 680 681void dml32_CalculateSwathWidth( 682 bool ForceSingleDPP, 683 unsigned int NumberOfActiveSurfaces, 684 enum source_format_class SourcePixelFormat[], 685 enum dm_rotation_angle SourceRotation[], 686 bool ViewportStationary[], 687 unsigned int ViewportWidth[], 688 unsigned int ViewportHeight[], 689 unsigned int ViewportXStart[], 690 unsigned int ViewportYStart[], 691 unsigned int ViewportXStartC[], 692 unsigned int ViewportYStartC[], 693 unsigned int SurfaceWidthY[], 694 unsigned int SurfaceWidthC[], 695 unsigned int SurfaceHeightY[], 696 unsigned int SurfaceHeightC[], 697 enum odm_combine_mode ODMMode[], 698 unsigned int BytePerPixY[], 699 unsigned int BytePerPixC[], 700 unsigned int Read256BytesBlockHeightY[], 701 unsigned int Read256BytesBlockHeightC[], 702 unsigned int Read256BytesBlockWidthY[], 703 unsigned int Read256BytesBlockWidthC[], 704 unsigned int BlendingAndTiming[], 705 unsigned int HActive[], 706 double HRatio[], 707 unsigned int DPPPerSurface[], 708 709 /* Output */ 710 double SwathWidthdoubleDPPY[], 711 double SwathWidthdoubleDPPC[], 712 double SwathWidthY[], // per-pipe 713 double SwathWidthC[], // per-pipe 714 unsigned int MaximumSwathHeightY[], 715 unsigned int MaximumSwathHeightC[], 716 unsigned int swath_width_luma_ub[], // per-pipe 717 unsigned int swath_width_chroma_ub[]) // per-pipe 718{ 719 unsigned int k, j; 720 enum odm_combine_mode MainSurfaceODMMode; 721 722 unsigned int surface_width_ub_l; 723 unsigned int surface_height_ub_l; 724 unsigned int surface_width_ub_c = 0; 725 unsigned int surface_height_ub_c = 0; 726 727#ifdef __DML_VBA_DEBUG__ 728 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 729 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 730#endif 731 732 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 733 if (!IsVertical(SourceRotation[k])) 734 SwathWidthdoubleDPPY[k] = ViewportWidth[k]; 735 else 736 SwathWidthdoubleDPPY[k] = ViewportHeight[k]; 737 738#ifdef __DML_VBA_DEBUG__ 739 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); 740 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); 741#endif 742 743 MainSurfaceODMMode = ODMMode[k]; 744 for (j = 0; j < NumberOfActiveSurfaces; ++j) { 745 if (BlendingAndTiming[k] == j) 746 MainSurfaceODMMode = ODMMode[j]; 747 } 748 749 if (ForceSingleDPP) { 750 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 751 } else { 752 if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) { 753 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], 754 dml_round(HActive[k] / 4.0 * HRatio[k])); 755 } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) { 756 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], 757 dml_round(HActive[k] / 2.0 * HRatio[k])); 758 } else if (DPPPerSurface[k] == 2) { 759 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2; 760 } else { 761 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 762 } 763 } 764 765#ifdef __DML_VBA_DEBUG__ 766 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]); 767 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]); 768 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode); 769 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]); 770 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]); 771#endif 772 773 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 774 SourcePixelFormat[k] == dm_420_12) { 775 SwathWidthC[k] = SwathWidthY[k] / 2; 776 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2; 777 } else { 778 SwathWidthC[k] = SwathWidthY[k]; 779 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k]; 780 } 781 782 if (ForceSingleDPP == true) { 783 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 784 SwathWidthC[k] = SwathWidthdoubleDPPC[k]; 785 } 786 787 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); 788 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); 789 790 if (!IsVertical(SourceRotation[k])) { 791 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 792 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 793 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 794 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 795 dml_floor(ViewportXStart[k] + 796 SwathWidthY[k] + 797 Read256BytesBlockWidthY[k] - 1, 798 Read256BytesBlockWidthY[k]) - 799 dml_floor(ViewportXStart[k], 800 Read256BytesBlockWidthY[k])); 801 } else { 802 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 803 dml_ceil(SwathWidthY[k] - 1, 804 Read256BytesBlockWidthY[k]) + 805 Read256BytesBlockWidthY[k]); 806 } 807 if (BytePerPixC[k] > 0) { 808 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); 809 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 810 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, 811 dml_floor(ViewportXStartC[k] + SwathWidthC[k] + 812 Read256BytesBlockWidthC[k] - 1, 813 Read256BytesBlockWidthC[k]) - 814 dml_floor(ViewportXStartC[k], 815 Read256BytesBlockWidthC[k])); 816 } else { 817 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, 818 dml_ceil(SwathWidthC[k] - 1, 819 Read256BytesBlockWidthC[k]) + 820 Read256BytesBlockWidthC[k]); 821 } 822 } else { 823 swath_width_chroma_ub[k] = 0; 824 } 825 } else { 826 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 827 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 828 829 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 830 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] + 831 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, 832 Read256BytesBlockHeightY[k]) - 833 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k])); 834 } else { 835 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1, 836 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); 837 } 838 if (BytePerPixC[k] > 0) { 839 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); 840 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 841 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, 842 dml_floor(ViewportYStartC[k] + SwathWidthC[k] + 843 Read256BytesBlockHeightC[k] - 1, 844 Read256BytesBlockHeightC[k]) - 845 dml_floor(ViewportYStartC[k], 846 Read256BytesBlockHeightC[k])); 847 } else { 848 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, 849 dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + 850 Read256BytesBlockHeightC[k]); 851 } 852 } else { 853 swath_width_chroma_ub[k] = 0; 854 } 855 } 856 857#ifdef __DML_VBA_DEBUG__ 858 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); 859 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l); 860 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c); 861 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c); 862 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]); 863 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]); 864 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]); 865 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]); 866 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]); 867 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]); 868 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]); 869 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]); 870 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]); 871 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]); 872#endif 873 874 } 875} // CalculateSwathWidth 876 877bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, 878 unsigned int TotalNumberOfActiveDPP, 879 bool NoChroma, 880 enum output_encoder_class Output, 881 enum dm_swizzle_mode SurfaceTiling, 882 bool CompBufReservedSpaceNeedAdjustment, 883 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) 884{ 885 bool ret_val = false; 886 887 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && 888 TotalNumberOfActiveDPP == 1 && NoChroma); 889 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) 890 ret_val = false; 891 892 if (SurfaceTiling == dm_sw_linear) 893 ret_val = false; 894 895 if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) 896 ret_val = false; 897 898#ifdef __DML_VBA_DEBUG__ 899 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment); 900 dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); 901 dml_print("DML::%s: ret_val = %d\n", __func__, ret_val); 902#endif 903 904 return (ret_val); 905} 906 907void dml32_CalculateDETBufferSize( 908 unsigned int DETSizeOverride[], 909 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 910 bool ForceSingleDPP, 911 unsigned int NumberOfActiveSurfaces, 912 bool UnboundedRequestEnabled, 913 unsigned int nomDETInKByte, 914 unsigned int MaxTotalDETInKByte, 915 unsigned int ConfigReturnBufferSizeInKByte, 916 unsigned int MinCompressedBufferSizeInKByte, 917 unsigned int CompressedBufferSegmentSizeInkByteFinal, 918 enum source_format_class SourcePixelFormat[], 919 double ReadBandwidthLuma[], 920 double ReadBandwidthChroma[], 921 unsigned int RoundedUpMaxSwathSizeBytesY[], 922 unsigned int RoundedUpMaxSwathSizeBytesC[], 923 unsigned int DPPPerSurface[], 924 /* Output */ 925 unsigned int DETBufferSizeInKByte[], 926 unsigned int *CompressedBufferSizeInkByte) 927{ 928 unsigned int DETBufferSizePoolInKByte; 929 unsigned int NextDETBufferPieceInKByte; 930 bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX]; 931 bool NextPotentialSurfaceToAssignDETPieceFound; 932 unsigned int NextSurfaceToAssignDETPiece; 933 double TotalBandwidth; 934 double BandwidthOfSurfacesNotAssignedDETPiece; 935 unsigned int max_minDET; 936 unsigned int minDET; 937 unsigned int minDET_pipe; 938 unsigned int j, k; 939 940#ifdef __DML_VBA_DEBUG__ 941 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 942 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 943 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 944 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); 945 dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte); 946 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 947 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte); 948 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__, 949 CompressedBufferSegmentSizeInkByteFinal); 950#endif 951 952 // Note: Will use default det size if that fits 2 swaths 953 if (UnboundedRequestEnabled) { 954 if (DETSizeOverride[0] > 0) { 955 DETBufferSizeInKByte[0] = DETSizeOverride[0]; 956 } else { 957 DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 * 958 ((double) RoundedUpMaxSwathSizeBytesY[0] + 959 (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0)); 960 } 961 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0]; 962 } else { 963 DETBufferSizePoolInKByte = MaxTotalDETInKByte; 964 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 965 DETBufferSizeInKByte[k] = nomDETInKByte; 966 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 967 SourcePixelFormat[k] == dm_420_12) { 968 max_minDET = nomDETInKByte - 64; 969 } else { 970 max_minDET = nomDETInKByte; 971 } 972 minDET = 128; 973 minDET_pipe = 0; 974 975 // add DET resource until can hold 2 full swaths 976 while (minDET <= max_minDET && minDET_pipe == 0) { 977 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] + 978 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET) 979 minDET_pipe = minDET; 980 minDET = minDET + 64; 981 } 982 983#ifdef __DML_VBA_DEBUG__ 984 dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET); 985 dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET); 986 dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe); 987 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, 988 RoundedUpMaxSwathSizeBytesY[k]); 989 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, 990 RoundedUpMaxSwathSizeBytesC[k]); 991#endif 992 993 if (minDET_pipe == 0) { 994 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] + 995 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64)); 996#ifdef __DML_VBA_DEBUG__ 997 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n", 998 __func__, k, minDET_pipe); 999#endif 1000 } 1001 1002 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { 1003 DETBufferSizeInKByte[k] = 0; 1004 } else if (DETSizeOverride[k] > 0) { 1005 DETBufferSizeInKByte[k] = DETSizeOverride[k]; 1006 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - 1007 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k]; 1008 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) { 1009 DETBufferSizeInKByte[k] = minDET_pipe; 1010 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - 1011 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe; 1012 } 1013 1014#ifdef __DML_VBA_DEBUG__ 1015 dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 1016 dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]); 1017 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 1018 dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte); 1019#endif 1020 } 1021 1022 TotalBandwidth = 0; 1023 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1024 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) 1025 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 1026 } 1027#ifdef __DML_VBA_DEBUG__ 1028 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__); 1029 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) 1030 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 1031 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__); 1032 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth); 1033#endif 1034 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth; 1035 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1036 1037 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { 1038 DETPieceAssignedToThisSurfaceAlready[k] = true; 1039 } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) * 1040 (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >= 1041 ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) { 1042 DETPieceAssignedToThisSurfaceAlready[k] = true; 1043 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - 1044 ReadBandwidthLuma[k] - ReadBandwidthChroma[k]; 1045 } else { 1046 DETPieceAssignedToThisSurfaceAlready[k] = false; 1047 } 1048#ifdef __DML_VBA_DEBUG__ 1049 dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k, 1050 DETPieceAssignedToThisSurfaceAlready[k]); 1051 dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, 1052 BandwidthOfSurfacesNotAssignedDETPiece); 1053#endif 1054 } 1055 1056 for (j = 0; j < NumberOfActiveSurfaces; ++j) { 1057 NextPotentialSurfaceToAssignDETPieceFound = false; 1058 NextSurfaceToAssignDETPiece = 0; 1059 1060 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1061#ifdef __DML_VBA_DEBUG__ 1062 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k, 1063 ReadBandwidthLuma[k]); 1064 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k, 1065 ReadBandwidthChroma[k]); 1066 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, 1067 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); 1068 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, 1069 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1070 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k, 1071 NextSurfaceToAssignDETPiece); 1072#endif 1073 if (!DETPieceAssignedToThisSurfaceAlready[k] && 1074 (!NextPotentialSurfaceToAssignDETPieceFound || 1075 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < 1076 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1077 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) { 1078 NextSurfaceToAssignDETPiece = k; 1079 NextPotentialSurfaceToAssignDETPieceFound = true; 1080 } 1081#ifdef __DML_VBA_DEBUG__ 1082 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n", 1083 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); 1084 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n", 1085 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); 1086#endif 1087 } 1088 1089 if (NextPotentialSurfaceToAssignDETPieceFound) { 1090 // Note: To show the banker's rounding behavior in VBA and also the fact 1091 // that the DET buffer size varies due to precision issue 1092 // 1093 //double tmp1 = ((double) DETBufferSizePoolInKByte * 1094 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1095 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1096 // BandwidthOfSurfacesNotAssignedDETPiece / 1097 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1098 //double tmp2 = dml_round((double) DETBufferSizePoolInKByte * 1099 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1100 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1101 //BandwidthOfSurfacesNotAssignedDETPiece / 1102 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1103 // 1104 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1); 1105 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2); 1106 1107 NextDETBufferPieceInKByte = dml_min( 1108 dml_round((double) DETBufferSizePoolInKByte * 1109 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1110 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1111 BandwidthOfSurfacesNotAssignedDETPiece / 1112 ((ForceSingleDPP ? 1 : 1113 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) * 1114 (ForceSingleDPP ? 1 : 1115 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0, 1116 dml_floor((double) DETBufferSizePoolInKByte, 1117 (ForceSingleDPP ? 1 : 1118 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1119 1120 // Above calculation can assign the entire DET buffer allocation to a single pipe. 1121 // We should limit the per-pipe DET size to the nominal / max per pipe. 1122 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { 1123 if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] < 1124 nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { 1125 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) - 1126 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]; 1127 } else { 1128 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] 1129 // already has the max per-pipe value 1130 NextDETBufferPieceInKByte = 0; 1131 } 1132 } 1133 1134#ifdef __DML_VBA_DEBUG__ 1135 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j, 1136 DETBufferSizePoolInKByte); 1137 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, 1138 NextSurfaceToAssignDETPiece); 1139 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j, 1140 NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); 1141 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j, 1142 NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1143 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", 1144 __func__, j, BandwidthOfSurfacesNotAssignedDETPiece); 1145 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j, 1146 NextDETBufferPieceInKByte); 1147 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ", 1148 __func__, j, NextSurfaceToAssignDETPiece, 1149 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); 1150#endif 1151 1152 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = 1153 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] 1154 + NextDETBufferPieceInKByte 1155 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]); 1156#ifdef __DML_VBA_DEBUG__ 1157 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); 1158#endif 1159 1160 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte; 1161 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true; 1162 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - 1163 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1164 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1165 } 1166 } 1167 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte; 1168 } 1169 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; 1170 1171#ifdef __DML_VBA_DEBUG__ 1172 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__); 1173 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 1174 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) { 1175 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n", 1176 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); 1177 } 1178#endif 1179} // CalculateDETBufferSize 1180 1181void dml32_CalculateODMMode( 1182 unsigned int MaximumPixelsPerLinePerDSCUnit, 1183 unsigned int HActive, 1184 enum output_format_class OutFormat, 1185 enum output_encoder_class Output, 1186 enum odm_combine_policy ODMUse, 1187 double StateDispclk, 1188 double MaxDispclk, 1189 bool DSCEnable, 1190 unsigned int TotalNumberOfActiveDPP, 1191 unsigned int MaxNumDPP, 1192 double PixelClock, 1193 double DISPCLKDPPCLKDSCCLKDownSpreading, 1194 double DISPCLKRampingMargin, 1195 double DISPCLKDPPCLKVCOSpeed, 1196 unsigned int NumberOfDSCSlices, 1197 1198 /* Output */ 1199 bool *TotalAvailablePipesSupport, 1200 unsigned int *NumberOfDPP, 1201 enum odm_combine_mode *ODMMode, 1202 double *RequiredDISPCLKPerSurface) 1203{ 1204 1205 double SurfaceRequiredDISPCLKWithoutODMCombine; 1206 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1207 double SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1208 1209 SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled, 1210 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1211 MaxDispclk); 1212 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1, 1213 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1214 MaxDispclk); 1215 SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1, 1216 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1217 MaxDispclk); 1218 *TotalAvailablePipesSupport = true; 1219 *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable 1220 1221 if (ODMUse == dm_odm_combine_policy_none) 1222 *ODMMode = dm_odm_combine_mode_disabled; 1223 1224 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine; 1225 *NumberOfDPP = 0; 1226 1227 // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care?? 1228 // (ODMUse == "" || ODMUse == "CombineAsNeeded") 1229 1230 if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 || 1231 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk || 1232 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)) 1233 || NumberOfDSCSlices > 8)))) { 1234 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) { 1235 *ODMMode = dm_odm_combine_mode_4to1; 1236 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1237 *NumberOfDPP = 4; 1238 } else { 1239 *TotalAvailablePipesSupport = false; 1240 } 1241 } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 || 1242 (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk && 1243 SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) || 1244 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)) 1245 || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) { 1246 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) { 1247 *ODMMode = dm_odm_combine_mode_2to1; 1248 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1249 *NumberOfDPP = 2; 1250 } else { 1251 *TotalAvailablePipesSupport = false; 1252 } 1253 } else { 1254 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) 1255 *NumberOfDPP = 1; 1256 else 1257 *TotalAvailablePipesSupport = false; 1258 } 1259 if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH && 1260 ODMUse != dm_odm_combine_policy_4to1) { 1261 if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) { 1262 *ODMMode = dm_odm_combine_mode_disabled; 1263 *NumberOfDPP = 0; 1264 *TotalAvailablePipesSupport = false; 1265 } else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 || 1266 *ODMMode == dm_odm_combine_mode_4to1) { 1267 *ODMMode = dm_odm_combine_mode_4to1; 1268 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1269 *NumberOfDPP = 4; 1270 } else { 1271 *ODMMode = dm_odm_combine_mode_2to1; 1272 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1273 *NumberOfDPP = 2; 1274 } 1275 } 1276 if (Output == dm_hdmi && OutFormat == dm_420 && 1277 HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) { 1278 *ODMMode = dm_odm_combine_mode_disabled; 1279 *NumberOfDPP = 0; 1280 *TotalAvailablePipesSupport = false; 1281 } 1282} 1283 1284double dml32_CalculateRequiredDispclk( 1285 enum odm_combine_mode ODMMode, 1286 double PixelClock, 1287 double DISPCLKDPPCLKDSCCLKDownSpreading, 1288 double DISPCLKRampingMargin, 1289 double DISPCLKDPPCLKVCOSpeed, 1290 double MaxDispclk) 1291{ 1292 double RequiredDispclk = 0.; 1293 double PixelClockAfterODM; 1294 double DISPCLKWithRampingRoundedToDFSGranularity; 1295 double DISPCLKWithoutRampingRoundedToDFSGranularity; 1296 double MaxDispclkRoundedDownToDFSGranularity; 1297 1298 if (ODMMode == dm_odm_combine_mode_4to1) 1299 PixelClockAfterODM = PixelClock / 4; 1300 else if (ODMMode == dm_odm_combine_mode_2to1) 1301 PixelClockAfterODM = PixelClock / 2; 1302 else 1303 PixelClockAfterODM = PixelClock; 1304 1305 1306 DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( 1307 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100) 1308 * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed); 1309 1310 DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( 1311 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed); 1312 1313 MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed); 1314 1315 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) 1316 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity; 1317 else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) 1318 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity; 1319 else 1320 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity; 1321 1322 return RequiredDispclk; 1323} 1324 1325double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed) 1326{ 1327 if (Clock <= 0.0) 1328 return 0.0; 1329 1330 if (round_up) 1331 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0); 1332 else 1333 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0); 1334} 1335 1336void dml32_CalculateOutputLink( 1337 double PHYCLKPerState, 1338 double PHYCLKD18PerState, 1339 double PHYCLKD32PerState, 1340 double Downspreading, 1341 bool IsMainSurfaceUsingTheIndicatedTiming, 1342 enum output_encoder_class Output, 1343 enum output_format_class OutputFormat, 1344 unsigned int HTotal, 1345 unsigned int HActive, 1346 double PixelClockBackEnd, 1347 double ForcedOutputLinkBPP, 1348 unsigned int DSCInputBitPerComponent, 1349 unsigned int NumberOfDSCSlices, 1350 double AudioSampleRate, 1351 unsigned int AudioSampleLayout, 1352 enum odm_combine_mode ODMModeNoDSC, 1353 enum odm_combine_mode ODMModeDSC, 1354 bool DSCEnable, 1355 unsigned int OutputLinkDPLanes, 1356 enum dm_output_link_dp_rate OutputLinkDPRate, 1357 1358 /* Output */ 1359 bool *RequiresDSC, 1360 double *RequiresFEC, 1361 double *OutBpp, 1362 enum dm_output_type *OutputType, 1363 enum dm_output_rate *OutputRate, 1364 unsigned int *RequiredSlots) 1365{ 1366 bool LinkDSCEnable; 1367 unsigned int dummy; 1368 *RequiresDSC = false; 1369 *RequiresFEC = false; 1370 *OutBpp = 0; 1371 *OutputType = dm_output_type_unknown; 1372 *OutputRate = dm_output_rate_unknown; 1373 1374 if (IsMainSurfaceUsingTheIndicatedTiming) { 1375 if (Output == dm_hdmi) { 1376 *RequiresDSC = false; 1377 *RequiresFEC = false; 1378 *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive, 1379 PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat, 1380 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1381 ODMModeNoDSC, ODMModeDSC, &dummy); 1382 //OutputTypeAndRate = "HDMI"; 1383 *OutputType = dm_output_type_hdmi; 1384 1385 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) { 1386 if (DSCEnable == true) { 1387 *RequiresDSC = true; 1388 LinkDSCEnable = true; 1389 if (Output == dm_dp || Output == dm_dp2p0) 1390 *RequiresFEC = true; 1391 else 1392 *RequiresFEC = false; 1393 } else { 1394 *RequiresDSC = false; 1395 LinkDSCEnable = false; 1396 if (Output == dm_dp2p0) 1397 *RequiresFEC = true; 1398 else 1399 *RequiresFEC = false; 1400 } 1401 if (Output == dm_dp2p0) { 1402 *OutBpp = 0; 1403 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) && 1404 PHYCLKD32PerState >= 10000 / 32) { 1405 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, 1406 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1407 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1408 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1409 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1410 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true && 1411 ForcedOutputLinkBPP == 0) { 1412 *RequiresDSC = true; 1413 LinkDSCEnable = true; 1414 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, 1415 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1416 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1417 OutputFormat, DSCInputBitPerComponent, 1418 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1419 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1420 } 1421 //OutputTypeAndRate = Output & " UHBR10"; 1422 *OutputType = dm_output_type_dp2p0; 1423 *OutputRate = dm_output_rate_dp_rate_uhbr10; 1424 } 1425 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) && 1426 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) { 1427 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, 1428 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1429 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1430 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1431 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1432 1433 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true && 1434 ForcedOutputLinkBPP == 0) { 1435 *RequiresDSC = true; 1436 LinkDSCEnable = true; 1437 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, 1438 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1439 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1440 OutputFormat, DSCInputBitPerComponent, 1441 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1442 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1443 } 1444 //OutputTypeAndRate = Output & " UHBR13p5"; 1445 *OutputType = dm_output_type_dp2p0; 1446 *OutputRate = dm_output_rate_dp_rate_uhbr13p5; 1447 } 1448 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) && 1449 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) { 1450 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, 1451 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1452 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1453 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1454 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1455 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { 1456 *RequiresDSC = true; 1457 LinkDSCEnable = true; 1458 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, 1459 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1460 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1461 OutputFormat, DSCInputBitPerComponent, 1462 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1463 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1464 } 1465 //OutputTypeAndRate = Output & " UHBR20"; 1466 *OutputType = dm_output_type_dp2p0; 1467 *OutputRate = dm_output_rate_dp_rate_uhbr20; 1468 } 1469 } else { 1470 *OutBpp = 0; 1471 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) && 1472 PHYCLKPerState >= 270) { 1473 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, 1474 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1475 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1476 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1477 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1478 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true && 1479 ForcedOutputLinkBPP == 0) { 1480 *RequiresDSC = true; 1481 LinkDSCEnable = true; 1482 if (Output == dm_dp) 1483 *RequiresFEC = true; 1484 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, 1485 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1486 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1487 OutputFormat, DSCInputBitPerComponent, 1488 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1489 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1490 } 1491 //OutputTypeAndRate = Output & " HBR"; 1492 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1493 *OutputRate = dm_output_rate_dp_rate_hbr; 1494 } 1495 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) && 1496 *OutBpp == 0 && PHYCLKPerState >= 540) { 1497 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, 1498 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1499 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1500 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1501 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1502 1503 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true && 1504 ForcedOutputLinkBPP == 0) { 1505 *RequiresDSC = true; 1506 LinkDSCEnable = true; 1507 if (Output == dm_dp) 1508 *RequiresFEC = true; 1509 1510 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, 1511 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1512 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1513 OutputFormat, DSCInputBitPerComponent, 1514 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1515 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1516 } 1517 //OutputTypeAndRate = Output & " HBR2"; 1518 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1519 *OutputRate = dm_output_rate_dp_rate_hbr2; 1520 } 1521 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) { 1522 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, 1523 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1524 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1525 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, 1526 AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, 1527 RequiredSlots); 1528 1529 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { 1530 *RequiresDSC = true; 1531 LinkDSCEnable = true; 1532 if (Output == dm_dp) 1533 *RequiresFEC = true; 1534 1535 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, 1536 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1537 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1538 OutputFormat, DSCInputBitPerComponent, 1539 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1540 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1541 } 1542 //OutputTypeAndRate = Output & " HBR3"; 1543 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1544 *OutputRate = dm_output_rate_dp_rate_hbr3; 1545 } 1546 } 1547 } 1548 } 1549} 1550 1551void dml32_CalculateDPPCLK( 1552 unsigned int NumberOfActiveSurfaces, 1553 double DISPCLKDPPCLKDSCCLKDownSpreading, 1554 double DISPCLKDPPCLKVCOSpeed, 1555 double DPPCLKUsingSingleDPP[], 1556 unsigned int DPPPerSurface[], 1557 1558 /* output */ 1559 double *GlobalDPPCLK, 1560 double Dppclk[]) 1561{ 1562 unsigned int k; 1563 *GlobalDPPCLK = 0; 1564 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1565 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100); 1566 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]); 1567 } 1568 *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed); 1569 for (k = 0; k < NumberOfActiveSurfaces; ++k) 1570 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0); 1571} 1572 1573double dml32_TruncToValidBPP( 1574 double LinkBitRate, 1575 unsigned int Lanes, 1576 unsigned int HTotal, 1577 unsigned int HActive, 1578 double PixelClock, 1579 double DesiredBPP, 1580 bool DSCEnable, 1581 enum output_encoder_class Output, 1582 enum output_format_class Format, 1583 unsigned int DSCInputBitPerComponent, 1584 unsigned int DSCSlices, 1585 unsigned int AudioRate, 1586 unsigned int AudioLayout, 1587 enum odm_combine_mode ODMModeNoDSC, 1588 enum odm_combine_mode ODMModeDSC, 1589 /* Output */ 1590 unsigned int *RequiredSlots) 1591{ 1592 double MaxLinkBPP; 1593 unsigned int MinDSCBPP; 1594 double MaxDSCBPP; 1595 unsigned int NonDSCBPP0; 1596 unsigned int NonDSCBPP1; 1597 unsigned int NonDSCBPP2; 1598 1599 if (Format == dm_420) { 1600 NonDSCBPP0 = 12; 1601 NonDSCBPP1 = 15; 1602 NonDSCBPP2 = 18; 1603 MinDSCBPP = 6; 1604 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; 1605 } else if (Format == dm_444) { 1606 NonDSCBPP0 = 24; 1607 NonDSCBPP1 = 30; 1608 NonDSCBPP2 = 36; 1609 MinDSCBPP = 8; 1610 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; 1611 } else { 1612 if (Output == dm_hdmi) { 1613 NonDSCBPP0 = 24; 1614 NonDSCBPP1 = 24; 1615 NonDSCBPP2 = 24; 1616 } else { 1617 NonDSCBPP0 = 16; 1618 NonDSCBPP1 = 20; 1619 NonDSCBPP2 = 24; 1620 } 1621 if (Format == dm_n422) { 1622 MinDSCBPP = 7; 1623 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; 1624 } else { 1625 MinDSCBPP = 8; 1626 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; 1627 } 1628 } 1629 if (Output == dm_dp2p0) { 1630 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540; 1631 } else if (DSCEnable && Output == dm_dp) { 1632 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); 1633 } else { 1634 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; 1635 } 1636 1637 if (DSCEnable) { 1638 if (ODMModeDSC == dm_odm_combine_mode_4to1) 1639 MaxLinkBPP = dml_min(MaxLinkBPP, 16); 1640 else if (ODMModeDSC == dm_odm_combine_mode_2to1) 1641 MaxLinkBPP = dml_min(MaxLinkBPP, 32); 1642 else if (ODMModeDSC == dm_odm_split_mode_1to2) 1643 MaxLinkBPP = 2 * MaxLinkBPP; 1644 } else { 1645 if (ODMModeNoDSC == dm_odm_combine_mode_4to1) 1646 MaxLinkBPP = dml_min(MaxLinkBPP, 16); 1647 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1) 1648 MaxLinkBPP = dml_min(MaxLinkBPP, 32); 1649 else if (ODMModeNoDSC == dm_odm_split_mode_1to2) 1650 MaxLinkBPP = 2 * MaxLinkBPP; 1651 } 1652 1653 if (DesiredBPP == 0) { 1654 if (DSCEnable) { 1655 if (MaxLinkBPP < MinDSCBPP) 1656 return BPP_INVALID; 1657 else if (MaxLinkBPP >= MaxDSCBPP) 1658 return MaxDSCBPP; 1659 else 1660 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; 1661 } else { 1662 if (MaxLinkBPP >= NonDSCBPP2) 1663 return NonDSCBPP2; 1664 else if (MaxLinkBPP >= NonDSCBPP1) 1665 return NonDSCBPP1; 1666 else if (MaxLinkBPP >= NonDSCBPP0) 1667 return 16.0; 1668 else 1669 return BPP_INVALID; 1670 } 1671 } else { 1672 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || 1673 DesiredBPP <= NonDSCBPP0)) || 1674 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) 1675 return BPP_INVALID; 1676 else 1677 return DesiredBPP; 1678 } 1679 1680 *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1); 1681 1682 return BPP_INVALID; 1683} // TruncToValidBPP 1684 1685double dml32_RequiredDTBCLK( 1686 bool DSCEnable, 1687 double PixelClock, 1688 enum output_format_class OutputFormat, 1689 double OutputBpp, 1690 unsigned int DSCSlices, 1691 unsigned int HTotal, 1692 unsigned int HActive, 1693 unsigned int AudioRate, 1694 unsigned int AudioLayout) 1695{ 1696 double PixelWordRate; 1697 double HCActive; 1698 double HCBlank; 1699 double AverageTribyteRate; 1700 double HActiveTribyteRate; 1701 1702 if (DSCEnable != true) 1703 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); 1704 1705 PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); 1706 HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * 1707 dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); 1708 HCBlank = 64 + 32 * 1709 dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); 1710 AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; 1711 HActiveTribyteRate = PixelWordRate * HCActive / HActive; 1712 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; 1713} 1714 1715unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, 1716 enum odm_combine_mode ODMMode, 1717 unsigned int DSCInputBitPerComponent, 1718 double OutputBpp, 1719 unsigned int HActive, 1720 unsigned int HTotal, 1721 unsigned int NumberOfDSCSlices, 1722 enum output_format_class OutputFormat, 1723 enum output_encoder_class Output, 1724 double PixelClock, 1725 double PixelClockBackEnd, 1726 double dsc_delay_factor_wa) 1727{ 1728 unsigned int DSCDelayRequirement_val; 1729 1730 if (DSCEnabled == true && OutputBpp != 0) { 1731 if (ODMMode == dm_odm_combine_mode_4to1) { 1732 DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1733 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4, 1734 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); 1735 } else if (ODMMode == dm_odm_combine_mode_2to1) { 1736 DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1737 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2, 1738 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); 1739 } else { 1740 DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1741 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices, 1742 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output); 1743 } 1744 1745 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) * 1746 dml_ceil((double)DSCDelayRequirement_val / HActive, 1); 1747 1748 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd; 1749 1750 } else { 1751 DSCDelayRequirement_val = 0; 1752 } 1753 1754#ifdef __DML_VBA_DEBUG__ 1755 dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled); 1756 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); 1757 dml_print("DML::%s: HActive = %d\n", __func__, HActive); 1758 dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat); 1759 dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent); 1760 dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices); 1761 dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val); 1762#endif 1763 1764 return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1); 1765} 1766 1767void dml32_CalculateSurfaceSizeInMall( 1768 unsigned int NumberOfActiveSurfaces, 1769 unsigned int MALLAllocatedForDCN, 1770 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], 1771 enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[], 1772 bool DCCEnable[], 1773 bool ViewportStationary[], 1774 unsigned int ViewportXStartY[], 1775 unsigned int ViewportYStartY[], 1776 unsigned int ViewportXStartC[], 1777 unsigned int ViewportYStartC[], 1778 unsigned int ViewportWidthY[], 1779 unsigned int ViewportHeightY[], 1780 unsigned int BytesPerPixelY[], 1781 unsigned int ViewportWidthC[], 1782 unsigned int ViewportHeightC[], 1783 unsigned int BytesPerPixelC[], 1784 unsigned int SurfaceWidthY[], 1785 unsigned int SurfaceWidthC[], 1786 unsigned int SurfaceHeightY[], 1787 unsigned int SurfaceHeightC[], 1788 unsigned int Read256BytesBlockWidthY[], 1789 unsigned int Read256BytesBlockWidthC[], 1790 unsigned int Read256BytesBlockHeightY[], 1791 unsigned int Read256BytesBlockHeightC[], 1792 unsigned int ReadBlockWidthY[], 1793 unsigned int ReadBlockWidthC[], 1794 unsigned int ReadBlockHeightY[], 1795 unsigned int ReadBlockHeightC[], 1796 unsigned int DCCMetaPitchY[], 1797 unsigned int DCCMetaPitchC[], 1798 1799 /* Output */ 1800 unsigned int SurfaceSizeInMALL[], 1801 bool *ExceededMALLSize) 1802{ 1803 unsigned int k; 1804 unsigned int TotalSurfaceSizeInMALLForSS = 0; 1805 unsigned int TotalSurfaceSizeInMALLForSubVP = 0; 1806 unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024; 1807 1808 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1809 if (ViewportStationary[k]) { 1810 SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]), 1811 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1, 1812 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k], 1813 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k], 1814 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] + 1815 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - 1816 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k]; 1817 1818 if (ReadBlockWidthC[k] > 0) { 1819 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1820 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]), 1821 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 1822 ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - 1823 dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) * 1824 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]), 1825 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 1826 ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - 1827 dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) * 1828 BytesPerPixelC[k]; 1829 } 1830 if (DCCEnable[k] == true) { 1831 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1832 (dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]), 1833 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 * 1834 Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) 1835 - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k])) 1836 * dml_min(dml_ceil(SurfaceHeightY[k], 8 * 1837 Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] + 1838 ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 * 1839 Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 * 1840 Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024); 1841 if (Read256BytesBlockWidthC[k] > 0) { 1842 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1843 dml_min(dml_ceil(DCCMetaPitchC[k], 8 * 1844 Read256BytesBlockWidthC[k]), 1845 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8 1846 * Read256BytesBlockWidthC[k] - 1, 8 * 1847 Read256BytesBlockWidthC[k]) - 1848 dml_floor(ViewportXStartC[k], 8 * 1849 Read256BytesBlockWidthC[k])) * 1850 dml_min(dml_ceil(SurfaceHeightC[k], 8 * 1851 Read256BytesBlockHeightC[k]), 1852 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 1853 8 * Read256BytesBlockHeightC[k] - 1, 8 * 1854 Read256BytesBlockHeightC[k]) - 1855 dml_floor(ViewportYStartC[k], 8 * 1856 Read256BytesBlockHeightC[k])) * 1857 BytesPerPixelC[k] / 256; 1858 } 1859 } 1860 } else { 1861 SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 1862 ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * 1863 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 1864 ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * 1865 BytesPerPixelY[k]; 1866 if (ReadBlockWidthC[k] > 0) { 1867 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1868 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 1869 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * 1870 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 1871 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * 1872 BytesPerPixelC[k]; 1873 } 1874 if (DCCEnable[k] == true) { 1875 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1876 (dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 * 1877 Read256BytesBlockWidthY[k] - 1), 8 * 1878 Read256BytesBlockWidthY[k]) * 1879 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 * 1880 Read256BytesBlockHeightY[k] - 1), 8 * 1881 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024); 1882 1883 if (Read256BytesBlockWidthC[k] > 0) { 1884 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1885 dml_ceil(dml_min(DCCMetaPitchC[k], ViewportWidthC[k] + 8 * 1886 Read256BytesBlockWidthC[k] - 1), 8 * 1887 Read256BytesBlockWidthC[k]) * 1888 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 * 1889 Read256BytesBlockHeightC[k] - 1), 8 * 1890 Read256BytesBlockHeightC[k]) * 1891 BytesPerPixelC[k] / 256; 1892 } 1893 } 1894 } 1895 } 1896 1897 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1898 /* SS and Subvp counted separate as they are never used at the same time */ 1899 if (UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) 1900 TotalSurfaceSizeInMALLForSubVP = TotalSurfaceSizeInMALLForSubVP + SurfaceSizeInMALL[k]; 1901 else if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable) 1902 TotalSurfaceSizeInMALLForSS = TotalSurfaceSizeInMALLForSS + SurfaceSizeInMALL[k]; 1903 } 1904 *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) || 1905 (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes); 1906} // CalculateSurfaceSizeInMall 1907 1908void dml32_CalculateVMRowAndSwath( 1909 unsigned int NumberOfActiveSurfaces, 1910 DmlPipe myPipe[], 1911 unsigned int SurfaceSizeInMALL[], 1912 unsigned int PTEBufferSizeInRequestsLuma, 1913 unsigned int PTEBufferSizeInRequestsChroma, 1914 unsigned int DCCMetaBufferSizeBytes, 1915 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], 1916 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 1917 unsigned int MALLAllocatedForDCN, 1918 double SwathWidthY[], 1919 double SwathWidthC[], 1920 bool GPUVMEnable, 1921 bool HostVMEnable, 1922 unsigned int HostVMMaxNonCachedPageTableLevels, 1923 unsigned int GPUVMMaxPageTableLevels, 1924 unsigned int GPUVMMinPageSizeKBytes[], 1925 unsigned int HostVMMinPageSize, 1926 1927 /* Output */ 1928 bool PTEBufferSizeNotExceeded[], 1929 bool DCCMetaBufferSizeNotExceeded[], 1930 unsigned int dpte_row_width_luma_ub[], 1931 unsigned int dpte_row_width_chroma_ub[], 1932 unsigned int dpte_row_height_luma[], 1933 unsigned int dpte_row_height_chroma[], 1934 unsigned int dpte_row_height_linear_luma[], // VBA_DELTA 1935 unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA 1936 unsigned int meta_req_width[], 1937 unsigned int meta_req_width_chroma[], 1938 unsigned int meta_req_height[], 1939 unsigned int meta_req_height_chroma[], 1940 unsigned int meta_row_width[], 1941 unsigned int meta_row_width_chroma[], 1942 unsigned int meta_row_height[], 1943 unsigned int meta_row_height_chroma[], 1944 unsigned int vm_group_bytes[], 1945 unsigned int dpte_group_bytes[], 1946 unsigned int PixelPTEReqWidthY[], 1947 unsigned int PixelPTEReqHeightY[], 1948 unsigned int PTERequestSizeY[], 1949 unsigned int PixelPTEReqWidthC[], 1950 unsigned int PixelPTEReqHeightC[], 1951 unsigned int PTERequestSizeC[], 1952 unsigned int dpde0_bytes_per_frame_ub_l[], 1953 unsigned int meta_pte_bytes_per_frame_ub_l[], 1954 unsigned int dpde0_bytes_per_frame_ub_c[], 1955 unsigned int meta_pte_bytes_per_frame_ub_c[], 1956 double PrefetchSourceLinesY[], 1957 double PrefetchSourceLinesC[], 1958 double VInitPreFillY[], 1959 double VInitPreFillC[], 1960 unsigned int MaxNumSwathY[], 1961 unsigned int MaxNumSwathC[], 1962 double meta_row_bw[], 1963 double dpte_row_bw[], 1964 double PixelPTEBytesPerRow[], 1965 double PDEAndMetaPTEBytesFrame[], 1966 double MetaRowByte[], 1967 bool use_one_row_for_frame[], 1968 bool use_one_row_for_frame_flip[], 1969 bool UsesMALLForStaticScreen[], 1970 bool PTE_BUFFER_MODE[], 1971 unsigned int BIGK_FRAGMENT_SIZE[]) 1972{ 1973 unsigned int k; 1974 unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; 1975 unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; 1976 unsigned int PDEAndMetaPTEBytesFrameY; 1977 unsigned int PDEAndMetaPTEBytesFrameC; 1978 unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; 1979 unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; 1980 unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; 1981 unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; 1982 unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; 1983 unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; 1984 unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; 1985 unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; 1986 unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; 1987 unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; 1988 bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; 1989 1990 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1991 if (HostVMEnable == true) { 1992 vm_group_bytes[k] = 512; 1993 dpte_group_bytes[k] = 512; 1994 } else if (GPUVMEnable == true) { 1995 vm_group_bytes[k] = 2048; 1996 if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation)) 1997 dpte_group_bytes[k] = 512; 1998 else 1999 dpte_group_bytes[k] = 2048; 2000 } else { 2001 vm_group_bytes[k] = 0; 2002 dpte_group_bytes[k] = 0; 2003 } 2004 2005 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 || 2006 myPipe[k].SourcePixelFormat == dm_420_12 || 2007 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) { 2008 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) && 2009 !IsVertical(myPipe[k].SourceRotation)) { 2010 PTEBufferSizeInRequestsForLuma[k] = 2011 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2; 2012 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k]; 2013 } else { 2014 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; 2015 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; 2016 } 2017 2018 PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( 2019 myPipe[k].ViewportStationary, 2020 myPipe[k].DCCEnable, 2021 myPipe[k].DPPPerSurface, 2022 myPipe[k].BlockHeight256BytesC, 2023 myPipe[k].BlockWidth256BytesC, 2024 myPipe[k].SourcePixelFormat, 2025 myPipe[k].SurfaceTiling, 2026 myPipe[k].BytePerPixelC, 2027 myPipe[k].SourceRotation, 2028 SwathWidthC[k], 2029 myPipe[k].ViewportHeightChroma, 2030 myPipe[k].ViewportXStartC, 2031 myPipe[k].ViewportYStartC, 2032 GPUVMEnable, 2033 HostVMEnable, 2034 HostVMMaxNonCachedPageTableLevels, 2035 GPUVMMaxPageTableLevels, 2036 GPUVMMinPageSizeKBytes[k], 2037 HostVMMinPageSize, 2038 PTEBufferSizeInRequestsForChroma[k], 2039 myPipe[k].PitchC, 2040 myPipe[k].DCCMetaPitchC, 2041 myPipe[k].BlockWidthC, 2042 myPipe[k].BlockHeightC, 2043 2044 /* Output */ 2045 &MetaRowByteC[k], 2046 &PixelPTEBytesPerRowC[k], 2047 &dpte_row_width_chroma_ub[k], 2048 &dpte_row_height_chroma[k], 2049 &dpte_row_height_linear_chroma[k], 2050 &PixelPTEBytesPerRowC_one_row_per_frame[k], 2051 &dpte_row_width_chroma_ub_one_row_per_frame[k], 2052 &dpte_row_height_chroma_one_row_per_frame[k], 2053 &meta_req_width_chroma[k], 2054 &meta_req_height_chroma[k], 2055 &meta_row_width_chroma[k], 2056 &meta_row_height_chroma[k], 2057 &PixelPTEReqWidthC[k], 2058 &PixelPTEReqHeightC[k], 2059 &PTERequestSizeC[k], 2060 &dpde0_bytes_per_frame_ub_c[k], 2061 &meta_pte_bytes_per_frame_ub_c[k]); 2062 2063 PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines( 2064 myPipe[k].VRatioChroma, 2065 myPipe[k].VTapsChroma, 2066 myPipe[k].InterlaceEnable, 2067 myPipe[k].ProgressiveToInterlaceUnitInOPP, 2068 myPipe[k].SwathHeightC, 2069 myPipe[k].SourceRotation, 2070 myPipe[k].ViewportStationary, 2071 SwathWidthC[k], 2072 myPipe[k].ViewportHeightChroma, 2073 myPipe[k].ViewportXStartC, 2074 myPipe[k].ViewportYStartC, 2075 2076 /* Output */ 2077 &VInitPreFillC[k], 2078 &MaxNumSwathC[k]); 2079 } else { 2080 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; 2081 PTEBufferSizeInRequestsForChroma[k] = 0; 2082 PixelPTEBytesPerRowC[k] = 0; 2083 PDEAndMetaPTEBytesFrameC = 0; 2084 MetaRowByteC[k] = 0; 2085 MaxNumSwathC[k] = 0; 2086 PrefetchSourceLinesC[k] = 0; 2087 dpte_row_height_chroma_one_row_per_frame[k] = 0; 2088 dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; 2089 PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; 2090 } 2091 2092 PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( 2093 myPipe[k].ViewportStationary, 2094 myPipe[k].DCCEnable, 2095 myPipe[k].DPPPerSurface, 2096 myPipe[k].BlockHeight256BytesY, 2097 myPipe[k].BlockWidth256BytesY, 2098 myPipe[k].SourcePixelFormat, 2099 myPipe[k].SurfaceTiling, 2100 myPipe[k].BytePerPixelY, 2101 myPipe[k].SourceRotation, 2102 SwathWidthY[k], 2103 myPipe[k].ViewportHeight, 2104 myPipe[k].ViewportXStart, 2105 myPipe[k].ViewportYStart, 2106 GPUVMEnable, 2107 HostVMEnable, 2108 HostVMMaxNonCachedPageTableLevels, 2109 GPUVMMaxPageTableLevels, 2110 GPUVMMinPageSizeKBytes[k], 2111 HostVMMinPageSize, 2112 PTEBufferSizeInRequestsForLuma[k], 2113 myPipe[k].PitchY, 2114 myPipe[k].DCCMetaPitchY, 2115 myPipe[k].BlockWidthY, 2116 myPipe[k].BlockHeightY, 2117 2118 /* Output */ 2119 &MetaRowByteY[k], 2120 &PixelPTEBytesPerRowY[k], 2121 &dpte_row_width_luma_ub[k], 2122 &dpte_row_height_luma[k], 2123 &dpte_row_height_linear_luma[k], 2124 &PixelPTEBytesPerRowY_one_row_per_frame[k], 2125 &dpte_row_width_luma_ub_one_row_per_frame[k], 2126 &dpte_row_height_luma_one_row_per_frame[k], 2127 &meta_req_width[k], 2128 &meta_req_height[k], 2129 &meta_row_width[k], 2130 &meta_row_height[k], 2131 &PixelPTEReqWidthY[k], 2132 &PixelPTEReqHeightY[k], 2133 &PTERequestSizeY[k], 2134 &dpde0_bytes_per_frame_ub_l[k], 2135 &meta_pte_bytes_per_frame_ub_l[k]); 2136 2137 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines( 2138 myPipe[k].VRatio, 2139 myPipe[k].VTaps, 2140 myPipe[k].InterlaceEnable, 2141 myPipe[k].ProgressiveToInterlaceUnitInOPP, 2142 myPipe[k].SwathHeightY, 2143 myPipe[k].SourceRotation, 2144 myPipe[k].ViewportStationary, 2145 SwathWidthY[k], 2146 myPipe[k].ViewportHeight, 2147 myPipe[k].ViewportXStart, 2148 myPipe[k].ViewportYStart, 2149 2150 /* Output */ 2151 &VInitPreFillY[k], 2152 &MaxNumSwathY[k]); 2153 2154 PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; 2155 MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k]; 2156 2157 if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] && 2158 PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) { 2159 PTEBufferSizeNotExceeded[k] = true; 2160 } else { 2161 PTEBufferSizeNotExceeded[k] = false; 2162 } 2163 2164 one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * 2165 PTEBufferSizeInRequestsForLuma[k] && 2166 PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]); 2167 } 2168 2169 dml32_CalculateMALLUseForStaticScreen( 2170 NumberOfActiveSurfaces, 2171 MALLAllocatedForDCN, 2172 UseMALLForStaticScreen, // mode 2173 SurfaceSizeInMALL, 2174 one_row_per_frame_fits_in_buffer, 2175 /* Output */ 2176 UsesMALLForStaticScreen); // boolen 2177 2178 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2179 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || 2180 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || 2181 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || 2182 (GPUVMMinPageSizeKBytes[k] > 64); 2183 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12; 2184 } 2185 2186 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2187#ifdef __DML_VBA_DEBUG__ 2188 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]); 2189 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); 2190#endif 2191 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || 2192 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || 2193 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || 2194 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation)); 2195 2196 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] && 2197 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame); 2198 2199 if (use_one_row_for_frame[k]) { 2200 dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k]; 2201 dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k]; 2202 PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k]; 2203 dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k]; 2204 dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k]; 2205 PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k]; 2206 PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k]; 2207 } 2208 2209 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes) 2210 DCCMetaBufferSizeNotExceeded[k] = true; 2211 else 2212 DCCMetaBufferSizeNotExceeded[k] = false; 2213 2214 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k]; 2215 if (use_one_row_for_frame[k]) 2216 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2; 2217 2218 dml32_CalculateRowBandwidth( 2219 GPUVMEnable, 2220 myPipe[k].SourcePixelFormat, 2221 myPipe[k].VRatio, 2222 myPipe[k].VRatioChroma, 2223 myPipe[k].DCCEnable, 2224 myPipe[k].HTotal / myPipe[k].PixelClock, 2225 MetaRowByteY[k], MetaRowByteC[k], 2226 meta_row_height[k], 2227 meta_row_height_chroma[k], 2228 PixelPTEBytesPerRowY[k], 2229 PixelPTEBytesPerRowC[k], 2230 dpte_row_height_luma[k], 2231 dpte_row_height_chroma[k], 2232 2233 /* Output */ 2234 &meta_row_bw[k], 2235 &dpte_row_bw[k]); 2236#ifdef __DML_VBA_DEBUG__ 2237 dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]); 2238 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n", 2239 __func__, k, use_one_row_for_frame_flip[k]); 2240 dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n", 2241 __func__, k, UseMALLForPStateChange[k]); 2242 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]); 2243 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n", 2244 __func__, k, dpte_row_width_luma_ub[k]); 2245 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]); 2246 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n", 2247 __func__, k, dpte_row_height_chroma[k]); 2248 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n", 2249 __func__, k, dpte_row_width_chroma_ub[k]); 2250 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]); 2251 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]); 2252 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n", 2253 __func__, k, PTEBufferSizeNotExceeded[k]); 2254 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]); 2255 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]); 2256#endif 2257 } 2258} // CalculateVMRowAndSwath 2259 2260unsigned int dml32_CalculateVMAndRowBytes( 2261 bool ViewportStationary, 2262 bool DCCEnable, 2263 unsigned int NumberOfDPPs, 2264 unsigned int BlockHeight256Bytes, 2265 unsigned int BlockWidth256Bytes, 2266 enum source_format_class SourcePixelFormat, 2267 unsigned int SurfaceTiling, 2268 unsigned int BytePerPixel, 2269 enum dm_rotation_angle SourceRotation, 2270 double SwathWidth, 2271 unsigned int ViewportHeight, 2272 unsigned int ViewportXStart, 2273 unsigned int ViewportYStart, 2274 bool GPUVMEnable, 2275 bool HostVMEnable, 2276 unsigned int HostVMMaxNonCachedPageTableLevels, 2277 unsigned int GPUVMMaxPageTableLevels, 2278 unsigned int GPUVMMinPageSizeKBytes, 2279 unsigned int HostVMMinPageSize, 2280 unsigned int PTEBufferSizeInRequests, 2281 unsigned int Pitch, 2282 unsigned int DCCMetaPitch, 2283 unsigned int MacroTileWidth, 2284 unsigned int MacroTileHeight, 2285 2286 /* Output */ 2287 unsigned int *MetaRowByte, 2288 unsigned int *PixelPTEBytesPerRow, 2289 unsigned int *dpte_row_width_ub, 2290 unsigned int *dpte_row_height, 2291 unsigned int *dpte_row_height_linear, 2292 unsigned int *PixelPTEBytesPerRow_one_row_per_frame, 2293 unsigned int *dpte_row_width_ub_one_row_per_frame, 2294 unsigned int *dpte_row_height_one_row_per_frame, 2295 unsigned int *MetaRequestWidth, 2296 unsigned int *MetaRequestHeight, 2297 unsigned int *meta_row_width, 2298 unsigned int *meta_row_height, 2299 unsigned int *PixelPTEReqWidth, 2300 unsigned int *PixelPTEReqHeight, 2301 unsigned int *PTERequestSize, 2302 unsigned int *DPDE0BytesFrame, 2303 unsigned int *MetaPTEBytesFrame) 2304{ 2305 unsigned int MPDEBytesFrame; 2306 unsigned int DCCMetaSurfaceBytes; 2307 unsigned int ExtraDPDEBytesFrame; 2308 unsigned int PDEAndMetaPTEBytesFrame; 2309 unsigned int HostVMDynamicLevels = 0; 2310 unsigned int MacroTileSizeBytes; 2311 unsigned int vp_height_meta_ub; 2312 unsigned int vp_height_dpte_ub; 2313 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this 2314 2315 if (GPUVMEnable == true && HostVMEnable == true) { 2316 if (HostVMMinPageSize < 2048) 2317 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 2318 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 2319 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 2320 else 2321 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 2322 } 2323 2324 *MetaRequestHeight = 8 * BlockHeight256Bytes; 2325 *MetaRequestWidth = 8 * BlockWidth256Bytes; 2326 if (SurfaceTiling == dm_sw_linear) { 2327 *meta_row_height = 32; 2328 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) 2329 - dml_floor(ViewportXStart, *MetaRequestWidth); 2330 } else if (!IsVertical(SourceRotation)) { 2331 *meta_row_height = *MetaRequestHeight; 2332 if (ViewportStationary && NumberOfDPPs == 1) { 2333 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, 2334 *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth); 2335 } else { 2336 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; 2337 } 2338 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 2339 } else { 2340 *meta_row_height = *MetaRequestWidth; 2341 if (ViewportStationary && NumberOfDPPs == 1) { 2342 *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, 2343 *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight); 2344 } else { 2345 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; 2346 } 2347 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 2348 } 2349 2350 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { 2351 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, 2352 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes); 2353 } else if (!IsVertical(SourceRotation)) { 2354 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; 2355 } else { 2356 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; 2357 } 2358 2359 DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0; 2360 2361 if (GPUVMEnable == true) { 2362 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / 2363 (8 * 4.0 * 1024), 1) + 1) * 64; 2364 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1); 2365 } else { 2366 *MetaPTEBytesFrame = 0; 2367 MPDEBytesFrame = 0; 2368 } 2369 2370 if (DCCEnable != true) { 2371 *MetaPTEBytesFrame = 0; 2372 MPDEBytesFrame = 0; 2373 *MetaRowByte = 0; 2374 } 2375 2376 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight; 2377 2378 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) { 2379 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { 2380 vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight + 2381 MacroTileHeight - 1, MacroTileHeight) - 2382 dml_floor(ViewportYStart, MacroTileHeight); 2383 } else if (!IsVertical(SourceRotation)) { 2384 vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight; 2385 } else { 2386 vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight; 2387 } 2388 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / 2389 (8 * 2097152), 1) + 1); 2390 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2); 2391 } else { 2392 *DPDE0BytesFrame = 0; 2393 ExtraDPDEBytesFrame = 0; 2394 vp_height_dpte_ub = 0; 2395 } 2396 2397 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; 2398 2399#ifdef __DML_VBA_DEBUG__ 2400 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); 2401 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 2402 dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear); 2403 dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel); 2404 dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels); 2405 dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes); 2406 dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes); 2407 dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight); 2408 dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth); 2409 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); 2410 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); 2411 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); 2412 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); 2413 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 2414 dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight); 2415 dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth); 2416 dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub); 2417#endif 2418 2419 if (HostVMEnable == true) 2420 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); 2421 2422 if (SurfaceTiling == dm_sw_linear) { 2423 *PixelPTEReqHeight = 1; 2424 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; 2425 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; 2426 *PTERequestSize = 64; 2427 } else if (GPUVMMinPageSizeKBytes == 4) { 2428 *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 2429 *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 2430 *PTERequestSize = 128; 2431 } else { 2432 *PixelPTEReqHeight = MacroTileHeight; 2433 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel); 2434 *PTERequestSize = 64; 2435 } 2436#ifdef __DML_VBA_DEBUG__ 2437 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); 2438 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame); 2439 dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight); 2440 dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth); 2441 dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear); 2442 dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize); 2443 dml_print("DML::%s: Pitch = %d\n", __func__, Pitch); 2444#endif 2445 2446 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub; 2447 *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame / 2448 (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) * 2449 (double) *PixelPTEReqWidth; 2450 *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth * 2451 *PTERequestSize; 2452 2453 if (SurfaceTiling == dm_sw_linear) { 2454 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2455 *PixelPTEReqWidth / Pitch), 1)); 2456#ifdef __DML_VBA_DEBUG__ 2457 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__, 2458 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch); 2459 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__, 2460 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch)); 2461 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__, 2462 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); 2463 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__, 2464 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2465 *PixelPTEReqWidth / Pitch), 1)); 2466 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); 2467#endif 2468 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1), 2469 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth; 2470 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize; 2471 2472 // VBA_DELTA, VBA doesn't have programming value for pte row height linear. 2473 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2474 PixelPTEReqWidth_linear / Pitch), 1); 2475 if (*dpte_row_height_linear > 128) 2476 *dpte_row_height_linear = 128; 2477 2478 } else if (!IsVertical(SourceRotation)) { 2479 *dpte_row_height = *PixelPTEReqHeight; 2480 2481 if (GPUVMMinPageSizeKBytes > 64) { 2482 *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) / 2483 *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 2484 } else if (ViewportStationary && (NumberOfDPPs == 1)) { 2485 *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth + 2486 *PixelPTEReqWidth - 1, *PixelPTEReqWidth) - 2487 dml_floor(ViewportXStart, *PixelPTEReqWidth); 2488 } else { 2489 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * 2490 *PixelPTEReqWidth; 2491 } 2492 2493 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 2494 } else { 2495 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth); 2496 2497 if (ViewportStationary && (NumberOfDPPs == 1)) { 2498 *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, 2499 *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight); 2500 } else { 2501 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) 2502 * *PixelPTEReqHeight; 2503 } 2504 2505 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 2506 } 2507 2508 if (GPUVMEnable != true) 2509 *PixelPTEBytesPerRow = 0; 2510 if (HostVMEnable == true) 2511 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); 2512 2513#ifdef __DML_VBA_DEBUG__ 2514 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); 2515 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); 2516 dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear); 2517 dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub); 2518 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow); 2519 dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests); 2520 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame); 2521 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n", 2522 __func__, *dpte_row_width_ub_one_row_per_frame); 2523 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n", 2524 __func__, *PixelPTEBytesPerRow_one_row_per_frame); 2525 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", 2526 *MetaPTEBytesFrame); 2527#endif 2528 2529 return PDEAndMetaPTEBytesFrame; 2530} // CalculateVMAndRowBytes 2531 2532double dml32_CalculatePrefetchSourceLines( 2533 double VRatio, 2534 unsigned int VTaps, 2535 bool Interlace, 2536 bool ProgressiveToInterlaceUnitInOPP, 2537 unsigned int SwathHeight, 2538 enum dm_rotation_angle SourceRotation, 2539 bool ViewportStationary, 2540 double SwathWidth, 2541 unsigned int ViewportHeight, 2542 unsigned int ViewportXStart, 2543 unsigned int ViewportYStart, 2544 2545 /* Output */ 2546 double *VInitPreFill, 2547 unsigned int *MaxNumSwath) 2548{ 2549 2550 unsigned int vp_start_rot; 2551 unsigned int sw0_tmp; 2552 unsigned int MaxPartialSwath; 2553 double numLines; 2554 2555#ifdef __DML_VBA_DEBUG__ 2556 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); 2557 dml_print("DML::%s: VTaps = %d\n", __func__, VTaps); 2558 dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart); 2559 dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart); 2560 dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary); 2561 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); 2562#endif 2563 if (ProgressiveToInterlaceUnitInOPP) 2564 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1); 2565 else 2566 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 2567 2568 if (ViewportStationary) { 2569 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) { 2570 vp_start_rot = SwathHeight - 2571 (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); 2572 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) { 2573 vp_start_rot = ViewportXStart; 2574 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) { 2575 vp_start_rot = SwathHeight - 2576 (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); 2577 } else { 2578 vp_start_rot = ViewportYStart; 2579 } 2580 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); 2581 if (sw0_tmp < *VInitPreFill) 2582 *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1; 2583 else 2584 *MaxNumSwath = 1; 2585 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight); 2586 } else { 2587 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1; 2588 if (*VInitPreFill > 1) 2589 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight); 2590 else 2591 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight); 2592 } 2593 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; 2594 2595#ifdef __DML_VBA_DEBUG__ 2596 dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot); 2597 dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill); 2598 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); 2599 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); 2600 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); 2601#endif 2602 return numLines; 2603 2604} // CalculatePrefetchSourceLines 2605 2606void dml32_CalculateMALLUseForStaticScreen( 2607 unsigned int NumberOfActiveSurfaces, 2608 unsigned int MALLAllocatedForDCNFinal, 2609 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen, 2610 unsigned int SurfaceSizeInMALL[], 2611 bool one_row_per_frame_fits_in_buffer[], 2612 2613 /* output */ 2614 bool UsesMALLForStaticScreen[]) 2615{ 2616 unsigned int k; 2617 unsigned int SurfaceToAddToMALL; 2618 bool CanAddAnotherSurfaceToMALL; 2619 unsigned int TotalSurfaceSizeInMALL; 2620 2621 TotalSurfaceSizeInMALL = 0; 2622 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2623 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable); 2624 if (UsesMALLForStaticScreen[k]) 2625 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; 2626#ifdef __DML_VBA_DEBUG__ 2627 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); 2628 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL); 2629#endif 2630 } 2631 2632 SurfaceToAddToMALL = 0; 2633 CanAddAnotherSurfaceToMALL = true; 2634 while (CanAddAnotherSurfaceToMALL) { 2635 CanAddAnotherSurfaceToMALL = false; 2636 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2637 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 && 2638 !UsesMALLForStaticScreen[k] && 2639 UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable && 2640 one_row_per_frame_fits_in_buffer[k] && 2641 (!CanAddAnotherSurfaceToMALL || 2642 SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { 2643 CanAddAnotherSurfaceToMALL = true; 2644 SurfaceToAddToMALL = k; 2645#ifdef __DML_VBA_DEBUG__ 2646 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n", 2647 __func__, k, UseMALLForStaticScreen[k]); 2648#endif 2649 } 2650 } 2651 if (CanAddAnotherSurfaceToMALL) { 2652 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true; 2653 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; 2654 2655#ifdef __DML_VBA_DEBUG__ 2656 dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL); 2657 dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL); 2658#endif 2659 2660 } 2661 } 2662} 2663 2664void dml32_CalculateRowBandwidth( 2665 bool GPUVMEnable, 2666 enum source_format_class SourcePixelFormat, 2667 double VRatio, 2668 double VRatioChroma, 2669 bool DCCEnable, 2670 double LineTime, 2671 unsigned int MetaRowByteLuma, 2672 unsigned int MetaRowByteChroma, 2673 unsigned int meta_row_height_luma, 2674 unsigned int meta_row_height_chroma, 2675 unsigned int PixelPTEBytesPerRowLuma, 2676 unsigned int PixelPTEBytesPerRowChroma, 2677 unsigned int dpte_row_height_luma, 2678 unsigned int dpte_row_height_chroma, 2679 /* Output */ 2680 double *meta_row_bw, 2681 double *dpte_row_bw) 2682{ 2683 if (DCCEnable != true) { 2684 *meta_row_bw = 0; 2685 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || 2686 SourcePixelFormat == dm_rgbe_alpha) { 2687 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * 2688 MetaRowByteChroma / (meta_row_height_chroma * LineTime); 2689 } else { 2690 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 2691 } 2692 2693 if (GPUVMEnable != true) { 2694 *dpte_row_bw = 0; 2695 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || 2696 SourcePixelFormat == dm_rgbe_alpha) { 2697 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + 2698 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 2699 } else { 2700 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 2701 } 2702} 2703 2704double dml32_CalculateUrgentLatency( 2705 double UrgentLatencyPixelDataOnly, 2706 double UrgentLatencyPixelMixedWithVMData, 2707 double UrgentLatencyVMDataOnly, 2708 bool DoUrgentLatencyAdjustment, 2709 double UrgentLatencyAdjustmentFabricClockComponent, 2710 double UrgentLatencyAdjustmentFabricClockReference, 2711 double FabricClock) 2712{ 2713 double ret; 2714 2715 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 2716 if (DoUrgentLatencyAdjustment == true) { 2717 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * 2718 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 2719 } 2720 return ret; 2721} 2722 2723void dml32_CalculateUrgentBurstFactor( 2724 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, 2725 unsigned int swath_width_luma_ub, 2726 unsigned int swath_width_chroma_ub, 2727 unsigned int SwathHeightY, 2728 unsigned int SwathHeightC, 2729 double LineTime, 2730 double UrgentLatency, 2731 double CursorBufferSize, 2732 unsigned int CursorWidth, 2733 unsigned int CursorBPP, 2734 double VRatio, 2735 double VRatioC, 2736 double BytePerPixelInDETY, 2737 double BytePerPixelInDETC, 2738 unsigned int DETBufferSizeY, 2739 unsigned int DETBufferSizeC, 2740 /* Output */ 2741 double *UrgentBurstFactorCursor, 2742 double *UrgentBurstFactorLuma, 2743 double *UrgentBurstFactorChroma, 2744 bool *NotEnoughUrgentLatencyHiding) 2745{ 2746 double LinesInDETLuma; 2747 double LinesInDETChroma; 2748 unsigned int LinesInCursorBuffer; 2749 double CursorBufferSizeInTime; 2750 double DETBufferSizeInTimeLuma; 2751 double DETBufferSizeInTimeChroma; 2752 2753 *NotEnoughUrgentLatencyHiding = 0; 2754 2755 if (CursorWidth > 0) { 2756 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / 2757 (CursorWidth * CursorBPP / 8.0)), 1.0); 2758 if (VRatio > 0) { 2759 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 2760 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 2761 *NotEnoughUrgentLatencyHiding = 1; 2762 *UrgentBurstFactorCursor = 0; 2763 } else { 2764 *UrgentBurstFactorCursor = CursorBufferSizeInTime / 2765 (CursorBufferSizeInTime - UrgentLatency); 2766 } 2767 } else { 2768 *UrgentBurstFactorCursor = 1; 2769 } 2770 } 2771 2772 LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 : 2773 DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; 2774 2775 if (VRatio > 0) { 2776 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 2777 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 2778 *NotEnoughUrgentLatencyHiding = 1; 2779 *UrgentBurstFactorLuma = 0; 2780 } else { 2781 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 2782 } 2783 } else { 2784 *UrgentBurstFactorLuma = 1; 2785 } 2786 2787 if (BytePerPixelInDETC > 0) { 2788 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 2789 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC 2790 / swath_width_chroma_ub; 2791 2792 if (VRatio > 0) { 2793 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; 2794 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 2795 *NotEnoughUrgentLatencyHiding = 1; 2796 *UrgentBurstFactorChroma = 0; 2797 } else { 2798 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma 2799 / (DETBufferSizeInTimeChroma - UrgentLatency); 2800 } 2801 } else { 2802 *UrgentBurstFactorChroma = 1; 2803 } 2804 } 2805} // CalculateUrgentBurstFactor 2806 2807void dml32_CalculateDCFCLKDeepSleep( 2808 unsigned int NumberOfActiveSurfaces, 2809 unsigned int BytePerPixelY[], 2810 unsigned int BytePerPixelC[], 2811 double VRatio[], 2812 double VRatioChroma[], 2813 double SwathWidthY[], 2814 double SwathWidthC[], 2815 unsigned int DPPPerSurface[], 2816 double HRatio[], 2817 double HRatioChroma[], 2818 double PixelClock[], 2819 double PSCL_THROUGHPUT[], 2820 double PSCL_THROUGHPUT_CHROMA[], 2821 double Dppclk[], 2822 double ReadBandwidthLuma[], 2823 double ReadBandwidthChroma[], 2824 unsigned int ReturnBusWidth, 2825 2826 /* Output */ 2827 double *DCFClkDeepSleep) 2828{ 2829 unsigned int k; 2830 double DisplayPipeLineDeliveryTimeLuma; 2831 double DisplayPipeLineDeliveryTimeChroma; 2832 double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX]; 2833 double ReadBandwidth = 0.0; 2834 2835 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2836 2837 if (VRatio[k] <= 1) { 2838 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k] 2839 / PixelClock[k]; 2840 } else { 2841 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 2842 } 2843 if (BytePerPixelC[k] == 0) { 2844 DisplayPipeLineDeliveryTimeChroma = 0; 2845 } else { 2846 if (VRatioChroma[k] <= 1) { 2847 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * 2848 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 2849 } else { 2850 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] 2851 / Dppclk[k]; 2852 } 2853 } 2854 2855 if (BytePerPixelC[k] > 0) { 2856 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * 2857 BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 2858 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 2859 32.0 / DisplayPipeLineDeliveryTimeChroma); 2860 } else { 2861 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 2862 64.0 / DisplayPipeLineDeliveryTimeLuma; 2863 } 2864 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16); 2865 2866#ifdef __DML_VBA_DEBUG__ 2867 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]); 2868 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); 2869#endif 2870 } 2871 2872 for (k = 0; k < NumberOfActiveSurfaces; ++k) 2873 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 2874 2875 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth); 2876 2877#ifdef __DML_VBA_DEBUG__ 2878 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__); 2879 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); 2880 dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth); 2881 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); 2882#endif 2883 2884 for (k = 0; k < NumberOfActiveSurfaces; ++k) 2885 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); 2886#ifdef __DML_VBA_DEBUG__ 2887 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); 2888#endif 2889} // CalculateDCFCLKDeepSleep 2890 2891double dml32_CalculateWriteBackDelay( 2892 enum source_format_class WritebackPixelFormat, 2893 double WritebackHRatio, 2894 double WritebackVRatio, 2895 unsigned int WritebackVTaps, 2896 unsigned int WritebackDestinationWidth, 2897 unsigned int WritebackDestinationHeight, 2898 unsigned int WritebackSourceHeight, 2899 unsigned int HTotal) 2900{ 2901 double CalculateWriteBackDelay; 2902 double Line_length; 2903 double Output_lines_last_notclamped; 2904 double WritebackVInit; 2905 2906 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 2907 Line_length = dml_max((double) WritebackDestinationWidth, 2908 dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); 2909 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - 2910 dml_ceil(((double)WritebackSourceHeight - 2911 (double) WritebackVInit) / (double)WritebackVRatio, 1.0); 2912 if (Output_lines_last_notclamped < 0) { 2913 CalculateWriteBackDelay = 0; 2914 } else { 2915 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + 2916 (HTotal - WritebackDestinationWidth) + 80; 2917 } 2918 return CalculateWriteBackDelay; 2919} 2920 2921void dml32_UseMinimumDCFCLK( 2922 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 2923 bool DRRDisplay[], 2924 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 2925 unsigned int MaxInterDCNTileRepeaters, 2926 unsigned int MaxPrefetchMode, 2927 double DRAMClockChangeLatencyFinal, 2928 double FCLKChangeLatency, 2929 double SREnterPlusExitTime, 2930 unsigned int ReturnBusWidth, 2931 unsigned int RoundTripPingLatencyCycles, 2932 unsigned int ReorderingBytes, 2933 unsigned int PixelChunkSizeInKByte, 2934 unsigned int MetaChunkSize, 2935 bool GPUVMEnable, 2936 unsigned int GPUVMMaxPageTableLevels, 2937 bool HostVMEnable, 2938 unsigned int NumberOfActiveSurfaces, 2939 double HostVMMinPageSize, 2940 unsigned int HostVMMaxNonCachedPageTableLevels, 2941 bool DynamicMetadataVMEnabled, 2942 bool ImmediateFlipRequirement, 2943 bool ProgressiveToInterlaceUnitInOPP, 2944 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation, 2945 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency, 2946 unsigned int VTotal[], 2947 unsigned int VActive[], 2948 unsigned int DynamicMetadataTransmittedBytes[], 2949 unsigned int DynamicMetadataLinesBeforeActiveRequired[], 2950 bool Interlace[], 2951 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX], 2952 double RequiredDISPCLK[][2], 2953 double UrgLatency[], 2954 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], 2955 double ProjectedDCFClkDeepSleep[][2], 2956 double MaximumVStartup[][2][DC__NUM_DPP__MAX], 2957 unsigned int TotalNumberOfActiveDPP[][2], 2958 unsigned int TotalNumberOfDCCActiveDPP[][2], 2959 unsigned int dpte_group_bytes[], 2960 double PrefetchLinesY[][2][DC__NUM_DPP__MAX], 2961 double PrefetchLinesC[][2][DC__NUM_DPP__MAX], 2962 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], 2963 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], 2964 unsigned int BytePerPixelY[], 2965 unsigned int BytePerPixelC[], 2966 unsigned int HTotal[], 2967 double PixelClock[], 2968 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], 2969 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], 2970 double MetaRowBytes[][2][DC__NUM_DPP__MAX], 2971 bool DynamicMetadataEnable[], 2972 double ReadBandwidthLuma[], 2973 double ReadBandwidthChroma[], 2974 double DCFCLKPerState[], 2975 /* Output */ 2976 double DCFCLKState[][2]) 2977{ 2978 unsigned int i, j, k; 2979 unsigned int dummy1; 2980 double dummy2, dummy3; 2981 double NormalEfficiency; 2982 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; 2983 2984 NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0; 2985 for (i = 0; i < DC__VOLTAGE_STATES; ++i) { 2986 for (j = 0; j <= 1; ++j) { 2987 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 2988 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 2989 double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX]; 2990 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 2991 double MinimumTWait = 0.0; 2992 double DPTEBandwidth; 2993 double DCFCLKRequiredForAverageBandwidth; 2994 unsigned int ExtraLatencyBytes; 2995 double ExtraLatencyCycles; 2996 double DCFCLKRequiredForPeakBandwidth; 2997 unsigned int NoOfDPPState[DC__NUM_DPP__MAX]; 2998 double MinimumTvmPlus2Tr0; 2999 3000 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; 3001 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3002 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 3003 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] 3004 / (15.75 * HTotal[k] / PixelClock[k]); 3005 } 3006 3007 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) 3008 NoOfDPPState[k] = NoOfDPP[i][j][k]; 3009 3010 DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]; 3011 DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth); 3012 3013 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes, 3014 TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte, 3015 TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable, 3016 NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize, 3017 HostVMMaxNonCachedPageTableLevels); 3018 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ 3019 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth; 3020 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3021 double DCFCLKCyclesRequiredInPrefetch; 3022 double PrefetchTime; 3023 3024 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] 3025 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k] 3026 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] 3027 * BytePerPixelC[k]) / NormalEfficiency 3028 / ReturnBusWidth; 3029 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] 3030 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency 3031 / NormalEfficiency / ReturnBusWidth 3032 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) 3033 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency 3034 / ReturnBusWidth 3035 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth 3036 + PixelDCFCLKCyclesRequiredInPrefetch[k]; 3037 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) 3038 * HTotal[k] / PixelClock[k]; 3039 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true && 3040 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ? 3041 UrgLatency[i] * GPUVMMaxPageTableLevels * 3042 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; 3043 3044 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode, 3045 UseMALLForPStateChange[k], 3046 SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 3047 DRRDisplay[k], 3048 DRAMClockChangeLatencyFinal, 3049 FCLKChangeLatency, 3050 UrgLatency[i], 3051 SREnterPlusExitTime); 3052 3053 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - 3054 MinimumTWait - UrgLatency[i] * 3055 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : 3056 GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ? 3057 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - 3058 DynamicMetadataVMExtraLatency[k]; 3059 3060 if (PrefetchTime > 0) { 3061 double ExpectedVRatioPrefetch; 3062 3063 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * 3064 PixelDCFCLKCyclesRequiredInPrefetch[k] / 3065 DCFCLKCyclesRequiredInPrefetch); 3066 DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] * 3067 PixelDCFCLKCyclesRequiredInPrefetch[k] / 3068 PrefetchPixelLinesTime[k] * 3069 dml_max(1.0, ExpectedVRatioPrefetch) * 3070 dml_max(1.0, ExpectedVRatioPrefetch / 4); 3071 if (HostVMEnable == true || ImmediateFlipRequirement == true) { 3072 DCFCLKRequiredForPeakBandwidthPerSurface[k] = 3073 DCFCLKRequiredForPeakBandwidthPerSurface[k] + 3074 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / 3075 NormalEfficiency / ReturnBusWidth; 3076 } 3077 } else { 3078 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; 3079 } 3080 if (DynamicMetadataEnable[k] == true) { 3081 double TSetupPipe; 3082 double TdmbfPipe; 3083 double TdmsksPipe; 3084 double TdmecPipe; 3085 double AllowedTimeForUrgentExtraLatency; 3086 3087 dml32_CalculateVUpdateAndDynamicMetadataParameters( 3088 MaxInterDCNTileRepeaters, 3089 RequiredDPPCLKPerSurface[i][j][k], 3090 RequiredDISPCLK[i][j], 3091 ProjectedDCFClkDeepSleep[i][j], 3092 PixelClock[k], 3093 HTotal[k], 3094 VTotal[k] - VActive[k], 3095 DynamicMetadataTransmittedBytes[k], 3096 DynamicMetadataLinesBeforeActiveRequired[k], 3097 Interlace[k], 3098 ProgressiveToInterlaceUnitInOPP, 3099 3100 /* output */ 3101 &TSetupPipe, 3102 &TdmbfPipe, 3103 &TdmecPipe, 3104 &TdmsksPipe, 3105 &dummy1, 3106 &dummy2, 3107 &dummy3); 3108 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / 3109 PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - 3110 TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; 3111 if (AllowedTimeForUrgentExtraLatency > 0) 3112 DCFCLKRequiredForPeakBandwidthPerSurface[k] = 3113 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k], 3114 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); 3115 else 3116 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; 3117 } 3118 } 3119 DCFCLKRequiredForPeakBandwidth = 0; 3120 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) { 3121 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + 3122 DCFCLKRequiredForPeakBandwidthPerSurface[k]; 3123 } 3124 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ? 3125 (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) * 3126 (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0); 3127 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3128 double MaximumTvmPlus2Tr0PlusTsw; 3129 3130 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / 3131 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; 3132 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { 3133 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i]; 3134 } else { 3135 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 3136 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - 3137 MinimumTvmPlus2Tr0 - 3138 PrefetchPixelLinesTime[k] / 4), 3139 (2 * ExtraLatencyCycles + 3140 PixelDCFCLKCyclesRequiredInPrefetch[k]) / 3141 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); 3142 } 3143 } 3144 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * 3145 dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); 3146 } 3147 } 3148} 3149 3150unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes, 3151 unsigned int TotalNumberOfActiveDPP, 3152 unsigned int PixelChunkSizeInKByte, 3153 unsigned int TotalNumberOfDCCActiveDPP, 3154 unsigned int MetaChunkSize, 3155 bool GPUVMEnable, 3156 bool HostVMEnable, 3157 unsigned int NumberOfActiveSurfaces, 3158 unsigned int NumberOfDPP[], 3159 unsigned int dpte_group_bytes[], 3160 double HostVMInefficiencyFactor, 3161 double HostVMMinPageSize, 3162 unsigned int HostVMMaxNonCachedPageTableLevels) 3163{ 3164 unsigned int k; 3165 double ret; 3166 unsigned int HostVMDynamicLevels; 3167 3168 if (GPUVMEnable == true && HostVMEnable == true) { 3169 if (HostVMMinPageSize < 2048) 3170 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 3171 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 3172 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 3173 else 3174 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 3175 } else { 3176 HostVMDynamicLevels = 0; 3177 } 3178 3179 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + 3180 TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; 3181 3182 if (GPUVMEnable == true) { 3183 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3184 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * 3185 (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 3186 } 3187 } 3188 return ret; 3189} 3190 3191void dml32_CalculateVUpdateAndDynamicMetadataParameters( 3192 unsigned int MaxInterDCNTileRepeaters, 3193 double Dppclk, 3194 double Dispclk, 3195 double DCFClkDeepSleep, 3196 double PixelClock, 3197 unsigned int HTotal, 3198 unsigned int VBlank, 3199 unsigned int DynamicMetadataTransmittedBytes, 3200 unsigned int DynamicMetadataLinesBeforeActiveRequired, 3201 unsigned int InterlaceEnable, 3202 bool ProgressiveToInterlaceUnitInOPP, 3203 3204 /* output */ 3205 double *TSetup, 3206 double *Tdmbf, 3207 double *Tdmec, 3208 double *Tdmsks, 3209 unsigned int *VUpdateOffsetPix, 3210 double *VUpdateWidthPix, 3211 double *VReadyOffsetPix) 3212{ 3213 double TotalRepeaterDelayTime; 3214 3215 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); 3216 *VUpdateWidthPix = 3217 dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0); 3218 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk, 3219 TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0); 3220 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0); 3221 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 3222 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; 3223 *Tdmec = HTotal / PixelClock; 3224 3225 if (DynamicMetadataLinesBeforeActiveRequired == 0) 3226 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 3227 else 3228 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 3229 3230 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) 3231 *Tdmsks = *Tdmsks / 2; 3232#ifdef __DML_VBA_DEBUG__ 3233 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); 3234 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); 3235 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); 3236 3237 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n", 3238 __func__, DynamicMetadataLinesBeforeActiveRequired); 3239 dml_print("DML::%s: VBlank = %d\n", __func__, VBlank); 3240 dml_print("DML::%s: HTotal = %d\n", __func__, HTotal); 3241 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock); 3242 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); 3243#endif 3244} 3245 3246double dml32_CalculateTWait( 3247 unsigned int PrefetchMode, 3248 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, 3249 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 3250 bool DRRDisplay, 3251 double DRAMClockChangeLatency, 3252 double FCLKChangeLatency, 3253 double UrgentLatency, 3254 double SREnterPlusExitTime) 3255{ 3256 double TWait = 0.0; 3257 3258 if (PrefetchMode == 0 && 3259 !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) && 3260 !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) && 3261 !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) && 3262 !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) { 3263 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); 3264 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { 3265 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); 3266 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { 3267 TWait = dml_max(SREnterPlusExitTime, UrgentLatency); 3268 } else { 3269 TWait = UrgentLatency; 3270 } 3271 3272#ifdef __DML_VBA_DEBUG__ 3273 dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode); 3274 dml_print("DML::%s: TWait = %f\n", __func__, TWait); 3275#endif 3276 return TWait; 3277} // CalculateTWait 3278 3279// Function: get_return_bw_mbps 3280// Megabyte per second 3281double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc, 3282 const int VoltageLevel, 3283 const bool HostVMEnable, 3284 const double DCFCLK, 3285 const double FabricClock, 3286 const double DRAMSpeed) 3287{ 3288 double ReturnBW = 0.; 3289 double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK; 3290 double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes; 3291 double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes; 3292 double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, 3293 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, 3294 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : 3295 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); 3296 double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, 3297 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, 3298 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : 3299 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); 3300 3301 if (HostVMEnable != true) 3302 ReturnBW = PixelDataOnlyReturnBW; 3303 else 3304 ReturnBW = PixelMixedWithVMDataReturnBW; 3305 3306#ifdef __DML_VBA_DEBUG__ 3307 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); 3308 dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable); 3309 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 3310 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); 3311 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); 3312 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth); 3313 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth); 3314 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth); 3315 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW); 3316 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW); 3317 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW); 3318#endif 3319 return ReturnBW; 3320} 3321 3322// Function: get_return_bw_mbps_vm_only 3323// Megabyte per second 3324double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc, 3325 const int VoltageLevel, 3326 const double DCFCLK, 3327 const double FabricClock, 3328 const double DRAMSpeed) 3329{ 3330 double VMDataOnlyReturnBW = dml_min3( 3331 soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0, 3332 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes 3333 * soc->pct_ideal_sdp_bw_after_urgent / 100.0, 3334 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes 3335 * (VoltageLevel < 2 ? 3336 soc->pct_ideal_dram_bw_after_urgent_strobe : 3337 soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0); 3338#ifdef __DML_VBA_DEBUG__ 3339 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); 3340 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 3341 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); 3342 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); 3343 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); 3344#endif 3345 return VMDataOnlyReturnBW; 3346} 3347 3348double dml32_CalculateExtraLatency( 3349 unsigned int RoundTripPingLatencyCycles, 3350 unsigned int ReorderingBytes, 3351 double DCFCLK, 3352 unsigned int TotalNumberOfActiveDPP, 3353 unsigned int PixelChunkSizeInKByte, 3354 unsigned int TotalNumberOfDCCActiveDPP, 3355 unsigned int MetaChunkSize, 3356 double ReturnBW, 3357 bool GPUVMEnable, 3358 bool HostVMEnable, 3359 unsigned int NumberOfActiveSurfaces, 3360 unsigned int NumberOfDPP[], 3361 unsigned int dpte_group_bytes[], 3362 double HostVMInefficiencyFactor, 3363 double HostVMMinPageSize, 3364 unsigned int HostVMMaxNonCachedPageTableLevels) 3365{ 3366 double ExtraLatencyBytes; 3367 double ExtraLatency; 3368 3369 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes( 3370 ReorderingBytes, 3371 TotalNumberOfActiveDPP, 3372 PixelChunkSizeInKByte, 3373 TotalNumberOfDCCActiveDPP, 3374 MetaChunkSize, 3375 GPUVMEnable, 3376 HostVMEnable, 3377 NumberOfActiveSurfaces, 3378 NumberOfDPP, 3379 dpte_group_bytes, 3380 HostVMInefficiencyFactor, 3381 HostVMMinPageSize, 3382 HostVMMaxNonCachedPageTableLevels); 3383 3384 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; 3385 3386#ifdef __DML_VBA_DEBUG__ 3387 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); 3388 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 3389 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); 3390 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 3391 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); 3392#endif 3393 3394 return ExtraLatency; 3395} // CalculateExtraLatency 3396 3397bool dml32_CalculatePrefetchSchedule( 3398 struct vba_vars_st *v, 3399 unsigned int k, 3400 double HostVMInefficiencyFactor, 3401 DmlPipe *myPipe, 3402 unsigned int DSCDelay, 3403 unsigned int DPP_RECOUT_WIDTH, 3404 unsigned int VStartup, 3405 unsigned int MaxVStartup, 3406 double UrgentLatency, 3407 double UrgentExtraLatency, 3408 double TCalc, 3409 unsigned int PDEAndMetaPTEBytesFrame, 3410 unsigned int MetaRowByte, 3411 unsigned int PixelPTEBytesPerRow, 3412 double PrefetchSourceLinesY, 3413 unsigned int SwathWidthY, 3414 unsigned int VInitPreFillY, 3415 unsigned int MaxNumSwathY, 3416 double PrefetchSourceLinesC, 3417 unsigned int SwathWidthC, 3418 unsigned int VInitPreFillC, 3419 unsigned int MaxNumSwathC, 3420 unsigned int swath_width_luma_ub, 3421 unsigned int swath_width_chroma_ub, 3422 unsigned int SwathHeightY, 3423 unsigned int SwathHeightC, 3424 double TWait, 3425 double TPreReq, 3426 bool ExtendPrefetchIfPossible, 3427 /* Output */ 3428 double *DSTXAfterScaler, 3429 double *DSTYAfterScaler, 3430 double *DestinationLinesForPrefetch, 3431 double *PrefetchBandwidth, 3432 double *DestinationLinesToRequestVMInVBlank, 3433 double *DestinationLinesToRequestRowInVBlank, 3434 double *VRatioPrefetchY, 3435 double *VRatioPrefetchC, 3436 double *RequiredPrefetchPixDataBWLuma, 3437 double *RequiredPrefetchPixDataBWChroma, 3438 bool *NotEnoughTimeForDynamicMetadata, 3439 double *Tno_bw, 3440 double *prefetch_vmrow_bw, 3441 double *Tdmdl_vm, 3442 double *Tdmdl, 3443 double *TSetup, 3444 unsigned int *VUpdateOffsetPix, 3445 double *VUpdateWidthPix, 3446 double *VReadyOffsetPix) 3447{ 3448 double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater; 3449 bool MyError = false; 3450 unsigned int DPPCycles, DISPCLKCycles; 3451 double DSTTotalPixelsAfterScaler; 3452 double LineTime; 3453 double dst_y_prefetch_equ; 3454 double prefetch_bw_oto; 3455 double Tvm_oto; 3456 double Tr0_oto; 3457 double Tvm_oto_lines; 3458 double Tr0_oto_lines; 3459 double dst_y_prefetch_oto; 3460 double TimeForFetchingMetaPTE = 0; 3461 double TimeForFetchingRowInVBlank = 0; 3462 double LinesToRequestPrefetchPixelData = 0; 3463 double LinesForPrefetchBandwidth = 0; 3464 unsigned int HostVMDynamicLevelsTrips; 3465 double trip_to_mem; 3466 double Tvm_trips; 3467 double Tr0_trips; 3468 double Tvm_trips_rounded; 3469 double Tr0_trips_rounded; 3470 double Lsw_oto; 3471 double Tpre_rounded; 3472 double prefetch_bw_equ; 3473 double Tvm_equ; 3474 double Tr0_equ; 3475 double Tdmbf; 3476 double Tdmec; 3477 double Tdmsks; 3478 double prefetch_sw_bytes; 3479 double bytes_pp; 3480 double dep_bytes; 3481 unsigned int max_vratio_pre = v->MaxVRatioPre; 3482 double min_Lsw; 3483 double Tsw_est1 = 0; 3484 double Tsw_est3 = 0; 3485 3486 if (v->GPUVMEnable == true && v->HostVMEnable == true) 3487 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; 3488 else 3489 HostVMDynamicLevelsTrips = 0; 3490#ifdef __DML_VBA_DEBUG__ 3491 dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable); 3492 dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels); 3493 dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable); 3494 dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n", 3495 __func__, v->HostVMEnable, HostVMInefficiencyFactor); 3496#endif 3497 dml32_CalculateVUpdateAndDynamicMetadataParameters( 3498 v->MaxInterDCNTileRepeaters, 3499 myPipe->Dppclk, 3500 myPipe->Dispclk, 3501 myPipe->DCFClkDeepSleep, 3502 myPipe->PixelClock, 3503 myPipe->HTotal, 3504 myPipe->VBlank, 3505 v->DynamicMetadataTransmittedBytes[k], 3506 v->DynamicMetadataLinesBeforeActiveRequired[k], 3507 myPipe->InterlaceEnable, 3508 myPipe->ProgressiveToInterlaceUnitInOPP, 3509 TSetup, 3510 3511 /* output */ 3512 &Tdmbf, 3513 &Tdmec, 3514 &Tdmsks, 3515 VUpdateOffsetPix, 3516 VUpdateWidthPix, 3517 VReadyOffsetPix); 3518 3519 LineTime = myPipe->HTotal / myPipe->PixelClock; 3520 trip_to_mem = UrgentLatency; 3521 Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 3522 3523 if (v->DynamicMetadataVMEnabled == true) 3524 *Tdmdl = TWait + Tvm_trips + trip_to_mem; 3525 else 3526 *Tdmdl = TWait + UrgentExtraLatency; 3527 3528#ifdef __DML_VBA_ALLOW_DELTA__ 3529 if (v->DynamicMetadataEnable[k] == false) 3530 *Tdmdl = 0.0; 3531#endif 3532 3533 if (v->DynamicMetadataEnable[k] == true) { 3534 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 3535 *NotEnoughTimeForDynamicMetadata = true; 3536#ifdef __DML_VBA_DEBUG__ 3537 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 3538 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", 3539 __func__, Tdmbf); 3540 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 3541 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", 3542 __func__, Tdmsks); 3543 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", 3544 __func__, *Tdmdl); 3545#endif 3546 } else { 3547 *NotEnoughTimeForDynamicMetadata = false; 3548 } 3549 } else { 3550 *NotEnoughTimeForDynamicMetadata = false; 3551 } 3552 3553 *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true && 3554 v->GPUVMEnable == true ? TWait + Tvm_trips : 0); 3555 3556 if (myPipe->ScalerEnabled) 3557 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL; 3558 else 3559 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly; 3560 3561 DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor; 3562 3563 DISPCLKCycles = v->DISPCLKDelaySubtotal; 3564 3565 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) 3566 return true; 3567 3568 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles * 3569 myPipe->PixelClock / myPipe->Dispclk + DSCDelay; 3570 3571 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0) 3572 + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH 3573 + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ? 3574 myPipe->HActive / 2 : 0) 3575 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0); 3576 3577#ifdef __DML_VBA_DEBUG__ 3578 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 3579 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 3580 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk); 3581 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 3582 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk); 3583 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 3584 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode); 3585 dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH); 3586 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); 3587#endif 3588 3589 if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) 3590 *DSTYAfterScaler = 1; 3591 else 3592 *DSTYAfterScaler = 0; 3593 3594 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 3595 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 3596 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 3597#ifdef __DML_VBA_DEBUG__ 3598 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 3599 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler); 3600#endif 3601 3602 MyError = false; 3603 3604 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 3605 3606 if (v->GPUVMEnable == true) { 3607 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime; 3608 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; 3609 if (v->GPUVMMaxPageTableLevels >= 3) { 3610 *Tno_bw = UrgentExtraLatency + trip_to_mem * 3611 (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); 3612 } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) { 3613 Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) / 3614 4.0 * LineTime; // VBA_ERROR 3615 *Tno_bw = UrgentExtraLatency; 3616 } else { 3617 *Tno_bw = 0; 3618 } 3619 } else if (myPipe->DCCEnable == true) { 3620 Tvm_trips_rounded = LineTime / 4.0; 3621 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; 3622 *Tno_bw = 0; 3623 } else { 3624 Tvm_trips_rounded = LineTime / 4.0; 3625 Tr0_trips_rounded = LineTime / 2.0; 3626 *Tno_bw = 0; 3627 } 3628 Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0); 3629 Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0); 3630 3631 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 3632 || myPipe->SourcePixelFormat == dm_420_12) { 3633 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 3634 } else { 3635 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 3636 } 3637 3638 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY 3639 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 3640 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, 3641 prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); 3642 3643 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre; 3644 min_Lsw = dml_max(min_Lsw, 1.0); 3645 Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0; 3646 3647 if (v->GPUVMEnable == true) { 3648 Tvm_oto = dml_max3( 3649 Tvm_trips, 3650 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, 3651 LineTime / 4.0); 3652 } else 3653 Tvm_oto = LineTime / 4.0; 3654 3655 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { 3656 Tr0_oto = dml_max4( 3657 Tr0_trips, 3658 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, 3659 (LineTime - Tvm_oto)/2.0, 3660 LineTime / 4.0); 3661#ifdef __DML_VBA_DEBUG__ 3662 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, 3663 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto); 3664 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips); 3665 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto); 3666 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4); 3667#endif 3668 } else 3669 Tr0_oto = (LineTime - Tvm_oto) / 2.0; 3670 3671 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 3672 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 3673 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 3674 3675 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - 3676 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); 3677 3678 dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__); 3679#ifdef __DML_VBA_DEBUG__ 3680 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); 3681 dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw); 3682 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw); 3683 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency); 3684 dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem); 3685 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3686 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3687 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3688 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC); 3689 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 3690 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub); 3691 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes); 3692 dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp); 3693 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 3694 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 3695 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 3696 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 3697 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 3698 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 3699 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 3700 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 3701 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 3702 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines); 3703 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines); 3704 dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto); 3705 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto); 3706 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ); 3707#endif 3708 3709 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 3710 Tpre_rounded = dst_y_prefetch_equ * LineTime; 3711#ifdef __DML_VBA_DEBUG__ 3712 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ); 3713 dml_print("DML::%s: LineTime: %f\n", __func__, LineTime); 3714 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup); 3715 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", 3716 __func__, VStartup * LineTime); 3717 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup); 3718 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc); 3719 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 3720 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 3721 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm); 3722 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); 3723 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n", 3724 __func__, *DSTYAfterScaler); 3725#endif 3726 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, 3727 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 3728 3729 if (prefetch_sw_bytes < dep_bytes) 3730 prefetch_sw_bytes = 2 * dep_bytes; 3731 3732 *PrefetchBandwidth = 0; 3733 *DestinationLinesToRequestVMInVBlank = 0; 3734 *DestinationLinesToRequestRowInVBlank = 0; 3735 *VRatioPrefetchY = 0; 3736 *VRatioPrefetchC = 0; 3737 *RequiredPrefetchPixDataBWLuma = 0; 3738 if (dst_y_prefetch_equ > 1 && 3739 (Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) { 3740 double PrefetchBandwidth1; 3741 double PrefetchBandwidth2; 3742 double PrefetchBandwidth3; 3743 double PrefetchBandwidth4; 3744 3745 if (Tpre_rounded - *Tno_bw > 0) { 3746 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3747 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3748 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 3749 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 3750 } else 3751 PrefetchBandwidth1 = 0; 3752 3753 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw) 3754 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 3755 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3756 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3757 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 3758 } 3759 3760 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 3761 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / 3762 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 3763 else 3764 PrefetchBandwidth2 = 0; 3765 3766 if (Tpre_rounded - Tvm_trips_rounded > 0) { 3767 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3768 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 3769 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 3770 } else 3771 PrefetchBandwidth3 = 0; 3772 3773 3774 if (VStartup == MaxVStartup && 3775 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 * 3776 LineTime - Tvm_trips_rounded > 0) { 3777 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3778 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 3779 } 3780 3781 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) { 3782 PrefetchBandwidth4 = prefetch_sw_bytes / 3783 (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 3784 } else { 3785 PrefetchBandwidth4 = 0; 3786 } 3787 3788#ifdef __DML_VBA_DEBUG__ 3789 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 3790 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw); 3791 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 3792 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1); 3793 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3); 3794 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1); 3795 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2); 3796 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); 3797 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4); 3798#endif 3799 { 3800 bool Case1OK; 3801 bool Case2OK; 3802 bool Case3OK; 3803 3804 if (PrefetchBandwidth1 > 0) { 3805 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 3806 >= Tvm_trips_rounded 3807 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3808 / PrefetchBandwidth1 >= Tr0_trips_rounded) { 3809 Case1OK = true; 3810 } else { 3811 Case1OK = false; 3812 } 3813 } else { 3814 Case1OK = false; 3815 } 3816 3817 if (PrefetchBandwidth2 > 0) { 3818 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 3819 >= Tvm_trips_rounded 3820 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3821 / PrefetchBandwidth2 < Tr0_trips_rounded) { 3822 Case2OK = true; 3823 } else { 3824 Case2OK = false; 3825 } 3826 } else { 3827 Case2OK = false; 3828 } 3829 3830 if (PrefetchBandwidth3 > 0) { 3831 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < 3832 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * 3833 HostVMInefficiencyFactor) / PrefetchBandwidth3 >= 3834 Tr0_trips_rounded) { 3835 Case3OK = true; 3836 } else { 3837 Case3OK = false; 3838 } 3839 } else { 3840 Case3OK = false; 3841 } 3842 3843 if (Case1OK) 3844 prefetch_bw_equ = PrefetchBandwidth1; 3845 else if (Case2OK) 3846 prefetch_bw_equ = PrefetchBandwidth2; 3847 else if (Case3OK) 3848 prefetch_bw_equ = PrefetchBandwidth3; 3849 else 3850 prefetch_bw_equ = PrefetchBandwidth4; 3851 3852#ifdef __DML_VBA_DEBUG__ 3853 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 3854 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 3855 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 3856 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 3857#endif 3858 3859 if (prefetch_bw_equ > 0) { 3860 if (v->GPUVMEnable == true) { 3861 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * 3862 HostVMInefficiencyFactor / prefetch_bw_equ, 3863 Tvm_trips, LineTime / 4); 3864 } else { 3865 Tvm_equ = LineTime / 4; 3866 } 3867 3868 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { 3869 Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * 3870 HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, 3871 (LineTime - Tvm_equ) / 2, LineTime / 4); 3872 } else { 3873 Tr0_equ = (LineTime - Tvm_equ) / 2; 3874 } 3875 } else { 3876 Tvm_equ = 0; 3877 Tr0_equ = 0; 3878#ifdef __DML_VBA_DEBUG__ 3879 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 3880#endif 3881 } 3882 } 3883 3884 if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 3885 if (dst_y_prefetch_oto * LineTime < TPreReq) { 3886 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 3887 } else { 3888 *DestinationLinesForPrefetch = dst_y_prefetch_oto; 3889 } 3890 TimeForFetchingMetaPTE = Tvm_oto; 3891 TimeForFetchingRowInVBlank = Tr0_oto; 3892 *PrefetchBandwidth = prefetch_bw_oto; 3893 /* Clamp to oto for bandwidth calculation */ 3894 LinesForPrefetchBandwidth = dst_y_prefetch_oto; 3895 } else { 3896 /* For mode programming we want to extend the prefetch as much as possible 3897 * (up to oto, or as long as we can for equ) if we're not already applying 3898 * the 60us prefetch requirement. This is to avoid intermittent underflow 3899 * issues during prefetch. 3900 * 3901 * The prefetch extension is applied under the following scenarios: 3902 * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank) 3903 * 2. We're using subvp or drr methods of p-state switch, in which case we 3904 * we don't care if prefetch takes up more of the blanking time 3905 * 3906 * Mode programming typically chooses the smallest prefetch time possible 3907 * (i.e. highest bandwidth during prefetch) presumably to create margin between 3908 * p-states / c-states that happen in vblank and prefetch. Therefore we only 3909 * apply this prefetch extension when p-state in vblank is not required (UCLK 3910 * p-states take up the most vblank time). 3911 */ 3912 if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) { 3913 MyError = true; 3914 } else { 3915 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 3916 TimeForFetchingMetaPTE = Tvm_equ; 3917 TimeForFetchingRowInVBlank = Tr0_equ; 3918 *PrefetchBandwidth = prefetch_bw_equ; 3919 /* Clamp to equ for bandwidth calculation */ 3920 LinesForPrefetchBandwidth = dst_y_prefetch_equ; 3921 } 3922 } 3923 3924 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 3925 3926 *DestinationLinesToRequestRowInVBlank = 3927 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 3928 3929 LinesToRequestPrefetchPixelData = LinesForPrefetchBandwidth - 3930 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 3931 3932#ifdef __DML_VBA_DEBUG__ 3933 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 3934 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 3935 __func__, *DestinationLinesToRequestVMInVBlank); 3936 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 3937 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 3938 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", 3939 __func__, *DestinationLinesToRequestRowInVBlank); 3940 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3941 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 3942#endif 3943 3944 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) { 3945 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 3946 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3947#ifdef __DML_VBA_DEBUG__ 3948 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 3949 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); 3950 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY); 3951#endif 3952 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 3953 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 3954 *VRatioPrefetchY = 3955 dml_max((double) PrefetchSourceLinesY / 3956 LinesToRequestPrefetchPixelData, 3957 (double) MaxNumSwathY * SwathHeightY / 3958 (LinesToRequestPrefetchPixelData - 3959 (VInitPreFillY - 3.0) / 2.0)); 3960 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3961 } else { 3962 MyError = true; 3963 *VRatioPrefetchY = 0; 3964 } 3965#ifdef __DML_VBA_DEBUG__ 3966 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 3967 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3968 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); 3969#endif 3970 } 3971 3972 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 3973 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3974 3975#ifdef __DML_VBA_DEBUG__ 3976 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 3977 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); 3978 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC); 3979#endif 3980 if ((SwathHeightC > 4)) { 3981 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 3982 *VRatioPrefetchC = 3983 dml_max(*VRatioPrefetchC, 3984 (double) MaxNumSwathC * SwathHeightC / 3985 (LinesToRequestPrefetchPixelData - 3986 (VInitPreFillC - 3.0) / 2.0)); 3987 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3988 } else { 3989 MyError = true; 3990 *VRatioPrefetchC = 0; 3991 } 3992#ifdef __DML_VBA_DEBUG__ 3993 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 3994 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 3995 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); 3996#endif 3997 } 3998 3999 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY 4000 / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub 4001 / LineTime; 4002 4003#ifdef __DML_VBA_DEBUG__ 4004 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 4005 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 4006 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 4007 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", 4008 __func__, *RequiredPrefetchPixDataBWLuma); 4009#endif 4010 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / 4011 LinesToRequestPrefetchPixelData 4012 * myPipe->BytePerPixelC 4013 * swath_width_chroma_ub / LineTime; 4014 } else { 4015 MyError = true; 4016#ifdef __DML_VBA_DEBUG__ 4017 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", 4018 __func__, LinesToRequestPrefetchPixelData); 4019#endif 4020 *VRatioPrefetchY = 0; 4021 *VRatioPrefetchC = 0; 4022 *RequiredPrefetchPixDataBWLuma = 0; 4023 *RequiredPrefetchPixDataBWChroma = 0; 4024 } 4025#ifdef __DML_VBA_DEBUG__ 4026 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 4027 (double)LinesToRequestPrefetchPixelData * LineTime + 4028 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 4029 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 4030 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", 4031 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); 4032 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 4033 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - 4034 TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler + 4035 ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 4036 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", 4037 PixelPTEBytesPerRow); 4038#endif 4039 } else { 4040 MyError = true; 4041#ifdef __DML_VBA_DEBUG__ 4042 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", 4043 __func__, dst_y_prefetch_equ); 4044#endif 4045 } 4046 4047 { 4048 double prefetch_vm_bw; 4049 double prefetch_row_bw; 4050 4051 if (PDEAndMetaPTEBytesFrame == 0) { 4052 prefetch_vm_bw = 0; 4053 } else if (*DestinationLinesToRequestVMInVBlank > 0) { 4054#ifdef __DML_VBA_DEBUG__ 4055 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 4056 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 4057 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 4058 __func__, *DestinationLinesToRequestVMInVBlank); 4059 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 4060#endif 4061 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / 4062 (*DestinationLinesToRequestVMInVBlank * LineTime); 4063#ifdef __DML_VBA_DEBUG__ 4064 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 4065#endif 4066 } else { 4067 prefetch_vm_bw = 0; 4068 MyError = true; 4069#ifdef __DML_VBA_DEBUG__ 4070 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", 4071 __func__, *DestinationLinesToRequestVMInVBlank); 4072#endif 4073 } 4074 4075 if (MetaRowByte + PixelPTEBytesPerRow == 0) { 4076 prefetch_row_bw = 0; 4077 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 4078 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / 4079 (*DestinationLinesToRequestRowInVBlank * LineTime); 4080 4081#ifdef __DML_VBA_DEBUG__ 4082 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 4083 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 4084 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", 4085 __func__, *DestinationLinesToRequestRowInVBlank); 4086 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 4087#endif 4088 } else { 4089 prefetch_row_bw = 0; 4090 MyError = true; 4091#ifdef __DML_VBA_DEBUG__ 4092 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", 4093 __func__, *DestinationLinesToRequestRowInVBlank); 4094#endif 4095 } 4096 4097 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 4098 } 4099 4100 if (MyError) { 4101 *PrefetchBandwidth = 0; 4102 TimeForFetchingMetaPTE = 0; 4103 TimeForFetchingRowInVBlank = 0; 4104 *DestinationLinesToRequestVMInVBlank = 0; 4105 *DestinationLinesToRequestRowInVBlank = 0; 4106 *DestinationLinesForPrefetch = 0; 4107 LinesToRequestPrefetchPixelData = 0; 4108 *VRatioPrefetchY = 0; 4109 *VRatioPrefetchC = 0; 4110 *RequiredPrefetchPixDataBWLuma = 0; 4111 *RequiredPrefetchPixDataBWChroma = 0; 4112 } 4113 4114 return MyError; 4115} // CalculatePrefetchSchedule 4116 4117void dml32_CalculateFlipSchedule( 4118 double HostVMInefficiencyFactor, 4119 double UrgentExtraLatency, 4120 double UrgentLatency, 4121 unsigned int GPUVMMaxPageTableLevels, 4122 bool HostVMEnable, 4123 unsigned int HostVMMaxNonCachedPageTableLevels, 4124 bool GPUVMEnable, 4125 double HostVMMinPageSize, 4126 double PDEAndMetaPTEBytesPerFrame, 4127 double MetaRowBytes, 4128 double DPTEBytesPerRow, 4129 double BandwidthAvailableForImmediateFlip, 4130 unsigned int TotImmediateFlipBytes, 4131 enum source_format_class SourcePixelFormat, 4132 double LineTime, 4133 double VRatio, 4134 double VRatioChroma, 4135 double Tno_bw, 4136 bool DCCEnable, 4137 unsigned int dpte_row_height, 4138 unsigned int meta_row_height, 4139 unsigned int dpte_row_height_chroma, 4140 unsigned int meta_row_height_chroma, 4141 bool use_one_row_for_frame_flip, 4142 4143 /* Output */ 4144 double *DestinationLinesToRequestVMInImmediateFlip, 4145 double *DestinationLinesToRequestRowInImmediateFlip, 4146 double *final_flip_bw, 4147 bool *ImmediateFlipSupportedForPipe) 4148{ 4149 double min_row_time = 0.0; 4150 unsigned int HostVMDynamicLevelsTrips; 4151 double TimeForFetchingMetaPTEImmediateFlip; 4152 double TimeForFetchingRowInVBlankImmediateFlip; 4153 double ImmediateFlipBW = 1.0; 4154 4155 if (GPUVMEnable == true && HostVMEnable == true) 4156 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 4157 else 4158 HostVMDynamicLevelsTrips = 0; 4159 4160#ifdef __DML_VBA_DEBUG__ 4161 dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes); 4162 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); 4163#endif 4164 4165 if (TotImmediateFlipBytes > 0) { 4166 if (use_one_row_for_frame_flip) { 4167 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) * 4168 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 4169 } else { 4170 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * 4171 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 4172 } 4173 if (GPUVMEnable == true) { 4174 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * 4175 HostVMInefficiencyFactor / ImmediateFlipBW, 4176 UrgentExtraLatency + UrgentLatency * 4177 (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), 4178 LineTime / 4.0); 4179 } else { 4180 TimeForFetchingMetaPTEImmediateFlip = 0; 4181 } 4182 if ((GPUVMEnable == true || DCCEnable == true)) { 4183 TimeForFetchingRowInVBlankImmediateFlip = dml_max3( 4184 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, 4185 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0); 4186 } else { 4187 TimeForFetchingRowInVBlankImmediateFlip = 0; 4188 } 4189 4190 *DestinationLinesToRequestVMInImmediateFlip = 4191 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0; 4192 *DestinationLinesToRequestRowInImmediateFlip = 4193 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0; 4194 4195 if (GPUVMEnable == true) { 4196 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / 4197 (*DestinationLinesToRequestVMInImmediateFlip * LineTime), 4198 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / 4199 (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); 4200 } else if ((GPUVMEnable == true || DCCEnable == true)) { 4201 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / 4202 (*DestinationLinesToRequestRowInImmediateFlip * LineTime); 4203 } else { 4204 *final_flip_bw = 0; 4205 } 4206 } else { 4207 TimeForFetchingMetaPTEImmediateFlip = 0; 4208 TimeForFetchingRowInVBlankImmediateFlip = 0; 4209 *DestinationLinesToRequestVMInImmediateFlip = 0; 4210 *DestinationLinesToRequestRowInImmediateFlip = 0; 4211 *final_flip_bw = 0; 4212 } 4213 4214 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) { 4215 if (GPUVMEnable == true && DCCEnable != true) { 4216 min_row_time = dml_min(dpte_row_height * 4217 LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); 4218 } else if (GPUVMEnable != true && DCCEnable == true) { 4219 min_row_time = dml_min(meta_row_height * 4220 LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); 4221 } else { 4222 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * 4223 LineTime / VRatio, dpte_row_height_chroma * LineTime / 4224 VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma); 4225 } 4226 } else { 4227 if (GPUVMEnable == true && DCCEnable != true) { 4228 min_row_time = dpte_row_height * LineTime / VRatio; 4229 } else if (GPUVMEnable != true && DCCEnable == true) { 4230 min_row_time = meta_row_height * LineTime / VRatio; 4231 } else { 4232 min_row_time = 4233 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); 4234 } 4235 } 4236 4237 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 4238 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip 4239 > min_row_time) { 4240 *ImmediateFlipSupportedForPipe = false; 4241 } else { 4242 *ImmediateFlipSupportedForPipe = true; 4243 } 4244 4245#ifdef __DML_VBA_DEBUG__ 4246 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 4247 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); 4248 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", 4249 __func__, *DestinationLinesToRequestVMInImmediateFlip); 4250 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", 4251 __func__, *DestinationLinesToRequestRowInImmediateFlip); 4252 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); 4253 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", 4254 __func__, TimeForFetchingRowInVBlankImmediateFlip); 4255 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); 4256 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe); 4257#endif 4258} // CalculateFlipSchedule 4259 4260void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( 4261 struct vba_vars_st *v, 4262 unsigned int PrefetchMode, 4263 double DCFCLK, 4264 double ReturnBW, 4265 SOCParametersList mmSOCParameters, 4266 double SOCCLK, 4267 double DCFClkDeepSleep, 4268 unsigned int DETBufferSizeY[], 4269 unsigned int DETBufferSizeC[], 4270 unsigned int SwathHeightY[], 4271 unsigned int SwathHeightC[], 4272 double SwathWidthY[], 4273 double SwathWidthC[], 4274 unsigned int DPPPerSurface[], 4275 double BytePerPixelDETY[], 4276 double BytePerPixelDETC[], 4277 double DSTXAfterScaler[], 4278 double DSTYAfterScaler[], 4279 bool UnboundedRequestEnabled, 4280 unsigned int CompressedBufferSizeInkByte, 4281 4282 /* Output */ 4283 enum clock_change_support *DRAMClockChangeSupport, 4284 double MaxActiveDRAMClockChangeLatencySupported[], 4285 unsigned int SubViewportLinesNeededInMALL[], 4286 enum dm_fclock_change_support *FCLKChangeSupport, 4287 double *MinActiveFCLKChangeLatencySupported, 4288 bool *USRRetrainingSupport, 4289 double ActiveDRAMClockChangeLatencyMargin[]) 4290{ 4291 unsigned int i, j, k; 4292 unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0; 4293 unsigned int DRAMClockChangeSupportNumber = 0; 4294 unsigned int LastSurfaceWithoutMargin; 4295 unsigned int DRAMClockChangeMethod = 0; 4296 bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; 4297 double MinActiveFCLKChangeMargin = 0.; 4298 double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; 4299 double ActiveClockChangeLatencyHidingY; 4300 double ActiveClockChangeLatencyHidingC; 4301 double ActiveClockChangeLatencyHiding; 4302 double EffectiveDETBufferSizeY; 4303 double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; 4304 double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; 4305 double TotalPixelBW = 0.0; 4306 bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; 4307 double EffectiveLBLatencyHidingY; 4308 double EffectiveLBLatencyHidingC; 4309 double LinesInDETY[DC__NUM_DPP__MAX]; 4310 double LinesInDETC[DC__NUM_DPP__MAX]; 4311 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 4312 unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; 4313 double FullDETBufferingTimeY; 4314 double FullDETBufferingTimeC; 4315 double WritebackDRAMClockChangeLatencyMargin; 4316 double WritebackFCLKChangeLatencyMargin; 4317 double WritebackLatencyHiding; 4318 bool SameTimingForFCLKChange; 4319 4320 unsigned int TotalActiveWriteback = 0; 4321 unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; 4322 unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; 4323 4324 v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; 4325 v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency 4326 + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency; 4327 v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark; 4328 v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark; 4329 v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency 4330 + 10 / DCFClkDeepSleep; 4331 v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency 4332 + 10 / DCFClkDeepSleep; 4333 v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency 4334 + 10 / DCFClkDeepSleep; 4335 v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time 4336 + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep; 4337 4338#ifdef __DML_VBA_DEBUG__ 4339 dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency); 4340 dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency); 4341 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency); 4342 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark); 4343 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark); 4344 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark); 4345 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark); 4346 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark); 4347 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark); 4348 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark); 4349 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", 4350 __func__, v->Watermark.Z8StutterEnterPlusExitWatermark); 4351#endif 4352 4353 4354 TotalActiveWriteback = 0; 4355 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4356 if (v->WritebackEnable[k] == true) 4357 TotalActiveWriteback = TotalActiveWriteback + 1; 4358 } 4359 4360 if (TotalActiveWriteback <= 1) { 4361 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; 4362 } else { 4363 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency 4364 + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 4365 } 4366 if (v->USRRetrainingRequiredFinal) 4367 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark 4368 + mmSOCParameters.USRRetrainingLatency; 4369 4370 if (TotalActiveWriteback <= 1) { 4371 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency 4372 + mmSOCParameters.WritebackLatency; 4373 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency 4374 + mmSOCParameters.WritebackLatency; 4375 } else { 4376 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency 4377 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 4378 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency 4379 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK; 4380 } 4381 4382 if (v->USRRetrainingRequiredFinal) 4383 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark 4384 + mmSOCParameters.USRRetrainingLatency; 4385 4386 if (v->USRRetrainingRequiredFinal) 4387 v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark 4388 + mmSOCParameters.USRRetrainingLatency; 4389 4390#ifdef __DML_VBA_DEBUG__ 4391 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", 4392 __func__, v->Watermark.WritebackDRAMClockChangeWatermark); 4393 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark); 4394 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark); 4395 dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal); 4396 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency); 4397#endif 4398 4399 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4400 TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + 4401 SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]); 4402 } 4403 4404 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4405 4406 LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); 4407 LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); 4408 4409 4410#ifdef __DML_VBA_DEBUG__ 4411 dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines); 4412 dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal); 4413 dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]); 4414 dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]); 4415 dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]); 4416#endif 4417 4418 EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); 4419 EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); 4420 EffectiveDETBufferSizeY = DETBufferSizeY[k]; 4421 4422 if (UnboundedRequestEnabled) { 4423 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY 4424 + CompressedBufferSizeInkByte * 1024 4425 * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k]) 4426 / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; 4427 } 4428 4429 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 4430 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 4431 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; 4432 4433 ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY 4434 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k]; 4435 4436 if (v->NumberOfActiveSurfaces > 1) { 4437 ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY 4438 - (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k] 4439 / v->PixelClock[k] / v->VRatio[k]; 4440 } 4441 4442 if (BytePerPixelDETC[k] > 0) { 4443 LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 4444 LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]); 4445 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) 4446 / v->VRatioChroma[k]; 4447 ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC 4448 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] 4449 / v->PixelClock[k]; 4450 if (v->NumberOfActiveSurfaces > 1) { 4451 ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC 4452 - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k] 4453 / v->PixelClock[k] / v->VRatioChroma[k]; 4454 } 4455 ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY, 4456 ActiveClockChangeLatencyHidingC); 4457 } else { 4458 ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY; 4459 } 4460 4461 ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark 4462 - v->Watermark.DRAMClockChangeWatermark; 4463 ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark 4464 - v->Watermark.FCLKChangeWatermark; 4465 USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark; 4466 4467 if (v->WritebackEnable[k]) { 4468 WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024 4469 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 4470 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); 4471 if (v->WritebackPixelFormat[k] == dm_444_64) 4472 WritebackLatencyHiding = WritebackLatencyHiding / 2; 4473 4474 WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding 4475 - v->Watermark.WritebackDRAMClockChangeWatermark; 4476 4477 WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding 4478 - v->Watermark.WritebackFCLKChangeWatermark; 4479 4480 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k], 4481 WritebackFCLKChangeLatencyMargin); 4482 ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k], 4483 WritebackDRAMClockChangeLatencyMargin); 4484 } 4485 MaxActiveDRAMClockChangeLatencySupported[k] = 4486 (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? 4487 0 : 4488 (ActiveDRAMClockChangeLatencyMargin[k] 4489 + mmSOCParameters.DRAMClockChangeLatency); 4490 } 4491 4492 for (i = 0; i < v->NumberOfActiveSurfaces; ++i) { 4493 for (j = 0; j < v->NumberOfActiveSurfaces; ++j) { 4494 if (i == j || 4495 (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) || 4496 (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) || 4497 (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) || 4498 (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] && 4499 v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] && 4500 v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && 4501 (v->DRRDisplay[i] || v->DRRDisplay[j]))) { 4502 SynchronizedSurfaces[i][j] = true; 4503 } else { 4504 SynchronizedSurfaces[i][j] = false; 4505 } 4506 } 4507 } 4508 4509 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4510 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4511 (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin || 4512 ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) { 4513 FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; 4514 MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k]; 4515 SurfaceWithMinActiveFCLKChangeMargin = k; 4516 } 4517 } 4518 4519 *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; 4520 4521 SameTimingForFCLKChange = true; 4522 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4523 if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) { 4524 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4525 (SameTimingForFCLKChange || 4526 ActiveFCLKChangeLatencyMargin[k] < 4527 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { 4528 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k]; 4529 } 4530 SameTimingForFCLKChange = false; 4531 } 4532 } 4533 4534 if (MinActiveFCLKChangeMargin > 0) { 4535 *FCLKChangeSupport = dm_fclock_change_vactive; 4536 } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && 4537 (PrefetchMode <= 1)) { 4538 *FCLKChangeSupport = dm_fclock_change_vblank; 4539 } else { 4540 *FCLKChangeSupport = dm_fclock_change_unsupported; 4541 } 4542 4543 *USRRetrainingSupport = true; 4544 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4545 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4546 (USRRetrainingLatencyMargin[k] < 0)) { 4547 *USRRetrainingSupport = false; 4548 } 4549 } 4550 4551 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4552 if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame && 4553 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport && 4554 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && 4555 ActiveDRAMClockChangeLatencyMargin[k] < 0) { 4556 if (PrefetchMode > 0) { 4557 DRAMClockChangeSupportNumber = 2; 4558 } else if (DRAMClockChangeSupportNumber == 0) { 4559 DRAMClockChangeSupportNumber = 1; 4560 LastSurfaceWithoutMargin = k; 4561 } else if (DRAMClockChangeSupportNumber == 1 && 4562 !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) { 4563 DRAMClockChangeSupportNumber = 2; 4564 } 4565 } 4566 } 4567 4568 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4569 if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) 4570 DRAMClockChangeMethod = 1; 4571 else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) 4572 DRAMClockChangeMethod = 2; 4573 } 4574 4575 if (DRAMClockChangeMethod == 0) { 4576 if (DRAMClockChangeSupportNumber == 0) 4577 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 4578 else if (DRAMClockChangeSupportNumber == 1) 4579 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 4580 else 4581 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4582 } else if (DRAMClockChangeMethod == 1) { 4583 if (DRAMClockChangeSupportNumber == 0) 4584 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame; 4585 else if (DRAMClockChangeSupportNumber == 1) 4586 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame; 4587 else 4588 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4589 } else { 4590 if (DRAMClockChangeSupportNumber == 0) 4591 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp; 4592 else if (DRAMClockChangeSupportNumber == 1) 4593 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp; 4594 else 4595 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4596 } 4597 4598 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4599 unsigned int dst_y_pstate; 4600 unsigned int src_y_pstate_l; 4601 unsigned int src_y_pstate_c; 4602 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c; 4603 4604 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1); 4605 src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]); 4606 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k]; 4607 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k]; 4608 4609#ifdef __DML_VBA_DEBUG__ 4610dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 4611dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 4612dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); 4613dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); 4614dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]); 4615dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate); 4616dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l); 4617dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l); 4618dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]); 4619dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l); 4620#endif 4621 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l; 4622 4623 if (BytePerPixelDETC[k] > 0) { 4624 src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]); 4625 src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k]; 4626 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k]; 4627 SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c); 4628 4629#ifdef __DML_VBA_DEBUG__ 4630dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c); 4631dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c); 4632dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]); 4633dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c); 4634#endif 4635 } 4636 } 4637#ifdef __DML_VBA_DEBUG__ 4638 dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport); 4639 dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport); 4640 dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n", 4641 __func__, *MinActiveFCLKChangeLatencySupported); 4642 dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport); 4643#endif 4644} // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport 4645 4646double dml32_CalculateWriteBackDISPCLK( 4647 enum source_format_class WritebackPixelFormat, 4648 double PixelClock, 4649 double WritebackHRatio, 4650 double WritebackVRatio, 4651 unsigned int WritebackHTaps, 4652 unsigned int WritebackVTaps, 4653 unsigned int WritebackSourceWidth, 4654 unsigned int WritebackDestinationWidth, 4655 unsigned int HTotal, 4656 unsigned int WritebackLineBufferSize, 4657 double DISPCLKDPPCLKVCOSpeed) 4658{ 4659 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 4660 4661 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; 4662 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; 4663 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * 4664 WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; 4665 return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed); 4666} 4667 4668void dml32_CalculateMinAndMaxPrefetchMode( 4669 enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal, 4670 unsigned int *MinPrefetchMode, 4671 unsigned int *MaxPrefetchMode) 4672{ 4673 if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) { 4674 *MinPrefetchMode = 3; 4675 *MaxPrefetchMode = 3; 4676 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) { 4677 *MinPrefetchMode = 2; 4678 *MaxPrefetchMode = 2; 4679 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) { 4680 *MinPrefetchMode = 1; 4681 *MaxPrefetchMode = 1; 4682 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) { 4683 *MinPrefetchMode = 0; 4684 *MaxPrefetchMode = 0; 4685 } else if (AllowForPStateChangeOrStutterInVBlankFinal == 4686 dm_prefetch_support_uclk_fclk_and_stutter_if_possible) { 4687 *MinPrefetchMode = 0; 4688 *MaxPrefetchMode = 3; 4689 } else { 4690 *MinPrefetchMode = 0; 4691 *MaxPrefetchMode = 3; 4692 } 4693} // CalculateMinAndMaxPrefetchMode 4694 4695void dml32_CalculatePixelDeliveryTimes( 4696 unsigned int NumberOfActiveSurfaces, 4697 double VRatio[], 4698 double VRatioChroma[], 4699 double VRatioPrefetchY[], 4700 double VRatioPrefetchC[], 4701 unsigned int swath_width_luma_ub[], 4702 unsigned int swath_width_chroma_ub[], 4703 unsigned int DPPPerSurface[], 4704 double HRatio[], 4705 double HRatioChroma[], 4706 double PixelClock[], 4707 double PSCL_THROUGHPUT[], 4708 double PSCL_THROUGHPUT_CHROMA[], 4709 double Dppclk[], 4710 unsigned int BytePerPixelC[], 4711 enum dm_rotation_angle SourceRotation[], 4712 unsigned int NumberOfCursors[], 4713 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 4714 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 4715 unsigned int BlockWidth256BytesY[], 4716 unsigned int BlockHeight256BytesY[], 4717 unsigned int BlockWidth256BytesC[], 4718 unsigned int BlockHeight256BytesC[], 4719 4720 /* Output */ 4721 double DisplayPipeLineDeliveryTimeLuma[], 4722 double DisplayPipeLineDeliveryTimeChroma[], 4723 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 4724 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 4725 double DisplayPipeRequestDeliveryTimeLuma[], 4726 double DisplayPipeRequestDeliveryTimeChroma[], 4727 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 4728 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 4729 double CursorRequestDeliveryTime[], 4730 double CursorRequestDeliveryTimePrefetch[]) 4731{ 4732 double req_per_swath_ub; 4733 unsigned int k; 4734 4735 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4736 4737#ifdef __DML_VBA_DEBUG__ 4738 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); 4739 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); 4740 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); 4741 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); 4742 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); 4743 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); 4744 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); 4745 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); 4746 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 4747 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]); 4748 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]); 4749#endif 4750 4751 if (VRatio[k] <= 1) { 4752 DisplayPipeLineDeliveryTimeLuma[k] = 4753 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; 4754 } else { 4755 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 4756 } 4757 4758 if (BytePerPixelC[k] == 0) { 4759 DisplayPipeLineDeliveryTimeChroma[k] = 0; 4760 } else { 4761 if (VRatioChroma[k] <= 1) { 4762 DisplayPipeLineDeliveryTimeChroma[k] = 4763 swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 4764 } else { 4765 DisplayPipeLineDeliveryTimeChroma[k] = 4766 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 4767 } 4768 } 4769 4770 if (VRatioPrefetchY[k] <= 1) { 4771 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = 4772 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; 4773 } else { 4774 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = 4775 swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 4776 } 4777 4778 if (BytePerPixelC[k] == 0) { 4779 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 4780 } else { 4781 if (VRatioPrefetchC[k] <= 1) { 4782 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * 4783 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 4784 } else { 4785 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 4786 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 4787 } 4788 } 4789#ifdef __DML_VBA_DEBUG__ 4790 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", 4791 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 4792 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", 4793 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 4794 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", 4795 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 4796 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", 4797 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 4798#endif 4799 } 4800 4801 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4802 if (!IsVertical(SourceRotation[k])) 4803 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 4804 else 4805 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 4806#ifdef __DML_VBA_DEBUG__ 4807 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub); 4808#endif 4809 4810 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; 4811 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = 4812 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 4813 if (BytePerPixelC[k] == 0) { 4814 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 4815 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 4816 } else { 4817 if (!IsVertical(SourceRotation[k])) 4818 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; 4819 else 4820 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; 4821#ifdef __DML_VBA_DEBUG__ 4822 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub); 4823#endif 4824 DisplayPipeRequestDeliveryTimeChroma[k] = 4825 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 4826 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 4827 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 4828 } 4829#ifdef __DML_VBA_DEBUG__ 4830 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", 4831 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 4832 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", 4833 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 4834 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", 4835 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 4836 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", 4837 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 4838#endif 4839 } 4840 4841 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4842 unsigned int cursor_req_per_width; 4843 4844 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] / 4845 256.0 / 8.0, 1.0); 4846 if (NumberOfCursors[k] > 0) { 4847 if (VRatio[k] <= 1) { 4848 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / 4849 HRatio[k] / PixelClock[k] / cursor_req_per_width; 4850 } else { 4851 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / 4852 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; 4853 } 4854 if (VRatioPrefetchY[k] <= 1) { 4855 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / 4856 HRatio[k] / PixelClock[k] / cursor_req_per_width; 4857 } else { 4858 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / 4859 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; 4860 } 4861 } else { 4862 CursorRequestDeliveryTime[k] = 0; 4863 CursorRequestDeliveryTimePrefetch[k] = 0; 4864 } 4865#ifdef __DML_VBA_DEBUG__ 4866 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", 4867 __func__, k, NumberOfCursors[k]); 4868 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", 4869 __func__, k, CursorRequestDeliveryTime[k]); 4870 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", 4871 __func__, k, CursorRequestDeliveryTimePrefetch[k]); 4872#endif 4873 } 4874} // CalculatePixelDeliveryTimes 4875 4876void dml32_CalculateMetaAndPTETimes( 4877 bool use_one_row_for_frame[], 4878 unsigned int NumberOfActiveSurfaces, 4879 bool GPUVMEnable, 4880 unsigned int MetaChunkSize, 4881 unsigned int MinMetaChunkSizeBytes, 4882 unsigned int HTotal[], 4883 double VRatio[], 4884 double VRatioChroma[], 4885 double DestinationLinesToRequestRowInVBlank[], 4886 double DestinationLinesToRequestRowInImmediateFlip[], 4887 bool DCCEnable[], 4888 double PixelClock[], 4889 unsigned int BytePerPixelY[], 4890 unsigned int BytePerPixelC[], 4891 enum dm_rotation_angle SourceRotation[], 4892 unsigned int dpte_row_height[], 4893 unsigned int dpte_row_height_chroma[], 4894 unsigned int meta_row_width[], 4895 unsigned int meta_row_width_chroma[], 4896 unsigned int meta_row_height[], 4897 unsigned int meta_row_height_chroma[], 4898 unsigned int meta_req_width[], 4899 unsigned int meta_req_width_chroma[], 4900 unsigned int meta_req_height[], 4901 unsigned int meta_req_height_chroma[], 4902 unsigned int dpte_group_bytes[], 4903 unsigned int PTERequestSizeY[], 4904 unsigned int PTERequestSizeC[], 4905 unsigned int PixelPTEReqWidthY[], 4906 unsigned int PixelPTEReqHeightY[], 4907 unsigned int PixelPTEReqWidthC[], 4908 unsigned int PixelPTEReqHeightC[], 4909 unsigned int dpte_row_width_luma_ub[], 4910 unsigned int dpte_row_width_chroma_ub[], 4911 4912 /* Output */ 4913 double DST_Y_PER_PTE_ROW_NOM_L[], 4914 double DST_Y_PER_PTE_ROW_NOM_C[], 4915 double DST_Y_PER_META_ROW_NOM_L[], 4916 double DST_Y_PER_META_ROW_NOM_C[], 4917 double TimePerMetaChunkNominal[], 4918 double TimePerChromaMetaChunkNominal[], 4919 double TimePerMetaChunkVBlank[], 4920 double TimePerChromaMetaChunkVBlank[], 4921 double TimePerMetaChunkFlip[], 4922 double TimePerChromaMetaChunkFlip[], 4923 double time_per_pte_group_nom_luma[], 4924 double time_per_pte_group_vblank_luma[], 4925 double time_per_pte_group_flip_luma[], 4926 double time_per_pte_group_nom_chroma[], 4927 double time_per_pte_group_vblank_chroma[], 4928 double time_per_pte_group_flip_chroma[]) 4929{ 4930 unsigned int meta_chunk_width; 4931 unsigned int min_meta_chunk_width; 4932 unsigned int meta_chunk_per_row_int; 4933 unsigned int meta_row_remainder; 4934 unsigned int meta_chunk_threshold; 4935 unsigned int meta_chunks_per_row_ub; 4936 unsigned int meta_chunk_width_chroma; 4937 unsigned int min_meta_chunk_width_chroma; 4938 unsigned int meta_chunk_per_row_int_chroma; 4939 unsigned int meta_row_remainder_chroma; 4940 unsigned int meta_chunk_threshold_chroma; 4941 unsigned int meta_chunks_per_row_ub_chroma; 4942 unsigned int dpte_group_width_luma; 4943 unsigned int dpte_groups_per_row_luma_ub; 4944 unsigned int dpte_group_width_chroma; 4945 unsigned int dpte_groups_per_row_chroma_ub; 4946 unsigned int k; 4947 4948 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4949 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 4950 if (BytePerPixelC[k] == 0) 4951 DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 4952 else 4953 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; 4954 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 4955 if (BytePerPixelC[k] == 0) 4956 DST_Y_PER_META_ROW_NOM_C[k] = 0; 4957 else 4958 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; 4959 } 4960 4961 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4962 if (DCCEnable[k] == true) { 4963 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; 4964 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; 4965 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 4966 meta_row_remainder = meta_row_width[k] % meta_chunk_width; 4967 if (!IsVertical(SourceRotation[k])) 4968 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 4969 else 4970 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; 4971 4972 if (meta_row_remainder <= meta_chunk_threshold) 4973 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 4974 else 4975 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 4976 4977 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * 4978 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4979 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * 4980 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4981 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 4982 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4983 if (BytePerPixelC[k] == 0) { 4984 TimePerChromaMetaChunkNominal[k] = 0; 4985 TimePerChromaMetaChunkVBlank[k] = 0; 4986 TimePerChromaMetaChunkFlip[k] = 0; 4987 } else { 4988 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / 4989 meta_row_height_chroma[k]; 4990 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / 4991 meta_row_height_chroma[k]; 4992 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / 4993 meta_chunk_width_chroma; 4994 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; 4995 if (!IsVertical(SourceRotation[k])) { 4996 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - 4997 meta_req_width_chroma[k]; 4998 } else { 4999 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - 5000 meta_req_height_chroma[k]; 5001 } 5002 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) 5003 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 5004 else 5005 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 5006 5007 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * 5008 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 5009 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * 5010 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 5011 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 5012 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 5013 } 5014 } else { 5015 TimePerMetaChunkNominal[k] = 0; 5016 TimePerMetaChunkVBlank[k] = 0; 5017 TimePerMetaChunkFlip[k] = 0; 5018 TimePerChromaMetaChunkNominal[k] = 0; 5019 TimePerChromaMetaChunkVBlank[k] = 0; 5020 TimePerChromaMetaChunkFlip[k] = 0; 5021 } 5022 } 5023 5024 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5025 if (GPUVMEnable == true) { 5026 if (!IsVertical(SourceRotation[k])) { 5027 dpte_group_width_luma = (double) dpte_group_bytes[k] / 5028 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k]; 5029 } else { 5030 dpte_group_width_luma = (double) dpte_group_bytes[k] / 5031 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k]; 5032 } 5033 5034 if (use_one_row_for_frame[k]) { 5035 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / 5036 (double) dpte_group_width_luma / 2.0, 1.0); 5037 } else { 5038 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / 5039 (double) dpte_group_width_luma, 1.0); 5040 } 5041#ifdef __DML_VBA_DEBUG__ 5042 dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n", 5043 __func__, k, use_one_row_for_frame[k]); 5044 dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n", 5045 __func__, k, dpte_group_bytes[k]); 5046 dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n", 5047 __func__, k, PTERequestSizeY[k]); 5048 dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n", 5049 __func__, k, PixelPTEReqWidthY[k]); 5050 dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n", 5051 __func__, k, PixelPTEReqHeightY[k]); 5052 dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n", 5053 __func__, k, dpte_row_width_luma_ub[k]); 5054 dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n", 5055 __func__, k, dpte_group_width_luma); 5056 dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n", 5057 __func__, k, dpte_groups_per_row_luma_ub); 5058#endif 5059 5060 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * 5061 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5062 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * 5063 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5064 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 5065 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5066 if (BytePerPixelC[k] == 0) { 5067 time_per_pte_group_nom_chroma[k] = 0; 5068 time_per_pte_group_vblank_chroma[k] = 0; 5069 time_per_pte_group_flip_chroma[k] = 0; 5070 } else { 5071 if (!IsVertical(SourceRotation[k])) { 5072 dpte_group_width_chroma = (double) dpte_group_bytes[k] / 5073 (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 5074 } else { 5075 dpte_group_width_chroma = (double) dpte_group_bytes[k] / 5076 (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k]; 5077 } 5078 5079 if (use_one_row_for_frame[k]) { 5080 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / 5081 (double) dpte_group_width_chroma / 2.0, 1.0); 5082 } else { 5083 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / 5084 (double) dpte_group_width_chroma, 1.0); 5085 } 5086#ifdef __DML_VBA_DEBUG__ 5087 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n", 5088 __func__, k, dpte_row_width_chroma_ub[k]); 5089 dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n", 5090 __func__, k, dpte_group_width_chroma); 5091 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n", 5092 __func__, k, dpte_groups_per_row_chroma_ub); 5093#endif 5094 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * 5095 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5096 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * 5097 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5098 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 5099 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5100 } 5101 } else { 5102 time_per_pte_group_nom_luma[k] = 0; 5103 time_per_pte_group_vblank_luma[k] = 0; 5104 time_per_pte_group_flip_luma[k] = 0; 5105 time_per_pte_group_nom_chroma[k] = 0; 5106 time_per_pte_group_vblank_chroma[k] = 0; 5107 time_per_pte_group_flip_chroma[k] = 0; 5108 } 5109#ifdef __DML_VBA_DEBUG__ 5110 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n", 5111 __func__, k, DestinationLinesToRequestRowInVBlank[k]); 5112 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n", 5113 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]); 5114 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n", 5115 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]); 5116 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n", 5117 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]); 5118 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n", 5119 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]); 5120 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n", 5121 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]); 5122 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n", 5123 __func__, k, TimePerMetaChunkNominal[k]); 5124 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n", 5125 __func__, k, TimePerMetaChunkVBlank[k]); 5126 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n", 5127 __func__, k, TimePerMetaChunkFlip[k]); 5128 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n", 5129 __func__, k, TimePerChromaMetaChunkNominal[k]); 5130 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n", 5131 __func__, k, TimePerChromaMetaChunkVBlank[k]); 5132 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n", 5133 __func__, k, TimePerChromaMetaChunkFlip[k]); 5134 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n", 5135 __func__, k, time_per_pte_group_nom_luma[k]); 5136 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n", 5137 __func__, k, time_per_pte_group_vblank_luma[k]); 5138 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n", 5139 __func__, k, time_per_pte_group_flip_luma[k]); 5140 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n", 5141 __func__, k, time_per_pte_group_nom_chroma[k]); 5142 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n", 5143 __func__, k, time_per_pte_group_vblank_chroma[k]); 5144 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n", 5145 __func__, k, time_per_pte_group_flip_chroma[k]); 5146#endif 5147 } 5148} // CalculateMetaAndPTETimes 5149 5150void dml32_CalculateVMGroupAndRequestTimes( 5151 unsigned int NumberOfActiveSurfaces, 5152 bool GPUVMEnable, 5153 unsigned int GPUVMMaxPageTableLevels, 5154 unsigned int HTotal[], 5155 unsigned int BytePerPixelC[], 5156 double DestinationLinesToRequestVMInVBlank[], 5157 double DestinationLinesToRequestVMInImmediateFlip[], 5158 bool DCCEnable[], 5159 double PixelClock[], 5160 unsigned int dpte_row_width_luma_ub[], 5161 unsigned int dpte_row_width_chroma_ub[], 5162 unsigned int vm_group_bytes[], 5163 unsigned int dpde0_bytes_per_frame_ub_l[], 5164 unsigned int dpde0_bytes_per_frame_ub_c[], 5165 unsigned int meta_pte_bytes_per_frame_ub_l[], 5166 unsigned int meta_pte_bytes_per_frame_ub_c[], 5167 5168 /* Output */ 5169 double TimePerVMGroupVBlank[], 5170 double TimePerVMGroupFlip[], 5171 double TimePerVMRequestVBlank[], 5172 double TimePerVMRequestFlip[]) 5173{ 5174 unsigned int k; 5175 unsigned int num_group_per_lower_vm_stage; 5176 unsigned int num_req_per_lower_vm_stage; 5177 5178#ifdef __DML_VBA_DEBUG__ 5179 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 5180 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 5181#endif 5182 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5183 5184#ifdef __DML_VBA_DEBUG__ 5185 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]); 5186 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]); 5187 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n", 5188 __func__, k, dpde0_bytes_per_frame_ub_l[k]); 5189 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n", 5190 __func__, k, dpde0_bytes_per_frame_ub_c[k]); 5191 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n", 5192 __func__, k, meta_pte_bytes_per_frame_ub_l[k]); 5193 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n", 5194 __func__, k, meta_pte_bytes_per_frame_ub_c[k]); 5195#endif 5196 5197 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 5198 if (DCCEnable[k] == false) { 5199 if (BytePerPixelC[k] > 0) { 5200 num_group_per_lower_vm_stage = dml_ceil( 5201 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5202 (double) (vm_group_bytes[k]), 1.0) + 5203 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / 5204 (double) (vm_group_bytes[k]), 1.0); 5205 } else { 5206 num_group_per_lower_vm_stage = dml_ceil( 5207 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5208 (double) (vm_group_bytes[k]), 1.0); 5209 } 5210 } else { 5211 if (GPUVMMaxPageTableLevels == 1) { 5212 if (BytePerPixelC[k] > 0) { 5213 num_group_per_lower_vm_stage = dml_ceil( 5214 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5215 (double) (vm_group_bytes[k]), 1.0) + 5216 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / 5217 (double) (vm_group_bytes[k]), 1.0); 5218 } else { 5219 num_group_per_lower_vm_stage = dml_ceil( 5220 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5221 (double) (vm_group_bytes[k]), 1.0); 5222 } 5223 } else { 5224 if (BytePerPixelC[k] > 0) { 5225 num_group_per_lower_vm_stage = 2 + dml_ceil( 5226 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5227 (double) (vm_group_bytes[k]), 1) + 5228 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / 5229 (double) (vm_group_bytes[k]), 1) + 5230 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / 5231 (double) (vm_group_bytes[k]), 1) + 5232 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / 5233 (double) (vm_group_bytes[k]), 1); 5234 } else { 5235 num_group_per_lower_vm_stage = 1 + dml_ceil( 5236 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5237 (double) (vm_group_bytes[k]), 1) + dml_ceil( 5238 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5239 (double) (vm_group_bytes[k]), 1); 5240 } 5241 } 5242 } 5243 5244 if (DCCEnable[k] == false) { 5245 if (BytePerPixelC[k] > 0) { 5246 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + 5247 dpde0_bytes_per_frame_ub_c[k] / 64; 5248 } else { 5249 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; 5250 } 5251 } else { 5252 if (GPUVMMaxPageTableLevels == 1) { 5253 if (BytePerPixelC[k] > 0) { 5254 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + 5255 meta_pte_bytes_per_frame_ub_c[k] / 64; 5256 } else { 5257 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 5258 } 5259 } else { 5260 if (BytePerPixelC[k] > 0) { 5261 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 5262 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + 5263 meta_pte_bytes_per_frame_ub_l[k] / 64 + 5264 meta_pte_bytes_per_frame_ub_c[k] / 64; 5265 } else { 5266 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 5267 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; 5268 } 5269 } 5270 } 5271 5272 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * 5273 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 5274 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * 5275 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 5276 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * 5277 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 5278 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * 5279 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 5280 5281 if (GPUVMMaxPageTableLevels > 2) { 5282 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 5283 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 5284 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 5285 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 5286 } 5287 5288 } else { 5289 TimePerVMGroupVBlank[k] = 0; 5290 TimePerVMGroupFlip[k] = 0; 5291 TimePerVMRequestVBlank[k] = 0; 5292 TimePerVMRequestFlip[k] = 0; 5293 } 5294 5295#ifdef __DML_VBA_DEBUG__ 5296 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); 5297 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); 5298 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); 5299 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); 5300#endif 5301 } 5302} // CalculateVMGroupAndRequestTimes 5303 5304void dml32_CalculateDCCConfiguration( 5305 bool DCCEnabled, 5306 bool DCCProgrammingAssumesScanDirectionUnknown, 5307 enum source_format_class SourcePixelFormat, 5308 unsigned int SurfaceWidthLuma, 5309 unsigned int SurfaceWidthChroma, 5310 unsigned int SurfaceHeightLuma, 5311 unsigned int SurfaceHeightChroma, 5312 unsigned int nomDETInKByte, 5313 unsigned int RequestHeight256ByteLuma, 5314 unsigned int RequestHeight256ByteChroma, 5315 enum dm_swizzle_mode TilingFormat, 5316 unsigned int BytePerPixelY, 5317 unsigned int BytePerPixelC, 5318 double BytePerPixelDETY, 5319 double BytePerPixelDETC, 5320 enum dm_rotation_angle SourceRotation, 5321 /* Output */ 5322 unsigned int *MaxUncompressedBlockLuma, 5323 unsigned int *MaxUncompressedBlockChroma, 5324 unsigned int *MaxCompressedBlockLuma, 5325 unsigned int *MaxCompressedBlockChroma, 5326 unsigned int *IndependentBlockLuma, 5327 unsigned int *IndependentBlockChroma) 5328{ 5329 typedef enum { 5330 REQ_256Bytes, 5331 REQ_128BytesNonContiguous, 5332 REQ_128BytesContiguous, 5333 REQ_NA 5334 } RequestType; 5335 5336 RequestType RequestLuma; 5337 RequestType RequestChroma; 5338 5339 unsigned int segment_order_horz_contiguous_luma; 5340 unsigned int segment_order_horz_contiguous_chroma; 5341 unsigned int segment_order_vert_contiguous_luma; 5342 unsigned int segment_order_vert_contiguous_chroma; 5343 unsigned int req128_horz_wc_l; 5344 unsigned int req128_horz_wc_c; 5345 unsigned int req128_vert_wc_l; 5346 unsigned int req128_vert_wc_c; 5347 unsigned int MAS_vp_horz_limit; 5348 unsigned int MAS_vp_vert_limit; 5349 unsigned int max_vp_horz_width; 5350 unsigned int max_vp_vert_height; 5351 unsigned int eff_surf_width_l; 5352 unsigned int eff_surf_width_c; 5353 unsigned int eff_surf_height_l; 5354 unsigned int eff_surf_height_c; 5355 unsigned int full_swath_bytes_horz_wc_l; 5356 unsigned int full_swath_bytes_horz_wc_c; 5357 unsigned int full_swath_bytes_vert_wc_l; 5358 unsigned int full_swath_bytes_vert_wc_c; 5359 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024; 5360 5361 unsigned int yuv420; 5362 unsigned int horz_div_l; 5363 unsigned int horz_div_c; 5364 unsigned int vert_div_l; 5365 unsigned int vert_div_c; 5366 5367 unsigned int swath_buf_size; 5368 double detile_buf_vp_horz_limit; 5369 double detile_buf_vp_vert_limit; 5370 5371 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || 5372 SourcePixelFormat == dm_420_12) ? 1 : 0); 5373 horz_div_l = 1; 5374 horz_div_c = 1; 5375 vert_div_l = 1; 5376 vert_div_c = 1; 5377 5378 if (BytePerPixelY == 1) 5379 vert_div_l = 0; 5380 if (BytePerPixelC == 1) 5381 vert_div_c = 0; 5382 5383 if (BytePerPixelC == 0) { 5384 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; 5385 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * 5386 BytePerPixelY / (1 + horz_div_l)); 5387 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / 5388 (1 + vert_div_l)); 5389 } else { 5390 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; 5391 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * 5392 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma * 5393 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 5394 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / 5395 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / 5396 (1 + vert_div_c) / (1 + yuv420)); 5397 } 5398 5399 if (SourcePixelFormat == dm_420_10) { 5400 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 5401 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 5402 } 5403 5404 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); 5405 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); 5406 5407 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144; 5408 MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); 5409 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); 5410 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); 5411 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 5412 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 5413 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 5414 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 5415 5416 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 5417 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 5418 if (BytePerPixelC > 0) { 5419 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 5420 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 5421 } else { 5422 full_swath_bytes_horz_wc_c = 0; 5423 full_swath_bytes_vert_wc_c = 0; 5424 } 5425 5426 if (SourcePixelFormat == dm_420_10) { 5427 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0); 5428 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0); 5429 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0); 5430 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0); 5431 } 5432 5433 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5434 req128_horz_wc_l = 0; 5435 req128_horz_wc_c = 0; 5436 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + 5437 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5438 req128_horz_wc_l = 0; 5439 req128_horz_wc_c = 1; 5440 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * 5441 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5442 req128_horz_wc_l = 1; 5443 req128_horz_wc_c = 0; 5444 } else { 5445 req128_horz_wc_l = 1; 5446 req128_horz_wc_c = 1; 5447 } 5448 5449 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5450 req128_vert_wc_l = 0; 5451 req128_vert_wc_c = 0; 5452 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * 5453 full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5454 req128_vert_wc_l = 0; 5455 req128_vert_wc_c = 1; 5456 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && 5457 full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5458 req128_vert_wc_l = 1; 5459 req128_vert_wc_c = 0; 5460 } else { 5461 req128_vert_wc_l = 1; 5462 req128_vert_wc_c = 1; 5463 } 5464 5465 if (BytePerPixelY == 2) { 5466 segment_order_horz_contiguous_luma = 0; 5467 segment_order_vert_contiguous_luma = 1; 5468 } else { 5469 segment_order_horz_contiguous_luma = 1; 5470 segment_order_vert_contiguous_luma = 0; 5471 } 5472 5473 if (BytePerPixelC == 2) { 5474 segment_order_horz_contiguous_chroma = 0; 5475 segment_order_vert_contiguous_chroma = 1; 5476 } else { 5477 segment_order_horz_contiguous_chroma = 1; 5478 segment_order_vert_contiguous_chroma = 0; 5479 } 5480#ifdef __DML_VBA_DEBUG__ 5481 dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled); 5482 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 5483 dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC); 5484 dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l); 5485 dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c); 5486 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l); 5487 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c); 5488 dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma); 5489 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n", 5490 __func__, segment_order_horz_contiguous_chroma); 5491#endif 5492 5493 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 5494 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) 5495 RequestLuma = REQ_256Bytes; 5496 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || 5497 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) 5498 RequestLuma = REQ_128BytesNonContiguous; 5499 else 5500 RequestLuma = REQ_128BytesContiguous; 5501 5502 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) 5503 RequestChroma = REQ_256Bytes; 5504 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || 5505 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) 5506 RequestChroma = REQ_128BytesNonContiguous; 5507 else 5508 RequestChroma = REQ_128BytesContiguous; 5509 5510 } else if (!IsVertical(SourceRotation)) { 5511 if (req128_horz_wc_l == 0) 5512 RequestLuma = REQ_256Bytes; 5513 else if (segment_order_horz_contiguous_luma == 0) 5514 RequestLuma = REQ_128BytesNonContiguous; 5515 else 5516 RequestLuma = REQ_128BytesContiguous; 5517 5518 if (req128_horz_wc_c == 0) 5519 RequestChroma = REQ_256Bytes; 5520 else if (segment_order_horz_contiguous_chroma == 0) 5521 RequestChroma = REQ_128BytesNonContiguous; 5522 else 5523 RequestChroma = REQ_128BytesContiguous; 5524 5525 } else { 5526 if (req128_vert_wc_l == 0) 5527 RequestLuma = REQ_256Bytes; 5528 else if (segment_order_vert_contiguous_luma == 0) 5529 RequestLuma = REQ_128BytesNonContiguous; 5530 else 5531 RequestLuma = REQ_128BytesContiguous; 5532 5533 if (req128_vert_wc_c == 0) 5534 RequestChroma = REQ_256Bytes; 5535 else if (segment_order_vert_contiguous_chroma == 0) 5536 RequestChroma = REQ_128BytesNonContiguous; 5537 else 5538 RequestChroma = REQ_128BytesContiguous; 5539 } 5540 5541 if (RequestLuma == REQ_256Bytes) { 5542 *MaxUncompressedBlockLuma = 256; 5543 *MaxCompressedBlockLuma = 256; 5544 *IndependentBlockLuma = 0; 5545 } else if (RequestLuma == REQ_128BytesContiguous) { 5546 *MaxUncompressedBlockLuma = 256; 5547 *MaxCompressedBlockLuma = 128; 5548 *IndependentBlockLuma = 128; 5549 } else { 5550 *MaxUncompressedBlockLuma = 256; 5551 *MaxCompressedBlockLuma = 64; 5552 *IndependentBlockLuma = 64; 5553 } 5554 5555 if (RequestChroma == REQ_256Bytes) { 5556 *MaxUncompressedBlockChroma = 256; 5557 *MaxCompressedBlockChroma = 256; 5558 *IndependentBlockChroma = 0; 5559 } else if (RequestChroma == REQ_128BytesContiguous) { 5560 *MaxUncompressedBlockChroma = 256; 5561 *MaxCompressedBlockChroma = 128; 5562 *IndependentBlockChroma = 128; 5563 } else { 5564 *MaxUncompressedBlockChroma = 256; 5565 *MaxCompressedBlockChroma = 64; 5566 *IndependentBlockChroma = 64; 5567 } 5568 5569 if (DCCEnabled != true || BytePerPixelC == 0) { 5570 *MaxUncompressedBlockChroma = 0; 5571 *MaxCompressedBlockChroma = 0; 5572 *IndependentBlockChroma = 0; 5573 } 5574 5575 if (DCCEnabled != true) { 5576 *MaxUncompressedBlockLuma = 0; 5577 *MaxCompressedBlockLuma = 0; 5578 *IndependentBlockLuma = 0; 5579 } 5580 5581#ifdef __DML_VBA_DEBUG__ 5582 dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma); 5583 dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma); 5584 dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma); 5585 dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma); 5586 dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma); 5587 dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma); 5588#endif 5589 5590} // CalculateDCCConfiguration 5591 5592void dml32_CalculateStutterEfficiency( 5593 unsigned int CompressedBufferSizeInkByte, 5594 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 5595 bool UnboundedRequestEnabled, 5596 unsigned int MetaFIFOSizeInKEntries, 5597 unsigned int ZeroSizeBufferEntries, 5598 unsigned int PixelChunkSizeInKByte, 5599 unsigned int NumberOfActiveSurfaces, 5600 unsigned int ROBBufferSizeInKByte, 5601 double TotalDataReadBandwidth, 5602 double DCFCLK, 5603 double ReturnBW, 5604 unsigned int CompbufReservedSpace64B, 5605 unsigned int CompbufReservedSpaceZs, 5606 double SRExitTime, 5607 double SRExitZ8Time, 5608 bool SynchronizeTimingsFinal, 5609 unsigned int BlendingAndTiming[], 5610 double StutterEnterPlusExitWatermark, 5611 double Z8StutterEnterPlusExitWatermark, 5612 bool ProgressiveToInterlaceUnitInOPP, 5613 bool Interlace[], 5614 double MinTTUVBlank[], 5615 unsigned int DPPPerSurface[], 5616 unsigned int DETBufferSizeY[], 5617 unsigned int BytePerPixelY[], 5618 double BytePerPixelDETY[], 5619 double SwathWidthY[], 5620 unsigned int SwathHeightY[], 5621 unsigned int SwathHeightC[], 5622 double NetDCCRateLuma[], 5623 double NetDCCRateChroma[], 5624 double DCCFractionOfZeroSizeRequestsLuma[], 5625 double DCCFractionOfZeroSizeRequestsChroma[], 5626 unsigned int HTotal[], 5627 unsigned int VTotal[], 5628 double PixelClock[], 5629 double VRatio[], 5630 enum dm_rotation_angle SourceRotation[], 5631 unsigned int BlockHeight256BytesY[], 5632 unsigned int BlockWidth256BytesY[], 5633 unsigned int BlockHeight256BytesC[], 5634 unsigned int BlockWidth256BytesC[], 5635 unsigned int DCCYMaxUncompressedBlock[], 5636 unsigned int DCCCMaxUncompressedBlock[], 5637 unsigned int VActive[], 5638 bool DCCEnable[], 5639 bool WritebackEnable[], 5640 double ReadBandwidthSurfaceLuma[], 5641 double ReadBandwidthSurfaceChroma[], 5642 double meta_row_bw[], 5643 double dpte_row_bw[], 5644 5645 /* Output */ 5646 double *StutterEfficiencyNotIncludingVBlank, 5647 double *StutterEfficiency, 5648 unsigned int *NumberOfStutterBurstsPerFrame, 5649 double *Z8StutterEfficiencyNotIncludingVBlank, 5650 double *Z8StutterEfficiency, 5651 unsigned int *Z8NumberOfStutterBurstsPerFrame, 5652 double *StutterPeriod, 5653 bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE) 5654{ 5655 5656 bool FoundCriticalSurface = false; 5657 unsigned int SwathSizeCriticalSurface = 0; 5658 unsigned int LastChunkOfSwathSize; 5659 unsigned int MissingPartOfLastSwathOfDETSize; 5660 double LastZ8StutterPeriod = 0.0; 5661 double LastStutterPeriod = 0.0; 5662 unsigned int TotalNumberOfActiveOTG = 0; 5663 double doublePixelClock; 5664 unsigned int doubleHTotal; 5665 unsigned int doubleVTotal; 5666 bool SameTiming = true; 5667 double DETBufferingTimeY; 5668 double SwathWidthYCriticalSurface = 0.0; 5669 double SwathHeightYCriticalSurface = 0.0; 5670 double VActiveTimeCriticalSurface = 0.0; 5671 double FrameTimeCriticalSurface = 0.0; 5672 unsigned int BytePerPixelYCriticalSurface = 0; 5673 double LinesToFinishSwathTransferStutterCriticalSurface = 0.0; 5674 unsigned int DETBufferSizeYCriticalSurface = 0; 5675 double MinTTUVBlankCriticalSurface = 0.0; 5676 unsigned int BlockWidth256BytesYCriticalSurface = 0; 5677 bool doublePlaneCriticalSurface = 0; 5678 bool doublePipeCriticalSurface = 0; 5679 double TotalCompressedReadBandwidth; 5680 double TotalRowReadBandwidth; 5681 double AverageDCCCompressionRate; 5682 double EffectiveCompressedBufferSize; 5683 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; 5684 double StutterBurstTime; 5685 unsigned int TotalActiveWriteback; 5686 double LinesInDETY; 5687 double LinesInDETYRoundedDownToSwath; 5688 double MaximumEffectiveCompressionLuma; 5689 double MaximumEffectiveCompressionChroma; 5690 double TotalZeroSizeRequestReadBandwidth; 5691 double TotalZeroSizeCompressedReadBandwidth; 5692 double AverageDCCZeroSizeFraction; 5693 double AverageZeroSizeCompressionRate; 5694 unsigned int k; 5695 5696 TotalZeroSizeRequestReadBandwidth = 0; 5697 TotalZeroSizeCompressedReadBandwidth = 0; 5698 TotalRowReadBandwidth = 0; 5699 TotalCompressedReadBandwidth = 0; 5700 5701 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5702 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5703 if (DCCEnable[k] == true) { 5704 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k]) 5705 || (!IsVertical(SourceRotation[k]) 5706 && BlockHeight256BytesY[k] > SwathHeightY[k]) 5707 || DCCYMaxUncompressedBlock[k] < 256) { 5708 MaximumEffectiveCompressionLuma = 2; 5709 } else { 5710 MaximumEffectiveCompressionLuma = 4; 5711 } 5712 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 5713 + ReadBandwidthSurfaceLuma[k] 5714 / dml_min(NetDCCRateLuma[k], 5715 MaximumEffectiveCompressionLuma); 5716#ifdef __DML_VBA_DEBUG__ 5717 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", 5718 __func__, k, ReadBandwidthSurfaceLuma[k]); 5719 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n", 5720 __func__, k, NetDCCRateLuma[k]); 5721 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n", 5722 __func__, k, MaximumEffectiveCompressionLuma); 5723#endif 5724 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth 5725 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; 5726 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 5727 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] 5728 / MaximumEffectiveCompressionLuma; 5729 5730 if (ReadBandwidthSurfaceChroma[k] > 0) { 5731 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k]) 5732 || (!IsVertical(SourceRotation[k]) 5733 && BlockHeight256BytesC[k] > SwathHeightC[k]) 5734 || DCCCMaxUncompressedBlock[k] < 256) { 5735 MaximumEffectiveCompressionChroma = 2; 5736 } else { 5737 MaximumEffectiveCompressionChroma = 4; 5738 } 5739 TotalCompressedReadBandwidth = 5740 TotalCompressedReadBandwidth 5741 + ReadBandwidthSurfaceChroma[k] 5742 / dml_min(NetDCCRateChroma[k], 5743 MaximumEffectiveCompressionChroma); 5744#ifdef __DML_VBA_DEBUG__ 5745 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n", 5746 __func__, k, ReadBandwidthSurfaceChroma[k]); 5747 dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n", 5748 __func__, k, NetDCCRateChroma[k]); 5749 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n", 5750 __func__, k, MaximumEffectiveCompressionChroma); 5751#endif 5752 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth 5753 + ReadBandwidthSurfaceChroma[k] 5754 * DCCFractionOfZeroSizeRequestsChroma[k]; 5755 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 5756 + ReadBandwidthSurfaceChroma[k] 5757 * DCCFractionOfZeroSizeRequestsChroma[k] 5758 / MaximumEffectiveCompressionChroma; 5759 } 5760 } else { 5761 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 5762 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k]; 5763 } 5764 TotalRowReadBandwidth = TotalRowReadBandwidth 5765 + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]); 5766 } 5767 } 5768 5769 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; 5770 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; 5771 5772#ifdef __DML_VBA_DEBUG__ 5773 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); 5774 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); 5775 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); 5776 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", 5777 __func__, TotalZeroSizeCompressedReadBandwidth); 5778 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); 5779 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); 5780 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 5781 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); 5782 dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B); 5783 dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs); 5784 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); 5785#endif 5786 if (AverageDCCZeroSizeFraction == 1) { 5787 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth 5788 / TotalZeroSizeCompressedReadBandwidth; 5789 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64 5790 * AverageZeroSizeCompressionRate 5791 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 5792 * AverageZeroSizeCompressionRate; 5793 } else if (AverageDCCZeroSizeFraction > 0) { 5794 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth 5795 / TotalZeroSizeCompressedReadBandwidth; 5796 EffectiveCompressedBufferSize = dml_min( 5797 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 5798 (double) MetaFIFOSizeInKEntries * 1024 * 64 5799 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate 5800 + 1 / AverageDCCCompressionRate)) 5801 + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) 5802 * AverageDCCCompressionRate, 5803 ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 5804 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 5805 5806#ifdef __DML_VBA_DEBUG__ 5807 dml_print("DML::%s: min 1 = %f\n", __func__, 5808 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 5809 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 / 5810 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / 5811 AverageDCCCompressionRate)); 5812 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 - 5813 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate); 5814 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 / 5815 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 5816#endif 5817 } else { 5818 EffectiveCompressedBufferSize = dml_min( 5819 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 5820 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) 5821 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) 5822 * AverageDCCCompressionRate; 5823 5824#ifdef __DML_VBA_DEBUG__ 5825 dml_print("DML::%s: min 1 = %f\n", __func__, 5826 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 5827 dml_print("DML::%s: min 2 = %f\n", __func__, 5828 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); 5829#endif 5830 } 5831 5832#ifdef __DML_VBA_DEBUG__ 5833 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); 5834 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); 5835 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 5836#endif 5837 5838 *StutterPeriod = 0; 5839 5840 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5841 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5842 LinesInDETY = ((double) DETBufferSizeY[k] 5843 + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) 5844 * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth) 5845 / BytePerPixelDETY[k] / SwathWidthY[k]; 5846 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); 5847 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k]) 5848 / VRatio[k]; 5849#ifdef __DML_VBA_DEBUG__ 5850 dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 5851 dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 5852 dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); 5853 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", 5854 __func__, k, ReadBandwidthSurfaceLuma[k]); 5855 dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); 5856 dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY); 5857 dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n", 5858 __func__, k, LinesInDETYRoundedDownToSwath); 5859 dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]); 5860 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); 5861 dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]); 5862 dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); 5863 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); 5864#endif 5865 5866 if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) { 5867 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; 5868 5869 FoundCriticalSurface = true; 5870 *StutterPeriod = DETBufferingTimeY; 5871 FrameTimeCriticalSurface = ( 5872 isInterlaceTiming ? 5873 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k]) 5874 * (double) HTotal[k] / PixelClock[k]; 5875 VActiveTimeCriticalSurface = ( 5876 isInterlaceTiming ? 5877 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k]) 5878 * (double) HTotal[k] / PixelClock[k]; 5879 BytePerPixelYCriticalSurface = BytePerPixelY[k]; 5880 SwathWidthYCriticalSurface = SwathWidthY[k]; 5881 SwathHeightYCriticalSurface = SwathHeightY[k]; 5882 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k]; 5883 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k] 5884 - (LinesInDETY - LinesInDETYRoundedDownToSwath); 5885 DETBufferSizeYCriticalSurface = DETBufferSizeY[k]; 5886 MinTTUVBlankCriticalSurface = MinTTUVBlank[k]; 5887 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0); 5888 doublePipeCriticalSurface = (DPPPerSurface[k] == 1); 5889 5890#ifdef __DML_VBA_DEBUG__ 5891 dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n", 5892 __func__, k, FoundCriticalSurface); 5893 dml_print("DML::%s: k=%0d, StutterPeriod = %f\n", 5894 __func__, k, *StutterPeriod); 5895 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n", 5896 __func__, k, MinTTUVBlankCriticalSurface); 5897 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n", 5898 __func__, k, FrameTimeCriticalSurface); 5899 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n", 5900 __func__, k, VActiveTimeCriticalSurface); 5901 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n", 5902 __func__, k, BytePerPixelYCriticalSurface); 5903 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n", 5904 __func__, k, SwathWidthYCriticalSurface); 5905 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n", 5906 __func__, k, SwathHeightYCriticalSurface); 5907 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n", 5908 __func__, k, BlockWidth256BytesYCriticalSurface); 5909 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n", 5910 __func__, k, doublePlaneCriticalSurface); 5911 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n", 5912 __func__, k, doublePipeCriticalSurface); 5913 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n", 5914 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface); 5915#endif 5916 } 5917 } 5918 } 5919 5920 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, 5921 EffectiveCompressedBufferSize); 5922#ifdef __DML_VBA_DEBUG__ 5923 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); 5924 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 5925 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", 5926 __func__, *StutterPeriod * TotalDataReadBandwidth); 5927 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 5928 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, 5929 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); 5930 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 5931 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); 5932 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); 5933 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 5934#endif 5935 5936 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate 5937 / ReturnBW 5938 + (*StutterPeriod * TotalDataReadBandwidth 5939 - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) 5940 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; 5941#ifdef __DML_VBA_DEBUG__ 5942 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 5943 AverageDCCCompressionRate / ReturnBW); 5944 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", 5945 __func__, (*StutterPeriod * TotalDataReadBandwidth)); 5946 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - 5947 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); 5948 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); 5949 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 5950#endif 5951 StutterBurstTime = dml_max(StutterBurstTime, 5952 LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface 5953 * SwathWidthYCriticalSurface / ReturnBW); 5954 5955#ifdef __DML_VBA_DEBUG__ 5956 dml_print("DML::%s: Time to finish residue swath=%f\n", 5957 __func__, 5958 LinesToFinishSwathTransferStutterCriticalSurface * 5959 BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW); 5960#endif 5961 5962 TotalActiveWriteback = 0; 5963 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5964 if (WritebackEnable[k]) 5965 TotalActiveWriteback = TotalActiveWriteback + 1; 5966 } 5967 5968 if (TotalActiveWriteback == 0) { 5969#ifdef __DML_VBA_DEBUG__ 5970 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); 5971 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); 5972 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); 5973 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 5974#endif 5975 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 5976 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; 5977 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 5978 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; 5979 *NumberOfStutterBurstsPerFrame = ( 5980 *StutterEfficiencyNotIncludingVBlank > 0 ? 5981 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); 5982 *Z8NumberOfStutterBurstsPerFrame = ( 5983 *Z8StutterEfficiencyNotIncludingVBlank > 0 ? 5984 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); 5985 } else { 5986 *StutterEfficiencyNotIncludingVBlank = 0.; 5987 *Z8StutterEfficiencyNotIncludingVBlank = 0.; 5988 *NumberOfStutterBurstsPerFrame = 0; 5989 *Z8NumberOfStutterBurstsPerFrame = 0; 5990 } 5991#ifdef __DML_VBA_DEBUG__ 5992 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface); 5993 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", 5994 __func__, *StutterEfficiencyNotIncludingVBlank); 5995 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", 5996 __func__, *Z8StutterEfficiencyNotIncludingVBlank); 5997 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); 5998 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 5999#endif 6000 6001 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6002 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 6003 if (BlendingAndTiming[k] == k) { 6004 if (TotalNumberOfActiveOTG == 0) { 6005 doublePixelClock = PixelClock[k]; 6006 doubleHTotal = HTotal[k]; 6007 doubleVTotal = VTotal[k]; 6008 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k] 6009 || doubleVTotal != VTotal[k]) { 6010 SameTiming = false; 6011 } 6012 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 6013 } 6014 } 6015 } 6016 6017 if (*StutterEfficiencyNotIncludingVBlank > 0) { 6018 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6019 6020 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming 6021 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) { 6022 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime 6023 + StutterBurstTime * VActiveTimeCriticalSurface 6024 / *StutterPeriod) / FrameTimeCriticalSurface) * 100; 6025 } else { 6026 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; 6027 } 6028 } else { 6029 *StutterEfficiency = 0; 6030 } 6031 6032 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { 6033 LastZ8StutterPeriod = VActiveTimeCriticalSurface 6034 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6035 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod + 6036 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) { 6037 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime 6038 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100; 6039 } else { 6040 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; 6041 } 6042 } else { 6043 *Z8StutterEfficiency = 0.; 6044 } 6045 6046#ifdef __DML_VBA_DEBUG__ 6047 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); 6048 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); 6049 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6050 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6051 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); 6052 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); 6053 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", 6054 __func__, *StutterEfficiencyNotIncludingVBlank); 6055 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6056#endif 6057 6058 SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface 6059 * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface); 6060 LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024); 6061 MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) 6062 - DETBufferSizeYCriticalSurface; 6063 6064 *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1) 6065 && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0) 6066 && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) 6067 && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); 6068 6069#ifdef __DML_VBA_DEBUG__ 6070 dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface); 6071 dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize); 6072 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize); 6073 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); 6074#endif 6075} // CalculateStutterEfficiency 6076 6077void dml32_CalculateMaxDETAndMinCompressedBufferSize( 6078 unsigned int ConfigReturnBufferSizeInKByte, 6079 unsigned int ROBBufferSizeInKByte, 6080 unsigned int MaxNumDPP, 6081 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size 6082 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA 6083 6084 /* Output */ 6085 unsigned int *MaxTotalDETInKByte, 6086 unsigned int *nomDETInKByte, 6087 unsigned int *MinCompressedBufferSizeInKByte) 6088{ 6089 bool det_buff_size_override_en = nomDETInKByteOverrideEnable; 6090 unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue; 6091 6092 *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte + 6093 (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64); 6094 *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64); 6095 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; 6096 6097#ifdef __DML_VBA_DEBUG__ 6098 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte); 6099 dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte); 6100 dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP); 6101 dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte); 6102 dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte); 6103 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte); 6104#endif 6105 6106 if (det_buff_size_override_en) { 6107 *nomDETInKByte = det_buff_size_override_val; 6108#ifdef __DML_VBA_DEBUG__ 6109 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte); 6110#endif 6111 } 6112} // CalculateMaxDETAndMinCompressedBufferSize 6113 6114bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces, 6115 double ReturnBW, 6116 bool NotUrgentLatencyHiding[], 6117 double ReadBandwidthLuma[], 6118 double ReadBandwidthChroma[], 6119 double cursor_bw[], 6120 double meta_row_bandwidth[], 6121 double dpte_row_bandwidth[], 6122 unsigned int NumberOfDPP[], 6123 double UrgentBurstFactorLuma[], 6124 double UrgentBurstFactorChroma[], 6125 double UrgentBurstFactorCursor[]) 6126{ 6127 unsigned int k; 6128 bool NotEnoughUrgentLatencyHiding = false; 6129 bool CalculateVActiveBandwithSupport_val = false; 6130 double VActiveBandwith = 0; 6131 6132 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6133 if (NotUrgentLatencyHiding[k]) { 6134 NotEnoughUrgentLatencyHiding = true; 6135 } 6136 } 6137 6138 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6139 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k]; 6140 } 6141 6142 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding; 6143 6144#ifdef __DML_VBA_DEBUG__ 6145dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding); 6146dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith); 6147dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 6148dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val); 6149#endif 6150 return CalculateVActiveBandwithSupport_val; 6151} 6152 6153void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces, 6154 double ReturnBW, 6155 bool NotUrgentLatencyHiding[], 6156 double ReadBandwidthLuma[], 6157 double ReadBandwidthChroma[], 6158 double PrefetchBandwidthLuma[], 6159 double PrefetchBandwidthChroma[], 6160 double cursor_bw[], 6161 double meta_row_bandwidth[], 6162 double dpte_row_bandwidth[], 6163 double cursor_bw_pre[], 6164 double prefetch_vmrow_bw[], 6165 unsigned int NumberOfDPP[], 6166 double UrgentBurstFactorLuma[], 6167 double UrgentBurstFactorChroma[], 6168 double UrgentBurstFactorCursor[], 6169 double UrgentBurstFactorLumaPre[], 6170 double UrgentBurstFactorChromaPre[], 6171 double UrgentBurstFactorCursorPre[], 6172 double PrefetchBW[], 6173 double VRatio[], 6174 double MaxVRatioPre, 6175 6176 /* output */ 6177 double *MaxPrefetchBandwidth, 6178 double *FractionOfUrgentBandwidth, 6179 bool *PrefetchBandwidthSupport) 6180{ 6181 unsigned int k; 6182 double ActiveBandwidthPerSurface; 6183 bool NotEnoughUrgentLatencyHiding = false; 6184 double TotalActiveBandwidth = 0; 6185 double TotalPrefetchBandwidth = 0; 6186 6187 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6188 if (NotUrgentLatencyHiding[k]) { 6189 NotEnoughUrgentLatencyHiding = true; 6190 } 6191 } 6192 6193 *MaxPrefetchBandwidth = 0; 6194 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6195 ActiveBandwidthPerSurface = ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]); 6196 6197 TotalActiveBandwidth += ActiveBandwidthPerSurface; 6198 6199 TotalPrefetchBandwidth = TotalPrefetchBandwidth + PrefetchBW[k] * VRatio[k]; 6200 6201 *MaxPrefetchBandwidth = *MaxPrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6202 ActiveBandwidthPerSurface, 6203 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6204 } 6205 6206 if (MaxVRatioPre == __DML_MAX_VRATIO_PRE__) 6207 *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && (TotalPrefetchBandwidth <= TotalActiveBandwidth * __DML_MAX_BW_RATIO_PRE__) && !NotEnoughUrgentLatencyHiding; 6208 else 6209 *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding; 6210 6211 *FractionOfUrgentBandwidth = *MaxPrefetchBandwidth / ReturnBW; 6212} 6213 6214double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces, 6215 double ReturnBW, 6216 double ReadBandwidthLuma[], 6217 double ReadBandwidthChroma[], 6218 double PrefetchBandwidthLuma[], 6219 double PrefetchBandwidthChroma[], 6220 double cursor_bw[], 6221 double cursor_bw_pre[], 6222 unsigned int NumberOfDPP[], 6223 double UrgentBurstFactorLuma[], 6224 double UrgentBurstFactorChroma[], 6225 double UrgentBurstFactorCursor[], 6226 double UrgentBurstFactorLumaPre[], 6227 double UrgentBurstFactorChromaPre[], 6228 double UrgentBurstFactorCursorPre[]) 6229{ 6230 unsigned int k; 6231 double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW; 6232 6233 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6234 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6235 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6236 } 6237 6238 return CalculateBandwidthAvailableForImmediateFlip_val; 6239} 6240 6241void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces, 6242 double ReturnBW, 6243 enum immediate_flip_requirement ImmediateFlipRequirement[], 6244 double final_flip_bw[], 6245 double ReadBandwidthLuma[], 6246 double ReadBandwidthChroma[], 6247 double PrefetchBandwidthLuma[], 6248 double PrefetchBandwidthChroma[], 6249 double cursor_bw[], 6250 double meta_row_bandwidth[], 6251 double dpte_row_bandwidth[], 6252 double cursor_bw_pre[], 6253 double prefetch_vmrow_bw[], 6254 unsigned int NumberOfDPP[], 6255 double UrgentBurstFactorLuma[], 6256 double UrgentBurstFactorChroma[], 6257 double UrgentBurstFactorCursor[], 6258 double UrgentBurstFactorLumaPre[], 6259 double UrgentBurstFactorChromaPre[], 6260 double UrgentBurstFactorCursorPre[], 6261 6262 /* output */ 6263 double *TotalBandwidth, 6264 double *FractionOfUrgentBandwidth, 6265 bool *ImmediateFlipBandwidthSupport) 6266{ 6267 unsigned int k; 6268 *TotalBandwidth = 0; 6269 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6270 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) { 6271 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6272 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6273 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6274 } else { 6275 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6276 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6277 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6278 } 6279 } 6280 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW); 6281 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW; 6282} 6283 6284bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces, 6285 double ReturnBW, 6286 double UrgentLatency, 6287 unsigned int SwathHeightY[], 6288 unsigned int SwathHeightC[], 6289 unsigned int SwathWidthY[], 6290 unsigned int SwathWidthC[], 6291 double BytePerPixelInDETY[], 6292 double BytePerPixelInDETC[], 6293 unsigned int DETBufferSizeY[], 6294 unsigned int DETBufferSizeC[], 6295 unsigned int NumOfDPP[], 6296 unsigned int HTotal[], 6297 double PixelClock[], 6298 double VRatioY[], 6299 double VRatioC[], 6300 enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[], 6301 enum unbounded_requesting_policy UseUnboundedRequesting) 6302{ 6303 int k; 6304 double SwathSizeAllSurfaces = 0; 6305 double SwathSizeAllSurfacesInFetchTimeUs; 6306 double DETSwathLatencyHidingUs; 6307 double DETSwathLatencyHidingYUs; 6308 double DETSwathLatencyHidingCUs; 6309 double SwathSizePerSurfaceY[DC__NUM_DPP__MAX]; 6310 double SwathSizePerSurfaceC[DC__NUM_DPP__MAX]; 6311 bool NotEnoughDETSwathFillLatencyHiding = false; 6312 6313 if (UseUnboundedRequesting == dm_unbounded_requesting) 6314 return false; 6315 6316 /* calculate sum of single swath size for all pipes in bytes */ 6317 for (k = 0; k < NumberOfActiveSurfaces; k++) { 6318 SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k]; 6319 6320 if (SwathHeightC[k] != 0) 6321 SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k]; 6322 else 6323 SwathSizePerSurfaceC[k] = 0; 6324 6325 SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k]; 6326 } 6327 6328 SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency; 6329 6330 /* ensure all DET - 1 swath can hide a fetch for all surfaces */ 6331 for (k = 0; k < NumberOfActiveSurfaces; k++) { 6332 double LineTime = HTotal[k] / PixelClock[k]; 6333 6334 /* only care if surface is not phantom */ 6335 if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 6336 DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime; 6337 6338 if (SwathHeightC[k] != 0) { 6339 DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime; 6340 6341 DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs); 6342 } else { 6343 DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs; 6344 } 6345 6346 /* DET must be able to hide time to fetch 1 swath for each surface */ 6347 if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) { 6348 NotEnoughDETSwathFillLatencyHiding = true; 6349 break; 6350 } 6351 } 6352 } 6353 6354 return NotEnoughDETSwathFillLatencyHiding; 6355} 6356