1/*
2 * Copyright 2022 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25#include "display_mode_vba_util_32.h"
26#include "../dml_inline_defs.h"
27#include "display_mode_vba_32.h"
28#include "../display_mode_lib.h"
29
30#define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096
31
32unsigned int dml32_dscceComputeDelay(
33		unsigned int bpc,
34		double BPP,
35		unsigned int sliceWidth,
36		unsigned int numSlices,
37		enum output_format_class pixelFormat,
38		enum output_encoder_class Output)
39{
40	// valid bpc         = source bits per component in the set of {8, 10, 12}
41	// valid bpp         = increments of 1/16 of a bit
42	//                    min = 6/7/8 in N420/N422/444, respectively
43	//                    max = such that compression is 1:1
44	//valid sliceWidth  = number of pixels per slice line,
45	//	must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
46	//valid numSlices   = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
47	//valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48
49	// fixed value
50	unsigned int rcModelSize = 8192;
51
52	// N422/N420 operate at 2 pixels per clock
53	unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54	Delay, pixels;
55
56	if (pixelFormat == dm_420)
57		pixelsPerClock = 2;
58	else if (pixelFormat == dm_n422)
59		pixelsPerClock = 2;
60	// #all other modes operate at 1 pixel per clock
61	else
62		pixelsPerClock = 1;
63
64	//initial transmit delay as per PPS
65	initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
66
67	//compute ssm delay
68	if (bpc == 8)
69		D = 81;
70	else if (bpc == 10)
71		D = 89;
72	else
73		D = 113;
74
75	//divide by pixel per cycle to compute slice width as seen by DSC
76	w = sliceWidth / pixelsPerClock;
77
78	//422 mode has an additional cycle of delay
79	if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
80		s = 0;
81	else
82		s = 1;
83
84	//main calculation for the dscce
85	ix = initalXmitDelay + 45;
86	wx = (w + 2) / 3;
87	p = 3 * wx - w;
88	l0 = ix / w;
89	a = ix + p * l0;
90	ax = (a + 2) / 3 + D + 6 + 1;
91	L = (ax + wx - 1) / wx;
92	if ((ix % w) == 0 && p != 0)
93		lstall = 1;
94	else
95		lstall = 0;
96	Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
97
98	//dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
99	pixels = Delay * 3 * pixelsPerClock;
100
101#ifdef __DML_VBA_DEBUG__
102	dml_print("DML::%s: bpc: %d\n", __func__, bpc);
103	dml_print("DML::%s: BPP: %f\n", __func__, BPP);
104	dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
105	dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
106	dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
107	dml_print("DML::%s: Output: %d\n", __func__, Output);
108	dml_print("DML::%s: pixels: %d\n", __func__, pixels);
109#endif
110
111	return pixels;
112}
113
114unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
115{
116	unsigned int Delay = 0;
117
118	if (pixelFormat == dm_420) {
119		//   sfr
120		Delay = Delay + 2;
121		//   dsccif
122		Delay = Delay + 0;
123		//   dscc - input deserializer
124		Delay = Delay + 3;
125		//   dscc gets pixels every other cycle
126		Delay = Delay + 2;
127		//   dscc - input cdc fifo
128		Delay = Delay + 12;
129		//   dscc gets pixels every other cycle
130		Delay = Delay + 13;
131		//   dscc - cdc uncertainty
132		Delay = Delay + 2;
133		//   dscc - output cdc fifo
134		Delay = Delay + 7;
135		//   dscc gets pixels every other cycle
136		Delay = Delay + 3;
137		//   dscc - cdc uncertainty
138		Delay = Delay + 2;
139		//   dscc - output serializer
140		Delay = Delay + 1;
141		//   sft
142		Delay = Delay + 1;
143	} else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
144		//   sfr
145		Delay = Delay + 2;
146		//   dsccif
147		Delay = Delay + 1;
148		//   dscc - input deserializer
149		Delay = Delay + 5;
150		//  dscc - input cdc fifo
151		Delay = Delay + 25;
152		//   dscc - cdc uncertainty
153		Delay = Delay + 2;
154		//   dscc - output cdc fifo
155		Delay = Delay + 10;
156		//   dscc - cdc uncertainty
157		Delay = Delay + 2;
158		//   dscc - output serializer
159		Delay = Delay + 1;
160		//   sft
161		Delay = Delay + 1;
162	} else {
163		//   sfr
164		Delay = Delay + 2;
165		//   dsccif
166		Delay = Delay + 0;
167		//   dscc - input deserializer
168		Delay = Delay + 3;
169		//   dscc - input cdc fifo
170		Delay = Delay + 12;
171		//   dscc - cdc uncertainty
172		Delay = Delay + 2;
173		//   dscc - output cdc fifo
174		Delay = Delay + 7;
175		//   dscc - output serializer
176		Delay = Delay + 1;
177		//   dscc - cdc uncertainty
178		Delay = Delay + 2;
179		//   sft
180		Delay = Delay + 1;
181	}
182
183	return Delay;
184}
185
186
187bool IsVertical(enum dm_rotation_angle Scan)
188{
189	bool is_vert = false;
190
191	if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
192		is_vert = true;
193	else
194		is_vert = false;
195	return is_vert;
196}
197
198void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
199		double HRatio,
200		double HRatioChroma,
201		double VRatio,
202		double VRatioChroma,
203		double MaxDCHUBToPSCLThroughput,
204		double MaxPSCLToLBThroughput,
205		double PixelClock,
206		enum source_format_class SourcePixelFormat,
207		unsigned int HTaps,
208		unsigned int HTapsChroma,
209		unsigned int VTaps,
210		unsigned int VTapsChroma,
211
212		/* output */
213		double *PSCL_THROUGHPUT,
214		double *PSCL_THROUGHPUT_CHROMA,
215		double *DPPCLKUsingSingleDPP)
216{
217	double DPPCLKUsingSingleDPPLuma;
218	double DPPCLKUsingSingleDPPChroma;
219
220	if (HRatio > 1) {
221		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
222				dml_ceil((double) HTaps / 6.0, 1.0));
223	} else {
224		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
225	}
226
227	DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
228			*PSCL_THROUGHPUT, 1);
229
230	if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
231		DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
232
233	if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
234			SourcePixelFormat != dm_rgbe_alpha)) {
235		*PSCL_THROUGHPUT_CHROMA = 0;
236		*DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
237	} else {
238		if (HRatioChroma > 1) {
239			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
240					HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
241		} else {
242			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
243		}
244		DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
245				HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
246		if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
247			DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
248		*DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
249	}
250}
251
252void dml32_CalculateBytePerPixelAndBlockSizes(
253		enum source_format_class SourcePixelFormat,
254		enum dm_swizzle_mode SurfaceTiling,
255
256		/* Output */
257		unsigned int *BytePerPixelY,
258		unsigned int *BytePerPixelC,
259		double  *BytePerPixelDETY,
260		double  *BytePerPixelDETC,
261		unsigned int *BlockHeight256BytesY,
262		unsigned int *BlockHeight256BytesC,
263		unsigned int *BlockWidth256BytesY,
264		unsigned int *BlockWidth256BytesC,
265		unsigned int *MacroTileHeightY,
266		unsigned int *MacroTileHeightC,
267		unsigned int *MacroTileWidthY,
268		unsigned int *MacroTileWidthC)
269{
270	if (SourcePixelFormat == dm_444_64) {
271		*BytePerPixelDETY = 8;
272		*BytePerPixelDETC = 0;
273		*BytePerPixelY = 8;
274		*BytePerPixelC = 0;
275	} else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
276		*BytePerPixelDETY = 4;
277		*BytePerPixelDETC = 0;
278		*BytePerPixelY = 4;
279		*BytePerPixelC = 0;
280	} else if (SourcePixelFormat == dm_444_16) {
281		*BytePerPixelDETY = 2;
282		*BytePerPixelDETC = 0;
283		*BytePerPixelY = 2;
284		*BytePerPixelC = 0;
285	} else if (SourcePixelFormat == dm_444_8) {
286		*BytePerPixelDETY = 1;
287		*BytePerPixelDETC = 0;
288		*BytePerPixelY = 1;
289		*BytePerPixelC = 0;
290	} else if (SourcePixelFormat == dm_rgbe_alpha) {
291		*BytePerPixelDETY = 4;
292		*BytePerPixelDETC = 1;
293		*BytePerPixelY = 4;
294		*BytePerPixelC = 1;
295	} else if (SourcePixelFormat == dm_420_8) {
296		*BytePerPixelDETY = 1;
297		*BytePerPixelDETC = 2;
298		*BytePerPixelY = 1;
299		*BytePerPixelC = 2;
300	} else if (SourcePixelFormat == dm_420_12) {
301		*BytePerPixelDETY = 2;
302		*BytePerPixelDETC = 4;
303		*BytePerPixelY = 2;
304		*BytePerPixelC = 4;
305	} else {
306		*BytePerPixelDETY = 4.0 / 3;
307		*BytePerPixelDETC = 8.0 / 3;
308		*BytePerPixelY = 2;
309		*BytePerPixelC = 4;
310	}
311#ifdef __DML_VBA_DEBUG__
312	dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
313	dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
314	dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
315	dml_print("DML::%s: BytePerPixelY    = %d\n", __func__, *BytePerPixelY);
316	dml_print("DML::%s: BytePerPixelC    = %d\n", __func__, *BytePerPixelC);
317#endif
318	if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
319			|| SourcePixelFormat == dm_444_16
320			|| SourcePixelFormat == dm_444_8
321			|| SourcePixelFormat == dm_mono_16
322			|| SourcePixelFormat == dm_mono_8
323			|| SourcePixelFormat == dm_rgbe)) {
324		if (SurfaceTiling == dm_sw_linear)
325			*BlockHeight256BytesY = 1;
326		else if (SourcePixelFormat == dm_444_64)
327			*BlockHeight256BytesY = 4;
328		else if (SourcePixelFormat == dm_444_8)
329			*BlockHeight256BytesY = 16;
330		else
331			*BlockHeight256BytesY = 8;
332
333		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
334		*BlockHeight256BytesC = 0;
335		*BlockWidth256BytesC = 0;
336	} else {
337		if (SurfaceTiling == dm_sw_linear) {
338			*BlockHeight256BytesY = 1;
339			*BlockHeight256BytesC = 1;
340		} else if (SourcePixelFormat == dm_rgbe_alpha) {
341			*BlockHeight256BytesY = 8;
342			*BlockHeight256BytesC = 16;
343		} else if (SourcePixelFormat == dm_420_8) {
344			*BlockHeight256BytesY = 16;
345			*BlockHeight256BytesC = 8;
346		} else {
347			*BlockHeight256BytesY = 8;
348			*BlockHeight256BytesC = 8;
349		}
350		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
351		*BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
352	}
353#ifdef __DML_VBA_DEBUG__
354	dml_print("DML::%s: BlockWidth256BytesY  = %d\n", __func__, *BlockWidth256BytesY);
355	dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
356	dml_print("DML::%s: BlockWidth256BytesC  = %d\n", __func__, *BlockWidth256BytesC);
357	dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
358#endif
359
360	if (SurfaceTiling == dm_sw_linear) {
361		*MacroTileHeightY = *BlockHeight256BytesY;
362		*MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
363		*MacroTileHeightC = *BlockHeight256BytesC;
364		if (*MacroTileHeightC == 0)
365			*MacroTileWidthC = 0;
366		else
367			*MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
368	} else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
369			SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
370		*MacroTileHeightY = 16 * *BlockHeight256BytesY;
371		*MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
372		*MacroTileHeightC = 16 * *BlockHeight256BytesC;
373		if (*MacroTileHeightC == 0)
374			*MacroTileWidthC = 0;
375		else
376			*MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
377	} else {
378		*MacroTileHeightY = 32 * *BlockHeight256BytesY;
379		*MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
380		*MacroTileHeightC = 32 * *BlockHeight256BytesC;
381		if (*MacroTileHeightC == 0)
382			*MacroTileWidthC = 0;
383		else
384			*MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385	}
386
387#ifdef __DML_VBA_DEBUG__
388	dml_print("DML::%s: MacroTileWidthY  = %d\n", __func__, *MacroTileWidthY);
389	dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
390	dml_print("DML::%s: MacroTileWidthC  = %d\n", __func__, *MacroTileWidthC);
391	dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
392#endif
393} // CalculateBytePerPixelAndBlockSizes
394
395void dml32_CalculateSwathAndDETConfiguration(
396		unsigned int DETSizeOverride[],
397		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
398		unsigned int ConfigReturnBufferSizeInKByte,
399		unsigned int MaxTotalDETInKByte,
400		unsigned int MinCompressedBufferSizeInKByte,
401		double ForceSingleDPP,
402		unsigned int NumberOfActiveSurfaces,
403		unsigned int nomDETInKByte,
404		enum unbounded_requesting_policy UseUnboundedRequestingFinal,
405		bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
406		unsigned int PixelChunkSizeKBytes,
407		unsigned int ROBSizeKBytes,
408		unsigned int CompressedBufferSegmentSizeInkByteFinal,
409		enum output_encoder_class Output[],
410		double ReadBandwidthLuma[],
411		double ReadBandwidthChroma[],
412		double MaximumSwathWidthLuma[],
413		double MaximumSwathWidthChroma[],
414		enum dm_rotation_angle SourceRotation[],
415		bool ViewportStationary[],
416		enum source_format_class SourcePixelFormat[],
417		enum dm_swizzle_mode SurfaceTiling[],
418		unsigned int ViewportWidth[],
419		unsigned int ViewportHeight[],
420		unsigned int ViewportXStart[],
421		unsigned int ViewportYStart[],
422		unsigned int ViewportXStartC[],
423		unsigned int ViewportYStartC[],
424		unsigned int SurfaceWidthY[],
425		unsigned int SurfaceWidthC[],
426		unsigned int SurfaceHeightY[],
427		unsigned int SurfaceHeightC[],
428		unsigned int Read256BytesBlockHeightY[],
429		unsigned int Read256BytesBlockHeightC[],
430		unsigned int Read256BytesBlockWidthY[],
431		unsigned int Read256BytesBlockWidthC[],
432		enum odm_combine_mode ODMMode[],
433		unsigned int BlendingAndTiming[],
434		unsigned int BytePerPixY[],
435		unsigned int BytePerPixC[],
436		double BytePerPixDETY[],
437		double BytePerPixDETC[],
438		unsigned int HActive[],
439		double HRatio[],
440		double HRatioChroma[],
441		unsigned int DPPPerSurface[],
442
443		/* Output */
444		unsigned int swath_width_luma_ub[],
445		unsigned int swath_width_chroma_ub[],
446		double SwathWidth[],
447		double SwathWidthChroma[],
448		unsigned int SwathHeightY[],
449		unsigned int SwathHeightC[],
450		unsigned int DETBufferSizeInKByte[],
451		unsigned int DETBufferSizeY[],
452		unsigned int DETBufferSizeC[],
453		bool *UnboundedRequestEnabled,
454		unsigned int *CompressedBufferSizeInkByte,
455		unsigned int *CompBufReservedSpaceKBytes,
456		bool *CompBufReservedSpaceNeedAdjustment,
457		bool ViewportSizeSupportPerSurface[],
458		bool *ViewportSizeSupport)
459{
460	unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
461	unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
462	unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
463	unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
464	unsigned int RoundedUpSwathSizeBytesY;
465	unsigned int RoundedUpSwathSizeBytesC;
466	double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
467	double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
468	unsigned int k;
469	unsigned int TotalActiveDPP = 0;
470	bool NoChromaSurfaces = true;
471	unsigned int DETBufferSizeInKByteForSwathCalculation;
472
473#ifdef __DML_VBA_DEBUG__
474	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
475	dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
476	dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
477#endif
478	dml32_CalculateSwathWidth(ForceSingleDPP,
479			NumberOfActiveSurfaces,
480			SourcePixelFormat,
481			SourceRotation,
482			ViewportStationary,
483			ViewportWidth,
484			ViewportHeight,
485			ViewportXStart,
486			ViewportYStart,
487			ViewportXStartC,
488			ViewportYStartC,
489			SurfaceWidthY,
490			SurfaceWidthC,
491			SurfaceHeightY,
492			SurfaceHeightC,
493			ODMMode,
494			BytePerPixY,
495			BytePerPixC,
496			Read256BytesBlockHeightY,
497			Read256BytesBlockHeightC,
498			Read256BytesBlockWidthY,
499			Read256BytesBlockWidthC,
500			BlendingAndTiming,
501			HActive,
502			HRatio,
503			DPPPerSurface,
504
505			/* Output */
506			SwathWidthdoubleDPP,
507			SwathWidthdoubleDPPChroma,
508			SwathWidth,
509			SwathWidthChroma,
510			MaximumSwathHeightY,
511			MaximumSwathHeightC,
512			swath_width_luma_ub,
513			swath_width_chroma_ub);
514
515	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
516		RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
517		RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
518#ifdef __DML_VBA_DEBUG__
519		dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
520		dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
521		dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
522		dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
523		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
524				RoundedUpMaxSwathSizeBytesY[k]);
525		dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
526		dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
527		dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
528		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
529				RoundedUpMaxSwathSizeBytesC[k]);
530#endif
531
532		if (SourcePixelFormat[k] == dm_420_10) {
533			RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
534			RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
535		}
536	}
537
538	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
539		TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
540		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
541				SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
542			NoChromaSurfaces = false;
543		}
544	}
545
546	// By default, just set the reserved space to 2 pixel chunks size
547	*CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
548
549	// if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
550	// - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
551	// - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
552	*CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
553
554	if (*CompBufReservedSpaceNeedAdjustment == 1) {
555		*CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
556	}
557
558	#ifdef __DML_VBA_DEBUG__
559		dml_print("DML::%s: CompBufReservedSpaceKBytes          = %d\n",  __func__, *CompBufReservedSpaceKBytes);
560		dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, *CompBufReservedSpaceNeedAdjustment);
561	#endif
562
563	*UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
564
565	dml32_CalculateDETBufferSize(DETSizeOverride,
566			UseMALLForPStateChange,
567			ForceSingleDPP,
568			NumberOfActiveSurfaces,
569			*UnboundedRequestEnabled,
570			nomDETInKByte,
571			MaxTotalDETInKByte,
572			ConfigReturnBufferSizeInKByte,
573			MinCompressedBufferSizeInKByte,
574			CompressedBufferSegmentSizeInkByteFinal,
575			SourcePixelFormat,
576			ReadBandwidthLuma,
577			ReadBandwidthChroma,
578			RoundedUpMaxSwathSizeBytesY,
579			RoundedUpMaxSwathSizeBytesC,
580			DPPPerSurface,
581
582			/* Output */
583			DETBufferSizeInKByte,    // per hubp pipe
584			CompressedBufferSizeInkByte);
585
586#ifdef __DML_VBA_DEBUG__
587	dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
588	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
589	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
590	dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
591	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
592	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
593#endif
594
595	*ViewportSizeSupport = true;
596	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
597
598		DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
599				dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
600#ifdef __DML_VBA_DEBUG__
601		dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
602				DETBufferSizeInKByteForSwathCalculation);
603#endif
604
605		if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
606				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
607			SwathHeightY[k] = MaximumSwathHeightY[k];
608			SwathHeightC[k] = MaximumSwathHeightC[k];
609			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
610			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
611		} else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
612				RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
613				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
614			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
615			SwathHeightC[k] = MaximumSwathHeightC[k];
616			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
617			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
618		} else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
619				RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
620				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
621			SwathHeightY[k] = MaximumSwathHeightY[k];
622			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
623			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
624			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
625		} else {
626			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
627			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
628			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
629			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
630		}
631
632		if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
633				DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
634				|| SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
635						SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
636			*ViewportSizeSupport = false;
637			ViewportSizeSupportPerSurface[k] = false;
638		} else {
639			ViewportSizeSupportPerSurface[k] = true;
640		}
641
642		if (SwathHeightC[k] == 0) {
643#ifdef __DML_VBA_DEBUG__
644			dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
645#endif
646			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
647			DETBufferSizeC[k] = 0;
648		} else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
649#ifdef __DML_VBA_DEBUG__
650			dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
651#endif
652			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
653			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
654		} else {
655#ifdef __DML_VBA_DEBUG__
656			dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
657#endif
658			DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
659			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
660		}
661
662#ifdef __DML_VBA_DEBUG__
663		dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
664		dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
665		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
666				k, RoundedUpMaxSwathSizeBytesY[k]);
667		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
668				k, RoundedUpMaxSwathSizeBytesC[k]);
669		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
670		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
671		dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
672		dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
673		dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
674		dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
675				ViewportSizeSupportPerSurface[k]);
676#endif
677
678	}
679} // CalculateSwathAndDETConfiguration
680
681void dml32_CalculateSwathWidth(
682		bool				ForceSingleDPP,
683		unsigned int			NumberOfActiveSurfaces,
684		enum source_format_class	SourcePixelFormat[],
685		enum dm_rotation_angle		SourceRotation[],
686		bool				ViewportStationary[],
687		unsigned int			ViewportWidth[],
688		unsigned int			ViewportHeight[],
689		unsigned int			ViewportXStart[],
690		unsigned int			ViewportYStart[],
691		unsigned int			ViewportXStartC[],
692		unsigned int			ViewportYStartC[],
693		unsigned int			SurfaceWidthY[],
694		unsigned int			SurfaceWidthC[],
695		unsigned int			SurfaceHeightY[],
696		unsigned int			SurfaceHeightC[],
697		enum odm_combine_mode		ODMMode[],
698		unsigned int			BytePerPixY[],
699		unsigned int			BytePerPixC[],
700		unsigned int			Read256BytesBlockHeightY[],
701		unsigned int			Read256BytesBlockHeightC[],
702		unsigned int			Read256BytesBlockWidthY[],
703		unsigned int			Read256BytesBlockWidthC[],
704		unsigned int			BlendingAndTiming[],
705		unsigned int			HActive[],
706		double				HRatio[],
707		unsigned int			DPPPerSurface[],
708
709		/* Output */
710		double				SwathWidthdoubleDPPY[],
711		double				SwathWidthdoubleDPPC[],
712		double				SwathWidthY[], // per-pipe
713		double				SwathWidthC[], // per-pipe
714		unsigned int			MaximumSwathHeightY[],
715		unsigned int			MaximumSwathHeightC[],
716		unsigned int			swath_width_luma_ub[], // per-pipe
717		unsigned int			swath_width_chroma_ub[]) // per-pipe
718{
719	unsigned int k, j;
720	enum odm_combine_mode MainSurfaceODMMode;
721
722	unsigned int surface_width_ub_l;
723	unsigned int surface_height_ub_l;
724	unsigned int surface_width_ub_c = 0;
725	unsigned int surface_height_ub_c = 0;
726
727#ifdef __DML_VBA_DEBUG__
728	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
729	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
730#endif
731
732	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
733		if (!IsVertical(SourceRotation[k]))
734			SwathWidthdoubleDPPY[k] = ViewportWidth[k];
735		else
736			SwathWidthdoubleDPPY[k] = ViewportHeight[k];
737
738#ifdef __DML_VBA_DEBUG__
739		dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
740		dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
741#endif
742
743		MainSurfaceODMMode = ODMMode[k];
744		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
745			if (BlendingAndTiming[k] == j)
746				MainSurfaceODMMode = ODMMode[j];
747		}
748
749		if (ForceSingleDPP) {
750			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
751		} else {
752			if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
753				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
754						dml_round(HActive[k] / 4.0 * HRatio[k]));
755			} else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
756				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
757						dml_round(HActive[k] / 2.0 * HRatio[k]));
758			} else if (DPPPerSurface[k] == 2) {
759				SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
760			} else {
761				SwathWidthY[k] = SwathWidthdoubleDPPY[k];
762			}
763		}
764
765#ifdef __DML_VBA_DEBUG__
766		dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
767		dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
768		dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
769		dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
770		dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
771#endif
772
773		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
774				SourcePixelFormat[k] == dm_420_12) {
775			SwathWidthC[k] = SwathWidthY[k] / 2;
776			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
777		} else {
778			SwathWidthC[k] = SwathWidthY[k];
779			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
780		}
781
782		if (ForceSingleDPP == true) {
783			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
784			SwathWidthC[k] = SwathWidthdoubleDPPC[k];
785		}
786
787		surface_width_ub_l  = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
788		surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
789
790		if (!IsVertical(SourceRotation[k])) {
791			MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
792			MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
793			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
794				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
795						dml_floor(ViewportXStart[k] +
796								SwathWidthY[k] +
797								Read256BytesBlockWidthY[k] - 1,
798								Read256BytesBlockWidthY[k]) -
799								dml_floor(ViewportXStart[k],
800								Read256BytesBlockWidthY[k]));
801			} else {
802				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
803						dml_ceil(SwathWidthY[k] - 1,
804								Read256BytesBlockWidthY[k]) +
805								Read256BytesBlockWidthY[k]);
806			}
807			if (BytePerPixC[k] > 0) {
808				surface_width_ub_c  = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
809				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
810					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
811							dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
812									Read256BytesBlockWidthC[k] - 1,
813									Read256BytesBlockWidthC[k]) -
814									dml_floor(ViewportXStartC[k],
815									Read256BytesBlockWidthC[k]));
816				} else {
817					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
818							dml_ceil(SwathWidthC[k] - 1,
819								Read256BytesBlockWidthC[k]) +
820								Read256BytesBlockWidthC[k]);
821				}
822			} else {
823				swath_width_chroma_ub[k] = 0;
824			}
825		} else {
826			MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
827			MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
828
829			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
830				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
831						SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
832						Read256BytesBlockHeightY[k]) -
833						dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
834			} else {
835				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
836						Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
837			}
838			if (BytePerPixC[k] > 0) {
839				surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
840				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
841					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
842							dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
843									Read256BytesBlockHeightC[k] - 1,
844									Read256BytesBlockHeightC[k]) -
845									dml_floor(ViewportYStartC[k],
846											Read256BytesBlockHeightC[k]));
847				} else {
848					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
849							dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
850							Read256BytesBlockHeightC[k]);
851				}
852			} else {
853				swath_width_chroma_ub[k] = 0;
854			}
855		}
856
857#ifdef __DML_VBA_DEBUG__
858		dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
859		dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
860		dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
861		dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
862		dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
863		dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
864		dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
865		dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
866		dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
867		dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
868		dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
869		dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
870		dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
871		dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
872#endif
873
874	}
875} // CalculateSwathWidth
876
877bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
878			unsigned int TotalNumberOfActiveDPP,
879			bool NoChroma,
880			enum output_encoder_class Output,
881			enum dm_swizzle_mode SurfaceTiling,
882			bool CompBufReservedSpaceNeedAdjustment,
883			bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
884{
885	bool ret_val = false;
886
887	ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
888			TotalNumberOfActiveDPP == 1 && NoChroma);
889	if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
890		ret_val = false;
891
892	if (SurfaceTiling == dm_sw_linear)
893		ret_val = false;
894
895	if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
896		ret_val = false;
897
898#ifdef __DML_VBA_DEBUG__
899	dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, CompBufReservedSpaceNeedAdjustment);
900	dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
901	dml_print("DML::%s: ret_val = %d\n",  __func__, ret_val);
902#endif
903
904	return (ret_val);
905}
906
907void dml32_CalculateDETBufferSize(
908		unsigned int DETSizeOverride[],
909		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
910		bool ForceSingleDPP,
911		unsigned int NumberOfActiveSurfaces,
912		bool UnboundedRequestEnabled,
913		unsigned int nomDETInKByte,
914		unsigned int MaxTotalDETInKByte,
915		unsigned int ConfigReturnBufferSizeInKByte,
916		unsigned int MinCompressedBufferSizeInKByte,
917		unsigned int CompressedBufferSegmentSizeInkByteFinal,
918		enum source_format_class SourcePixelFormat[],
919		double ReadBandwidthLuma[],
920		double ReadBandwidthChroma[],
921		unsigned int RoundedUpMaxSwathSizeBytesY[],
922		unsigned int RoundedUpMaxSwathSizeBytesC[],
923		unsigned int DPPPerSurface[],
924		/* Output */
925		unsigned int DETBufferSizeInKByte[],
926		unsigned int *CompressedBufferSizeInkByte)
927{
928	unsigned int DETBufferSizePoolInKByte;
929	unsigned int NextDETBufferPieceInKByte;
930	bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
931	bool NextPotentialSurfaceToAssignDETPieceFound;
932	unsigned int NextSurfaceToAssignDETPiece;
933	double TotalBandwidth;
934	double BandwidthOfSurfacesNotAssignedDETPiece;
935	unsigned int max_minDET;
936	unsigned int minDET;
937	unsigned int minDET_pipe;
938	unsigned int j, k;
939
940#ifdef __DML_VBA_DEBUG__
941	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
942	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
943	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
944	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
945	dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
946	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
947	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
948	dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
949			CompressedBufferSegmentSizeInkByteFinal);
950#endif
951
952	// Note: Will use default det size if that fits 2 swaths
953	if (UnboundedRequestEnabled) {
954		if (DETSizeOverride[0] > 0) {
955			DETBufferSizeInKByte[0] = DETSizeOverride[0];
956		} else {
957			DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
958					((double) RoundedUpMaxSwathSizeBytesY[0] +
959							(double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
960		}
961		*CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
962	} else {
963		DETBufferSizePoolInKByte = MaxTotalDETInKByte;
964		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
965			DETBufferSizeInKByte[k] = nomDETInKByte;
966			if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
967					SourcePixelFormat[k] == dm_420_12) {
968				max_minDET = nomDETInKByte - 64;
969			} else {
970				max_minDET = nomDETInKByte;
971			}
972			minDET = 128;
973			minDET_pipe = 0;
974
975			// add DET resource until can hold 2 full swaths
976			while (minDET <= max_minDET && minDET_pipe == 0) {
977				if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
978						(double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
979					minDET_pipe = minDET;
980				minDET = minDET + 64;
981			}
982
983#ifdef __DML_VBA_DEBUG__
984			dml_print("DML::%s: k=%0d minDET        = %d\n", __func__, k, minDET);
985			dml_print("DML::%s: k=%0d max_minDET    = %d\n", __func__, k, max_minDET);
986			dml_print("DML::%s: k=%0d minDET_pipe   = %d\n", __func__, k, minDET_pipe);
987			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
988					RoundedUpMaxSwathSizeBytesY[k]);
989			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
990					RoundedUpMaxSwathSizeBytesC[k]);
991#endif
992
993			if (minDET_pipe == 0) {
994				minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
995						(double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
996#ifdef __DML_VBA_DEBUG__
997				dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
998						__func__, k, minDET_pipe);
999#endif
1000			}
1001
1002			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1003				DETBufferSizeInKByte[k] = 0;
1004			} else if (DETSizeOverride[k] > 0) {
1005				DETBufferSizeInKByte[k] = DETSizeOverride[k];
1006				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1007						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1008			} else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1009				DETBufferSizeInKByte[k] = minDET_pipe;
1010				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1011						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1012			}
1013
1014#ifdef __DML_VBA_DEBUG__
1015			dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1016			dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1017			dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1018			dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1019#endif
1020		}
1021
1022		TotalBandwidth = 0;
1023		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1024			if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1025				TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1026		}
1027#ifdef __DML_VBA_DEBUG__
1028		dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1029		for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1030			dml_print("DML::%s: k=%d DETBufferSizeInKByte   = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1031		dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1032		dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1033#endif
1034		BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1035		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1036
1037			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1038				DETPieceAssignedToThisSurfaceAlready[k] = true;
1039			} else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1040					(double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1041					((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1042				DETPieceAssignedToThisSurfaceAlready[k] = true;
1043				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1044						ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1045			} else {
1046				DETPieceAssignedToThisSurfaceAlready[k] = false;
1047			}
1048#ifdef __DML_VBA_DEBUG__
1049			dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1050					DETPieceAssignedToThisSurfaceAlready[k]);
1051			dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1052					BandwidthOfSurfacesNotAssignedDETPiece);
1053#endif
1054		}
1055
1056		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1057			NextPotentialSurfaceToAssignDETPieceFound = false;
1058			NextSurfaceToAssignDETPiece = 0;
1059
1060			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1061#ifdef __DML_VBA_DEBUG__
1062				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1063						ReadBandwidthLuma[k]);
1064				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1065						ReadBandwidthChroma[k]);
1066				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1067						ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1068				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1069						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1070				dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1071						NextSurfaceToAssignDETPiece);
1072#endif
1073				if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1074						(!NextPotentialSurfaceToAssignDETPieceFound ||
1075						ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1076						ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1077						ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1078					NextSurfaceToAssignDETPiece = k;
1079					NextPotentialSurfaceToAssignDETPieceFound = true;
1080				}
1081#ifdef __DML_VBA_DEBUG__
1082				dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1083						__func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1084				dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1085						__func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1086#endif
1087			}
1088
1089			if (NextPotentialSurfaceToAssignDETPieceFound) {
1090				// Note: To show the banker's rounding behavior in VBA and also the fact
1091				// that the DET buffer size varies due to precision issue
1092				//
1093				//double tmp1 =  ((double) DETBufferSizePoolInKByte *
1094				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1095				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1096				// BandwidthOfSurfacesNotAssignedDETPiece /
1097				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1098				//double tmp2 =  dml_round((double) DETBufferSizePoolInKByte *
1099				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1100				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1101				 //BandwidthOfSurfacesNotAssignedDETPiece /
1102				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1103				//
1104				//dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1105				//dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1106
1107				NextDETBufferPieceInKByte = dml_min(
1108					dml_round((double) DETBufferSizePoolInKByte *
1109						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1110						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1111						BandwidthOfSurfacesNotAssignedDETPiece /
1112						((ForceSingleDPP ? 1 :
1113								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1114						(ForceSingleDPP ? 1 :
1115								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1116						dml_floor((double) DETBufferSizePoolInKByte,
1117						(ForceSingleDPP ? 1 :
1118								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1119
1120				// Above calculation can assign the entire DET buffer allocation to a single pipe.
1121				// We should limit the per-pipe DET size to the nominal / max per pipe.
1122				if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1123					if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1124							nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1125						NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1126								DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1127					} else {
1128						// Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1129						// already has the max per-pipe value
1130						NextDETBufferPieceInKByte = 0;
1131					}
1132				}
1133
1134#ifdef __DML_VBA_DEBUG__
1135				dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1136					DETBufferSizePoolInKByte);
1137				dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1138					NextSurfaceToAssignDETPiece);
1139				dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1140					NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1141				dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1142					NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1143				dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1144					__func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1145				dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1146					NextDETBufferPieceInKByte);
1147				dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1148					__func__, j, NextSurfaceToAssignDETPiece,
1149					DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1150#endif
1151
1152				DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1153						DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1154						+ NextDETBufferPieceInKByte
1155						/ (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1156#ifdef __DML_VBA_DEBUG__
1157				dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1158#endif
1159
1160				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1161				DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1162				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1163						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1164								ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1165			}
1166		}
1167		*CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1168	}
1169	*CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1170
1171#ifdef __DML_VBA_DEBUG__
1172	dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1173	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1174	for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1175		dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1176				__func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1177	}
1178#endif
1179} // CalculateDETBufferSize
1180
1181void dml32_CalculateODMMode(
1182		unsigned int MaximumPixelsPerLinePerDSCUnit,
1183		unsigned int HActive,
1184		enum output_format_class OutFormat,
1185		enum output_encoder_class Output,
1186		enum odm_combine_policy ODMUse,
1187		double StateDispclk,
1188		double MaxDispclk,
1189		bool DSCEnable,
1190		unsigned int TotalNumberOfActiveDPP,
1191		unsigned int MaxNumDPP,
1192		double PixelClock,
1193		double DISPCLKDPPCLKDSCCLKDownSpreading,
1194		double DISPCLKRampingMargin,
1195		double DISPCLKDPPCLKVCOSpeed,
1196		unsigned int NumberOfDSCSlices,
1197
1198		/* Output */
1199		bool *TotalAvailablePipesSupport,
1200		unsigned int *NumberOfDPP,
1201		enum odm_combine_mode *ODMMode,
1202		double *RequiredDISPCLKPerSurface)
1203{
1204
1205	double SurfaceRequiredDISPCLKWithoutODMCombine;
1206	double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1207	double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1208
1209	SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1210			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1211			MaxDispclk);
1212	SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1213			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1214			MaxDispclk);
1215	SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1216			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1217			MaxDispclk);
1218	*TotalAvailablePipesSupport = true;
1219	*ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1220
1221	if (ODMUse == dm_odm_combine_policy_none)
1222		*ODMMode = dm_odm_combine_mode_disabled;
1223
1224	*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1225	*NumberOfDPP = 0;
1226
1227	// FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1228	// (ODMUse == "" || ODMUse == "CombineAsNeeded")
1229
1230	if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1231			((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1232					(DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
1233					|| NumberOfDSCSlices > 8)))) {
1234		if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1235			*ODMMode = dm_odm_combine_mode_4to1;
1236			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1237			*NumberOfDPP = 4;
1238		} else {
1239			*TotalAvailablePipesSupport = false;
1240		}
1241	} else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1242			(((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1243					SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1244					(DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
1245					|| (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
1246		if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1247			*ODMMode = dm_odm_combine_mode_2to1;
1248			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1249			*NumberOfDPP = 2;
1250		} else {
1251			*TotalAvailablePipesSupport = false;
1252		}
1253	} else {
1254		if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1255			*NumberOfDPP = 1;
1256		else
1257			*TotalAvailablePipesSupport = false;
1258	}
1259	if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH &&
1260			ODMUse != dm_odm_combine_policy_4to1) {
1261		if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) {
1262			*ODMMode = dm_odm_combine_mode_disabled;
1263			*NumberOfDPP = 0;
1264			*TotalAvailablePipesSupport = false;
1265		} else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 ||
1266				*ODMMode == dm_odm_combine_mode_4to1) {
1267			*ODMMode = dm_odm_combine_mode_4to1;
1268			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1269			*NumberOfDPP = 4;
1270		} else {
1271			*ODMMode = dm_odm_combine_mode_2to1;
1272			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1273			*NumberOfDPP = 2;
1274		}
1275	}
1276	if (Output == dm_hdmi && OutFormat == dm_420 &&
1277			HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) {
1278		*ODMMode = dm_odm_combine_mode_disabled;
1279		*NumberOfDPP = 0;
1280		*TotalAvailablePipesSupport = false;
1281	}
1282}
1283
1284double dml32_CalculateRequiredDispclk(
1285		enum odm_combine_mode ODMMode,
1286		double PixelClock,
1287		double DISPCLKDPPCLKDSCCLKDownSpreading,
1288		double DISPCLKRampingMargin,
1289		double DISPCLKDPPCLKVCOSpeed,
1290		double MaxDispclk)
1291{
1292	double RequiredDispclk = 0.;
1293	double PixelClockAfterODM;
1294	double DISPCLKWithRampingRoundedToDFSGranularity;
1295	double DISPCLKWithoutRampingRoundedToDFSGranularity;
1296	double MaxDispclkRoundedDownToDFSGranularity;
1297
1298	if (ODMMode == dm_odm_combine_mode_4to1)
1299		PixelClockAfterODM = PixelClock / 4;
1300	else if (ODMMode == dm_odm_combine_mode_2to1)
1301		PixelClockAfterODM = PixelClock / 2;
1302	else
1303		PixelClockAfterODM = PixelClock;
1304
1305
1306	DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1307			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1308					* (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1309
1310	DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1311			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1312
1313	MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1314
1315	if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1316		RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1317	else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1318		RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1319	else
1320		RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1321
1322	return RequiredDispclk;
1323}
1324
1325double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1326{
1327	if (Clock <= 0.0)
1328		return 0.0;
1329
1330	if (round_up)
1331		return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1332	else
1333		return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1334}
1335
1336void dml32_CalculateOutputLink(
1337		double PHYCLKPerState,
1338		double PHYCLKD18PerState,
1339		double PHYCLKD32PerState,
1340		double Downspreading,
1341		bool IsMainSurfaceUsingTheIndicatedTiming,
1342		enum output_encoder_class Output,
1343		enum output_format_class OutputFormat,
1344		unsigned int HTotal,
1345		unsigned int HActive,
1346		double PixelClockBackEnd,
1347		double ForcedOutputLinkBPP,
1348		unsigned int DSCInputBitPerComponent,
1349		unsigned int NumberOfDSCSlices,
1350		double AudioSampleRate,
1351		unsigned int AudioSampleLayout,
1352		enum odm_combine_mode ODMModeNoDSC,
1353		enum odm_combine_mode ODMModeDSC,
1354		bool DSCEnable,
1355		unsigned int OutputLinkDPLanes,
1356		enum dm_output_link_dp_rate OutputLinkDPRate,
1357
1358		/* Output */
1359		bool *RequiresDSC,
1360		double *RequiresFEC,
1361		double  *OutBpp,
1362		enum dm_output_type *OutputType,
1363		enum dm_output_rate *OutputRate,
1364		unsigned int *RequiredSlots)
1365{
1366	bool LinkDSCEnable;
1367	unsigned int dummy;
1368	*RequiresDSC = false;
1369	*RequiresFEC = false;
1370	*OutBpp = 0;
1371	*OutputType = dm_output_type_unknown;
1372	*OutputRate = dm_output_rate_unknown;
1373
1374	if (IsMainSurfaceUsingTheIndicatedTiming) {
1375		if (Output == dm_hdmi) {
1376			*RequiresDSC = false;
1377			*RequiresFEC = false;
1378			*OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1379					PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1380					DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1381					ODMModeNoDSC, ODMModeDSC, &dummy);
1382			//OutputTypeAndRate = "HDMI";
1383			*OutputType = dm_output_type_hdmi;
1384
1385		} else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1386			if (DSCEnable == true) {
1387				*RequiresDSC = true;
1388				LinkDSCEnable = true;
1389				if (Output == dm_dp || Output == dm_dp2p0)
1390					*RequiresFEC = true;
1391				else
1392					*RequiresFEC = false;
1393			} else {
1394				*RequiresDSC = false;
1395				LinkDSCEnable = false;
1396				if (Output == dm_dp2p0)
1397					*RequiresFEC = true;
1398				else
1399					*RequiresFEC = false;
1400			}
1401			if (Output == dm_dp2p0) {
1402				*OutBpp = 0;
1403				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1404						PHYCLKD32PerState >= 10000 / 32) {
1405					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1406							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1407							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1408							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1409							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1410					if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1411							ForcedOutputLinkBPP == 0) {
1412						*RequiresDSC = true;
1413						LinkDSCEnable = true;
1414						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1415								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1416								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1417								OutputFormat, DSCInputBitPerComponent,
1418								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1419								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1420					}
1421					//OutputTypeAndRate = Output & " UHBR10";
1422					*OutputType = dm_output_type_dp2p0;
1423					*OutputRate = dm_output_rate_dp_rate_uhbr10;
1424				}
1425				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1426						*OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1427					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1428							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1429							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1430							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1431							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432
1433					if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1434							ForcedOutputLinkBPP == 0) {
1435						*RequiresDSC = true;
1436						LinkDSCEnable = true;
1437						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1438								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1439								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1440								OutputFormat, DSCInputBitPerComponent,
1441								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1442								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1443					}
1444					//OutputTypeAndRate = Output & " UHBR13p5";
1445					*OutputType = dm_output_type_dp2p0;
1446					*OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1447				}
1448				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1449						*OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1450					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1451							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1452							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1453							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1454							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1455					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1456						*RequiresDSC = true;
1457						LinkDSCEnable = true;
1458						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1459								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1460								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1461								OutputFormat, DSCInputBitPerComponent,
1462								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1463								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1464					}
1465					//OutputTypeAndRate = Output & " UHBR20";
1466					*OutputType = dm_output_type_dp2p0;
1467					*OutputRate = dm_output_rate_dp_rate_uhbr20;
1468				}
1469			} else {
1470				*OutBpp = 0;
1471				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1472						PHYCLKPerState >= 270) {
1473					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1474							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1475							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1476							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1477							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1478					if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1479							ForcedOutputLinkBPP == 0) {
1480						*RequiresDSC = true;
1481						LinkDSCEnable = true;
1482						if (Output == dm_dp)
1483							*RequiresFEC = true;
1484						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1485								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1486								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1487								OutputFormat, DSCInputBitPerComponent,
1488								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1489								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1490					}
1491					//OutputTypeAndRate = Output & " HBR";
1492					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1493					*OutputRate = dm_output_rate_dp_rate_hbr;
1494				}
1495				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1496						*OutBpp == 0 && PHYCLKPerState >= 540) {
1497					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1498							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1499							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1500							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1501							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1502
1503					if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1504							ForcedOutputLinkBPP == 0) {
1505						*RequiresDSC = true;
1506						LinkDSCEnable = true;
1507						if (Output == dm_dp)
1508							*RequiresFEC = true;
1509
1510						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1511								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1512								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1513								OutputFormat, DSCInputBitPerComponent,
1514								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1515								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1516					}
1517					//OutputTypeAndRate = Output & " HBR2";
1518					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1519					*OutputRate = dm_output_rate_dp_rate_hbr2;
1520				}
1521				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1522					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1523							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1524							ForcedOutputLinkBPP, LinkDSCEnable, Output,
1525							OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1526							AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1527							RequiredSlots);
1528
1529					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1530						*RequiresDSC = true;
1531						LinkDSCEnable = true;
1532						if (Output == dm_dp)
1533							*RequiresFEC = true;
1534
1535						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1536								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1537								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1538								OutputFormat, DSCInputBitPerComponent,
1539								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1540								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1541					}
1542					//OutputTypeAndRate = Output & " HBR3";
1543					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1544					*OutputRate = dm_output_rate_dp_rate_hbr3;
1545				}
1546			}
1547		}
1548	}
1549}
1550
1551void dml32_CalculateDPPCLK(
1552		unsigned int NumberOfActiveSurfaces,
1553		double DISPCLKDPPCLKDSCCLKDownSpreading,
1554		double DISPCLKDPPCLKVCOSpeed,
1555		double DPPCLKUsingSingleDPP[],
1556		unsigned int DPPPerSurface[],
1557
1558		/* output */
1559		double *GlobalDPPCLK,
1560		double Dppclk[])
1561{
1562	unsigned int k;
1563	*GlobalDPPCLK = 0;
1564	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1565		Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1566		*GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1567	}
1568	*GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1569	for (k = 0; k < NumberOfActiveSurfaces; ++k)
1570		Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1571}
1572
1573double dml32_TruncToValidBPP(
1574		double LinkBitRate,
1575		unsigned int Lanes,
1576		unsigned int HTotal,
1577		unsigned int HActive,
1578		double PixelClock,
1579		double DesiredBPP,
1580		bool DSCEnable,
1581		enum output_encoder_class Output,
1582		enum output_format_class Format,
1583		unsigned int DSCInputBitPerComponent,
1584		unsigned int DSCSlices,
1585		unsigned int AudioRate,
1586		unsigned int AudioLayout,
1587		enum odm_combine_mode ODMModeNoDSC,
1588		enum odm_combine_mode ODMModeDSC,
1589		/* Output */
1590		unsigned int *RequiredSlots)
1591{
1592	double    MaxLinkBPP;
1593	unsigned int   MinDSCBPP;
1594	double    MaxDSCBPP;
1595	unsigned int   NonDSCBPP0;
1596	unsigned int   NonDSCBPP1;
1597	unsigned int   NonDSCBPP2;
1598
1599	if (Format == dm_420) {
1600		NonDSCBPP0 = 12;
1601		NonDSCBPP1 = 15;
1602		NonDSCBPP2 = 18;
1603		MinDSCBPP = 6;
1604		MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1605	} else if (Format == dm_444) {
1606		NonDSCBPP0 = 24;
1607		NonDSCBPP1 = 30;
1608		NonDSCBPP2 = 36;
1609		MinDSCBPP = 8;
1610		MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1611	} else {
1612		if (Output == dm_hdmi) {
1613			NonDSCBPP0 = 24;
1614			NonDSCBPP1 = 24;
1615			NonDSCBPP2 = 24;
1616		} else {
1617			NonDSCBPP0 = 16;
1618			NonDSCBPP1 = 20;
1619			NonDSCBPP2 = 24;
1620		}
1621		if (Format == dm_n422) {
1622			MinDSCBPP = 7;
1623			MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1624		} else {
1625			MinDSCBPP = 8;
1626			MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1627		}
1628	}
1629	if (Output == dm_dp2p0) {
1630		MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1631	} else if (DSCEnable && Output == dm_dp) {
1632		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1633	} else {
1634		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1635	}
1636
1637	if (DSCEnable) {
1638		if (ODMModeDSC == dm_odm_combine_mode_4to1)
1639			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1640		else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1641			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1642		else if (ODMModeDSC == dm_odm_split_mode_1to2)
1643			MaxLinkBPP = 2 * MaxLinkBPP;
1644	} else {
1645		if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1646			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1647		else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1648			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1649		else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1650			MaxLinkBPP = 2 * MaxLinkBPP;
1651	}
1652
1653	if (DesiredBPP == 0) {
1654		if (DSCEnable) {
1655			if (MaxLinkBPP < MinDSCBPP)
1656				return BPP_INVALID;
1657			else if (MaxLinkBPP >= MaxDSCBPP)
1658				return MaxDSCBPP;
1659			else
1660				return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1661		} else {
1662			if (MaxLinkBPP >= NonDSCBPP2)
1663				return NonDSCBPP2;
1664			else if (MaxLinkBPP >= NonDSCBPP1)
1665				return NonDSCBPP1;
1666			else if (MaxLinkBPP >= NonDSCBPP0)
1667				return 16.0;
1668			else
1669				return BPP_INVALID;
1670		}
1671	} else {
1672		if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1673				DesiredBPP <= NonDSCBPP0)) ||
1674				(DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1675			return BPP_INVALID;
1676		else
1677			return DesiredBPP;
1678	}
1679
1680	*RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1681
1682	return BPP_INVALID;
1683} // TruncToValidBPP
1684
1685double dml32_RequiredDTBCLK(
1686		bool              DSCEnable,
1687		double               PixelClock,
1688		enum output_format_class  OutputFormat,
1689		double               OutputBpp,
1690		unsigned int              DSCSlices,
1691		unsigned int                 HTotal,
1692		unsigned int                 HActive,
1693		unsigned int              AudioRate,
1694		unsigned int              AudioLayout)
1695{
1696	double PixelWordRate;
1697	double HCActive;
1698	double HCBlank;
1699	double AverageTribyteRate;
1700	double HActiveTribyteRate;
1701
1702	if (DSCEnable != true)
1703		return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1704
1705	PixelWordRate = PixelClock /  (OutputFormat == dm_444 ? 1 : 2);
1706	HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1707			dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1708	HCBlank = 64 + 32 *
1709			dml_ceil(AudioRate *  (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1710	AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1711	HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1712	return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1713}
1714
1715unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1716		enum odm_combine_mode ODMMode,
1717		unsigned int DSCInputBitPerComponent,
1718		double OutputBpp,
1719		unsigned int HActive,
1720		unsigned int HTotal,
1721		unsigned int NumberOfDSCSlices,
1722		enum output_format_class  OutputFormat,
1723		enum output_encoder_class Output,
1724		double PixelClock,
1725		double PixelClockBackEnd,
1726		double dsc_delay_factor_wa)
1727{
1728	unsigned int DSCDelayRequirement_val;
1729
1730	if (DSCEnabled == true && OutputBpp != 0) {
1731		if (ODMMode == dm_odm_combine_mode_4to1) {
1732			DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1733					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1734					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1735		} else if (ODMMode == dm_odm_combine_mode_2to1) {
1736			DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1737					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1738					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1739		} else {
1740			DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1741					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1742					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1743		}
1744
1745		DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1746				dml_ceil((double)DSCDelayRequirement_val / HActive, 1);
1747
1748		DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1749
1750	} else {
1751		DSCDelayRequirement_val = 0;
1752	}
1753
1754#ifdef __DML_VBA_DEBUG__
1755	dml_print("DML::%s: DSCEnabled              = %d\n", __func__, DSCEnabled);
1756	dml_print("DML::%s: OutputBpp               = %f\n", __func__, OutputBpp);
1757	dml_print("DML::%s: HActive                 = %d\n", __func__, HActive);
1758	dml_print("DML::%s: OutputFormat            = %d\n", __func__, OutputFormat);
1759	dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1760	dml_print("DML::%s: NumberOfDSCSlices       = %d\n", __func__, NumberOfDSCSlices);
1761	dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1762#endif
1763
1764	return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1);
1765}
1766
1767void dml32_CalculateSurfaceSizeInMall(
1768		unsigned int NumberOfActiveSurfaces,
1769		unsigned int MALLAllocatedForDCN,
1770		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1771		enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
1772		bool DCCEnable[],
1773		bool ViewportStationary[],
1774		unsigned int ViewportXStartY[],
1775		unsigned int ViewportYStartY[],
1776		unsigned int ViewportXStartC[],
1777		unsigned int ViewportYStartC[],
1778		unsigned int ViewportWidthY[],
1779		unsigned int ViewportHeightY[],
1780		unsigned int BytesPerPixelY[],
1781		unsigned int ViewportWidthC[],
1782		unsigned int ViewportHeightC[],
1783		unsigned int BytesPerPixelC[],
1784		unsigned int SurfaceWidthY[],
1785		unsigned int SurfaceWidthC[],
1786		unsigned int SurfaceHeightY[],
1787		unsigned int SurfaceHeightC[],
1788		unsigned int Read256BytesBlockWidthY[],
1789		unsigned int Read256BytesBlockWidthC[],
1790		unsigned int Read256BytesBlockHeightY[],
1791		unsigned int Read256BytesBlockHeightC[],
1792		unsigned int ReadBlockWidthY[],
1793		unsigned int ReadBlockWidthC[],
1794		unsigned int ReadBlockHeightY[],
1795		unsigned int ReadBlockHeightC[],
1796		unsigned int DCCMetaPitchY[],
1797		unsigned int DCCMetaPitchC[],
1798
1799		/* Output */
1800		unsigned int    SurfaceSizeInMALL[],
1801		bool *ExceededMALLSize)
1802{
1803	unsigned int k;
1804	unsigned int TotalSurfaceSizeInMALLForSS = 0;
1805	unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
1806	unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
1807
1808	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1809		if (ViewportStationary[k]) {
1810			SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1811					dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1812						ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1813						ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1814						ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1815						ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1816						dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1817
1818			if (ReadBlockWidthC[k] > 0) {
1819				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1820						dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1821							dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1822							ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1823							dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1824							dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1825							dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1826							ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1827							dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1828							BytesPerPixelC[k];
1829			}
1830			if (DCCEnable[k] == true) {
1831				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1832						(dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]),
1833							dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1834							Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1835							- dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1836							* dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1837							Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1838							ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1839							Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 *
1840							Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024);
1841				if (Read256BytesBlockWidthC[k] > 0) {
1842					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1843							dml_min(dml_ceil(DCCMetaPitchC[k], 8 *
1844								Read256BytesBlockWidthC[k]),
1845								dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1846								* Read256BytesBlockWidthC[k] - 1, 8 *
1847								Read256BytesBlockWidthC[k]) -
1848								dml_floor(ViewportXStartC[k], 8 *
1849								Read256BytesBlockWidthC[k])) *
1850								dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1851								Read256BytesBlockHeightC[k]),
1852								dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1853								8 * Read256BytesBlockHeightC[k] - 1, 8 *
1854								Read256BytesBlockHeightC[k]) -
1855								dml_floor(ViewportYStartC[k], 8 *
1856								Read256BytesBlockHeightC[k])) *
1857								BytesPerPixelC[k] / 256;
1858				}
1859			}
1860		} else {
1861			SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1862					ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1863					dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1864							ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1865							BytesPerPixelY[k];
1866			if (ReadBlockWidthC[k] > 0) {
1867				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1868						dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1869								ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1870						dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1871								ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1872								BytesPerPixelC[k];
1873			}
1874			if (DCCEnable[k] == true) {
1875				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1876						(dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 *
1877								Read256BytesBlockWidthY[k] - 1), 8 *
1878								Read256BytesBlockWidthY[k]) *
1879						dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1880								Read256BytesBlockHeightY[k] - 1), 8 *
1881								Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024);
1882
1883				if (Read256BytesBlockWidthC[k] > 0) {
1884					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1885							dml_ceil(dml_min(DCCMetaPitchC[k], ViewportWidthC[k] + 8 *
1886									Read256BytesBlockWidthC[k] - 1), 8 *
1887									Read256BytesBlockWidthC[k]) *
1888							dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1889									Read256BytesBlockHeightC[k] - 1), 8 *
1890									Read256BytesBlockHeightC[k]) *
1891									BytesPerPixelC[k] / 256;
1892				}
1893			}
1894		}
1895	}
1896
1897	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1898		/* SS and Subvp counted separate as they are never used at the same time */
1899		if (UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe)
1900			TotalSurfaceSizeInMALLForSubVP = TotalSurfaceSizeInMALLForSubVP + SurfaceSizeInMALL[k];
1901		else if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1902			TotalSurfaceSizeInMALLForSS = TotalSurfaceSizeInMALLForSS + SurfaceSizeInMALL[k];
1903	}
1904	*ExceededMALLSize =  (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
1905							(TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
1906} // CalculateSurfaceSizeInMall
1907
1908void dml32_CalculateVMRowAndSwath(
1909		unsigned int NumberOfActiveSurfaces,
1910		DmlPipe myPipe[],
1911		unsigned int SurfaceSizeInMALL[],
1912		unsigned int PTEBufferSizeInRequestsLuma,
1913		unsigned int PTEBufferSizeInRequestsChroma,
1914		unsigned int DCCMetaBufferSizeBytes,
1915		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1916		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1917		unsigned int MALLAllocatedForDCN,
1918		double SwathWidthY[],
1919		double SwathWidthC[],
1920		bool GPUVMEnable,
1921		bool HostVMEnable,
1922		unsigned int HostVMMaxNonCachedPageTableLevels,
1923		unsigned int GPUVMMaxPageTableLevels,
1924		unsigned int GPUVMMinPageSizeKBytes[],
1925		unsigned int HostVMMinPageSize,
1926
1927		/* Output */
1928		bool PTEBufferSizeNotExceeded[],
1929		bool DCCMetaBufferSizeNotExceeded[],
1930		unsigned int dpte_row_width_luma_ub[],
1931		unsigned int dpte_row_width_chroma_ub[],
1932		unsigned int dpte_row_height_luma[],
1933		unsigned int dpte_row_height_chroma[],
1934		unsigned int dpte_row_height_linear_luma[],     // VBA_DELTA
1935		unsigned int dpte_row_height_linear_chroma[],   // VBA_DELTA
1936		unsigned int meta_req_width[],
1937		unsigned int meta_req_width_chroma[],
1938		unsigned int meta_req_height[],
1939		unsigned int meta_req_height_chroma[],
1940		unsigned int meta_row_width[],
1941		unsigned int meta_row_width_chroma[],
1942		unsigned int meta_row_height[],
1943		unsigned int meta_row_height_chroma[],
1944		unsigned int vm_group_bytes[],
1945		unsigned int dpte_group_bytes[],
1946		unsigned int PixelPTEReqWidthY[],
1947		unsigned int PixelPTEReqHeightY[],
1948		unsigned int PTERequestSizeY[],
1949		unsigned int PixelPTEReqWidthC[],
1950		unsigned int PixelPTEReqHeightC[],
1951		unsigned int PTERequestSizeC[],
1952		unsigned int dpde0_bytes_per_frame_ub_l[],
1953		unsigned int meta_pte_bytes_per_frame_ub_l[],
1954		unsigned int dpde0_bytes_per_frame_ub_c[],
1955		unsigned int meta_pte_bytes_per_frame_ub_c[],
1956		double PrefetchSourceLinesY[],
1957		double PrefetchSourceLinesC[],
1958		double VInitPreFillY[],
1959		double VInitPreFillC[],
1960		unsigned int MaxNumSwathY[],
1961		unsigned int MaxNumSwathC[],
1962		double meta_row_bw[],
1963		double dpte_row_bw[],
1964		double PixelPTEBytesPerRow[],
1965		double PDEAndMetaPTEBytesFrame[],
1966		double MetaRowByte[],
1967		bool use_one_row_for_frame[],
1968		bool use_one_row_for_frame_flip[],
1969		bool UsesMALLForStaticScreen[],
1970		bool PTE_BUFFER_MODE[],
1971		unsigned int BIGK_FRAGMENT_SIZE[])
1972{
1973	unsigned int k;
1974	unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1975	unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1976	unsigned int PDEAndMetaPTEBytesFrameY;
1977	unsigned int PDEAndMetaPTEBytesFrameC;
1978	unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
1979	unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
1980	unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1981	unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1982	unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1983	unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1984	unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1985	unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1986	unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1987	unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1988	bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1989
1990	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1991		if (HostVMEnable == true) {
1992			vm_group_bytes[k] = 512;
1993			dpte_group_bytes[k] = 512;
1994		} else if (GPUVMEnable == true) {
1995			vm_group_bytes[k] = 2048;
1996			if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1997				dpte_group_bytes[k] = 512;
1998			else
1999				dpte_group_bytes[k] = 2048;
2000		} else {
2001			vm_group_bytes[k] = 0;
2002			dpte_group_bytes[k] = 0;
2003		}
2004
2005		if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
2006				myPipe[k].SourcePixelFormat == dm_420_12 ||
2007				myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
2008			if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
2009					!IsVertical(myPipe[k].SourceRotation)) {
2010				PTEBufferSizeInRequestsForLuma[k] =
2011						(PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
2012				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
2013			} else {
2014				PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
2015				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
2016			}
2017
2018			PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
2019					myPipe[k].ViewportStationary,
2020					myPipe[k].DCCEnable,
2021					myPipe[k].DPPPerSurface,
2022					myPipe[k].BlockHeight256BytesC,
2023					myPipe[k].BlockWidth256BytesC,
2024					myPipe[k].SourcePixelFormat,
2025					myPipe[k].SurfaceTiling,
2026					myPipe[k].BytePerPixelC,
2027					myPipe[k].SourceRotation,
2028					SwathWidthC[k],
2029					myPipe[k].ViewportHeightChroma,
2030					myPipe[k].ViewportXStartC,
2031					myPipe[k].ViewportYStartC,
2032					GPUVMEnable,
2033					HostVMEnable,
2034					HostVMMaxNonCachedPageTableLevels,
2035					GPUVMMaxPageTableLevels,
2036					GPUVMMinPageSizeKBytes[k],
2037					HostVMMinPageSize,
2038					PTEBufferSizeInRequestsForChroma[k],
2039					myPipe[k].PitchC,
2040					myPipe[k].DCCMetaPitchC,
2041					myPipe[k].BlockWidthC,
2042					myPipe[k].BlockHeightC,
2043
2044					/* Output */
2045					&MetaRowByteC[k],
2046					&PixelPTEBytesPerRowC[k],
2047					&dpte_row_width_chroma_ub[k],
2048					&dpte_row_height_chroma[k],
2049					&dpte_row_height_linear_chroma[k],
2050					&PixelPTEBytesPerRowC_one_row_per_frame[k],
2051					&dpte_row_width_chroma_ub_one_row_per_frame[k],
2052					&dpte_row_height_chroma_one_row_per_frame[k],
2053					&meta_req_width_chroma[k],
2054					&meta_req_height_chroma[k],
2055					&meta_row_width_chroma[k],
2056					&meta_row_height_chroma[k],
2057					&PixelPTEReqWidthC[k],
2058					&PixelPTEReqHeightC[k],
2059					&PTERequestSizeC[k],
2060					&dpde0_bytes_per_frame_ub_c[k],
2061					&meta_pte_bytes_per_frame_ub_c[k]);
2062
2063			PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2064					myPipe[k].VRatioChroma,
2065					myPipe[k].VTapsChroma,
2066					myPipe[k].InterlaceEnable,
2067					myPipe[k].ProgressiveToInterlaceUnitInOPP,
2068					myPipe[k].SwathHeightC,
2069					myPipe[k].SourceRotation,
2070					myPipe[k].ViewportStationary,
2071					SwathWidthC[k],
2072					myPipe[k].ViewportHeightChroma,
2073					myPipe[k].ViewportXStartC,
2074					myPipe[k].ViewportYStartC,
2075
2076					/* Output */
2077					&VInitPreFillC[k],
2078					&MaxNumSwathC[k]);
2079		} else {
2080			PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2081			PTEBufferSizeInRequestsForChroma[k] = 0;
2082			PixelPTEBytesPerRowC[k] = 0;
2083			PDEAndMetaPTEBytesFrameC = 0;
2084			MetaRowByteC[k] = 0;
2085			MaxNumSwathC[k] = 0;
2086			PrefetchSourceLinesC[k] = 0;
2087			dpte_row_height_chroma_one_row_per_frame[k] = 0;
2088			dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2089			PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2090		}
2091
2092		PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2093				myPipe[k].ViewportStationary,
2094				myPipe[k].DCCEnable,
2095				myPipe[k].DPPPerSurface,
2096				myPipe[k].BlockHeight256BytesY,
2097				myPipe[k].BlockWidth256BytesY,
2098				myPipe[k].SourcePixelFormat,
2099				myPipe[k].SurfaceTiling,
2100				myPipe[k].BytePerPixelY,
2101				myPipe[k].SourceRotation,
2102				SwathWidthY[k],
2103				myPipe[k].ViewportHeight,
2104				myPipe[k].ViewportXStart,
2105				myPipe[k].ViewportYStart,
2106				GPUVMEnable,
2107				HostVMEnable,
2108				HostVMMaxNonCachedPageTableLevels,
2109				GPUVMMaxPageTableLevels,
2110				GPUVMMinPageSizeKBytes[k],
2111				HostVMMinPageSize,
2112				PTEBufferSizeInRequestsForLuma[k],
2113				myPipe[k].PitchY,
2114				myPipe[k].DCCMetaPitchY,
2115				myPipe[k].BlockWidthY,
2116				myPipe[k].BlockHeightY,
2117
2118				/* Output */
2119				&MetaRowByteY[k],
2120				&PixelPTEBytesPerRowY[k],
2121				&dpte_row_width_luma_ub[k],
2122				&dpte_row_height_luma[k],
2123				&dpte_row_height_linear_luma[k],
2124				&PixelPTEBytesPerRowY_one_row_per_frame[k],
2125				&dpte_row_width_luma_ub_one_row_per_frame[k],
2126				&dpte_row_height_luma_one_row_per_frame[k],
2127				&meta_req_width[k],
2128				&meta_req_height[k],
2129				&meta_row_width[k],
2130				&meta_row_height[k],
2131				&PixelPTEReqWidthY[k],
2132				&PixelPTEReqHeightY[k],
2133				&PTERequestSizeY[k],
2134				&dpde0_bytes_per_frame_ub_l[k],
2135				&meta_pte_bytes_per_frame_ub_l[k]);
2136
2137		PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2138				myPipe[k].VRatio,
2139				myPipe[k].VTaps,
2140				myPipe[k].InterlaceEnable,
2141				myPipe[k].ProgressiveToInterlaceUnitInOPP,
2142				myPipe[k].SwathHeightY,
2143				myPipe[k].SourceRotation,
2144				myPipe[k].ViewportStationary,
2145				SwathWidthY[k],
2146				myPipe[k].ViewportHeight,
2147				myPipe[k].ViewportXStart,
2148				myPipe[k].ViewportYStart,
2149
2150				/* Output */
2151				&VInitPreFillY[k],
2152				&MaxNumSwathY[k]);
2153
2154		PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2155		MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2156
2157		if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2158				PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2159			PTEBufferSizeNotExceeded[k] = true;
2160		} else {
2161			PTEBufferSizeNotExceeded[k] = false;
2162		}
2163
2164		one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2165			PTEBufferSizeInRequestsForLuma[k] &&
2166			PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2167	}
2168
2169	dml32_CalculateMALLUseForStaticScreen(
2170			NumberOfActiveSurfaces,
2171			MALLAllocatedForDCN,
2172			UseMALLForStaticScreen,   // mode
2173			SurfaceSizeInMALL,
2174			one_row_per_frame_fits_in_buffer,
2175			/* Output */
2176			UsesMALLForStaticScreen); // boolen
2177
2178	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2179		PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2180				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2181				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2182				(GPUVMMinPageSizeKBytes[k] > 64);
2183		BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2184	}
2185
2186	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2187#ifdef __DML_VBA_DEBUG__
2188		dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n",  __func__, k, SurfaceSizeInMALL[k]);
2189		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2190#endif
2191		use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2192				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2193				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2194				(GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2195
2196		use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2197				!(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2198
2199		if (use_one_row_for_frame[k]) {
2200			dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2201			dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2202			PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2203			dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2204			dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2205			PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2206			PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2207		}
2208
2209		if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2210			DCCMetaBufferSizeNotExceeded[k] = true;
2211		else
2212			DCCMetaBufferSizeNotExceeded[k] = false;
2213
2214		PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2215		if (use_one_row_for_frame[k])
2216			PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2217
2218		dml32_CalculateRowBandwidth(
2219				GPUVMEnable,
2220				myPipe[k].SourcePixelFormat,
2221				myPipe[k].VRatio,
2222				myPipe[k].VRatioChroma,
2223				myPipe[k].DCCEnable,
2224				myPipe[k].HTotal / myPipe[k].PixelClock,
2225				MetaRowByteY[k], MetaRowByteC[k],
2226				meta_row_height[k],
2227				meta_row_height_chroma[k],
2228				PixelPTEBytesPerRowY[k],
2229				PixelPTEBytesPerRowC[k],
2230				dpte_row_height_luma[k],
2231				dpte_row_height_chroma[k],
2232
2233				/* Output */
2234				&meta_row_bw[k],
2235				&dpte_row_bw[k]);
2236#ifdef __DML_VBA_DEBUG__
2237		dml_print("DML::%s: k=%d, use_one_row_for_frame        = %d\n",  __func__, k, use_one_row_for_frame[k]);
2238		dml_print("DML::%s: k=%d, use_one_row_for_frame_flip   = %d\n",
2239				__func__, k, use_one_row_for_frame_flip[k]);
2240		dml_print("DML::%s: k=%d, UseMALLForPStateChange       = %d\n",
2241				__func__, k, UseMALLForPStateChange[k]);
2242		dml_print("DML::%s: k=%d, dpte_row_height_luma         = %d\n",  __func__, k, dpte_row_height_luma[k]);
2243		dml_print("DML::%s: k=%d, dpte_row_width_luma_ub       = %d\n",
2244				__func__, k, dpte_row_width_luma_ub[k]);
2245		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY         = %d\n",  __func__, k, PixelPTEBytesPerRowY[k]);
2246		dml_print("DML::%s: k=%d, dpte_row_height_chroma       = %d\n",
2247				__func__, k, dpte_row_height_chroma[k]);
2248		dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub     = %d\n",
2249				__func__, k, dpte_row_width_chroma_ub[k]);
2250		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC         = %d\n",  __func__, k, PixelPTEBytesPerRowC[k]);
2251		dml_print("DML::%s: k=%d, PixelPTEBytesPerRow          = %d\n",  __func__, k, PixelPTEBytesPerRow[k]);
2252		dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded     = %d\n",
2253				__func__, k, PTEBufferSizeNotExceeded[k]);
2254		dml_print("DML::%s: k=%d, PTE_BUFFER_MODE              = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2255		dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE           = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2256#endif
2257	}
2258} // CalculateVMRowAndSwath
2259
2260unsigned int dml32_CalculateVMAndRowBytes(
2261		bool ViewportStationary,
2262		bool DCCEnable,
2263		unsigned int NumberOfDPPs,
2264		unsigned int BlockHeight256Bytes,
2265		unsigned int BlockWidth256Bytes,
2266		enum source_format_class SourcePixelFormat,
2267		unsigned int SurfaceTiling,
2268		unsigned int BytePerPixel,
2269		enum dm_rotation_angle SourceRotation,
2270		double SwathWidth,
2271		unsigned int ViewportHeight,
2272		unsigned int    ViewportXStart,
2273		unsigned int    ViewportYStart,
2274		bool GPUVMEnable,
2275		bool HostVMEnable,
2276		unsigned int HostVMMaxNonCachedPageTableLevels,
2277		unsigned int GPUVMMaxPageTableLevels,
2278		unsigned int GPUVMMinPageSizeKBytes,
2279		unsigned int HostVMMinPageSize,
2280		unsigned int PTEBufferSizeInRequests,
2281		unsigned int Pitch,
2282		unsigned int DCCMetaPitch,
2283		unsigned int MacroTileWidth,
2284		unsigned int MacroTileHeight,
2285
2286		/* Output */
2287		unsigned int *MetaRowByte,
2288		unsigned int *PixelPTEBytesPerRow,
2289		unsigned int    *dpte_row_width_ub,
2290		unsigned int *dpte_row_height,
2291		unsigned int *dpte_row_height_linear,
2292		unsigned int    *PixelPTEBytesPerRow_one_row_per_frame,
2293		unsigned int    *dpte_row_width_ub_one_row_per_frame,
2294		unsigned int    *dpte_row_height_one_row_per_frame,
2295		unsigned int *MetaRequestWidth,
2296		unsigned int *MetaRequestHeight,
2297		unsigned int *meta_row_width,
2298		unsigned int *meta_row_height,
2299		unsigned int *PixelPTEReqWidth,
2300		unsigned int *PixelPTEReqHeight,
2301		unsigned int *PTERequestSize,
2302		unsigned int    *DPDE0BytesFrame,
2303		unsigned int    *MetaPTEBytesFrame)
2304{
2305	unsigned int MPDEBytesFrame;
2306	unsigned int DCCMetaSurfaceBytes;
2307	unsigned int ExtraDPDEBytesFrame;
2308	unsigned int PDEAndMetaPTEBytesFrame;
2309	unsigned int HostVMDynamicLevels = 0;
2310	unsigned int    MacroTileSizeBytes;
2311	unsigned int    vp_height_meta_ub;
2312	unsigned int    vp_height_dpte_ub;
2313	unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2314
2315	if (GPUVMEnable == true && HostVMEnable == true) {
2316		if (HostVMMinPageSize < 2048)
2317			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2318		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2319			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2320		else
2321			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2322	}
2323
2324	*MetaRequestHeight = 8 * BlockHeight256Bytes;
2325	*MetaRequestWidth = 8 * BlockWidth256Bytes;
2326	if (SurfaceTiling == dm_sw_linear) {
2327		*meta_row_height = 32;
2328		*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2329				- dml_floor(ViewportXStart, *MetaRequestWidth);
2330	} else if (!IsVertical(SourceRotation)) {
2331		*meta_row_height = *MetaRequestHeight;
2332		if (ViewportStationary && NumberOfDPPs == 1) {
2333			*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2334					*MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2335		} else {
2336			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2337		}
2338		*MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2339	} else {
2340		*meta_row_height = *MetaRequestWidth;
2341		if (ViewportStationary && NumberOfDPPs == 1) {
2342			*meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2343					*MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2344		} else {
2345			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2346		}
2347		*MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2348	}
2349
2350	if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2351		vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2352				64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2353	} else if (!IsVertical(SourceRotation)) {
2354		vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2355	} else {
2356		vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2357	}
2358
2359	DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2360
2361	if (GPUVMEnable == true) {
2362		*MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2363				(8 * 4.0 * 1024), 1) + 1) * 64;
2364		MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2365	} else {
2366		*MetaPTEBytesFrame = 0;
2367		MPDEBytesFrame = 0;
2368	}
2369
2370	if (DCCEnable != true) {
2371		*MetaPTEBytesFrame = 0;
2372		MPDEBytesFrame = 0;
2373		*MetaRowByte = 0;
2374	}
2375
2376	MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2377
2378	if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2379		if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2380			vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2381					MacroTileHeight - 1, MacroTileHeight) -
2382					dml_floor(ViewportYStart, MacroTileHeight);
2383		} else if (!IsVertical(SourceRotation)) {
2384			vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2385		} else {
2386			vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2387		}
2388		*DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2389				(8 * 2097152), 1) + 1);
2390		ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2391	} else {
2392		*DPDE0BytesFrame = 0;
2393		ExtraDPDEBytesFrame = 0;
2394		vp_height_dpte_ub = 0;
2395	}
2396
2397	PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2398
2399#ifdef __DML_VBA_DEBUG__
2400	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2401	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2402	dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2403	dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2404	dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2405	dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2406	dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2407	dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2408	dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2409	dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2410	dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2411	dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2412	dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2413	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2414	dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2415	dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2416	dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2417#endif
2418
2419	if (HostVMEnable == true)
2420		PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2421
2422	if (SurfaceTiling == dm_sw_linear) {
2423		*PixelPTEReqHeight = 1;
2424		*PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2425		PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2426		*PTERequestSize = 64;
2427	} else if (GPUVMMinPageSizeKBytes == 4) {
2428		*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2429		*PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2430		*PTERequestSize = 128;
2431	} else {
2432		*PixelPTEReqHeight = MacroTileHeight;
2433		*PixelPTEReqWidth = 8 *  1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2434		*PTERequestSize = 64;
2435	}
2436#ifdef __DML_VBA_DEBUG__
2437	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2438	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2439	dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2440	dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2441	dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2442	dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2443	dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2444#endif
2445
2446	*dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2447	*dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2448			(double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2449					(double) *PixelPTEReqWidth;
2450	*PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2451			*PTERequestSize;
2452
2453	if (SurfaceTiling == dm_sw_linear) {
2454		*dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2455				*PixelPTEReqWidth / Pitch), 1));
2456#ifdef __DML_VBA_DEBUG__
2457		dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2458				PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2459		dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2460				dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2461		dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2462				dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2463		dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2464				1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2465						*PixelPTEReqWidth / Pitch), 1));
2466		dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2467#endif
2468		*dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2469				(double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2470		*PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2471
2472		// VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2473		*dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2474				PixelPTEReqWidth_linear / Pitch), 1);
2475		if (*dpte_row_height_linear > 128)
2476			*dpte_row_height_linear = 128;
2477
2478	} else if (!IsVertical(SourceRotation)) {
2479		*dpte_row_height = *PixelPTEReqHeight;
2480
2481		if (GPUVMMinPageSizeKBytes > 64) {
2482			*dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2483					*PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2484		} else if (ViewportStationary && (NumberOfDPPs == 1)) {
2485			*dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2486					*PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2487					dml_floor(ViewportXStart, *PixelPTEReqWidth);
2488		} else {
2489			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2490					*PixelPTEReqWidth;
2491		}
2492
2493		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2494	} else {
2495		*dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2496
2497		if (ViewportStationary && (NumberOfDPPs == 1)) {
2498			*dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2499					*PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2500		} else {
2501			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2502					* *PixelPTEReqHeight;
2503		}
2504
2505		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2506	}
2507
2508	if (GPUVMEnable != true)
2509		*PixelPTEBytesPerRow = 0;
2510	if (HostVMEnable == true)
2511		*PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2512
2513#ifdef __DML_VBA_DEBUG__
2514	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2515	dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2516	dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2517	dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2518	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2519	dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2520	dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2521	dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2522			__func__, *dpte_row_width_ub_one_row_per_frame);
2523	dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2524			__func__, *PixelPTEBytesPerRow_one_row_per_frame);
2525	dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2526			*MetaPTEBytesFrame);
2527#endif
2528
2529	return PDEAndMetaPTEBytesFrame;
2530} // CalculateVMAndRowBytes
2531
2532double dml32_CalculatePrefetchSourceLines(
2533		double VRatio,
2534		unsigned int VTaps,
2535		bool Interlace,
2536		bool ProgressiveToInterlaceUnitInOPP,
2537		unsigned int SwathHeight,
2538		enum dm_rotation_angle SourceRotation,
2539		bool ViewportStationary,
2540		double SwathWidth,
2541		unsigned int ViewportHeight,
2542		unsigned int ViewportXStart,
2543		unsigned int ViewportYStart,
2544
2545		/* Output */
2546		double *VInitPreFill,
2547		unsigned int *MaxNumSwath)
2548{
2549
2550	unsigned int vp_start_rot;
2551	unsigned int sw0_tmp;
2552	unsigned int MaxPartialSwath;
2553	double numLines;
2554
2555#ifdef __DML_VBA_DEBUG__
2556	dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2557	dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2558	dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2559	dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2560	dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2561	dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2562#endif
2563	if (ProgressiveToInterlaceUnitInOPP)
2564		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2565	else
2566		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2567
2568	if (ViewportStationary) {
2569		if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2570			vp_start_rot = SwathHeight -
2571					(((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2572		} else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2573			vp_start_rot = ViewportXStart;
2574		} else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2575			vp_start_rot = SwathHeight -
2576					(((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2577		} else {
2578			vp_start_rot = ViewportYStart;
2579		}
2580		sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2581		if (sw0_tmp < *VInitPreFill)
2582			*MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2583		else
2584			*MaxNumSwath = 1;
2585		MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2586	} else {
2587		*MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2588		if (*VInitPreFill > 1)
2589			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2590		else
2591			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2592	}
2593	numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2594
2595#ifdef __DML_VBA_DEBUG__
2596	dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2597	dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2598	dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2599	dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2600	dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2601#endif
2602	return numLines;
2603
2604} // CalculatePrefetchSourceLines
2605
2606void dml32_CalculateMALLUseForStaticScreen(
2607		unsigned int NumberOfActiveSurfaces,
2608		unsigned int MALLAllocatedForDCNFinal,
2609		enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2610		unsigned int SurfaceSizeInMALL[],
2611		bool one_row_per_frame_fits_in_buffer[],
2612
2613		/* output */
2614		bool UsesMALLForStaticScreen[])
2615{
2616	unsigned int k;
2617	unsigned int SurfaceToAddToMALL;
2618	bool CanAddAnotherSurfaceToMALL;
2619	unsigned int TotalSurfaceSizeInMALL;
2620
2621	TotalSurfaceSizeInMALL = 0;
2622	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2623		UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2624		if (UsesMALLForStaticScreen[k])
2625			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2626#ifdef __DML_VBA_DEBUG__
2627		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2628		dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n",  __func__, k, TotalSurfaceSizeInMALL);
2629#endif
2630	}
2631
2632	SurfaceToAddToMALL = 0;
2633	CanAddAnotherSurfaceToMALL = true;
2634	while (CanAddAnotherSurfaceToMALL) {
2635		CanAddAnotherSurfaceToMALL = false;
2636		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2637			if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2638					!UsesMALLForStaticScreen[k] &&
2639					UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2640					one_row_per_frame_fits_in_buffer[k] &&
2641					(!CanAddAnotherSurfaceToMALL ||
2642					SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2643				CanAddAnotherSurfaceToMALL = true;
2644				SurfaceToAddToMALL = k;
2645#ifdef __DML_VBA_DEBUG__
2646				dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2647						__func__, k, UseMALLForStaticScreen[k]);
2648#endif
2649			}
2650		}
2651		if (CanAddAnotherSurfaceToMALL) {
2652			UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2653			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2654
2655#ifdef __DML_VBA_DEBUG__
2656			dml_print("DML::%s: SurfaceToAddToMALL       = %d\n",  __func__, SurfaceToAddToMALL);
2657			dml_print("DML::%s: TotalSurfaceSizeInMALL   = %d\n",  __func__, TotalSurfaceSizeInMALL);
2658#endif
2659
2660		}
2661	}
2662}
2663
2664void dml32_CalculateRowBandwidth(
2665		bool GPUVMEnable,
2666		enum source_format_class SourcePixelFormat,
2667		double VRatio,
2668		double VRatioChroma,
2669		bool DCCEnable,
2670		double LineTime,
2671		unsigned int MetaRowByteLuma,
2672		unsigned int MetaRowByteChroma,
2673		unsigned int meta_row_height_luma,
2674		unsigned int meta_row_height_chroma,
2675		unsigned int PixelPTEBytesPerRowLuma,
2676		unsigned int PixelPTEBytesPerRowChroma,
2677		unsigned int dpte_row_height_luma,
2678		unsigned int dpte_row_height_chroma,
2679		/* Output */
2680		double *meta_row_bw,
2681		double *dpte_row_bw)
2682{
2683	if (DCCEnable != true) {
2684		*meta_row_bw = 0;
2685	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2686			SourcePixelFormat == dm_rgbe_alpha) {
2687		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2688				MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2689	} else {
2690		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2691	}
2692
2693	if (GPUVMEnable != true) {
2694		*dpte_row_bw = 0;
2695	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2696			SourcePixelFormat == dm_rgbe_alpha) {
2697		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2698				VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2699	} else {
2700		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2701	}
2702}
2703
2704double dml32_CalculateUrgentLatency(
2705		double UrgentLatencyPixelDataOnly,
2706		double UrgentLatencyPixelMixedWithVMData,
2707		double UrgentLatencyVMDataOnly,
2708		bool   DoUrgentLatencyAdjustment,
2709		double UrgentLatencyAdjustmentFabricClockComponent,
2710		double UrgentLatencyAdjustmentFabricClockReference,
2711		double FabricClock)
2712{
2713	double   ret;
2714
2715	ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2716	if (DoUrgentLatencyAdjustment == true) {
2717		ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2718				(UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2719	}
2720	return ret;
2721}
2722
2723void dml32_CalculateUrgentBurstFactor(
2724		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2725		unsigned int    swath_width_luma_ub,
2726		unsigned int    swath_width_chroma_ub,
2727		unsigned int SwathHeightY,
2728		unsigned int SwathHeightC,
2729		double  LineTime,
2730		double  UrgentLatency,
2731		double  CursorBufferSize,
2732		unsigned int CursorWidth,
2733		unsigned int CursorBPP,
2734		double  VRatio,
2735		double  VRatioC,
2736		double  BytePerPixelInDETY,
2737		double  BytePerPixelInDETC,
2738		unsigned int    DETBufferSizeY,
2739		unsigned int    DETBufferSizeC,
2740		/* Output */
2741		double *UrgentBurstFactorCursor,
2742		double *UrgentBurstFactorLuma,
2743		double *UrgentBurstFactorChroma,
2744		bool   *NotEnoughUrgentLatencyHiding)
2745{
2746	double       LinesInDETLuma;
2747	double       LinesInDETChroma;
2748	unsigned int LinesInCursorBuffer;
2749	double       CursorBufferSizeInTime;
2750	double       DETBufferSizeInTimeLuma;
2751	double       DETBufferSizeInTimeChroma;
2752
2753	*NotEnoughUrgentLatencyHiding = 0;
2754
2755	if (CursorWidth > 0) {
2756		LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2757				(CursorWidth * CursorBPP / 8.0)), 1.0);
2758		if (VRatio > 0) {
2759			CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2760			if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2761				*NotEnoughUrgentLatencyHiding = 1;
2762				*UrgentBurstFactorCursor = 0;
2763			} else {
2764				*UrgentBurstFactorCursor = CursorBufferSizeInTime /
2765						(CursorBufferSizeInTime - UrgentLatency);
2766			}
2767		} else {
2768			*UrgentBurstFactorCursor = 1;
2769		}
2770	}
2771
2772	LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2773			DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2774
2775	if (VRatio > 0) {
2776		DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2777		if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2778			*NotEnoughUrgentLatencyHiding = 1;
2779			*UrgentBurstFactorLuma = 0;
2780		} else {
2781			*UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2782		}
2783	} else {
2784		*UrgentBurstFactorLuma = 1;
2785	}
2786
2787	if (BytePerPixelInDETC > 0) {
2788		LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2789					1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2790					/ swath_width_chroma_ub;
2791
2792		if (VRatio > 0) {
2793			DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2794			if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2795				*NotEnoughUrgentLatencyHiding = 1;
2796				*UrgentBurstFactorChroma = 0;
2797			} else {
2798				*UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2799						/ (DETBufferSizeInTimeChroma - UrgentLatency);
2800			}
2801		} else {
2802			*UrgentBurstFactorChroma = 1;
2803		}
2804	}
2805} // CalculateUrgentBurstFactor
2806
2807void dml32_CalculateDCFCLKDeepSleep(
2808		unsigned int NumberOfActiveSurfaces,
2809		unsigned int BytePerPixelY[],
2810		unsigned int BytePerPixelC[],
2811		double VRatio[],
2812		double VRatioChroma[],
2813		double SwathWidthY[],
2814		double SwathWidthC[],
2815		unsigned int DPPPerSurface[],
2816		double HRatio[],
2817		double HRatioChroma[],
2818		double PixelClock[],
2819		double PSCL_THROUGHPUT[],
2820		double PSCL_THROUGHPUT_CHROMA[],
2821		double Dppclk[],
2822		double ReadBandwidthLuma[],
2823		double ReadBandwidthChroma[],
2824		unsigned int ReturnBusWidth,
2825
2826		/* Output */
2827		double *DCFClkDeepSleep)
2828{
2829	unsigned int k;
2830	double   DisplayPipeLineDeliveryTimeLuma;
2831	double   DisplayPipeLineDeliveryTimeChroma;
2832	double   DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2833	double ReadBandwidth = 0.0;
2834
2835	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2836
2837		if (VRatio[k] <= 1) {
2838			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2839					/ PixelClock[k];
2840		} else {
2841			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2842		}
2843		if (BytePerPixelC[k] == 0) {
2844			DisplayPipeLineDeliveryTimeChroma = 0;
2845		} else {
2846			if (VRatioChroma[k] <= 1) {
2847				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2848						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2849			} else {
2850				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2851						/ Dppclk[k];
2852			}
2853		}
2854
2855		if (BytePerPixelC[k] > 0) {
2856			DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2857					BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2858					__DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2859					32.0 / DisplayPipeLineDeliveryTimeChroma);
2860		} else {
2861			DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2862					64.0 / DisplayPipeLineDeliveryTimeLuma;
2863		}
2864		DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2865
2866#ifdef __DML_VBA_DEBUG__
2867		dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2868		dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2869#endif
2870	}
2871
2872	for (k = 0; k < NumberOfActiveSurfaces; ++k)
2873		ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2874
2875	*DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2876
2877#ifdef __DML_VBA_DEBUG__
2878	dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2879	dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2880	dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2881	dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2882#endif
2883
2884	for (k = 0; k < NumberOfActiveSurfaces; ++k)
2885		*DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2886#ifdef __DML_VBA_DEBUG__
2887	dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2888#endif
2889} // CalculateDCFCLKDeepSleep
2890
2891double dml32_CalculateWriteBackDelay(
2892		enum source_format_class WritebackPixelFormat,
2893		double WritebackHRatio,
2894		double WritebackVRatio,
2895		unsigned int WritebackVTaps,
2896		unsigned int         WritebackDestinationWidth,
2897		unsigned int         WritebackDestinationHeight,
2898		unsigned int         WritebackSourceHeight,
2899		unsigned int HTotal)
2900{
2901	double CalculateWriteBackDelay;
2902	double Line_length;
2903	double Output_lines_last_notclamped;
2904	double WritebackVInit;
2905
2906	WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2907	Line_length = dml_max((double) WritebackDestinationWidth,
2908			dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2909	Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2910			dml_ceil(((double)WritebackSourceHeight -
2911					(double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2912	if (Output_lines_last_notclamped < 0) {
2913		CalculateWriteBackDelay = 0;
2914	} else {
2915		CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2916				(HTotal - WritebackDestinationWidth) + 80;
2917	}
2918	return CalculateWriteBackDelay;
2919}
2920
2921void dml32_UseMinimumDCFCLK(
2922		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2923		bool DRRDisplay[],
2924		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2925		unsigned int MaxInterDCNTileRepeaters,
2926		unsigned int MaxPrefetchMode,
2927		double DRAMClockChangeLatencyFinal,
2928		double FCLKChangeLatency,
2929		double SREnterPlusExitTime,
2930		unsigned int ReturnBusWidth,
2931		unsigned int RoundTripPingLatencyCycles,
2932		unsigned int ReorderingBytes,
2933		unsigned int PixelChunkSizeInKByte,
2934		unsigned int MetaChunkSize,
2935		bool GPUVMEnable,
2936		unsigned int GPUVMMaxPageTableLevels,
2937		bool HostVMEnable,
2938		unsigned int NumberOfActiveSurfaces,
2939		double HostVMMinPageSize,
2940		unsigned int HostVMMaxNonCachedPageTableLevels,
2941		bool DynamicMetadataVMEnabled,
2942		bool ImmediateFlipRequirement,
2943		bool ProgressiveToInterlaceUnitInOPP,
2944		double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2945		double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2946		unsigned int VTotal[],
2947		unsigned int VActive[],
2948		unsigned int DynamicMetadataTransmittedBytes[],
2949		unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2950		bool Interlace[],
2951		double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2952		double RequiredDISPCLK[][2],
2953		double UrgLatency[],
2954		unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2955		double ProjectedDCFClkDeepSleep[][2],
2956		double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2957		unsigned int TotalNumberOfActiveDPP[][2],
2958		unsigned int TotalNumberOfDCCActiveDPP[][2],
2959		unsigned int dpte_group_bytes[],
2960		double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2961		double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2962		unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2963		unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2964		unsigned int BytePerPixelY[],
2965		unsigned int BytePerPixelC[],
2966		unsigned int HTotal[],
2967		double PixelClock[],
2968		double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2969		double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2970		double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2971		bool DynamicMetadataEnable[],
2972		double ReadBandwidthLuma[],
2973		double ReadBandwidthChroma[],
2974		double DCFCLKPerState[],
2975		/* Output */
2976		double DCFCLKState[][2])
2977{
2978	unsigned int i, j, k;
2979	unsigned int     dummy1;
2980	double dummy2, dummy3;
2981	double   NormalEfficiency;
2982	double   TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2983
2984	NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2985	for  (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2986		for  (j = 0; j <= 1; ++j) {
2987			double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2988			double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2989			double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2990			double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2991			double MinimumTWait = 0.0;
2992			double DPTEBandwidth;
2993			double DCFCLKRequiredForAverageBandwidth;
2994			unsigned int ExtraLatencyBytes;
2995			double ExtraLatencyCycles;
2996			double DCFCLKRequiredForPeakBandwidth;
2997			unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2998			double MinimumTvmPlus2Tr0;
2999
3000			TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
3001			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3002				TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
3003						+ NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
3004								/ (15.75 * HTotal[k] / PixelClock[k]);
3005			}
3006
3007			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
3008				NoOfDPPState[k] = NoOfDPP[i][j][k];
3009
3010			DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
3011			DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
3012
3013			ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
3014					TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
3015					TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
3016					NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
3017					HostVMMaxNonCachedPageTableLevels);
3018			ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
3019					+ ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
3020			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3021				double DCFCLKCyclesRequiredInPrefetch;
3022				double PrefetchTime;
3023
3024				PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
3025						* swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
3026						+ PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
3027								* BytePerPixelC[k]) / NormalEfficiency
3028						/ ReturnBusWidth;
3029				DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
3030						+ PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
3031								/ NormalEfficiency / ReturnBusWidth
3032								* (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
3033						+ 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
3034								/ ReturnBusWidth
3035						+ 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
3036						+ PixelDCFCLKCyclesRequiredInPrefetch[k];
3037				PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
3038						* HTotal[k] / PixelClock[k];
3039				DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
3040						DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
3041						UrgLatency[i] * GPUVMMaxPageTableLevels *
3042						(HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
3043
3044				MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
3045						UseMALLForPStateChange[k],
3046						SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3047						DRRDisplay[k],
3048						DRAMClockChangeLatencyFinal,
3049						FCLKChangeLatency,
3050						UrgLatency[i],
3051						SREnterPlusExitTime);
3052
3053				PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3054						MinimumTWait - UrgLatency[i] *
3055						((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3056						GPUVMMaxPageTableLevels - 2) *  (HostVMEnable == true ?
3057						HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3058						DynamicMetadataVMExtraLatency[k];
3059
3060				if (PrefetchTime > 0) {
3061					double ExpectedVRatioPrefetch;
3062
3063					ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3064							PixelDCFCLKCyclesRequiredInPrefetch[k] /
3065							DCFCLKCyclesRequiredInPrefetch);
3066					DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3067							PixelDCFCLKCyclesRequiredInPrefetch[k] /
3068							PrefetchPixelLinesTime[k] *
3069							dml_max(1.0, ExpectedVRatioPrefetch) *
3070							dml_max(1.0, ExpectedVRatioPrefetch / 4);
3071					if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3072						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3073								DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3074								NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3075								NormalEfficiency / ReturnBusWidth;
3076					}
3077				} else {
3078					DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3079				}
3080				if (DynamicMetadataEnable[k] == true) {
3081					double TSetupPipe;
3082					double TdmbfPipe;
3083					double TdmsksPipe;
3084					double TdmecPipe;
3085					double AllowedTimeForUrgentExtraLatency;
3086
3087					dml32_CalculateVUpdateAndDynamicMetadataParameters(
3088							MaxInterDCNTileRepeaters,
3089							RequiredDPPCLKPerSurface[i][j][k],
3090							RequiredDISPCLK[i][j],
3091							ProjectedDCFClkDeepSleep[i][j],
3092							PixelClock[k],
3093							HTotal[k],
3094							VTotal[k] - VActive[k],
3095							DynamicMetadataTransmittedBytes[k],
3096							DynamicMetadataLinesBeforeActiveRequired[k],
3097							Interlace[k],
3098							ProgressiveToInterlaceUnitInOPP,
3099
3100							/* output */
3101							&TSetupPipe,
3102							&TdmbfPipe,
3103							&TdmecPipe,
3104							&TdmsksPipe,
3105							&dummy1,
3106							&dummy2,
3107							&dummy3);
3108					AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3109							PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3110							TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3111					if (AllowedTimeForUrgentExtraLatency > 0)
3112						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3113								dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3114								ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3115					else
3116						DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3117				}
3118			}
3119			DCFCLKRequiredForPeakBandwidth = 0;
3120			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3121				DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3122						DCFCLKRequiredForPeakBandwidthPerSurface[k];
3123			}
3124			MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3125					(HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3126					(HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3127			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3128				double MaximumTvmPlus2Tr0PlusTsw;
3129
3130				MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3131						PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3132				if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3133					DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3134				} else {
3135					DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3136							2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3137								MinimumTvmPlus2Tr0 -
3138								PrefetchPixelLinesTime[k] / 4),
3139							(2 * ExtraLatencyCycles +
3140								PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3141								(MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3142				}
3143			}
3144			DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3145					dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3146		}
3147	}
3148}
3149
3150unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3151		unsigned int TotalNumberOfActiveDPP,
3152		unsigned int PixelChunkSizeInKByte,
3153		unsigned int TotalNumberOfDCCActiveDPP,
3154		unsigned int MetaChunkSize,
3155		bool GPUVMEnable,
3156		bool HostVMEnable,
3157		unsigned int NumberOfActiveSurfaces,
3158		unsigned int NumberOfDPP[],
3159		unsigned int dpte_group_bytes[],
3160		double HostVMInefficiencyFactor,
3161		double HostVMMinPageSize,
3162		unsigned int HostVMMaxNonCachedPageTableLevels)
3163{
3164	unsigned int k;
3165	double   ret;
3166	unsigned int  HostVMDynamicLevels;
3167
3168	if (GPUVMEnable == true && HostVMEnable == true) {
3169		if (HostVMMinPageSize < 2048)
3170			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3171		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3172			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3173		else
3174			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3175	} else {
3176		HostVMDynamicLevels = 0;
3177	}
3178
3179	ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3180			TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3181
3182	if (GPUVMEnable == true) {
3183		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3184			ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3185					(1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3186		}
3187	}
3188	return ret;
3189}
3190
3191void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3192		unsigned int MaxInterDCNTileRepeaters,
3193		double Dppclk,
3194		double Dispclk,
3195		double DCFClkDeepSleep,
3196		double PixelClock,
3197		unsigned int HTotal,
3198		unsigned int VBlank,
3199		unsigned int DynamicMetadataTransmittedBytes,
3200		unsigned int DynamicMetadataLinesBeforeActiveRequired,
3201		unsigned int InterlaceEnable,
3202		bool ProgressiveToInterlaceUnitInOPP,
3203
3204		/* output */
3205		double *TSetup,
3206		double *Tdmbf,
3207		double *Tdmec,
3208		double *Tdmsks,
3209		unsigned int *VUpdateOffsetPix,
3210		double *VUpdateWidthPix,
3211		double *VReadyOffsetPix)
3212{
3213	double TotalRepeaterDelayTime;
3214
3215	TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3216	*VUpdateWidthPix  =
3217			dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3218	*VReadyOffsetPix  = dml_ceil(dml_max(150.0 / Dppclk,
3219			TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3220	*VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3221	*TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3222	*Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3223	*Tdmec = HTotal / PixelClock;
3224
3225	if (DynamicMetadataLinesBeforeActiveRequired == 0)
3226		*Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3227	else
3228		*Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3229
3230	if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3231		*Tdmsks = *Tdmsks / 2;
3232#ifdef __DML_VBA_DEBUG__
3233	dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3234	dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3235	dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3236
3237	dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3238			__func__, DynamicMetadataLinesBeforeActiveRequired);
3239	dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3240	dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3241	dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3242	dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3243#endif
3244}
3245
3246double dml32_CalculateTWait(
3247		unsigned int PrefetchMode,
3248		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3249		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3250		bool DRRDisplay,
3251		double DRAMClockChangeLatency,
3252		double FCLKChangeLatency,
3253		double UrgentLatency,
3254		double SREnterPlusExitTime)
3255{
3256	double TWait = 0.0;
3257
3258	if (PrefetchMode == 0 &&
3259			!(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3260			!(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3261			!(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3262			!(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3263		TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3264	} else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3265		TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3266	} else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3267		TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3268	} else {
3269		TWait = UrgentLatency;
3270	}
3271
3272#ifdef __DML_VBA_DEBUG__
3273	dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3274	dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3275#endif
3276	return TWait;
3277} // CalculateTWait
3278
3279// Function: get_return_bw_mbps
3280// Megabyte per second
3281double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3282		const int VoltageLevel,
3283		const bool HostVMEnable,
3284		const double DCFCLK,
3285		const double FabricClock,
3286		const double DRAMSpeed)
3287{
3288	double ReturnBW = 0.;
3289	double IdealSDPPortBandwidth    = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3290	double IdealFabricBandwidth     = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3291	double IdealDRAMBandwidth       = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3292	double PixelDataOnlyReturnBW    = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3293			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3294			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe  :
3295					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3296	double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3297			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3298			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3299					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3300
3301	if (HostVMEnable != true)
3302		ReturnBW = PixelDataOnlyReturnBW;
3303	else
3304		ReturnBW = PixelMixedWithVMDataReturnBW;
3305
3306#ifdef __DML_VBA_DEBUG__
3307	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3308	dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3309	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3310	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3311	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3312	dml_print("DML::%s: IdealSDPPortBandwidth        = %f\n", __func__, IdealSDPPortBandwidth);
3313	dml_print("DML::%s: IdealFabricBandwidth         = %f\n", __func__, IdealFabricBandwidth);
3314	dml_print("DML::%s: IdealDRAMBandwidth           = %f\n", __func__, IdealDRAMBandwidth);
3315	dml_print("DML::%s: PixelDataOnlyReturnBW        = %f\n", __func__, PixelDataOnlyReturnBW);
3316	dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3317	dml_print("DML::%s: ReturnBW                     = %f MBps\n", __func__, ReturnBW);
3318#endif
3319	return ReturnBW;
3320}
3321
3322// Function: get_return_bw_mbps_vm_only
3323// Megabyte per second
3324double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3325		const int VoltageLevel,
3326		const double DCFCLK,
3327		const double FabricClock,
3328		const double DRAMSpeed)
3329{
3330	double VMDataOnlyReturnBW = dml_min3(
3331			soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3332			FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3333					* soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3334			DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3335					* (VoltageLevel < 2 ?
3336							soc->pct_ideal_dram_bw_after_urgent_strobe :
3337							soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3338#ifdef __DML_VBA_DEBUG__
3339	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3340	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3341	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3342	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3343	dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3344#endif
3345	return VMDataOnlyReturnBW;
3346}
3347
3348double dml32_CalculateExtraLatency(
3349		unsigned int RoundTripPingLatencyCycles,
3350		unsigned int ReorderingBytes,
3351		double DCFCLK,
3352		unsigned int TotalNumberOfActiveDPP,
3353		unsigned int PixelChunkSizeInKByte,
3354		unsigned int TotalNumberOfDCCActiveDPP,
3355		unsigned int MetaChunkSize,
3356		double ReturnBW,
3357		bool GPUVMEnable,
3358		bool HostVMEnable,
3359		unsigned int NumberOfActiveSurfaces,
3360		unsigned int NumberOfDPP[],
3361		unsigned int dpte_group_bytes[],
3362		double HostVMInefficiencyFactor,
3363		double HostVMMinPageSize,
3364		unsigned int HostVMMaxNonCachedPageTableLevels)
3365{
3366	double ExtraLatencyBytes;
3367	double ExtraLatency;
3368
3369	ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3370			ReorderingBytes,
3371			TotalNumberOfActiveDPP,
3372			PixelChunkSizeInKByte,
3373			TotalNumberOfDCCActiveDPP,
3374			MetaChunkSize,
3375			GPUVMEnable,
3376			HostVMEnable,
3377			NumberOfActiveSurfaces,
3378			NumberOfDPP,
3379			dpte_group_bytes,
3380			HostVMInefficiencyFactor,
3381			HostVMMinPageSize,
3382			HostVMMaxNonCachedPageTableLevels);
3383
3384	ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3385
3386#ifdef __DML_VBA_DEBUG__
3387	dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3388	dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3389	dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3390	dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3391	dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3392#endif
3393
3394	return ExtraLatency;
3395} // CalculateExtraLatency
3396
3397bool dml32_CalculatePrefetchSchedule(
3398		struct vba_vars_st *v,
3399		unsigned int k,
3400		double HostVMInefficiencyFactor,
3401		DmlPipe *myPipe,
3402		unsigned int DSCDelay,
3403		unsigned int DPP_RECOUT_WIDTH,
3404		unsigned int VStartup,
3405		unsigned int MaxVStartup,
3406		double UrgentLatency,
3407		double UrgentExtraLatency,
3408		double TCalc,
3409		unsigned int PDEAndMetaPTEBytesFrame,
3410		unsigned int MetaRowByte,
3411		unsigned int PixelPTEBytesPerRow,
3412		double PrefetchSourceLinesY,
3413		unsigned int SwathWidthY,
3414		unsigned int VInitPreFillY,
3415		unsigned int MaxNumSwathY,
3416		double PrefetchSourceLinesC,
3417		unsigned int SwathWidthC,
3418		unsigned int VInitPreFillC,
3419		unsigned int MaxNumSwathC,
3420		unsigned int swath_width_luma_ub,
3421		unsigned int swath_width_chroma_ub,
3422		unsigned int SwathHeightY,
3423		unsigned int SwathHeightC,
3424		double TWait,
3425		double TPreReq,
3426		bool ExtendPrefetchIfPossible,
3427		/* Output */
3428		double   *DSTXAfterScaler,
3429		double   *DSTYAfterScaler,
3430		double *DestinationLinesForPrefetch,
3431		double *PrefetchBandwidth,
3432		double *DestinationLinesToRequestVMInVBlank,
3433		double *DestinationLinesToRequestRowInVBlank,
3434		double *VRatioPrefetchY,
3435		double *VRatioPrefetchC,
3436		double *RequiredPrefetchPixDataBWLuma,
3437		double *RequiredPrefetchPixDataBWChroma,
3438		bool   *NotEnoughTimeForDynamicMetadata,
3439		double *Tno_bw,
3440		double *prefetch_vmrow_bw,
3441		double *Tdmdl_vm,
3442		double *Tdmdl,
3443		double *TSetup,
3444		unsigned int   *VUpdateOffsetPix,
3445		double   *VUpdateWidthPix,
3446		double   *VReadyOffsetPix)
3447{
3448	double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
3449	bool MyError = false;
3450	unsigned int DPPCycles, DISPCLKCycles;
3451	double DSTTotalPixelsAfterScaler;
3452	double LineTime;
3453	double dst_y_prefetch_equ;
3454	double prefetch_bw_oto;
3455	double Tvm_oto;
3456	double Tr0_oto;
3457	double Tvm_oto_lines;
3458	double Tr0_oto_lines;
3459	double dst_y_prefetch_oto;
3460	double TimeForFetchingMetaPTE = 0;
3461	double TimeForFetchingRowInVBlank = 0;
3462	double LinesToRequestPrefetchPixelData = 0;
3463	double LinesForPrefetchBandwidth = 0;
3464	unsigned int HostVMDynamicLevelsTrips;
3465	double  trip_to_mem;
3466	double  Tvm_trips;
3467	double  Tr0_trips;
3468	double  Tvm_trips_rounded;
3469	double  Tr0_trips_rounded;
3470	double  Lsw_oto;
3471	double  Tpre_rounded;
3472	double  prefetch_bw_equ;
3473	double  Tvm_equ;
3474	double  Tr0_equ;
3475	double  Tdmbf;
3476	double  Tdmec;
3477	double  Tdmsks;
3478	double  prefetch_sw_bytes;
3479	double  bytes_pp;
3480	double  dep_bytes;
3481	unsigned int max_vratio_pre = v->MaxVRatioPre;
3482	double  min_Lsw;
3483	double  Tsw_est1 = 0;
3484	double  Tsw_est3 = 0;
3485
3486	if (v->GPUVMEnable == true && v->HostVMEnable == true)
3487		HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3488	else
3489		HostVMDynamicLevelsTrips = 0;
3490#ifdef __DML_VBA_DEBUG__
3491	dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable);
3492	dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels);
3493	dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3494	dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3495			__func__, v->HostVMEnable, HostVMInefficiencyFactor);
3496#endif
3497	dml32_CalculateVUpdateAndDynamicMetadataParameters(
3498			v->MaxInterDCNTileRepeaters,
3499			myPipe->Dppclk,
3500			myPipe->Dispclk,
3501			myPipe->DCFClkDeepSleep,
3502			myPipe->PixelClock,
3503			myPipe->HTotal,
3504			myPipe->VBlank,
3505			v->DynamicMetadataTransmittedBytes[k],
3506			v->DynamicMetadataLinesBeforeActiveRequired[k],
3507			myPipe->InterlaceEnable,
3508			myPipe->ProgressiveToInterlaceUnitInOPP,
3509			TSetup,
3510
3511			/* output */
3512			&Tdmbf,
3513			&Tdmec,
3514			&Tdmsks,
3515			VUpdateOffsetPix,
3516			VUpdateWidthPix,
3517			VReadyOffsetPix);
3518
3519	LineTime = myPipe->HTotal / myPipe->PixelClock;
3520	trip_to_mem = UrgentLatency;
3521	Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3522
3523	if (v->DynamicMetadataVMEnabled == true)
3524		*Tdmdl = TWait + Tvm_trips + trip_to_mem;
3525	else
3526		*Tdmdl = TWait + UrgentExtraLatency;
3527
3528#ifdef __DML_VBA_ALLOW_DELTA__
3529	if (v->DynamicMetadataEnable[k] == false)
3530		*Tdmdl = 0.0;
3531#endif
3532
3533	if (v->DynamicMetadataEnable[k] == true) {
3534		if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3535			*NotEnoughTimeForDynamicMetadata = true;
3536#ifdef __DML_VBA_DEBUG__
3537			dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3538			dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3539					__func__, Tdmbf);
3540			dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3541			dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3542					__func__, Tdmsks);
3543			dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3544					__func__, *Tdmdl);
3545#endif
3546		} else {
3547			*NotEnoughTimeForDynamicMetadata = false;
3548		}
3549	} else {
3550		*NotEnoughTimeForDynamicMetadata = false;
3551	}
3552
3553	*Tdmdl_vm =  (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true &&
3554			v->GPUVMEnable == true ? TWait + Tvm_trips : 0);
3555
3556	if (myPipe->ScalerEnabled)
3557		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
3558	else
3559		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
3560
3561	DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
3562
3563	DISPCLKCycles = v->DISPCLKDelaySubtotal;
3564
3565	if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3566		return true;
3567
3568	*DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3569			myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3570
3571	*DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3572			+ (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3573			+ ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3574					myPipe->HActive / 2 : 0)
3575			+ ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3576
3577#ifdef __DML_VBA_DEBUG__
3578	dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3579	dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3580	dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3581	dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3582	dml_print("DML::%s: DISPCLK: %f\n", __func__,  myPipe->Dispclk);
3583	dml_print("DML::%s: DSCDelay: %d\n", __func__,  DSCDelay);
3584	dml_print("DML::%s: ODMMode: %d\n", __func__,  myPipe->ODMMode);
3585	dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3586	dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__,  *DSTXAfterScaler);
3587#endif
3588
3589	if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3590		*DSTYAfterScaler = 1;
3591	else
3592		*DSTYAfterScaler = 0;
3593
3594	DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3595	*DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3596	*DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3597#ifdef __DML_VBA_DEBUG__
3598	dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__,  *DSTXAfterScaler);
3599	dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3600#endif
3601
3602	MyError = false;
3603
3604	Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3605
3606	if (v->GPUVMEnable == true) {
3607		Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3608		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3609		if (v->GPUVMMaxPageTableLevels >= 3) {
3610			*Tno_bw = UrgentExtraLatency + trip_to_mem *
3611					(double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3612		} else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) {
3613			Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3614					4.0 * LineTime; // VBA_ERROR
3615			*Tno_bw = UrgentExtraLatency;
3616		} else {
3617			*Tno_bw = 0;
3618		}
3619	} else if (myPipe->DCCEnable == true) {
3620		Tvm_trips_rounded = LineTime / 4.0;
3621		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3622		*Tno_bw = 0;
3623	} else {
3624		Tvm_trips_rounded = LineTime / 4.0;
3625		Tr0_trips_rounded = LineTime / 2.0;
3626		*Tno_bw = 0;
3627	}
3628	Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3629	Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3630
3631	if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3632			|| myPipe->SourcePixelFormat == dm_420_12) {
3633		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3634	} else {
3635		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3636	}
3637
3638	prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3639			+ PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3640	prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3641			prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3642
3643	min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3644	min_Lsw = dml_max(min_Lsw, 1.0);
3645	Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3646
3647	if (v->GPUVMEnable == true) {
3648		Tvm_oto = dml_max3(
3649				Tvm_trips,
3650				*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3651				LineTime / 4.0);
3652	} else
3653		Tvm_oto = LineTime / 4.0;
3654
3655	if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3656		Tr0_oto = dml_max4(
3657				Tr0_trips,
3658				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3659				(LineTime - Tvm_oto)/2.0,
3660				LineTime / 4.0);
3661#ifdef __DML_VBA_DEBUG__
3662		dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3663				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3664		dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3665		dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3666		dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3667#endif
3668	} else
3669		Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3670
3671	Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3672	Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3673	dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3674
3675	dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3676			(*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3677
3678	dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__);
3679#ifdef __DML_VBA_DEBUG__
3680	dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3681	dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3682	dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3683	dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3684	dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3685	dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3686	dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3687	dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3688	dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3689	dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3690	dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3691	dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3692	dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3693	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3694	dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3695	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3696	dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3697	dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3698	dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3699	dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3700	dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3701	dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3702	dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3703	dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3704	dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3705	dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3706	dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3707#endif
3708
3709	dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3710	Tpre_rounded = dst_y_prefetch_equ * LineTime;
3711#ifdef __DML_VBA_DEBUG__
3712	dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3713	dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3714	dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3715	dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3716			__func__, VStartup * LineTime);
3717	dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3718	dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3719	dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3720	dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3721	dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3722	dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3723	dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3724			__func__, *DSTYAfterScaler);
3725#endif
3726	dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3727			MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3728
3729	if (prefetch_sw_bytes < dep_bytes)
3730		prefetch_sw_bytes = 2 * dep_bytes;
3731
3732	*PrefetchBandwidth = 0;
3733	*DestinationLinesToRequestVMInVBlank = 0;
3734	*DestinationLinesToRequestRowInVBlank = 0;
3735	*VRatioPrefetchY = 0;
3736	*VRatioPrefetchC = 0;
3737	*RequiredPrefetchPixDataBWLuma = 0;
3738	if (dst_y_prefetch_equ > 1 &&
3739			(Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) {
3740		double PrefetchBandwidth1;
3741		double PrefetchBandwidth2;
3742		double PrefetchBandwidth3;
3743		double PrefetchBandwidth4;
3744
3745		if (Tpre_rounded - *Tno_bw > 0) {
3746			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3747					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3748					+ prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3749			Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3750		} else
3751			PrefetchBandwidth1 = 0;
3752
3753		if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3754				&& Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3755			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3756					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3757					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3758		}
3759
3760		if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3761			PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3762			(Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3763		else
3764			PrefetchBandwidth2 = 0;
3765
3766		if (Tpre_rounded - Tvm_trips_rounded > 0) {
3767			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3768					+ prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3769			Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3770		} else
3771			PrefetchBandwidth3 = 0;
3772
3773
3774		if (VStartup == MaxVStartup &&
3775				(Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3776				LineTime - Tvm_trips_rounded > 0) {
3777			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3778					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3779		}
3780
3781		if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3782			PrefetchBandwidth4 = prefetch_sw_bytes /
3783					(Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3784		} else {
3785			PrefetchBandwidth4 = 0;
3786		}
3787
3788#ifdef __DML_VBA_DEBUG__
3789		dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3790		dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3791		dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3792		dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3793		dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3794		dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3795		dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3796		dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3797		dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3798#endif
3799		{
3800			bool Case1OK;
3801			bool Case2OK;
3802			bool Case3OK;
3803
3804			if (PrefetchBandwidth1 > 0) {
3805				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3806						>= Tvm_trips_rounded
3807						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3808								/ PrefetchBandwidth1 >= Tr0_trips_rounded) {
3809					Case1OK = true;
3810				} else {
3811					Case1OK = false;
3812				}
3813			} else {
3814				Case1OK = false;
3815			}
3816
3817			if (PrefetchBandwidth2 > 0) {
3818				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3819						>= Tvm_trips_rounded
3820						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3821						/ PrefetchBandwidth2 < Tr0_trips_rounded) {
3822					Case2OK = true;
3823				} else {
3824					Case2OK = false;
3825				}
3826			} else {
3827				Case2OK = false;
3828			}
3829
3830			if (PrefetchBandwidth3 > 0) {
3831				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3832						Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3833								HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3834								Tr0_trips_rounded) {
3835					Case3OK = true;
3836				} else {
3837					Case3OK = false;
3838				}
3839			} else {
3840				Case3OK = false;
3841			}
3842
3843			if (Case1OK)
3844				prefetch_bw_equ = PrefetchBandwidth1;
3845			else if (Case2OK)
3846				prefetch_bw_equ = PrefetchBandwidth2;
3847			else if (Case3OK)
3848				prefetch_bw_equ = PrefetchBandwidth3;
3849			else
3850				prefetch_bw_equ = PrefetchBandwidth4;
3851
3852#ifdef __DML_VBA_DEBUG__
3853			dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3854			dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3855			dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3856			dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3857#endif
3858
3859			if (prefetch_bw_equ > 0) {
3860				if (v->GPUVMEnable == true) {
3861					Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3862							HostVMInefficiencyFactor / prefetch_bw_equ,
3863							Tvm_trips, LineTime / 4);
3864				} else {
3865					Tvm_equ = LineTime / 4;
3866				}
3867
3868				if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3869					Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3870							HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3871							(LineTime - Tvm_equ) / 2, LineTime / 4);
3872				} else {
3873					Tr0_equ = (LineTime - Tvm_equ) / 2;
3874				}
3875			} else {
3876				Tvm_equ = 0;
3877				Tr0_equ = 0;
3878#ifdef __DML_VBA_DEBUG__
3879				dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3880#endif
3881			}
3882		}
3883
3884		if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3885			if (dst_y_prefetch_oto * LineTime < TPreReq) {
3886				*DestinationLinesForPrefetch = dst_y_prefetch_equ;
3887			} else {
3888				*DestinationLinesForPrefetch = dst_y_prefetch_oto;
3889			}
3890			TimeForFetchingMetaPTE = Tvm_oto;
3891			TimeForFetchingRowInVBlank = Tr0_oto;
3892			*PrefetchBandwidth = prefetch_bw_oto;
3893			/* Clamp to oto for bandwidth calculation */
3894			LinesForPrefetchBandwidth = dst_y_prefetch_oto;
3895		} else {
3896			/* For mode programming we want to extend the prefetch as much as possible
3897			 * (up to oto, or as long as we can for equ) if we're not already applying
3898			 * the 60us prefetch requirement. This is to avoid intermittent underflow
3899			 * issues during prefetch.
3900			 *
3901			 * The prefetch extension is applied under the following scenarios:
3902			 * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank)
3903			 * 2. We're using subvp or drr methods of p-state switch, in which case we
3904			 *    we don't care if prefetch takes up more of the blanking time
3905			 *
3906			 * Mode programming typically chooses the smallest prefetch time possible
3907			 * (i.e. highest bandwidth during prefetch) presumably to create margin between
3908			 * p-states / c-states that happen in vblank and prefetch. Therefore we only
3909			 * apply this prefetch extension when p-state in vblank is not required (UCLK
3910			 * p-states take up the most vblank time).
3911			 */
3912			if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) {
3913				MyError = true;
3914			} else {
3915				*DestinationLinesForPrefetch = dst_y_prefetch_equ;
3916				TimeForFetchingMetaPTE = Tvm_equ;
3917				TimeForFetchingRowInVBlank = Tr0_equ;
3918				*PrefetchBandwidth = prefetch_bw_equ;
3919				/* Clamp to equ for bandwidth calculation */
3920				LinesForPrefetchBandwidth = dst_y_prefetch_equ;
3921			}
3922		}
3923
3924		*DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3925
3926		*DestinationLinesToRequestRowInVBlank =
3927				dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3928
3929		LinesToRequestPrefetchPixelData = LinesForPrefetchBandwidth -
3930				*DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3931
3932#ifdef __DML_VBA_DEBUG__
3933		dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3934		dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3935				__func__, *DestinationLinesToRequestVMInVBlank);
3936		dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3937		dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3938		dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3939				__func__, *DestinationLinesToRequestRowInVBlank);
3940		dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3941		dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3942#endif
3943
3944		if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3945			*VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3946			*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3947#ifdef __DML_VBA_DEBUG__
3948			dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3949			dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3950			dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3951#endif
3952			if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3953				if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3954					*VRatioPrefetchY =
3955							dml_max((double) PrefetchSourceLinesY /
3956									LinesToRequestPrefetchPixelData,
3957									(double) MaxNumSwathY * SwathHeightY /
3958									(LinesToRequestPrefetchPixelData -
3959									(VInitPreFillY - 3.0) / 2.0));
3960					*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3961				} else {
3962					MyError = true;
3963					*VRatioPrefetchY = 0;
3964				}
3965#ifdef __DML_VBA_DEBUG__
3966				dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3967				dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3968				dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3969#endif
3970			}
3971
3972			*VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3973			*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3974
3975#ifdef __DML_VBA_DEBUG__
3976			dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3977			dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3978			dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3979#endif
3980			if ((SwathHeightC > 4)) {
3981				if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3982					*VRatioPrefetchC =
3983						dml_max(*VRatioPrefetchC,
3984							(double) MaxNumSwathC * SwathHeightC /
3985							(LinesToRequestPrefetchPixelData -
3986							(VInitPreFillC - 3.0) / 2.0));
3987					*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3988				} else {
3989					MyError = true;
3990					*VRatioPrefetchC = 0;
3991				}
3992#ifdef __DML_VBA_DEBUG__
3993				dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3994				dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3995				dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3996#endif
3997			}
3998
3999			*RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
4000					/ LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
4001					/ LineTime;
4002
4003#ifdef __DML_VBA_DEBUG__
4004			dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
4005			dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
4006			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4007			dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
4008					__func__, *RequiredPrefetchPixDataBWLuma);
4009#endif
4010			*RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
4011					LinesToRequestPrefetchPixelData
4012					* myPipe->BytePerPixelC
4013					* swath_width_chroma_ub / LineTime;
4014		} else {
4015			MyError = true;
4016#ifdef __DML_VBA_DEBUG__
4017			dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
4018					__func__, LinesToRequestPrefetchPixelData);
4019#endif
4020			*VRatioPrefetchY = 0;
4021			*VRatioPrefetchC = 0;
4022			*RequiredPrefetchPixDataBWLuma = 0;
4023			*RequiredPrefetchPixDataBWChroma = 0;
4024		}
4025#ifdef __DML_VBA_DEBUG__
4026		dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
4027			(double)LinesToRequestPrefetchPixelData * LineTime +
4028			2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
4029		dml_print("DML:  Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
4030		dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
4031			(*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
4032		dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
4033		dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
4034			TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
4035			((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
4036		dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
4037				PixelPTEBytesPerRow);
4038#endif
4039	} else {
4040		MyError = true;
4041#ifdef __DML_VBA_DEBUG__
4042		dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
4043				__func__, dst_y_prefetch_equ);
4044#endif
4045	}
4046
4047	{
4048		double prefetch_vm_bw;
4049		double prefetch_row_bw;
4050
4051		if (PDEAndMetaPTEBytesFrame == 0) {
4052			prefetch_vm_bw = 0;
4053		} else if (*DestinationLinesToRequestVMInVBlank > 0) {
4054#ifdef __DML_VBA_DEBUG__
4055			dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
4056			dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
4057			dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
4058					__func__, *DestinationLinesToRequestVMInVBlank);
4059			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4060#endif
4061			prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
4062					(*DestinationLinesToRequestVMInVBlank * LineTime);
4063#ifdef __DML_VBA_DEBUG__
4064			dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
4065#endif
4066		} else {
4067			prefetch_vm_bw = 0;
4068			MyError = true;
4069#ifdef __DML_VBA_DEBUG__
4070			dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
4071					__func__, *DestinationLinesToRequestVMInVBlank);
4072#endif
4073		}
4074
4075		if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4076			prefetch_row_bw = 0;
4077		} else if (*DestinationLinesToRequestRowInVBlank > 0) {
4078			prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4079					(*DestinationLinesToRequestRowInVBlank * LineTime);
4080
4081#ifdef __DML_VBA_DEBUG__
4082			dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4083			dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4084			dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4085					__func__, *DestinationLinesToRequestRowInVBlank);
4086			dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4087#endif
4088		} else {
4089			prefetch_row_bw = 0;
4090			MyError = true;
4091#ifdef __DML_VBA_DEBUG__
4092			dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4093					__func__, *DestinationLinesToRequestRowInVBlank);
4094#endif
4095		}
4096
4097		*prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4098	}
4099
4100	if (MyError) {
4101		*PrefetchBandwidth = 0;
4102		TimeForFetchingMetaPTE = 0;
4103		TimeForFetchingRowInVBlank = 0;
4104		*DestinationLinesToRequestVMInVBlank = 0;
4105		*DestinationLinesToRequestRowInVBlank = 0;
4106		*DestinationLinesForPrefetch = 0;
4107		LinesToRequestPrefetchPixelData = 0;
4108		*VRatioPrefetchY = 0;
4109		*VRatioPrefetchC = 0;
4110		*RequiredPrefetchPixDataBWLuma = 0;
4111		*RequiredPrefetchPixDataBWChroma = 0;
4112	}
4113
4114	return MyError;
4115} // CalculatePrefetchSchedule
4116
4117void dml32_CalculateFlipSchedule(
4118		double HostVMInefficiencyFactor,
4119		double UrgentExtraLatency,
4120		double UrgentLatency,
4121		unsigned int GPUVMMaxPageTableLevels,
4122		bool HostVMEnable,
4123		unsigned int HostVMMaxNonCachedPageTableLevels,
4124		bool GPUVMEnable,
4125		double HostVMMinPageSize,
4126		double PDEAndMetaPTEBytesPerFrame,
4127		double MetaRowBytes,
4128		double DPTEBytesPerRow,
4129		double BandwidthAvailableForImmediateFlip,
4130		unsigned int TotImmediateFlipBytes,
4131		enum source_format_class SourcePixelFormat,
4132		double LineTime,
4133		double VRatio,
4134		double VRatioChroma,
4135		double Tno_bw,
4136		bool DCCEnable,
4137		unsigned int dpte_row_height,
4138		unsigned int meta_row_height,
4139		unsigned int dpte_row_height_chroma,
4140		unsigned int meta_row_height_chroma,
4141		bool    use_one_row_for_frame_flip,
4142
4143		/* Output */
4144		double *DestinationLinesToRequestVMInImmediateFlip,
4145		double *DestinationLinesToRequestRowInImmediateFlip,
4146		double *final_flip_bw,
4147		bool *ImmediateFlipSupportedForPipe)
4148{
4149	double min_row_time = 0.0;
4150	unsigned int HostVMDynamicLevelsTrips;
4151	double TimeForFetchingMetaPTEImmediateFlip;
4152	double TimeForFetchingRowInVBlankImmediateFlip;
4153	double ImmediateFlipBW = 1.0;
4154
4155	if (GPUVMEnable == true && HostVMEnable == true)
4156		HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4157	else
4158		HostVMDynamicLevelsTrips = 0;
4159
4160#ifdef __DML_VBA_DEBUG__
4161	dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4162	dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4163#endif
4164
4165	if (TotImmediateFlipBytes > 0) {
4166		if (use_one_row_for_frame_flip) {
4167			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4168					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4169		} else {
4170			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4171					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4172		}
4173		if (GPUVMEnable == true) {
4174			TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4175					HostVMInefficiencyFactor / ImmediateFlipBW,
4176					UrgentExtraLatency + UrgentLatency *
4177					(GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4178					LineTime / 4.0);
4179		} else {
4180			TimeForFetchingMetaPTEImmediateFlip = 0;
4181		}
4182		if ((GPUVMEnable == true || DCCEnable == true)) {
4183			TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4184					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4185					UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4186		} else {
4187			TimeForFetchingRowInVBlankImmediateFlip = 0;
4188		}
4189
4190		*DestinationLinesToRequestVMInImmediateFlip =
4191				dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4192		*DestinationLinesToRequestRowInImmediateFlip =
4193				dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4194
4195		if (GPUVMEnable == true) {
4196			*final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4197					(*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4198					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4199					(*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4200		} else if ((GPUVMEnable == true || DCCEnable == true)) {
4201			*final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4202					(*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4203		} else {
4204			*final_flip_bw = 0;
4205		}
4206	} else {
4207		TimeForFetchingMetaPTEImmediateFlip = 0;
4208		TimeForFetchingRowInVBlankImmediateFlip = 0;
4209		*DestinationLinesToRequestVMInImmediateFlip = 0;
4210		*DestinationLinesToRequestRowInImmediateFlip = 0;
4211		*final_flip_bw = 0;
4212	}
4213
4214	if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4215		if (GPUVMEnable == true && DCCEnable != true) {
4216			min_row_time = dml_min(dpte_row_height *
4217					LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4218		} else if (GPUVMEnable != true && DCCEnable == true) {
4219			min_row_time = dml_min(meta_row_height *
4220					LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4221		} else {
4222			min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4223					LineTime / VRatio, dpte_row_height_chroma * LineTime /
4224					VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4225		}
4226	} else {
4227		if (GPUVMEnable == true && DCCEnable != true) {
4228			min_row_time = dpte_row_height * LineTime / VRatio;
4229		} else if (GPUVMEnable != true && DCCEnable == true) {
4230			min_row_time = meta_row_height * LineTime / VRatio;
4231		} else {
4232			min_row_time =
4233				dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4234		}
4235	}
4236
4237	if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4238			|| TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4239					> min_row_time) {
4240		*ImmediateFlipSupportedForPipe = false;
4241	} else {
4242		*ImmediateFlipSupportedForPipe = true;
4243	}
4244
4245#ifdef __DML_VBA_DEBUG__
4246	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4247	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4248	dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4249			__func__, *DestinationLinesToRequestVMInImmediateFlip);
4250	dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4251			__func__, *DestinationLinesToRequestRowInImmediateFlip);
4252	dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4253	dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4254			__func__, TimeForFetchingRowInVBlankImmediateFlip);
4255	dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4256	dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4257#endif
4258} // CalculateFlipSchedule
4259
4260void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4261		struct vba_vars_st *v,
4262		unsigned int PrefetchMode,
4263		double DCFCLK,
4264		double ReturnBW,
4265		SOCParametersList mmSOCParameters,
4266		double SOCCLK,
4267		double DCFClkDeepSleep,
4268		unsigned int DETBufferSizeY[],
4269		unsigned int DETBufferSizeC[],
4270		unsigned int SwathHeightY[],
4271		unsigned int SwathHeightC[],
4272		double SwathWidthY[],
4273		double SwathWidthC[],
4274		unsigned int DPPPerSurface[],
4275		double BytePerPixelDETY[],
4276		double BytePerPixelDETC[],
4277		double DSTXAfterScaler[],
4278		double DSTYAfterScaler[],
4279		bool UnboundedRequestEnabled,
4280		unsigned int CompressedBufferSizeInkByte,
4281
4282		/* Output */
4283		enum clock_change_support *DRAMClockChangeSupport,
4284		double MaxActiveDRAMClockChangeLatencySupported[],
4285		unsigned int SubViewportLinesNeededInMALL[],
4286		enum dm_fclock_change_support *FCLKChangeSupport,
4287		double *MinActiveFCLKChangeLatencySupported,
4288		bool *USRRetrainingSupport,
4289		double ActiveDRAMClockChangeLatencyMargin[])
4290{
4291	unsigned int i, j, k;
4292	unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4293	unsigned int DRAMClockChangeSupportNumber = 0;
4294	unsigned int LastSurfaceWithoutMargin;
4295	unsigned int DRAMClockChangeMethod = 0;
4296	bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4297	double MinActiveFCLKChangeMargin = 0.;
4298	double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4299	double ActiveClockChangeLatencyHidingY;
4300	double ActiveClockChangeLatencyHidingC;
4301	double ActiveClockChangeLatencyHiding;
4302	double EffectiveDETBufferSizeY;
4303	double     ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4304	double     USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4305	double TotalPixelBW = 0.0;
4306	bool    SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4307	double     EffectiveLBLatencyHidingY;
4308	double     EffectiveLBLatencyHidingC;
4309	double     LinesInDETY[DC__NUM_DPP__MAX];
4310	double     LinesInDETC[DC__NUM_DPP__MAX];
4311	unsigned int    LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4312	unsigned int    LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4313	double     FullDETBufferingTimeY;
4314	double     FullDETBufferingTimeC;
4315	double     WritebackDRAMClockChangeLatencyMargin;
4316	double     WritebackFCLKChangeLatencyMargin;
4317	double     WritebackLatencyHiding;
4318	bool    SameTimingForFCLKChange;
4319
4320	unsigned int    TotalActiveWriteback = 0;
4321	unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4322	unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4323
4324	v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4325	v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4326			+ mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4327	v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
4328	v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
4329	v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4330			+ 10 / DCFClkDeepSleep;
4331	v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4332			+ 10 / DCFClkDeepSleep;
4333	v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4334			+ 10 / DCFClkDeepSleep;
4335	v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4336			+ mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4337
4338#ifdef __DML_VBA_DEBUG__
4339	dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4340	dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4341	dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4342	dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark);
4343	dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark);
4344	dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark);
4345	dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark);
4346	dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark);
4347	dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark);
4348	dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark);
4349	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4350			__func__, v->Watermark.Z8StutterEnterPlusExitWatermark);
4351#endif
4352
4353
4354	TotalActiveWriteback = 0;
4355	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4356		if (v->WritebackEnable[k] == true)
4357			TotalActiveWriteback = TotalActiveWriteback + 1;
4358	}
4359
4360	if (TotalActiveWriteback <= 1) {
4361		v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4362	} else {
4363		v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4364				+ v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4365	}
4366	if (v->USRRetrainingRequiredFinal)
4367		v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4368				+ mmSOCParameters.USRRetrainingLatency;
4369
4370	if (TotalActiveWriteback <= 1) {
4371		v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4372				+ mmSOCParameters.WritebackLatency;
4373		v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4374				+ mmSOCParameters.WritebackLatency;
4375	} else {
4376		v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4377				+ mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4378		v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4379				+ mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
4380	}
4381
4382	if (v->USRRetrainingRequiredFinal)
4383		v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4384				+ mmSOCParameters.USRRetrainingLatency;
4385
4386	if (v->USRRetrainingRequiredFinal)
4387		v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
4388				+ mmSOCParameters.USRRetrainingLatency;
4389
4390#ifdef __DML_VBA_DEBUG__
4391	dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4392			__func__, v->Watermark.WritebackDRAMClockChangeWatermark);
4393	dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark);
4394	dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark);
4395	dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal);
4396	dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4397#endif
4398
4399	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4400		TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
4401				SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
4402	}
4403
4404	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4405
4406		LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
4407		LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
4408
4409
4410#ifdef __DML_VBA_DEBUG__
4411		dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines);
4412		dml_print("DML::%s: k=%d, v->LineBufferSizeFinal     = %d\n", __func__, k, v->LineBufferSizeFinal);
4413		dml_print("DML::%s: k=%d, v->LBBitPerPixel      = %d\n", __func__, k, v->LBBitPerPixel[k]);
4414		dml_print("DML::%s: k=%d, v->HRatio             = %f\n", __func__, k, v->HRatio[k]);
4415		dml_print("DML::%s: k=%d, v->vtaps              = %d\n", __func__, k, v->vtaps[k]);
4416#endif
4417
4418		EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
4419		EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
4420		EffectiveDETBufferSizeY = DETBufferSizeY[k];
4421
4422		if (UnboundedRequestEnabled) {
4423			EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4424					+ CompressedBufferSizeInkByte * 1024
4425							* (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
4426							/ (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
4427		}
4428
4429		LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4430		LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4431		FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
4432
4433		ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4434				- (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];
4435
4436		if (v->NumberOfActiveSurfaces > 1) {
4437			ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4438					- (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
4439							/ v->PixelClock[k] / v->VRatio[k];
4440		}
4441
4442		if (BytePerPixelDETC[k] > 0) {
4443			LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4444			LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4445			FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
4446					/ v->VRatioChroma[k];
4447			ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4448					- (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
4449							/ v->PixelClock[k];
4450			if (v->NumberOfActiveSurfaces > 1) {
4451				ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4452						- (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
4453								/ v->PixelClock[k] / v->VRatioChroma[k];
4454			}
4455			ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4456					ActiveClockChangeLatencyHidingC);
4457		} else {
4458			ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4459		}
4460
4461		ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4462				- v->Watermark.DRAMClockChangeWatermark;
4463		ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4464				- v->Watermark.FCLKChangeWatermark;
4465		USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;
4466
4467		if (v->WritebackEnable[k]) {
4468			WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
4469					/ (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4470							/ (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
4471			if (v->WritebackPixelFormat[k] == dm_444_64)
4472				WritebackLatencyHiding = WritebackLatencyHiding / 2;
4473
4474			WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4475					- v->Watermark.WritebackDRAMClockChangeWatermark;
4476
4477			WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4478					- v->Watermark.WritebackFCLKChangeWatermark;
4479
4480			ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4481					WritebackFCLKChangeLatencyMargin);
4482			ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4483					WritebackDRAMClockChangeLatencyMargin);
4484		}
4485		MaxActiveDRAMClockChangeLatencySupported[k] =
4486				(v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4487						0 :
4488						(ActiveDRAMClockChangeLatencyMargin[k]
4489								+ mmSOCParameters.DRAMClockChangeLatency);
4490	}
4491
4492	for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
4493		for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
4494			if (i == j ||
4495					(v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
4496					(v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
4497					(v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
4498					(v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
4499					v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
4500					v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4501					(v->DRRDisplay[i] || v->DRRDisplay[j]))) {
4502				SynchronizedSurfaces[i][j] = true;
4503			} else {
4504				SynchronizedSurfaces[i][j] = false;
4505			}
4506		}
4507	}
4508
4509	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4510		if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4511				(!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4512				ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4513			FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4514			MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4515			SurfaceWithMinActiveFCLKChangeMargin = k;
4516		}
4517	}
4518
4519	*MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4520
4521	SameTimingForFCLKChange = true;
4522	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4523		if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4524			if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4525					(SameTimingForFCLKChange ||
4526					ActiveFCLKChangeLatencyMargin[k] <
4527					SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4528				SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4529			}
4530			SameTimingForFCLKChange = false;
4531		}
4532	}
4533
4534	if (MinActiveFCLKChangeMargin > 0) {
4535		*FCLKChangeSupport = dm_fclock_change_vactive;
4536	} else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4537			(PrefetchMode <= 1)) {
4538		*FCLKChangeSupport = dm_fclock_change_vblank;
4539	} else {
4540		*FCLKChangeSupport = dm_fclock_change_unsupported;
4541	}
4542
4543	*USRRetrainingSupport = true;
4544	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4545		if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4546				(USRRetrainingLatencyMargin[k] < 0)) {
4547			*USRRetrainingSupport = false;
4548		}
4549	}
4550
4551	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4552		if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4553				v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4554				v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4555				ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4556			if (PrefetchMode > 0) {
4557				DRAMClockChangeSupportNumber = 2;
4558			} else if (DRAMClockChangeSupportNumber == 0) {
4559				DRAMClockChangeSupportNumber = 1;
4560				LastSurfaceWithoutMargin = k;
4561			} else if (DRAMClockChangeSupportNumber == 1 &&
4562					!SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4563				DRAMClockChangeSupportNumber = 2;
4564			}
4565		}
4566	}
4567
4568	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4569		if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4570			DRAMClockChangeMethod = 1;
4571		else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4572			DRAMClockChangeMethod = 2;
4573	}
4574
4575	if (DRAMClockChangeMethod == 0) {
4576		if (DRAMClockChangeSupportNumber == 0)
4577			*DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4578		else if (DRAMClockChangeSupportNumber == 1)
4579			*DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4580		else
4581			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4582	} else if (DRAMClockChangeMethod == 1) {
4583		if (DRAMClockChangeSupportNumber == 0)
4584			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4585		else if (DRAMClockChangeSupportNumber == 1)
4586			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4587		else
4588			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4589	} else {
4590		if (DRAMClockChangeSupportNumber == 0)
4591			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4592		else if (DRAMClockChangeSupportNumber == 1)
4593			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4594		else
4595			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4596	}
4597
4598	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4599		unsigned int dst_y_pstate;
4600		unsigned int src_y_pstate_l;
4601		unsigned int src_y_pstate_c;
4602		unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4603
4604		dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1);
4605		src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]);
4606		src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4607		sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];
4608
4609#ifdef __DML_VBA_DEBUG__
4610dml_print("DML::%s: k=%d, DETBufferSizeY               = %d\n", __func__, k, DETBufferSizeY[k]);
4611dml_print("DML::%s: k=%d, BytePerPixelDETY             = %f\n", __func__, k, BytePerPixelDETY[k]);
4612dml_print("DML::%s: k=%d, SwathWidthY                  = %d\n", __func__, k, SwathWidthY[k]);
4613dml_print("DML::%s: k=%d, SwathHeightY                 = %d\n", __func__, k, SwathHeightY[k]);
4614dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY  = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4615dml_print("DML::%s: k=%d, dst_y_pstate      = %d\n", __func__, k, dst_y_pstate);
4616dml_print("DML::%s: k=%d, src_y_pstate_l    = %d\n", __func__, k, src_y_pstate_l);
4617dml_print("DML::%s: k=%d, src_y_ahead_l     = %d\n", __func__, k, src_y_ahead_l);
4618dml_print("DML::%s: k=%d, v->meta_row_height   = %d\n", __func__, k, v->meta_row_height[k]);
4619dml_print("DML::%s: k=%d, sub_vp_lines_l    = %d\n", __func__, k, sub_vp_lines_l);
4620#endif
4621		SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4622
4623		if (BytePerPixelDETC[k] > 0) {
4624			src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]);
4625			src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4626			sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
4627			SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4628
4629#ifdef __DML_VBA_DEBUG__
4630dml_print("DML::%s: k=%d, src_y_pstate_c            = %d\n", __func__, k, src_y_pstate_c);
4631dml_print("DML::%s: k=%d, src_y_ahead_c             = %d\n", __func__, k, src_y_ahead_c);
4632dml_print("DML::%s: k=%d, v->meta_row_height_chroma    = %d\n", __func__, k, v->meta_row_height_chroma[k]);
4633dml_print("DML::%s: k=%d, sub_vp_lines_c            = %d\n", __func__, k, sub_vp_lines_c);
4634#endif
4635		}
4636	}
4637#ifdef __DML_VBA_DEBUG__
4638	dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4639	dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4640	dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4641			__func__, *MinActiveFCLKChangeLatencySupported);
4642	dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4643#endif
4644} // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4645
4646double dml32_CalculateWriteBackDISPCLK(
4647		enum source_format_class WritebackPixelFormat,
4648		double PixelClock,
4649		double WritebackHRatio,
4650		double WritebackVRatio,
4651		unsigned int WritebackHTaps,
4652		unsigned int WritebackVTaps,
4653		unsigned int   WritebackSourceWidth,
4654		unsigned int   WritebackDestinationWidth,
4655		unsigned int HTotal,
4656		unsigned int WritebackLineBufferSize,
4657		double DISPCLKDPPCLKVCOSpeed)
4658{
4659	double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4660
4661	DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4662	DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4663	DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4664			WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4665	return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4666}
4667
4668void dml32_CalculateMinAndMaxPrefetchMode(
4669		enum dm_prefetch_modes   AllowForPStateChangeOrStutterInVBlankFinal,
4670		unsigned int             *MinPrefetchMode,
4671		unsigned int             *MaxPrefetchMode)
4672{
4673	if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4674		*MinPrefetchMode = 3;
4675		*MaxPrefetchMode = 3;
4676	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4677		*MinPrefetchMode = 2;
4678		*MaxPrefetchMode = 2;
4679	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4680		*MinPrefetchMode = 1;
4681		*MaxPrefetchMode = 1;
4682	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4683		*MinPrefetchMode = 0;
4684		*MaxPrefetchMode = 0;
4685	} else if (AllowForPStateChangeOrStutterInVBlankFinal ==
4686			dm_prefetch_support_uclk_fclk_and_stutter_if_possible) {
4687		*MinPrefetchMode = 0;
4688		*MaxPrefetchMode = 3;
4689	} else {
4690		*MinPrefetchMode = 0;
4691		*MaxPrefetchMode = 3;
4692	}
4693} // CalculateMinAndMaxPrefetchMode
4694
4695void dml32_CalculatePixelDeliveryTimes(
4696		unsigned int             NumberOfActiveSurfaces,
4697		double              VRatio[],
4698		double              VRatioChroma[],
4699		double              VRatioPrefetchY[],
4700		double              VRatioPrefetchC[],
4701		unsigned int             swath_width_luma_ub[],
4702		unsigned int             swath_width_chroma_ub[],
4703		unsigned int             DPPPerSurface[],
4704		double              HRatio[],
4705		double              HRatioChroma[],
4706		double              PixelClock[],
4707		double              PSCL_THROUGHPUT[],
4708		double              PSCL_THROUGHPUT_CHROMA[],
4709		double              Dppclk[],
4710		unsigned int             BytePerPixelC[],
4711		enum dm_rotation_angle   SourceRotation[],
4712		unsigned int             NumberOfCursors[],
4713		unsigned int             CursorWidth[][DC__NUM_CURSOR__MAX],
4714		unsigned int             CursorBPP[][DC__NUM_CURSOR__MAX],
4715		unsigned int             BlockWidth256BytesY[],
4716		unsigned int             BlockHeight256BytesY[],
4717		unsigned int             BlockWidth256BytesC[],
4718		unsigned int             BlockHeight256BytesC[],
4719
4720		/* Output */
4721		double              DisplayPipeLineDeliveryTimeLuma[],
4722		double              DisplayPipeLineDeliveryTimeChroma[],
4723		double              DisplayPipeLineDeliveryTimeLumaPrefetch[],
4724		double              DisplayPipeLineDeliveryTimeChromaPrefetch[],
4725		double              DisplayPipeRequestDeliveryTimeLuma[],
4726		double              DisplayPipeRequestDeliveryTimeChroma[],
4727		double              DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4728		double              DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4729		double              CursorRequestDeliveryTime[],
4730		double              CursorRequestDeliveryTimePrefetch[])
4731{
4732	double   req_per_swath_ub;
4733	unsigned int k;
4734
4735	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4736
4737#ifdef __DML_VBA_DEBUG__
4738		dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4739		dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4740		dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4741		dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4742		dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4743		dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4744		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4745		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4746		dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4747		dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4748		dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4749#endif
4750
4751		if (VRatio[k] <= 1) {
4752			DisplayPipeLineDeliveryTimeLuma[k] =
4753					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4754		} else {
4755			DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4756		}
4757
4758		if (BytePerPixelC[k] == 0) {
4759			DisplayPipeLineDeliveryTimeChroma[k] = 0;
4760		} else {
4761			if (VRatioChroma[k] <= 1) {
4762				DisplayPipeLineDeliveryTimeChroma[k] =
4763					swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4764			} else {
4765				DisplayPipeLineDeliveryTimeChroma[k] =
4766					swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4767			}
4768		}
4769
4770		if (VRatioPrefetchY[k] <= 1) {
4771			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4772					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4773		} else {
4774			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4775					swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4776		}
4777
4778		if (BytePerPixelC[k] == 0) {
4779			DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4780		} else {
4781			if (VRatioPrefetchC[k] <= 1) {
4782				DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4783						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4784			} else {
4785				DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4786						swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4787			}
4788		}
4789#ifdef __DML_VBA_DEBUG__
4790		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4791				__func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4792		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4793				__func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4794		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4795				__func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4796		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4797				__func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4798#endif
4799	}
4800
4801	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4802		if (!IsVertical(SourceRotation[k]))
4803			req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4804		else
4805			req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4806#ifdef __DML_VBA_DEBUG__
4807		dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4808#endif
4809
4810		DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4811		DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4812				DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4813		if (BytePerPixelC[k] == 0) {
4814			DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4815			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4816		} else {
4817			if (!IsVertical(SourceRotation[k]))
4818				req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4819			else
4820				req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4821#ifdef __DML_VBA_DEBUG__
4822			dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4823#endif
4824			DisplayPipeRequestDeliveryTimeChroma[k] =
4825					DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4826			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4827					DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4828		}
4829#ifdef __DML_VBA_DEBUG__
4830		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4831				__func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4832		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4833				__func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4834		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4835				__func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4836		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4837				__func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4838#endif
4839	}
4840
4841	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4842		unsigned int cursor_req_per_width;
4843
4844		cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4845				256.0 / 8.0, 1.0);
4846		if (NumberOfCursors[k] > 0) {
4847			if (VRatio[k] <= 1) {
4848				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4849						HRatio[k] / PixelClock[k] / cursor_req_per_width;
4850			} else {
4851				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4852						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4853			}
4854			if (VRatioPrefetchY[k] <= 1) {
4855				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4856						HRatio[k] / PixelClock[k] / cursor_req_per_width;
4857			} else {
4858				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4859						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4860			}
4861		} else {
4862			CursorRequestDeliveryTime[k] = 0;
4863			CursorRequestDeliveryTimePrefetch[k] = 0;
4864		}
4865#ifdef __DML_VBA_DEBUG__
4866		dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4867				__func__, k, NumberOfCursors[k]);
4868		dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4869				__func__, k, CursorRequestDeliveryTime[k]);
4870		dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4871				__func__, k, CursorRequestDeliveryTimePrefetch[k]);
4872#endif
4873	}
4874} // CalculatePixelDeliveryTimes
4875
4876void dml32_CalculateMetaAndPTETimes(
4877		bool use_one_row_for_frame[],
4878		unsigned int NumberOfActiveSurfaces,
4879		bool GPUVMEnable,
4880		unsigned int MetaChunkSize,
4881		unsigned int MinMetaChunkSizeBytes,
4882		unsigned int    HTotal[],
4883		double  VRatio[],
4884		double  VRatioChroma[],
4885		double  DestinationLinesToRequestRowInVBlank[],
4886		double  DestinationLinesToRequestRowInImmediateFlip[],
4887		bool DCCEnable[],
4888		double  PixelClock[],
4889		unsigned int BytePerPixelY[],
4890		unsigned int BytePerPixelC[],
4891		enum dm_rotation_angle SourceRotation[],
4892		unsigned int dpte_row_height[],
4893		unsigned int dpte_row_height_chroma[],
4894		unsigned int meta_row_width[],
4895		unsigned int meta_row_width_chroma[],
4896		unsigned int meta_row_height[],
4897		unsigned int meta_row_height_chroma[],
4898		unsigned int meta_req_width[],
4899		unsigned int meta_req_width_chroma[],
4900		unsigned int meta_req_height[],
4901		unsigned int meta_req_height_chroma[],
4902		unsigned int dpte_group_bytes[],
4903		unsigned int    PTERequestSizeY[],
4904		unsigned int    PTERequestSizeC[],
4905		unsigned int    PixelPTEReqWidthY[],
4906		unsigned int    PixelPTEReqHeightY[],
4907		unsigned int    PixelPTEReqWidthC[],
4908		unsigned int    PixelPTEReqHeightC[],
4909		unsigned int    dpte_row_width_luma_ub[],
4910		unsigned int    dpte_row_width_chroma_ub[],
4911
4912		/* Output */
4913		double DST_Y_PER_PTE_ROW_NOM_L[],
4914		double DST_Y_PER_PTE_ROW_NOM_C[],
4915		double DST_Y_PER_META_ROW_NOM_L[],
4916		double DST_Y_PER_META_ROW_NOM_C[],
4917		double TimePerMetaChunkNominal[],
4918		double TimePerChromaMetaChunkNominal[],
4919		double TimePerMetaChunkVBlank[],
4920		double TimePerChromaMetaChunkVBlank[],
4921		double TimePerMetaChunkFlip[],
4922		double TimePerChromaMetaChunkFlip[],
4923		double time_per_pte_group_nom_luma[],
4924		double time_per_pte_group_vblank_luma[],
4925		double time_per_pte_group_flip_luma[],
4926		double time_per_pte_group_nom_chroma[],
4927		double time_per_pte_group_vblank_chroma[],
4928		double time_per_pte_group_flip_chroma[])
4929{
4930	unsigned int   meta_chunk_width;
4931	unsigned int   min_meta_chunk_width;
4932	unsigned int   meta_chunk_per_row_int;
4933	unsigned int   meta_row_remainder;
4934	unsigned int   meta_chunk_threshold;
4935	unsigned int   meta_chunks_per_row_ub;
4936	unsigned int   meta_chunk_width_chroma;
4937	unsigned int   min_meta_chunk_width_chroma;
4938	unsigned int   meta_chunk_per_row_int_chroma;
4939	unsigned int   meta_row_remainder_chroma;
4940	unsigned int   meta_chunk_threshold_chroma;
4941	unsigned int   meta_chunks_per_row_ub_chroma;
4942	unsigned int   dpte_group_width_luma;
4943	unsigned int   dpte_groups_per_row_luma_ub;
4944	unsigned int   dpte_group_width_chroma;
4945	unsigned int   dpte_groups_per_row_chroma_ub;
4946	unsigned int k;
4947
4948	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4949		DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4950		if (BytePerPixelC[k] == 0)
4951			DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4952		else
4953			DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4954		DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4955		if (BytePerPixelC[k] == 0)
4956			DST_Y_PER_META_ROW_NOM_C[k] = 0;
4957		else
4958			DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4959	}
4960
4961	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4962		if (DCCEnable[k] == true) {
4963			meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4964			min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4965			meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4966			meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4967			if (!IsVertical(SourceRotation[k]))
4968				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4969			else
4970				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4971
4972			if (meta_row_remainder <= meta_chunk_threshold)
4973				meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4974			else
4975				meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4976
4977			TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4978					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4979			TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4980					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4981			TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4982					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4983			if (BytePerPixelC[k] == 0) {
4984				TimePerChromaMetaChunkNominal[k] = 0;
4985				TimePerChromaMetaChunkVBlank[k] = 0;
4986				TimePerChromaMetaChunkFlip[k] = 0;
4987			} else {
4988				meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4989						meta_row_height_chroma[k];
4990				min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4991						meta_row_height_chroma[k];
4992				meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4993						meta_chunk_width_chroma;
4994				meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4995				if (!IsVertical(SourceRotation[k])) {
4996					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4997							meta_req_width_chroma[k];
4998				} else {
4999					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
5000							meta_req_height_chroma[k];
5001				}
5002				if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
5003					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
5004				else
5005					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5006
5007				TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
5008						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5009				TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
5010						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5011				TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5012						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5013			}
5014		} else {
5015			TimePerMetaChunkNominal[k] = 0;
5016			TimePerMetaChunkVBlank[k] = 0;
5017			TimePerMetaChunkFlip[k] = 0;
5018			TimePerChromaMetaChunkNominal[k] = 0;
5019			TimePerChromaMetaChunkVBlank[k] = 0;
5020			TimePerChromaMetaChunkFlip[k] = 0;
5021		}
5022	}
5023
5024	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5025		if (GPUVMEnable == true) {
5026			if (!IsVertical(SourceRotation[k])) {
5027				dpte_group_width_luma = (double) dpte_group_bytes[k] /
5028						(double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5029			} else {
5030				dpte_group_width_luma = (double) dpte_group_bytes[k] /
5031						(double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5032			}
5033
5034			if (use_one_row_for_frame[k]) {
5035				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5036						(double) dpte_group_width_luma / 2.0, 1.0);
5037			} else {
5038				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5039						(double) dpte_group_width_luma, 1.0);
5040			}
5041#ifdef __DML_VBA_DEBUG__
5042			dml_print("DML::%s: k=%0d, use_one_row_for_frame        = %d\n",
5043					__func__, k, use_one_row_for_frame[k]);
5044			dml_print("DML::%s: k=%0d, dpte_group_bytes             = %d\n",
5045					__func__, k, dpte_group_bytes[k]);
5046			dml_print("DML::%s: k=%0d, PTERequestSizeY              = %d\n",
5047					__func__, k, PTERequestSizeY[k]);
5048			dml_print("DML::%s: k=%0d, PixelPTEReqWidthY            = %d\n",
5049					__func__, k, PixelPTEReqWidthY[k]);
5050			dml_print("DML::%s: k=%0d, PixelPTEReqHeightY           = %d\n",
5051					__func__, k, PixelPTEReqHeightY[k]);
5052			dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub       = %d\n",
5053					__func__, k, dpte_row_width_luma_ub[k]);
5054			dml_print("DML::%s: k=%0d, dpte_group_width_luma        = %d\n",
5055					__func__, k, dpte_group_width_luma);
5056			dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub  = %d\n",
5057					__func__, k, dpte_groups_per_row_luma_ub);
5058#endif
5059
5060			time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5061					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5062			time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5063					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5064			time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5065					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5066			if (BytePerPixelC[k] == 0) {
5067				time_per_pte_group_nom_chroma[k] = 0;
5068				time_per_pte_group_vblank_chroma[k] = 0;
5069				time_per_pte_group_flip_chroma[k] = 0;
5070			} else {
5071				if (!IsVertical(SourceRotation[k])) {
5072					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5073							(double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5074				} else {
5075					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5076							(double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5077				}
5078
5079				if (use_one_row_for_frame[k]) {
5080					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5081							(double) dpte_group_width_chroma / 2.0, 1.0);
5082				} else {
5083					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5084							(double) dpte_group_width_chroma, 1.0);
5085				}
5086#ifdef __DML_VBA_DEBUG__
5087				dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub        = %d\n",
5088						__func__, k, dpte_row_width_chroma_ub[k]);
5089				dml_print("DML::%s: k=%0d, dpte_group_width_chroma        = %d\n",
5090						__func__, k, dpte_group_width_chroma);
5091				dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub  = %d\n",
5092						__func__, k, dpte_groups_per_row_chroma_ub);
5093#endif
5094				time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5095						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5096				time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5097						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5098				time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5099						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5100			}
5101		} else {
5102			time_per_pte_group_nom_luma[k] = 0;
5103			time_per_pte_group_vblank_luma[k] = 0;
5104			time_per_pte_group_flip_luma[k] = 0;
5105			time_per_pte_group_nom_chroma[k] = 0;
5106			time_per_pte_group_vblank_chroma[k] = 0;
5107			time_per_pte_group_flip_chroma[k] = 0;
5108		}
5109#ifdef __DML_VBA_DEBUG__
5110		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank         = %f\n",
5111				__func__, k, DestinationLinesToRequestRowInVBlank[k]);
5112		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip  = %f\n",
5113				__func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5114		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L                      = %f\n",
5115				__func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5116		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C                      = %f\n",
5117				__func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5118		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L                     = %f\n",
5119				__func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5120		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C                     = %f\n",
5121				__func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5122		dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal          = %f\n",
5123				__func__, k, TimePerMetaChunkNominal[k]);
5124		dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank           = %f\n",
5125				__func__, k, TimePerMetaChunkVBlank[k]);
5126		dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip             = %f\n",
5127				__func__, k, TimePerMetaChunkFlip[k]);
5128		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal    = %f\n",
5129				__func__, k, TimePerChromaMetaChunkNominal[k]);
5130		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank     = %f\n",
5131				__func__, k, TimePerChromaMetaChunkVBlank[k]);
5132		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip       = %f\n",
5133				__func__, k, TimePerChromaMetaChunkFlip[k]);
5134		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma      = %f\n",
5135				__func__, k, time_per_pte_group_nom_luma[k]);
5136		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma   = %f\n",
5137				__func__, k, time_per_pte_group_vblank_luma[k]);
5138		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma     = %f\n",
5139				__func__, k, time_per_pte_group_flip_luma[k]);
5140		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma    = %f\n",
5141				__func__, k, time_per_pte_group_nom_chroma[k]);
5142		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5143				__func__, k, time_per_pte_group_vblank_chroma[k]);
5144		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma   = %f\n",
5145				__func__, k, time_per_pte_group_flip_chroma[k]);
5146#endif
5147	}
5148} // CalculateMetaAndPTETimes
5149
5150void dml32_CalculateVMGroupAndRequestTimes(
5151		unsigned int     NumberOfActiveSurfaces,
5152		bool     GPUVMEnable,
5153		unsigned int     GPUVMMaxPageTableLevels,
5154		unsigned int     HTotal[],
5155		unsigned int     BytePerPixelC[],
5156		double      DestinationLinesToRequestVMInVBlank[],
5157		double      DestinationLinesToRequestVMInImmediateFlip[],
5158		bool     DCCEnable[],
5159		double      PixelClock[],
5160		unsigned int        dpte_row_width_luma_ub[],
5161		unsigned int        dpte_row_width_chroma_ub[],
5162		unsigned int     vm_group_bytes[],
5163		unsigned int     dpde0_bytes_per_frame_ub_l[],
5164		unsigned int     dpde0_bytes_per_frame_ub_c[],
5165		unsigned int        meta_pte_bytes_per_frame_ub_l[],
5166		unsigned int        meta_pte_bytes_per_frame_ub_c[],
5167
5168		/* Output */
5169		double      TimePerVMGroupVBlank[],
5170		double      TimePerVMGroupFlip[],
5171		double      TimePerVMRequestVBlank[],
5172		double      TimePerVMRequestFlip[])
5173{
5174	unsigned int k;
5175	unsigned int   num_group_per_lower_vm_stage;
5176	unsigned int   num_req_per_lower_vm_stage;
5177
5178#ifdef __DML_VBA_DEBUG__
5179	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5180	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5181#endif
5182	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5183
5184#ifdef __DML_VBA_DEBUG__
5185		dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5186		dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5187		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5188				__func__, k, dpde0_bytes_per_frame_ub_l[k]);
5189		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5190				__func__, k, dpde0_bytes_per_frame_ub_c[k]);
5191		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5192				__func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5193		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5194				__func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5195#endif
5196
5197		if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5198			if (DCCEnable[k] == false) {
5199				if (BytePerPixelC[k] > 0) {
5200					num_group_per_lower_vm_stage = dml_ceil(
5201							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5202							(double) (vm_group_bytes[k]), 1.0) +
5203							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5204							(double) (vm_group_bytes[k]), 1.0);
5205				} else {
5206					num_group_per_lower_vm_stage = dml_ceil(
5207							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5208							(double) (vm_group_bytes[k]), 1.0);
5209				}
5210			} else {
5211				if (GPUVMMaxPageTableLevels == 1) {
5212					if (BytePerPixelC[k] > 0) {
5213						num_group_per_lower_vm_stage = dml_ceil(
5214							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5215							(double) (vm_group_bytes[k]), 1.0) +
5216							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5217							(double) (vm_group_bytes[k]), 1.0);
5218					} else {
5219						num_group_per_lower_vm_stage = dml_ceil(
5220								(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5221								(double) (vm_group_bytes[k]), 1.0);
5222					}
5223				} else {
5224					if (BytePerPixelC[k] > 0) {
5225						num_group_per_lower_vm_stage = 2 + dml_ceil(
5226							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5227							(double) (vm_group_bytes[k]), 1) +
5228							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5229							(double) (vm_group_bytes[k]), 1) +
5230							dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5231							(double) (vm_group_bytes[k]), 1) +
5232							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5233							(double) (vm_group_bytes[k]), 1);
5234					} else {
5235						num_group_per_lower_vm_stage = 1 + dml_ceil(
5236							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5237							(double) (vm_group_bytes[k]), 1) + dml_ceil(
5238							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5239							(double) (vm_group_bytes[k]), 1);
5240					}
5241				}
5242			}
5243
5244			if (DCCEnable[k] == false) {
5245				if (BytePerPixelC[k] > 0) {
5246					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5247							dpde0_bytes_per_frame_ub_c[k] / 64;
5248				} else {
5249					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5250				}
5251			} else {
5252				if (GPUVMMaxPageTableLevels == 1) {
5253					if (BytePerPixelC[k] > 0) {
5254						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5255								meta_pte_bytes_per_frame_ub_c[k] / 64;
5256					} else {
5257						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5258					}
5259				} else {
5260					if (BytePerPixelC[k] > 0) {
5261						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5262								64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5263								meta_pte_bytes_per_frame_ub_l[k] / 64 +
5264								meta_pte_bytes_per_frame_ub_c[k] / 64;
5265					} else {
5266						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5267								64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5268					}
5269				}
5270			}
5271
5272			TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5273					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5274			TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5275					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5276			TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5277					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5278			TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5279					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5280
5281			if (GPUVMMaxPageTableLevels > 2) {
5282				TimePerVMGroupVBlank[k]    = TimePerVMGroupVBlank[k] / 2;
5283				TimePerVMGroupFlip[k]      = TimePerVMGroupFlip[k] / 2;
5284				TimePerVMRequestVBlank[k]  = TimePerVMRequestVBlank[k] / 2;
5285				TimePerVMRequestFlip[k]    = TimePerVMRequestFlip[k] / 2;
5286			}
5287
5288		} else {
5289			TimePerVMGroupVBlank[k] = 0;
5290			TimePerVMGroupFlip[k] = 0;
5291			TimePerVMRequestVBlank[k] = 0;
5292			TimePerVMRequestFlip[k] = 0;
5293		}
5294
5295#ifdef __DML_VBA_DEBUG__
5296		dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5297		dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5298		dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5299		dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5300#endif
5301	}
5302} // CalculateVMGroupAndRequestTimes
5303
5304void dml32_CalculateDCCConfiguration(
5305		bool             DCCEnabled,
5306		bool             DCCProgrammingAssumesScanDirectionUnknown,
5307		enum source_format_class SourcePixelFormat,
5308		unsigned int             SurfaceWidthLuma,
5309		unsigned int             SurfaceWidthChroma,
5310		unsigned int             SurfaceHeightLuma,
5311		unsigned int             SurfaceHeightChroma,
5312		unsigned int                nomDETInKByte,
5313		unsigned int             RequestHeight256ByteLuma,
5314		unsigned int             RequestHeight256ByteChroma,
5315		enum dm_swizzle_mode     TilingFormat,
5316		unsigned int             BytePerPixelY,
5317		unsigned int             BytePerPixelC,
5318		double              BytePerPixelDETY,
5319		double              BytePerPixelDETC,
5320		enum dm_rotation_angle   SourceRotation,
5321		/* Output */
5322		unsigned int        *MaxUncompressedBlockLuma,
5323		unsigned int        *MaxUncompressedBlockChroma,
5324		unsigned int        *MaxCompressedBlockLuma,
5325		unsigned int        *MaxCompressedBlockChroma,
5326		unsigned int        *IndependentBlockLuma,
5327		unsigned int        *IndependentBlockChroma)
5328{
5329	typedef enum {
5330		REQ_256Bytes,
5331		REQ_128BytesNonContiguous,
5332		REQ_128BytesContiguous,
5333		REQ_NA
5334	} RequestType;
5335
5336	RequestType   RequestLuma;
5337	RequestType   RequestChroma;
5338
5339	unsigned int   segment_order_horz_contiguous_luma;
5340	unsigned int   segment_order_horz_contiguous_chroma;
5341	unsigned int   segment_order_vert_contiguous_luma;
5342	unsigned int   segment_order_vert_contiguous_chroma;
5343	unsigned int req128_horz_wc_l;
5344	unsigned int req128_horz_wc_c;
5345	unsigned int req128_vert_wc_l;
5346	unsigned int req128_vert_wc_c;
5347	unsigned int MAS_vp_horz_limit;
5348	unsigned int MAS_vp_vert_limit;
5349	unsigned int max_vp_horz_width;
5350	unsigned int max_vp_vert_height;
5351	unsigned int eff_surf_width_l;
5352	unsigned int eff_surf_width_c;
5353	unsigned int eff_surf_height_l;
5354	unsigned int eff_surf_height_c;
5355	unsigned int full_swath_bytes_horz_wc_l;
5356	unsigned int full_swath_bytes_horz_wc_c;
5357	unsigned int full_swath_bytes_vert_wc_l;
5358	unsigned int full_swath_bytes_vert_wc_c;
5359	unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5360
5361	unsigned int   yuv420;
5362	unsigned int   horz_div_l;
5363	unsigned int   horz_div_c;
5364	unsigned int   vert_div_l;
5365	unsigned int   vert_div_c;
5366
5367	unsigned int     swath_buf_size;
5368	double   detile_buf_vp_horz_limit;
5369	double   detile_buf_vp_vert_limit;
5370
5371	yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5372			SourcePixelFormat == dm_420_12) ? 1 : 0);
5373	horz_div_l = 1;
5374	horz_div_c = 1;
5375	vert_div_l = 1;
5376	vert_div_c = 1;
5377
5378	if (BytePerPixelY == 1)
5379		vert_div_l = 0;
5380	if (BytePerPixelC == 1)
5381		vert_div_c = 0;
5382
5383	if (BytePerPixelC == 0) {
5384		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5385		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5386				BytePerPixelY / (1 + horz_div_l));
5387		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5388				(1 + vert_div_l));
5389	} else {
5390		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5391		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5392				BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5393				BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5394		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5395				(1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5396				(1 + vert_div_c) / (1 + yuv420));
5397	}
5398
5399	if (SourcePixelFormat == dm_420_10) {
5400		detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5401		detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5402	}
5403
5404	detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5405	detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5406
5407	MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5408	MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5409	max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5410	max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5411	eff_surf_width_l =  (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5412	eff_surf_width_c =  eff_surf_width_l / (1 + yuv420);
5413	eff_surf_height_l =  (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5414	eff_surf_height_c =  eff_surf_height_l / (1 + yuv420);
5415
5416	full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5417	full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5418	if (BytePerPixelC > 0) {
5419		full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5420		full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5421	} else {
5422		full_swath_bytes_horz_wc_c = 0;
5423		full_swath_bytes_vert_wc_c = 0;
5424	}
5425
5426	if (SourcePixelFormat == dm_420_10) {
5427		full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5428		full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5429		full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5430		full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5431	}
5432
5433	if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5434		req128_horz_wc_l = 0;
5435		req128_horz_wc_c = 0;
5436	} else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5437			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5438		req128_horz_wc_l = 0;
5439		req128_horz_wc_c = 1;
5440	} else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5441			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5442		req128_horz_wc_l = 1;
5443		req128_horz_wc_c = 0;
5444	} else {
5445		req128_horz_wc_l = 1;
5446		req128_horz_wc_c = 1;
5447	}
5448
5449	if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5450		req128_vert_wc_l = 0;
5451		req128_vert_wc_c = 0;
5452	} else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5453			full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5454		req128_vert_wc_l = 0;
5455		req128_vert_wc_c = 1;
5456	} else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5457			full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5458		req128_vert_wc_l = 1;
5459		req128_vert_wc_c = 0;
5460	} else {
5461		req128_vert_wc_l = 1;
5462		req128_vert_wc_c = 1;
5463	}
5464
5465	if (BytePerPixelY == 2) {
5466		segment_order_horz_contiguous_luma = 0;
5467		segment_order_vert_contiguous_luma = 1;
5468	} else {
5469		segment_order_horz_contiguous_luma = 1;
5470		segment_order_vert_contiguous_luma = 0;
5471	}
5472
5473	if (BytePerPixelC == 2) {
5474		segment_order_horz_contiguous_chroma = 0;
5475		segment_order_vert_contiguous_chroma = 1;
5476	} else {
5477		segment_order_horz_contiguous_chroma = 1;
5478		segment_order_vert_contiguous_chroma = 0;
5479	}
5480#ifdef __DML_VBA_DEBUG__
5481	dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5482	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5483	dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5484	dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5485	dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5486	dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5487	dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5488	dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5489	dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5490			__func__, segment_order_horz_contiguous_chroma);
5491#endif
5492
5493	if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5494		if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5495			RequestLuma = REQ_256Bytes;
5496		else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5497				(req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5498			RequestLuma = REQ_128BytesNonContiguous;
5499		else
5500			RequestLuma = REQ_128BytesContiguous;
5501
5502		if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5503			RequestChroma = REQ_256Bytes;
5504		else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5505				(req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5506			RequestChroma = REQ_128BytesNonContiguous;
5507		else
5508			RequestChroma = REQ_128BytesContiguous;
5509
5510	} else if (!IsVertical(SourceRotation)) {
5511		if (req128_horz_wc_l == 0)
5512			RequestLuma = REQ_256Bytes;
5513		else if (segment_order_horz_contiguous_luma == 0)
5514			RequestLuma = REQ_128BytesNonContiguous;
5515		else
5516			RequestLuma = REQ_128BytesContiguous;
5517
5518		if (req128_horz_wc_c == 0)
5519			RequestChroma = REQ_256Bytes;
5520		else if (segment_order_horz_contiguous_chroma == 0)
5521			RequestChroma = REQ_128BytesNonContiguous;
5522		else
5523			RequestChroma = REQ_128BytesContiguous;
5524
5525	} else {
5526		if (req128_vert_wc_l == 0)
5527			RequestLuma = REQ_256Bytes;
5528		else if (segment_order_vert_contiguous_luma == 0)
5529			RequestLuma = REQ_128BytesNonContiguous;
5530		else
5531			RequestLuma = REQ_128BytesContiguous;
5532
5533		if (req128_vert_wc_c == 0)
5534			RequestChroma = REQ_256Bytes;
5535		else if (segment_order_vert_contiguous_chroma == 0)
5536			RequestChroma = REQ_128BytesNonContiguous;
5537		else
5538			RequestChroma = REQ_128BytesContiguous;
5539	}
5540
5541	if (RequestLuma == REQ_256Bytes) {
5542		*MaxUncompressedBlockLuma = 256;
5543		*MaxCompressedBlockLuma = 256;
5544		*IndependentBlockLuma = 0;
5545	} else if (RequestLuma == REQ_128BytesContiguous) {
5546		*MaxUncompressedBlockLuma = 256;
5547		*MaxCompressedBlockLuma = 128;
5548		*IndependentBlockLuma = 128;
5549	} else {
5550		*MaxUncompressedBlockLuma = 256;
5551		*MaxCompressedBlockLuma = 64;
5552		*IndependentBlockLuma = 64;
5553	}
5554
5555	if (RequestChroma == REQ_256Bytes) {
5556		*MaxUncompressedBlockChroma = 256;
5557		*MaxCompressedBlockChroma = 256;
5558		*IndependentBlockChroma = 0;
5559	} else if (RequestChroma == REQ_128BytesContiguous) {
5560		*MaxUncompressedBlockChroma = 256;
5561		*MaxCompressedBlockChroma = 128;
5562		*IndependentBlockChroma = 128;
5563	} else {
5564		*MaxUncompressedBlockChroma = 256;
5565		*MaxCompressedBlockChroma = 64;
5566		*IndependentBlockChroma = 64;
5567	}
5568
5569	if (DCCEnabled != true || BytePerPixelC == 0) {
5570		*MaxUncompressedBlockChroma = 0;
5571		*MaxCompressedBlockChroma = 0;
5572		*IndependentBlockChroma = 0;
5573	}
5574
5575	if (DCCEnabled != true) {
5576		*MaxUncompressedBlockLuma = 0;
5577		*MaxCompressedBlockLuma = 0;
5578		*IndependentBlockLuma = 0;
5579	}
5580
5581#ifdef __DML_VBA_DEBUG__
5582	dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5583	dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5584	dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5585	dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5586	dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5587	dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5588#endif
5589
5590} // CalculateDCCConfiguration
5591
5592void dml32_CalculateStutterEfficiency(
5593		unsigned int      CompressedBufferSizeInkByte,
5594		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5595		bool   UnboundedRequestEnabled,
5596		unsigned int      MetaFIFOSizeInKEntries,
5597		unsigned int      ZeroSizeBufferEntries,
5598		unsigned int      PixelChunkSizeInKByte,
5599		unsigned int   NumberOfActiveSurfaces,
5600		unsigned int      ROBBufferSizeInKByte,
5601		double    TotalDataReadBandwidth,
5602		double    DCFCLK,
5603		double    ReturnBW,
5604		unsigned int      CompbufReservedSpace64B,
5605		unsigned int      CompbufReservedSpaceZs,
5606		double    SRExitTime,
5607		double    SRExitZ8Time,
5608		bool   SynchronizeTimingsFinal,
5609		unsigned int   BlendingAndTiming[],
5610		double    StutterEnterPlusExitWatermark,
5611		double    Z8StutterEnterPlusExitWatermark,
5612		bool   ProgressiveToInterlaceUnitInOPP,
5613		bool   Interlace[],
5614		double    MinTTUVBlank[],
5615		unsigned int   DPPPerSurface[],
5616		unsigned int      DETBufferSizeY[],
5617		unsigned int   BytePerPixelY[],
5618		double    BytePerPixelDETY[],
5619		double      SwathWidthY[],
5620		unsigned int   SwathHeightY[],
5621		unsigned int   SwathHeightC[],
5622		double    NetDCCRateLuma[],
5623		double    NetDCCRateChroma[],
5624		double    DCCFractionOfZeroSizeRequestsLuma[],
5625		double    DCCFractionOfZeroSizeRequestsChroma[],
5626		unsigned int      HTotal[],
5627		unsigned int      VTotal[],
5628		double    PixelClock[],
5629		double    VRatio[],
5630		enum dm_rotation_angle SourceRotation[],
5631		unsigned int   BlockHeight256BytesY[],
5632		unsigned int   BlockWidth256BytesY[],
5633		unsigned int   BlockHeight256BytesC[],
5634		unsigned int   BlockWidth256BytesC[],
5635		unsigned int   DCCYMaxUncompressedBlock[],
5636		unsigned int   DCCCMaxUncompressedBlock[],
5637		unsigned int      VActive[],
5638		bool   DCCEnable[],
5639		bool   WritebackEnable[],
5640		double    ReadBandwidthSurfaceLuma[],
5641		double    ReadBandwidthSurfaceChroma[],
5642		double    meta_row_bw[],
5643		double    dpte_row_bw[],
5644
5645		/* Output */
5646		double   *StutterEfficiencyNotIncludingVBlank,
5647		double   *StutterEfficiency,
5648		unsigned int     *NumberOfStutterBurstsPerFrame,
5649		double   *Z8StutterEfficiencyNotIncludingVBlank,
5650		double   *Z8StutterEfficiency,
5651		unsigned int     *Z8NumberOfStutterBurstsPerFrame,
5652		double   *StutterPeriod,
5653		bool  *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5654{
5655
5656	bool FoundCriticalSurface = false;
5657	unsigned int SwathSizeCriticalSurface = 0;
5658	unsigned int LastChunkOfSwathSize;
5659	unsigned int MissingPartOfLastSwathOfDETSize;
5660	double LastZ8StutterPeriod = 0.0;
5661	double LastStutterPeriod = 0.0;
5662	unsigned int TotalNumberOfActiveOTG = 0;
5663	double doublePixelClock;
5664	unsigned int doubleHTotal;
5665	unsigned int doubleVTotal;
5666	bool SameTiming = true;
5667	double DETBufferingTimeY;
5668	double SwathWidthYCriticalSurface = 0.0;
5669	double SwathHeightYCriticalSurface = 0.0;
5670	double VActiveTimeCriticalSurface = 0.0;
5671	double FrameTimeCriticalSurface = 0.0;
5672	unsigned int BytePerPixelYCriticalSurface = 0;
5673	double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5674	unsigned int DETBufferSizeYCriticalSurface = 0;
5675	double MinTTUVBlankCriticalSurface = 0.0;
5676	unsigned int BlockWidth256BytesYCriticalSurface = 0;
5677	bool doublePlaneCriticalSurface = 0;
5678	bool doublePipeCriticalSurface = 0;
5679	double TotalCompressedReadBandwidth;
5680	double TotalRowReadBandwidth;
5681	double AverageDCCCompressionRate;
5682	double EffectiveCompressedBufferSize;
5683	double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5684	double StutterBurstTime;
5685	unsigned int TotalActiveWriteback;
5686	double LinesInDETY;
5687	double LinesInDETYRoundedDownToSwath;
5688	double MaximumEffectiveCompressionLuma;
5689	double MaximumEffectiveCompressionChroma;
5690	double TotalZeroSizeRequestReadBandwidth;
5691	double TotalZeroSizeCompressedReadBandwidth;
5692	double AverageDCCZeroSizeFraction;
5693	double AverageZeroSizeCompressionRate;
5694	unsigned int k;
5695
5696	TotalZeroSizeRequestReadBandwidth = 0;
5697	TotalZeroSizeCompressedReadBandwidth = 0;
5698	TotalRowReadBandwidth = 0;
5699	TotalCompressedReadBandwidth = 0;
5700
5701	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5702		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5703			if (DCCEnable[k] == true) {
5704				if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5705						|| (!IsVertical(SourceRotation[k])
5706								&& BlockHeight256BytesY[k] > SwathHeightY[k])
5707						|| DCCYMaxUncompressedBlock[k] < 256) {
5708					MaximumEffectiveCompressionLuma = 2;
5709				} else {
5710					MaximumEffectiveCompressionLuma = 4;
5711				}
5712				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5713						+ ReadBandwidthSurfaceLuma[k]
5714								/ dml_min(NetDCCRateLuma[k],
5715										MaximumEffectiveCompressionLuma);
5716#ifdef __DML_VBA_DEBUG__
5717				dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5718						__func__, k, ReadBandwidthSurfaceLuma[k]);
5719				dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5720						__func__, k, NetDCCRateLuma[k]);
5721				dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5722						__func__, k, MaximumEffectiveCompressionLuma);
5723#endif
5724				TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5725						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5726				TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5727						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5728								/ MaximumEffectiveCompressionLuma;
5729
5730				if (ReadBandwidthSurfaceChroma[k] > 0) {
5731					if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5732							|| (!IsVertical(SourceRotation[k])
5733									&& BlockHeight256BytesC[k] > SwathHeightC[k])
5734							|| DCCCMaxUncompressedBlock[k] < 256) {
5735						MaximumEffectiveCompressionChroma = 2;
5736					} else {
5737						MaximumEffectiveCompressionChroma = 4;
5738					}
5739					TotalCompressedReadBandwidth =
5740							TotalCompressedReadBandwidth
5741							+ ReadBandwidthSurfaceChroma[k]
5742							/ dml_min(NetDCCRateChroma[k],
5743							MaximumEffectiveCompressionChroma);
5744#ifdef __DML_VBA_DEBUG__
5745					dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5746							__func__, k, ReadBandwidthSurfaceChroma[k]);
5747					dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5748							__func__, k, NetDCCRateChroma[k]);
5749					dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5750							__func__, k, MaximumEffectiveCompressionChroma);
5751#endif
5752					TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5753							+ ReadBandwidthSurfaceChroma[k]
5754									* DCCFractionOfZeroSizeRequestsChroma[k];
5755					TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5756							+ ReadBandwidthSurfaceChroma[k]
5757									* DCCFractionOfZeroSizeRequestsChroma[k]
5758									/ MaximumEffectiveCompressionChroma;
5759				}
5760			} else {
5761				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5762						+ ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5763			}
5764			TotalRowReadBandwidth = TotalRowReadBandwidth
5765					+ DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5766		}
5767	}
5768
5769	AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5770	AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5771
5772#ifdef __DML_VBA_DEBUG__
5773	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5774	dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5775	dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5776	dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5777			__func__, TotalZeroSizeCompressedReadBandwidth);
5778	dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5779	dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5780	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5781	dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5782	dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5783	dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5784	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5785#endif
5786	if (AverageDCCZeroSizeFraction == 1) {
5787		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5788				/ TotalZeroSizeCompressedReadBandwidth;
5789		EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5790				* AverageZeroSizeCompressionRate
5791				+ ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5792						* AverageZeroSizeCompressionRate;
5793	} else if (AverageDCCZeroSizeFraction > 0) {
5794		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5795				/ TotalZeroSizeCompressedReadBandwidth;
5796		EffectiveCompressedBufferSize = dml_min(
5797				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5798				(double) MetaFIFOSizeInKEntries * 1024 * 64
5799					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5800					+ 1 / AverageDCCCompressionRate))
5801					+ dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5802					* AverageDCCCompressionRate,
5803					((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5804					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5805
5806#ifdef __DML_VBA_DEBUG__
5807		dml_print("DML::%s: min 1 = %f\n", __func__,
5808				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5809		dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5810				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5811						AverageDCCCompressionRate));
5812		dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5813				CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5814		dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5815				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5816#endif
5817	} else {
5818		EffectiveCompressedBufferSize = dml_min(
5819				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5820				(double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5821				+ ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5822						* AverageDCCCompressionRate;
5823
5824#ifdef __DML_VBA_DEBUG__
5825		dml_print("DML::%s: min 1 = %f\n", __func__,
5826				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5827		dml_print("DML::%s: min 2 = %f\n", __func__,
5828				MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5829#endif
5830	}
5831
5832#ifdef __DML_VBA_DEBUG__
5833	dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5834	dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5835	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5836#endif
5837
5838	*StutterPeriod = 0;
5839
5840	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5841		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5842			LinesInDETY = ((double) DETBufferSizeY[k]
5843					+ (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5844							* ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5845					/ BytePerPixelDETY[k] / SwathWidthY[k];
5846			LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5847			DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5848					/ VRatio[k];
5849#ifdef __DML_VBA_DEBUG__
5850			dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5851			dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5852			dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5853			dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5854					__func__, k, ReadBandwidthSurfaceLuma[k]);
5855			dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5856			dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5857			dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5858					__func__, k, LinesInDETYRoundedDownToSwath);
5859			dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5860			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5861			dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5862			dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5863			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5864#endif
5865
5866			if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5867				bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5868
5869				FoundCriticalSurface = true;
5870				*StutterPeriod = DETBufferingTimeY;
5871				FrameTimeCriticalSurface = (
5872						isInterlaceTiming ?
5873								dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5874						* (double) HTotal[k] / PixelClock[k];
5875				VActiveTimeCriticalSurface = (
5876						isInterlaceTiming ?
5877								dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5878						* (double) HTotal[k] / PixelClock[k];
5879				BytePerPixelYCriticalSurface = BytePerPixelY[k];
5880				SwathWidthYCriticalSurface = SwathWidthY[k];
5881				SwathHeightYCriticalSurface = SwathHeightY[k];
5882				BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5883				LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5884						- (LinesInDETY - LinesInDETYRoundedDownToSwath);
5885				DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5886				MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5887				doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5888				doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5889
5890#ifdef __DML_VBA_DEBUG__
5891				dml_print("DML::%s: k=%0d, FoundCriticalSurface                = %d\n",
5892						__func__, k, FoundCriticalSurface);
5893				dml_print("DML::%s: k=%0d, StutterPeriod                       = %f\n",
5894						__func__, k, *StutterPeriod);
5895				dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface         = %f\n",
5896						__func__, k, MinTTUVBlankCriticalSurface);
5897				dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface            = %f\n",
5898						__func__, k, FrameTimeCriticalSurface);
5899				dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface          = %f\n",
5900						__func__, k, VActiveTimeCriticalSurface);
5901				dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface        = %d\n",
5902						__func__, k, BytePerPixelYCriticalSurface);
5903				dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface          = %f\n",
5904						__func__, k, SwathWidthYCriticalSurface);
5905				dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface         = %f\n",
5906						__func__, k, SwathHeightYCriticalSurface);
5907				dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface  = %d\n",
5908						__func__, k, BlockWidth256BytesYCriticalSurface);
5909				dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface          = %d\n",
5910						__func__, k, doublePlaneCriticalSurface);
5911				dml_print("DML::%s: k=%0d, doublePipeCriticalSurface           = %d\n",
5912						__func__, k, doublePipeCriticalSurface);
5913				dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5914						__func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5915#endif
5916			}
5917		}
5918	}
5919
5920	PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5921			EffectiveCompressedBufferSize);
5922#ifdef __DML_VBA_DEBUG__
5923	dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5924	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5925	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5926			__func__, *StutterPeriod * TotalDataReadBandwidth);
5927	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5928	dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5929			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5930	dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5931	dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5932	dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5933	dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5934#endif
5935
5936	StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5937			/ ReturnBW
5938			+ (*StutterPeriod * TotalDataReadBandwidth
5939					- PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5940			+ *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5941#ifdef __DML_VBA_DEBUG__
5942	dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5943			AverageDCCCompressionRate / ReturnBW);
5944	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5945			__func__, (*StutterPeriod * TotalDataReadBandwidth));
5946	dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5947			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5948	dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5949	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5950#endif
5951	StutterBurstTime = dml_max(StutterBurstTime,
5952			LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5953					* SwathWidthYCriticalSurface / ReturnBW);
5954
5955#ifdef __DML_VBA_DEBUG__
5956	dml_print("DML::%s: Time to finish residue swath=%f\n",
5957			__func__,
5958			LinesToFinishSwathTransferStutterCriticalSurface *
5959			BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5960#endif
5961
5962	TotalActiveWriteback = 0;
5963	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5964		if (WritebackEnable[k])
5965			TotalActiveWriteback = TotalActiveWriteback + 1;
5966	}
5967
5968	if (TotalActiveWriteback == 0) {
5969#ifdef __DML_VBA_DEBUG__
5970		dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5971		dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5972		dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5973		dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5974#endif
5975		*StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5976				1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5977		*Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5978				1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5979		*NumberOfStutterBurstsPerFrame = (
5980				*StutterEfficiencyNotIncludingVBlank > 0 ?
5981						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5982		*Z8NumberOfStutterBurstsPerFrame = (
5983				*Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5984						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5985	} else {
5986		*StutterEfficiencyNotIncludingVBlank = 0.;
5987		*Z8StutterEfficiencyNotIncludingVBlank = 0.;
5988		*NumberOfStutterBurstsPerFrame = 0;
5989		*Z8NumberOfStutterBurstsPerFrame = 0;
5990	}
5991#ifdef __DML_VBA_DEBUG__
5992	dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5993	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5994			__func__, *StutterEfficiencyNotIncludingVBlank);
5995	dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5996			__func__, *Z8StutterEfficiencyNotIncludingVBlank);
5997	dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5998	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5999#endif
6000
6001	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6002		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
6003			if (BlendingAndTiming[k] == k) {
6004				if (TotalNumberOfActiveOTG == 0) {
6005					doublePixelClock = PixelClock[k];
6006					doubleHTotal = HTotal[k];
6007					doubleVTotal = VTotal[k];
6008				} else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
6009						|| doubleVTotal != VTotal[k]) {
6010					SameTiming = false;
6011				}
6012				TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6013			}
6014		}
6015	}
6016
6017	if (*StutterEfficiencyNotIncludingVBlank > 0) {
6018		LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6019
6020		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
6021				&& LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
6022			*StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
6023						+ StutterBurstTime * VActiveTimeCriticalSurface
6024						/ *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6025		} else {
6026			*StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6027		}
6028	} else {
6029		*StutterEfficiency = 0;
6030	}
6031
6032	if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6033		LastZ8StutterPeriod = VActiveTimeCriticalSurface
6034				- (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6035		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
6036				MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
6037			*Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
6038				* VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6039		} else {
6040			*Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6041		}
6042	} else {
6043		*Z8StutterEfficiency = 0.;
6044	}
6045
6046#ifdef __DML_VBA_DEBUG__
6047	dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6048	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6049	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6050	dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6051	dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6052	dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6053	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
6054			__func__, *StutterEfficiencyNotIncludingVBlank);
6055	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6056#endif
6057
6058	SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
6059			* dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
6060	LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6061	MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
6062			- DETBufferSizeYCriticalSurface;
6063
6064	*DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6065			&& doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6066			&& (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6067			&& (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6068
6069#ifdef __DML_VBA_DEBUG__
6070	dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6071	dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6072	dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6073	dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6074#endif
6075} // CalculateStutterEfficiency
6076
6077void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6078		unsigned int    ConfigReturnBufferSizeInKByte,
6079		unsigned int    ROBBufferSizeInKByte,
6080		unsigned int MaxNumDPP,
6081		bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6082		unsigned int nomDETInKByteOverrideValue,  // VBA_DELTA
6083
6084		/* Output */
6085		unsigned int *MaxTotalDETInKByte,
6086		unsigned int *nomDETInKByte,
6087		unsigned int *MinCompressedBufferSizeInKByte)
6088{
6089	bool     det_buff_size_override_en  = nomDETInKByteOverrideEnable;
6090	unsigned int        det_buff_size_override_val = nomDETInKByteOverrideValue;
6091
6092	*MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6093			(double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6094	*nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6095	*MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6096
6097#ifdef __DML_VBA_DEBUG__
6098	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6099	dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6100	dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6101	dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6102	dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6103	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6104#endif
6105
6106	if (det_buff_size_override_en) {
6107		*nomDETInKByte = det_buff_size_override_val;
6108#ifdef __DML_VBA_DEBUG__
6109		dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6110#endif
6111	}
6112} // CalculateMaxDETAndMinCompressedBufferSize
6113
6114bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6115		double ReturnBW,
6116		bool NotUrgentLatencyHiding[],
6117		double ReadBandwidthLuma[],
6118		double ReadBandwidthChroma[],
6119		double cursor_bw[],
6120		double meta_row_bandwidth[],
6121		double dpte_row_bandwidth[],
6122		unsigned int NumberOfDPP[],
6123		double UrgentBurstFactorLuma[],
6124		double UrgentBurstFactorChroma[],
6125		double UrgentBurstFactorCursor[])
6126{
6127	unsigned int k;
6128	bool NotEnoughUrgentLatencyHiding = false;
6129	bool CalculateVActiveBandwithSupport_val = false;
6130	double VActiveBandwith = 0;
6131
6132	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6133		if (NotUrgentLatencyHiding[k]) {
6134			NotEnoughUrgentLatencyHiding = true;
6135		}
6136	}
6137
6138	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6139		VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6140	}
6141
6142	CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6143
6144#ifdef __DML_VBA_DEBUG__
6145dml_print("DML::%s: NotEnoughUrgentLatencyHiding        = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6146dml_print("DML::%s: VActiveBandwith                     = %f\n", __func__, VActiveBandwith);
6147dml_print("DML::%s: ReturnBW                            = %f\n", __func__, ReturnBW);
6148dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6149#endif
6150	return CalculateVActiveBandwithSupport_val;
6151}
6152
6153void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6154		double ReturnBW,
6155		bool NotUrgentLatencyHiding[],
6156		double ReadBandwidthLuma[],
6157		double ReadBandwidthChroma[],
6158		double PrefetchBandwidthLuma[],
6159		double PrefetchBandwidthChroma[],
6160		double cursor_bw[],
6161		double meta_row_bandwidth[],
6162		double dpte_row_bandwidth[],
6163		double cursor_bw_pre[],
6164		double prefetch_vmrow_bw[],
6165		unsigned int NumberOfDPP[],
6166		double UrgentBurstFactorLuma[],
6167		double UrgentBurstFactorChroma[],
6168		double UrgentBurstFactorCursor[],
6169		double UrgentBurstFactorLumaPre[],
6170		double UrgentBurstFactorChromaPre[],
6171		double UrgentBurstFactorCursorPre[],
6172		double PrefetchBW[],
6173		double VRatio[],
6174		double MaxVRatioPre,
6175
6176		/* output */
6177		double  *MaxPrefetchBandwidth,
6178		double  *FractionOfUrgentBandwidth,
6179		bool *PrefetchBandwidthSupport)
6180{
6181	unsigned int k;
6182	double ActiveBandwidthPerSurface;
6183	bool NotEnoughUrgentLatencyHiding = false;
6184	double TotalActiveBandwidth = 0;
6185	double TotalPrefetchBandwidth = 0;
6186
6187	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6188		if (NotUrgentLatencyHiding[k]) {
6189			NotEnoughUrgentLatencyHiding = true;
6190		}
6191	}
6192
6193	*MaxPrefetchBandwidth = 0;
6194	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6195		ActiveBandwidthPerSurface = ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]);
6196
6197		TotalActiveBandwidth += ActiveBandwidthPerSurface;
6198
6199		TotalPrefetchBandwidth = TotalPrefetchBandwidth + PrefetchBW[k] * VRatio[k];
6200
6201		*MaxPrefetchBandwidth = *MaxPrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6202				ActiveBandwidthPerSurface,
6203				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6204	}
6205
6206	if (MaxVRatioPre == __DML_MAX_VRATIO_PRE__)
6207		*PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && (TotalPrefetchBandwidth <= TotalActiveBandwidth * __DML_MAX_BW_RATIO_PRE__) && !NotEnoughUrgentLatencyHiding;
6208	else
6209		*PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6210
6211	*FractionOfUrgentBandwidth = *MaxPrefetchBandwidth / ReturnBW;
6212}
6213
6214double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6215		double ReturnBW,
6216		double ReadBandwidthLuma[],
6217		double ReadBandwidthChroma[],
6218		double PrefetchBandwidthLuma[],
6219		double PrefetchBandwidthChroma[],
6220		double cursor_bw[],
6221		double cursor_bw_pre[],
6222		unsigned int NumberOfDPP[],
6223		double UrgentBurstFactorLuma[],
6224		double UrgentBurstFactorChroma[],
6225		double UrgentBurstFactorCursor[],
6226		double UrgentBurstFactorLumaPre[],
6227		double UrgentBurstFactorChromaPre[],
6228		double UrgentBurstFactorCursorPre[])
6229{
6230	unsigned int k;
6231	double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6232
6233	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6234		CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6235				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6236	}
6237
6238	return CalculateBandwidthAvailableForImmediateFlip_val;
6239}
6240
6241void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6242		double ReturnBW,
6243		enum immediate_flip_requirement ImmediateFlipRequirement[],
6244		double final_flip_bw[],
6245		double ReadBandwidthLuma[],
6246		double ReadBandwidthChroma[],
6247		double PrefetchBandwidthLuma[],
6248		double PrefetchBandwidthChroma[],
6249		double cursor_bw[],
6250		double meta_row_bandwidth[],
6251		double dpte_row_bandwidth[],
6252		double cursor_bw_pre[],
6253		double prefetch_vmrow_bw[],
6254		unsigned int NumberOfDPP[],
6255		double UrgentBurstFactorLuma[],
6256		double UrgentBurstFactorChroma[],
6257		double UrgentBurstFactorCursor[],
6258		double UrgentBurstFactorLumaPre[],
6259		double UrgentBurstFactorChromaPre[],
6260		double UrgentBurstFactorCursorPre[],
6261
6262		/* output */
6263		double  *TotalBandwidth,
6264		double  *FractionOfUrgentBandwidth,
6265		bool *ImmediateFlipBandwidthSupport)
6266{
6267	unsigned int k;
6268	*TotalBandwidth = 0;
6269	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6270		if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6271			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6272					NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6273					NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6274		} else {
6275			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6276					NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6277					NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6278		}
6279	}
6280	*ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6281	*FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6282}
6283
6284bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,
6285		double ReturnBW,
6286		double UrgentLatency,
6287		unsigned int SwathHeightY[],
6288		unsigned int SwathHeightC[],
6289		unsigned int SwathWidthY[],
6290		unsigned int SwathWidthC[],
6291		double  BytePerPixelInDETY[],
6292		double  BytePerPixelInDETC[],
6293		unsigned int    DETBufferSizeY[],
6294		unsigned int    DETBufferSizeC[],
6295		unsigned int	NumOfDPP[],
6296		unsigned int	HTotal[],
6297		double	PixelClock[],
6298		double	VRatioY[],
6299		double	VRatioC[],
6300		enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
6301		enum unbounded_requesting_policy UseUnboundedRequesting)
6302{
6303	int k;
6304	double SwathSizeAllSurfaces = 0;
6305	double SwathSizeAllSurfacesInFetchTimeUs;
6306	double DETSwathLatencyHidingUs;
6307	double DETSwathLatencyHidingYUs;
6308	double DETSwathLatencyHidingCUs;
6309	double SwathSizePerSurfaceY[DC__NUM_DPP__MAX];
6310	double SwathSizePerSurfaceC[DC__NUM_DPP__MAX];
6311	bool NotEnoughDETSwathFillLatencyHiding = false;
6312
6313	if (UseUnboundedRequesting == dm_unbounded_requesting)
6314		return false;
6315
6316	/* calculate sum of single swath size for all pipes in bytes */
6317	for (k = 0; k < NumberOfActiveSurfaces; k++) {
6318		SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k];
6319
6320		if (SwathHeightC[k] != 0)
6321			SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k];
6322		else
6323			SwathSizePerSurfaceC[k] = 0;
6324
6325		SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k];
6326	}
6327
6328	SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency;
6329
6330	/* ensure all DET - 1 swath can hide a fetch for all surfaces */
6331	for (k = 0; k < NumberOfActiveSurfaces; k++) {
6332		double LineTime = HTotal[k] / PixelClock[k];
6333
6334		/* only care if surface is not phantom */
6335		if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
6336			DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime;
6337
6338			if (SwathHeightC[k] != 0) {
6339				DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime;
6340
6341				DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs);
6342			} else {
6343				DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs;
6344			}
6345
6346			/* DET must be able to hide time to fetch 1 swath for each surface */
6347			if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) {
6348				NotEnoughDETSwathFillLatencyHiding = true;
6349				break;
6350			}
6351		}
6352	}
6353
6354	return NotEnoughDETSwathFillLatencyHiding;
6355}
6356