1/*	$NetBSD: amdgpu_dml1_display_rq_dlg_calc.c,v 1.2 2021/12/18 23:45:04 riastradh Exp $	*/
2
3/*
4 * Copyright 2017 Advanced Micro Devices, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: AMD
25 *
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: amdgpu_dml1_display_rq_dlg_calc.c,v 1.2 2021/12/18 23:45:04 riastradh Exp $");
30
31#include "dml1_display_rq_dlg_calc.h"
32#include "display_mode_lib.h"
33
34#include "dml_inline_defs.h"
35
36/*
37 * NOTE:
38 *   This file is gcc-parseable HW gospel, coming straight from HW engineers.
39 *
40 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
41 * ways. Unless there is something clearly wrong with it the code should
42 * remain as-is as it provides us with a guarantee from HW that it is correct.
43 */
44
45static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma)
46{
47	unsigned int ret_val = 0;
48
49	if (source_format == dm_444_16) {
50		if (!is_chroma)
51			ret_val = 2;
52	} else if (source_format == dm_444_32) {
53		if (!is_chroma)
54			ret_val = 4;
55	} else if (source_format == dm_444_64) {
56		if (!is_chroma)
57			ret_val = 8;
58	} else if (source_format == dm_420_8) {
59		if (is_chroma)
60			ret_val = 2;
61		else
62			ret_val = 1;
63	} else if (source_format == dm_420_10) {
64		if (is_chroma)
65			ret_val = 4;
66		else
67			ret_val = 2;
68	}
69	return ret_val;
70}
71
72static bool is_dual_plane(enum source_format_class source_format)
73{
74	bool ret_val = 0;
75
76	if ((source_format == dm_420_8) || (source_format == dm_420_10))
77		ret_val = 1;
78
79	return ret_val;
80}
81
82static void get_blk256_size(
83		unsigned int *blk256_width,
84		unsigned int *blk256_height,
85		unsigned int bytes_per_element)
86{
87	if (bytes_per_element == 1) {
88		*blk256_width = 16;
89		*blk256_height = 16;
90	} else if (bytes_per_element == 2) {
91		*blk256_width = 16;
92		*blk256_height = 8;
93	} else if (bytes_per_element == 4) {
94		*blk256_width = 8;
95		*blk256_height = 8;
96	} else if (bytes_per_element == 8) {
97		*blk256_width = 8;
98		*blk256_height = 4;
99	}
100}
101
102static double get_refcyc_per_delivery(
103		struct display_mode_lib *mode_lib,
104		double refclk_freq_in_mhz,
105		double pclk_freq_in_mhz,
106		unsigned int recout_width,
107		double vratio,
108		double hscale_pixel_rate,
109		unsigned int delivery_width,
110		unsigned int req_per_swath_ub)
111{
112	double refcyc_per_delivery = 0.0;
113
114	if (vratio <= 1.0) {
115		refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width
116				/ pclk_freq_in_mhz / (double) req_per_swath_ub;
117	} else {
118		refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width
119				/ (double) hscale_pixel_rate / (double) req_per_swath_ub;
120	}
121
122	DTRACE("DLG: %s: refclk_freq_in_mhz = %3.2f", __func__, refclk_freq_in_mhz);
123	DTRACE("DLG: %s: pclk_freq_in_mhz   = %3.2f", __func__, pclk_freq_in_mhz);
124	DTRACE("DLG: %s: recout_width       = %d", __func__, recout_width);
125	DTRACE("DLG: %s: vratio             = %3.2f", __func__, vratio);
126	DTRACE("DLG: %s: req_per_swath_ub   = %d", __func__, req_per_swath_ub);
127	DTRACE("DLG: %s: refcyc_per_delivery= %3.2f", __func__, refcyc_per_delivery);
128
129	return refcyc_per_delivery;
130
131}
132
133static double get_vratio_pre(
134		struct display_mode_lib *mode_lib,
135		unsigned int max_num_sw,
136		unsigned int max_partial_sw,
137		unsigned int swath_height,
138		double vinit,
139		double l_sw)
140{
141	double prefill = dml_floor(vinit, 1);
142	double vratio_pre = 1.0;
143
144	vratio_pre = (max_num_sw * swath_height + max_partial_sw) / l_sw;
145
146	if (swath_height > 4) {
147		double tmp0 = (max_num_sw * swath_height) / (l_sw - (prefill - 3.0) / 2.0);
148
149		if (tmp0 > vratio_pre)
150			vratio_pre = tmp0;
151	}
152
153	DTRACE("DLG: %s: max_num_sw        = %0d", __func__, max_num_sw);
154	DTRACE("DLG: %s: max_partial_sw    = %0d", __func__, max_partial_sw);
155	DTRACE("DLG: %s: swath_height      = %0d", __func__, swath_height);
156	DTRACE("DLG: %s: vinit             = %3.2f", __func__, vinit);
157	DTRACE("DLG: %s: vratio_pre        = %3.2f", __func__, vratio_pre);
158
159	if (vratio_pre < 1.0) {
160		DTRACE("WARNING_DLG: %s:  vratio_pre=%3.2f < 1.0, set to 1.0", __func__, vratio_pre);
161		vratio_pre = 1.0;
162	}
163
164	if (vratio_pre > 4.0) {
165		DTRACE(
166				"WARNING_DLG: %s:  vratio_pre=%3.2f > 4.0 (max scaling ratio). set to 4.0",
167				__func__,
168				vratio_pre);
169		vratio_pre = 4.0;
170	}
171
172	return vratio_pre;
173}
174
175static void get_swath_need(
176		struct display_mode_lib *mode_lib,
177		unsigned int *max_num_sw,
178		unsigned int *max_partial_sw,
179		unsigned int swath_height,
180		double vinit)
181{
182	double prefill = dml_floor(vinit, 1);
183	unsigned int max_partial_sw_int;
184
185	DTRACE("DLG: %s: swath_height      = %0d", __func__, swath_height);
186	DTRACE("DLG: %s: vinit             = %3.2f", __func__, vinit);
187
188	ASSERT(prefill > 0.0 && prefill <= 8.0);
189
190	*max_num_sw = (unsigned int) (dml_ceil((prefill - 1.0) / (double) swath_height, 1) + 1.0); /* prefill has to be >= 1 */
191	max_partial_sw_int =
192			(prefill == 1) ?
193					(swath_height - 1) :
194					((unsigned int) (prefill - 2.0) % swath_height);
195	*max_partial_sw = (max_partial_sw_int < 1) ? 1 : max_partial_sw_int; /* ensure minimum of 1 is used */
196
197	DTRACE("DLG: %s: max_num_sw        = %0d", __func__, *max_num_sw);
198	DTRACE("DLG: %s: max_partial_sw    = %0d", __func__, *max_partial_sw);
199}
200
201static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size)
202{
203	if (tile_size == dm_256k_tile)
204		return (256 * 1024);
205	else if (tile_size == dm_64k_tile)
206		return (64 * 1024);
207	else
208		return (4 * 1024);
209}
210
211static void extract_rq_sizing_regs(
212		struct display_mode_lib *mode_lib,
213		struct _vcs_dpi_display_data_rq_regs_st *rq_regs,
214		const struct _vcs_dpi_display_data_rq_sizing_params_st rq_sizing)
215{
216	DTRACE("DLG: %s: rq_sizing param", __func__);
217	print__data_rq_sizing_params_st(mode_lib, rq_sizing);
218
219	rq_regs->chunk_size = dml_log2(rq_sizing.chunk_bytes) - 10;
220
221	if (rq_sizing.min_chunk_bytes == 0)
222		rq_regs->min_chunk_size = 0;
223	else
224		rq_regs->min_chunk_size = dml_log2(rq_sizing.min_chunk_bytes) - 8 + 1;
225
226	rq_regs->meta_chunk_size = dml_log2(rq_sizing.meta_chunk_bytes) - 10;
227	if (rq_sizing.min_meta_chunk_bytes == 0)
228		rq_regs->min_meta_chunk_size = 0;
229	else
230		rq_regs->min_meta_chunk_size = dml_log2(rq_sizing.min_meta_chunk_bytes) - 6 + 1;
231
232	rq_regs->dpte_group_size = dml_log2(rq_sizing.dpte_group_bytes) - 6;
233	rq_regs->mpte_group_size = dml_log2(rq_sizing.mpte_group_bytes) - 6;
234}
235
236void dml1_extract_rq_regs(
237		struct display_mode_lib *mode_lib,
238		struct _vcs_dpi_display_rq_regs_st *rq_regs,
239		const struct _vcs_dpi_display_rq_params_st rq_param)
240{
241	unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024;
242	unsigned int detile_buf_plane1_addr = 0;
243
244	extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), rq_param.sizing.rq_l);
245	if (rq_param.yuv420)
246		extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), rq_param.sizing.rq_c);
247
248	rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height);
249	rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height);
250
251	/* TODO: take the max between luma, chroma chunk size?
252	 * okay for now, as we are setting chunk_bytes to 8kb anyways
253	 */
254	if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024) { /*32kb */
255		rq_regs->drq_expansion_mode = 0;
256	} else {
257		rq_regs->drq_expansion_mode = 2;
258	}
259	rq_regs->prq_expansion_mode = 1;
260	rq_regs->mrq_expansion_mode = 1;
261	rq_regs->crq_expansion_mode = 1;
262
263	if (rq_param.yuv420) {
264		if ((double) rq_param.misc.rq_l.stored_swath_bytes
265				/ (double) rq_param.misc.rq_c.stored_swath_bytes <= 1.5) {
266			detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 64.0); /* half to chroma */
267		} else {
268			detile_buf_plane1_addr = dml_round_to_multiple(
269					(unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0),
270					256,
271					0) / 64.0; /* 2/3 to chroma */
272		}
273	}
274	rq_regs->plane1_base_address = detile_buf_plane1_addr;
275}
276
277static void handle_det_buf_split(
278		struct display_mode_lib *mode_lib,
279		struct _vcs_dpi_display_rq_params_st *rq_param,
280		const struct _vcs_dpi_display_pipe_source_params_st pipe_src_param)
281{
282	unsigned int total_swath_bytes = 0;
283	unsigned int swath_bytes_l = 0;
284	unsigned int swath_bytes_c = 0;
285	unsigned int full_swath_bytes_packed_l = 0;
286	unsigned int full_swath_bytes_packed_c = 0;
287	bool req128_l = 0;
288	bool req128_c = 0;
289	bool surf_linear = (pipe_src_param.sw_mode == dm_sw_linear);
290	bool surf_vert = (pipe_src_param.source_scan == dm_vert);
291	unsigned int log2_swath_height_l = 0;
292	unsigned int log2_swath_height_c = 0;
293	unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024;
294
295	full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes;
296	full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes;
297
298	if (rq_param->yuv420_10bpc) {
299		full_swath_bytes_packed_l = dml_round_to_multiple(
300				rq_param->misc.rq_l.full_swath_bytes * 2 / 3,
301				256,
302				1) + 256;
303		full_swath_bytes_packed_c = dml_round_to_multiple(
304				rq_param->misc.rq_c.full_swath_bytes * 2 / 3,
305				256,
306				1) + 256;
307	}
308
309	if (rq_param->yuv420) {
310		total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c;
311
312		if (total_swath_bytes <= detile_buf_size_in_bytes) { /*full 256b request */
313			req128_l = 0;
314			req128_c = 0;
315			swath_bytes_l = full_swath_bytes_packed_l;
316			swath_bytes_c = full_swath_bytes_packed_c;
317		} else { /*128b request (for luma only for yuv420 8bpc) */
318			req128_l = 1;
319			req128_c = 0;
320			swath_bytes_l = full_swath_bytes_packed_l / 2;
321			swath_bytes_c = full_swath_bytes_packed_c;
322		}
323
324		/* Bug workaround, luma and chroma req size needs to be the same. (see: DEGVIDCN10-137)
325		 * TODO: Remove after rtl fix
326		 */
327		if (req128_l == 1) {
328			req128_c = 1;
329			DTRACE("DLG: %s: bug workaround DEGVIDCN10-137", __func__);
330		}
331
332		/* Note: assumption, the config that pass in will fit into
333		 *       the detiled buffer.
334		 */
335	} else {
336		total_swath_bytes = 2 * full_swath_bytes_packed_l;
337
338		if (total_swath_bytes <= detile_buf_size_in_bytes)
339			req128_l = 0;
340		else
341			req128_l = 1;
342
343		swath_bytes_l = total_swath_bytes;
344		swath_bytes_c = 0;
345	}
346	rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l;
347	rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c;
348
349	if (surf_linear) {
350		log2_swath_height_l = 0;
351		log2_swath_height_c = 0;
352	} else if (!surf_vert) {
353		log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_height) - req128_l;
354		log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_height) - req128_c;
355	} else {
356		log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_width) - req128_l;
357		log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_width) - req128_c;
358	}
359	rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;
360	rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c;
361
362	DTRACE("DLG: %s: req128_l = %0d", __func__, req128_l);
363	DTRACE("DLG: %s: req128_c = %0d", __func__, req128_c);
364	DTRACE("DLG: %s: full_swath_bytes_packed_l = %0d", __func__, full_swath_bytes_packed_l);
365	DTRACE("DLG: %s: full_swath_bytes_packed_c = %0d", __func__, full_swath_bytes_packed_c);
366}
367
368/* Need refactor. */
369static void dml1_rq_dlg_get_row_heights(
370		struct display_mode_lib *mode_lib,
371		unsigned int *o_dpte_row_height,
372		unsigned int *o_meta_row_height,
373		unsigned int vp_width,
374		unsigned int data_pitch,
375		int source_format,
376		int tiling,
377		int macro_tile_size,
378		int source_scan,
379		int is_chroma)
380{
381	bool surf_linear = (tiling == dm_sw_linear);
382	bool surf_vert = (source_scan == dm_vert);
383
384	unsigned int bytes_per_element = get_bytes_per_element(
385			(enum source_format_class) source_format,
386			is_chroma);
387	unsigned int log2_bytes_per_element = dml_log2(bytes_per_element);
388	unsigned int blk256_width = 0;
389	unsigned int blk256_height = 0;
390
391	unsigned int log2_blk256_height;
392	unsigned int blk_bytes;
393	unsigned int log2_blk_bytes;
394	unsigned int log2_blk_height;
395	unsigned int log2_blk_width;
396	unsigned int log2_meta_req_bytes;
397	unsigned int log2_meta_req_height;
398	unsigned int log2_meta_req_width;
399	unsigned int log2_meta_row_height;
400	unsigned int log2_vmpg_bytes;
401	unsigned int dpte_buf_in_pte_reqs;
402	unsigned int log2_vmpg_height;
403	unsigned int log2_vmpg_width;
404	unsigned int log2_dpte_req_height_ptes;
405	unsigned int log2_dpte_req_width_ptes;
406	unsigned int log2_dpte_req_height;
407	unsigned int log2_dpte_req_width;
408	unsigned int log2_dpte_row_height_linear;
409	unsigned int log2_dpte_row_height;
410	unsigned int dpte_req_width;
411
412	if (surf_linear) {
413		blk256_width = 256;
414		blk256_height = 1;
415	} else {
416		get_blk256_size(&blk256_width, &blk256_height, bytes_per_element);
417	}
418
419	log2_blk256_height = dml_log2((double) blk256_height);
420	blk_bytes = surf_linear ?
421			256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
422	log2_blk_bytes = dml_log2((double) blk_bytes);
423	log2_blk_height = 0;
424	log2_blk_width = 0;
425
426	/* remember log rule
427	 * "+" in log is multiply
428	 * "-" in log is divide
429	 * "/2" is like square root
430	 * blk is vertical biased
431	 */
432	if (tiling != dm_sw_linear)
433		log2_blk_height = log2_blk256_height
434				+ dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1);
435	else
436		log2_blk_height = 0; /* blk height of 1 */
437
438	log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height;
439
440	/* ------- */
441	/* meta    */
442	/* ------- */
443	log2_meta_req_bytes = 6; /* meta request is 64b and is 8x8byte meta element */
444
445	/* each 64b meta request for dcn is 8x8 meta elements and
446	 * a meta element covers one 256b block of the the data surface.
447	 */
448	log2_meta_req_height = log2_blk256_height + 3; /* meta req is 8x8 */
449	log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element
450			- log2_meta_req_height;
451	log2_meta_row_height = 0;
452
453	/* the dimensions of a meta row are meta_row_width x meta_row_height in elements.
454	 * calculate upper bound of the meta_row_width
455	 */
456	if (!surf_vert)
457		log2_meta_row_height = log2_meta_req_height;
458	else
459		log2_meta_row_height = log2_meta_req_width;
460
461	*o_meta_row_height = 1 << log2_meta_row_height;
462
463	/* ------ */
464	/* dpte   */
465	/* ------ */
466	log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes);
467	dpte_buf_in_pte_reqs = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
468
469	log2_vmpg_height = 0;
470	log2_vmpg_width = 0;
471	log2_dpte_req_height_ptes = 0;
472	log2_dpte_req_width_ptes = 0;
473	log2_dpte_req_height = 0;
474	log2_dpte_req_width = 0;
475	log2_dpte_row_height_linear = 0;
476	log2_dpte_row_height = 0;
477	dpte_req_width = 0; /* 64b dpte req width in data element */
478
479	if (surf_linear)
480		log2_vmpg_height = 0; /* one line high */
481	else
482		log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height;
483	log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height;
484
485	/* only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4. */
486	if (log2_blk_bytes <= log2_vmpg_bytes)
487		log2_dpte_req_height_ptes = 0;
488	else if (log2_blk_height - log2_vmpg_height >= 2)
489		log2_dpte_req_height_ptes = 2;
490	else
491		log2_dpte_req_height_ptes = log2_blk_height - log2_vmpg_height;
492	log2_dpte_req_width_ptes = 3 - log2_dpte_req_height_ptes;
493
494	ASSERT((log2_dpte_req_width_ptes == 3 && log2_dpte_req_height_ptes == 0) || /* 8x1 */
495			(log2_dpte_req_width_ptes == 2 && log2_dpte_req_height_ptes == 1) || /* 4x2 */
496			(log2_dpte_req_width_ptes == 1 && log2_dpte_req_height_ptes == 2)); /* 2x4 */
497
498	/* the dpte request dimensions in data elements is dpte_req_width x dpte_req_height
499	 * log2_wmpg_width is how much 1 pte represent, now trying to calculate how much 64b pte req represent
500	 */
501	log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes;
502	log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes;
503	dpte_req_width = 1 << log2_dpte_req_width;
504
505	/* calculate pitch dpte row buffer can hold
506	 * round the result down to a power of two.
507	 */
508	if (surf_linear) {
509		log2_dpte_row_height_linear = dml_floor(
510				dml_log2(dpte_buf_in_pte_reqs * dpte_req_width / data_pitch),
511				1);
512
513		ASSERT(log2_dpte_row_height_linear >= 3);
514
515		if (log2_dpte_row_height_linear > 7)
516			log2_dpte_row_height_linear = 7;
517
518		log2_dpte_row_height = log2_dpte_row_height_linear;
519	} else {
520		/* the upper bound of the dpte_row_width without dependency on viewport position follows.  */
521		if (!surf_vert)
522			log2_dpte_row_height = log2_dpte_req_height;
523		else
524			log2_dpte_row_height =
525					(log2_blk_width < log2_dpte_req_width) ?
526							log2_blk_width : log2_dpte_req_width;
527	}
528
529	/* From programming guide:
530	 * There is a special case of saving only half of ptes returned due to buffer space limits.
531	 * this case applies to 4 and 8bpe in horizontal access of a vp_width greater than 2560+16
532	 * when the pte request is 2x4 ptes (which happens when vmpg_bytes =4kb and tile blk_bytes >=64kb).
533	 */
534	if (!surf_vert && vp_width > (2560 + 16) && bytes_per_element >= 4 && log2_vmpg_bytes == 12
535			&& log2_blk_bytes >= 16)
536		log2_dpte_row_height = log2_dpte_row_height - 1; /*half of the full height */
537
538	*o_dpte_row_height = 1 << log2_dpte_row_height;
539}
540
541static void get_surf_rq_param(
542		struct display_mode_lib *mode_lib,
543		struct _vcs_dpi_display_data_rq_sizing_params_st *rq_sizing_param,
544		struct _vcs_dpi_display_data_rq_dlg_params_st *rq_dlg_param,
545		struct _vcs_dpi_display_data_rq_misc_params_st *rq_misc_param,
546		const struct _vcs_dpi_display_pipe_source_params_st pipe_src_param,
547		bool is_chroma)
548{
549	bool mode_422 = 0;
550	unsigned int vp_width = 0;
551	unsigned int vp_height = 0;
552	unsigned int data_pitch = 0;
553	unsigned int meta_pitch = 0;
554	unsigned int ppe = mode_422 ? 2 : 1;
555	bool surf_linear;
556	bool surf_vert;
557	unsigned int bytes_per_element;
558	unsigned int log2_bytes_per_element;
559	unsigned int blk256_width;
560	unsigned int blk256_height;
561	unsigned int log2_blk256_width;
562	unsigned int log2_blk256_height;
563	unsigned int blk_bytes;
564	unsigned int log2_blk_bytes;
565	unsigned int log2_blk_height;
566	unsigned int log2_blk_width;
567	unsigned int log2_meta_req_bytes;
568	unsigned int log2_meta_req_height;
569	unsigned int log2_meta_req_width;
570	unsigned int meta_req_width;
571	unsigned int meta_req_height;
572	unsigned int log2_meta_row_height;
573	unsigned int meta_row_width_ub;
574	unsigned int log2_meta_chunk_bytes;
575	unsigned int log2_meta_chunk_height;
576	unsigned int log2_meta_chunk_width;
577	unsigned int log2_min_meta_chunk_bytes;
578	unsigned int min_meta_chunk_width;
579	unsigned int meta_chunk_width;
580	unsigned int meta_chunk_per_row_int;
581	unsigned int meta_row_remainder;
582	unsigned int meta_chunk_threshold;
583	unsigned int meta_blk_bytes;
584	unsigned int meta_blk_height;
585	unsigned int meta_blk_width;
586	unsigned int meta_surface_bytes;
587	unsigned int vmpg_bytes;
588	unsigned int meta_pte_req_per_frame_ub;
589	unsigned int meta_pte_bytes_per_frame_ub;
590	unsigned int log2_vmpg_bytes;
591	unsigned int dpte_buf_in_pte_reqs;
592	unsigned int log2_vmpg_height;
593	unsigned int log2_vmpg_width;
594	unsigned int log2_dpte_req_height_ptes;
595	unsigned int log2_dpte_req_width_ptes;
596	unsigned int log2_dpte_req_height;
597	unsigned int log2_dpte_req_width;
598	unsigned int log2_dpte_row_height_linear;
599	unsigned int log2_dpte_row_height;
600	unsigned int log2_dpte_group_width;
601	unsigned int dpte_row_width_ub;
602	unsigned int dpte_row_height;
603	unsigned int dpte_req_height;
604	unsigned int dpte_req_width;
605	unsigned int dpte_group_width;
606	unsigned int log2_dpte_group_bytes;
607	unsigned int log2_dpte_group_length;
608	unsigned int func_meta_row_height, func_dpte_row_height;
609
610	/* TODO check if ppe apply for both luma and chroma in 422 case */
611	if (is_chroma) {
612		vp_width = pipe_src_param.viewport_width_c / ppe;
613		vp_height = pipe_src_param.viewport_height_c;
614		data_pitch = pipe_src_param.data_pitch_c;
615		meta_pitch = pipe_src_param.meta_pitch_c;
616	} else {
617		vp_width = pipe_src_param.viewport_width / ppe;
618		vp_height = pipe_src_param.viewport_height;
619		data_pitch = pipe_src_param.data_pitch;
620		meta_pitch = pipe_src_param.meta_pitch;
621	}
622
623	rq_sizing_param->chunk_bytes = 8192;
624
625	if (rq_sizing_param->chunk_bytes == 64 * 1024)
626		rq_sizing_param->min_chunk_bytes = 0;
627	else
628		rq_sizing_param->min_chunk_bytes = 1024;
629
630	rq_sizing_param->meta_chunk_bytes = 2048;
631	rq_sizing_param->min_meta_chunk_bytes = 256;
632
633	rq_sizing_param->mpte_group_bytes = 2048;
634
635	surf_linear = (pipe_src_param.sw_mode == dm_sw_linear);
636	surf_vert = (pipe_src_param.source_scan == dm_vert);
637
638	bytes_per_element = get_bytes_per_element(
639			(enum source_format_class) pipe_src_param.source_format,
640			is_chroma);
641	log2_bytes_per_element = dml_log2(bytes_per_element);
642	blk256_width = 0;
643	blk256_height = 0;
644
645	if (surf_linear) {
646		blk256_width = 256 / bytes_per_element;
647		blk256_height = 1;
648	} else {
649		get_blk256_size(&blk256_width, &blk256_height, bytes_per_element);
650	}
651
652	DTRACE("DLG: %s: surf_linear        = %d", __func__, surf_linear);
653	DTRACE("DLG: %s: surf_vert          = %d", __func__, surf_vert);
654	DTRACE("DLG: %s: blk256_width       = %d", __func__, blk256_width);
655	DTRACE("DLG: %s: blk256_height      = %d", __func__, blk256_height);
656
657	log2_blk256_width = dml_log2((double) blk256_width);
658	log2_blk256_height = dml_log2((double) blk256_height);
659	blk_bytes =
660			surf_linear ? 256 : get_blk_size_bytes(
661							(enum source_macro_tile_size) pipe_src_param.macro_tile_size);
662	log2_blk_bytes = dml_log2((double) blk_bytes);
663	log2_blk_height = 0;
664	log2_blk_width = 0;
665
666	/* remember log rule
667	 * "+" in log is multiply
668	 * "-" in log is divide
669	 * "/2" is like square root
670	 * blk is vertical biased
671	 */
672	if (pipe_src_param.sw_mode != dm_sw_linear)
673		log2_blk_height = log2_blk256_height
674				+ dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1);
675	else
676		log2_blk_height = 0; /* blk height of 1 */
677
678	log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height;
679
680	if (!surf_vert) {
681		rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_width - 1, blk256_width, 1)
682				+ blk256_width;
683		rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_width;
684	} else {
685		rq_dlg_param->swath_width_ub = dml_round_to_multiple(
686				vp_height - 1,
687				blk256_height,
688				1) + blk256_height;
689		rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_height;
690	}
691
692	if (!surf_vert)
693		rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height
694				* bytes_per_element;
695	else
696		rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width
697				* bytes_per_element;
698
699	rq_misc_param->blk256_height = blk256_height;
700	rq_misc_param->blk256_width = blk256_width;
701
702	/* -------  */
703	/* meta     */
704	/* -------  */
705	log2_meta_req_bytes = 6; /* meta request is 64b and is 8x8byte meta element */
706
707	/* each 64b meta request for dcn is 8x8 meta elements and
708	 * a meta element covers one 256b block of the the data surface.
709	 */
710	log2_meta_req_height = log2_blk256_height + 3; /* meta req is 8x8 byte, each byte represent 1 blk256 */
711	log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element
712			- log2_meta_req_height;
713	meta_req_width = 1 << log2_meta_req_width;
714	meta_req_height = 1 << log2_meta_req_height;
715	log2_meta_row_height = 0;
716	meta_row_width_ub = 0;
717
718	/* the dimensions of a meta row are meta_row_width x meta_row_height in elements.
719	 * calculate upper bound of the meta_row_width
720	 */
721	if (!surf_vert) {
722		log2_meta_row_height = log2_meta_req_height;
723		meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1)
724				+ meta_req_width;
725		rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width;
726	} else {
727		log2_meta_row_height = log2_meta_req_width;
728		meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1)
729				+ meta_req_height;
730		rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height;
731	}
732	rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64;
733
734	log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes);
735	log2_meta_chunk_height = log2_meta_row_height;
736
737	/*full sized meta chunk width in unit of data elements */
738	log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element
739			- log2_meta_chunk_height;
740	log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes);
741	min_meta_chunk_width = 1
742			<< (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element
743					- log2_meta_chunk_height);
744	meta_chunk_width = 1 << log2_meta_chunk_width;
745	meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width);
746	meta_row_remainder = meta_row_width_ub % meta_chunk_width;
747	meta_chunk_threshold = 0;
748	meta_blk_bytes = 4096;
749	meta_blk_height = blk256_height * 64;
750	meta_blk_width = meta_blk_bytes * 256 / bytes_per_element / meta_blk_height;
751	meta_surface_bytes = meta_pitch
752			* (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1)
753					+ meta_blk_height) * bytes_per_element / 256;
754	vmpg_bytes = mode_lib->soc.vmm_page_size_bytes;
755	meta_pte_req_per_frame_ub = (dml_round_to_multiple(
756			meta_surface_bytes - vmpg_bytes,
757			8 * vmpg_bytes,
758			1) + 8 * vmpg_bytes) / (8 * vmpg_bytes);
759	meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; /*64B mpte request */
760	rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub;
761
762	DTRACE("DLG: %s: meta_blk_height             = %d", __func__, meta_blk_height);
763	DTRACE("DLG: %s: meta_blk_width              = %d", __func__, meta_blk_width);
764	DTRACE("DLG: %s: meta_surface_bytes          = %d", __func__, meta_surface_bytes);
765	DTRACE("DLG: %s: meta_pte_req_per_frame_ub   = %d", __func__, meta_pte_req_per_frame_ub);
766	DTRACE("DLG: %s: meta_pte_bytes_per_frame_ub = %d", __func__, meta_pte_bytes_per_frame_ub);
767
768	if (!surf_vert)
769		meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width;
770	else
771		meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height;
772
773	if (meta_row_remainder <= meta_chunk_threshold)
774		rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
775	else
776		rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
777
778	rq_dlg_param->meta_row_height = 1 << log2_meta_row_height;
779
780	/* ------ */
781	/* dpte   */
782	/* ------ */
783	log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes);
784	dpte_buf_in_pte_reqs = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
785
786	log2_vmpg_height = 0;
787	log2_vmpg_width = 0;
788	log2_dpte_req_height_ptes = 0;
789	log2_dpte_req_width_ptes = 0;
790	log2_dpte_req_height = 0;
791	log2_dpte_req_width = 0;
792	log2_dpte_row_height_linear = 0;
793	log2_dpte_row_height = 0;
794	log2_dpte_group_width = 0;
795	dpte_row_width_ub = 0;
796	dpte_row_height = 0;
797	dpte_req_height = 0; /* 64b dpte req height in data element */
798	dpte_req_width = 0; /* 64b dpte req width in data element */
799	dpte_group_width = 0;
800	log2_dpte_group_bytes = 0;
801	log2_dpte_group_length = 0;
802
803	if (surf_linear)
804		log2_vmpg_height = 0; /* one line high */
805	else
806		log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height;
807	log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height;
808
809	/* only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4. */
810	if (log2_blk_bytes <= log2_vmpg_bytes)
811		log2_dpte_req_height_ptes = 0;
812	else if (log2_blk_height - log2_vmpg_height >= 2)
813		log2_dpte_req_height_ptes = 2;
814	else
815		log2_dpte_req_height_ptes = log2_blk_height - log2_vmpg_height;
816	log2_dpte_req_width_ptes = 3 - log2_dpte_req_height_ptes;
817
818	/* Ensure we only have the 3 shapes */
819	ASSERT((log2_dpte_req_width_ptes == 3 && log2_dpte_req_height_ptes == 0) || /* 8x1 */
820			(log2_dpte_req_width_ptes == 2 && log2_dpte_req_height_ptes == 1) || /* 4x2 */
821			(log2_dpte_req_width_ptes == 1 && log2_dpte_req_height_ptes == 2)); /* 2x4 */
822
823	/* The dpte request dimensions in data elements is dpte_req_width x dpte_req_height
824	 * log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent
825	 * That depends on the pte shape (i.e. 8x1, 4x2, 2x4)
826	 */
827	log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes;
828	log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes;
829	dpte_req_height = 1 << log2_dpte_req_height;
830	dpte_req_width = 1 << log2_dpte_req_width;
831
832	/* calculate pitch dpte row buffer can hold
833	 * round the result down to a power of two.
834	 */
835	if (surf_linear) {
836		log2_dpte_row_height_linear = dml_floor(
837				dml_log2(dpte_buf_in_pte_reqs * dpte_req_width / data_pitch),
838				1);
839
840		ASSERT(log2_dpte_row_height_linear >= 3);
841
842		if (log2_dpte_row_height_linear > 7)
843			log2_dpte_row_height_linear = 7;
844
845		log2_dpte_row_height = log2_dpte_row_height_linear;
846		rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height;
847
848		/* For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary.
849		 * the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering.
850		 */
851		dpte_row_width_ub = dml_round_to_multiple(
852				data_pitch * dpte_row_height - 1,
853				dpte_req_width,
854				1) + dpte_req_width;
855		rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width;
856	} else {
857		/* for tiled mode, row height is the same as req height and row store up to vp size upper bound */
858		if (!surf_vert) {
859			log2_dpte_row_height = log2_dpte_req_height;
860			dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1)
861					+ dpte_req_width;
862			rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width;
863		} else {
864			log2_dpte_row_height =
865					(log2_blk_width < log2_dpte_req_width) ?
866							log2_blk_width : log2_dpte_req_width;
867			dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1)
868					+ dpte_req_height;
869			rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height;
870		}
871		rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height;
872	}
873	rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64;
874
875	/* From programming guide:
876	 * There is a special case of saving only half of ptes returned due to buffer space limits.
877	 * this case applies to 4 and 8bpe in horizontal access of a vp_width greater than 2560+16
878	 * when the pte request is 2x4 ptes (which happens when vmpg_bytes =4kb and tile blk_bytes >=64kb).
879	 */
880	if (!surf_vert && vp_width > (2560 + 16) && bytes_per_element >= 4 && log2_vmpg_bytes == 12
881			&& log2_blk_bytes >= 16) {
882		log2_dpte_row_height = log2_dpte_row_height - 1; /*half of the full height */
883		rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height;
884	}
885
886	/* the dpte_group_bytes is reduced for the specific case of vertical
887	 * access of a tile surface that has dpte request of 8x1 ptes.
888	 */
889	if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) /*reduced, in this case, will have page fault within a group */
890		rq_sizing_param->dpte_group_bytes = 512;
891	else
892		/*full size */
893		rq_sizing_param->dpte_group_bytes = 2048;
894
895	/*since pte request size is 64byte, the number of data pte requests per full sized group is as follows.  */
896	log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes);
897	log2_dpte_group_length = log2_dpte_group_bytes - 6; /*length in 64b requests  */
898
899	/* full sized data pte group width in elements */
900	if (!surf_vert)
901		log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width;
902	else
903		log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height;
904
905	dpte_group_width = 1 << log2_dpte_group_width;
906
907	/* since dpte groups are only aligned to dpte_req_width and not dpte_group_width,
908	 * the upper bound for the dpte groups per row is as follows.
909	 */
910	rq_dlg_param->dpte_groups_per_row_ub = dml_ceil(
911			(double) dpte_row_width_ub / dpte_group_width,
912			1);
913
914	dml1_rq_dlg_get_row_heights(
915			mode_lib,
916			&func_dpte_row_height,
917			&func_meta_row_height,
918			vp_width,
919			data_pitch,
920			pipe_src_param.source_format,
921			pipe_src_param.sw_mode,
922			pipe_src_param.macro_tile_size,
923			pipe_src_param.source_scan,
924			is_chroma);
925
926	/* Just a check to make sure this function and the new one give the same
927	 * result. The standalone get_row_heights() function is based off of the
928	 * code in this function so the same changes need to be made to both.
929	 */
930	if (rq_dlg_param->meta_row_height != func_meta_row_height) {
931		DTRACE(
932				"MISMATCH: rq_dlg_param->meta_row_height = %d",
933				rq_dlg_param->meta_row_height);
934		DTRACE("MISMATCH: func_meta_row_height = %d", func_meta_row_height);
935		ASSERT(0);
936	}
937
938	if (rq_dlg_param->dpte_row_height != func_dpte_row_height) {
939		DTRACE(
940				"MISMATCH: rq_dlg_param->dpte_row_height = %d",
941				rq_dlg_param->dpte_row_height);
942		DTRACE("MISMATCH: func_dpte_row_height = %d", func_dpte_row_height);
943		ASSERT(0);
944	}
945}
946
947void dml1_rq_dlg_get_rq_params(
948		struct display_mode_lib *mode_lib,
949		struct _vcs_dpi_display_rq_params_st *rq_param,
950		const struct _vcs_dpi_display_pipe_source_params_st pipe_src_param)
951{
952	/* get param for luma surface */
953	rq_param->yuv420 = pipe_src_param.source_format == dm_420_8
954			|| pipe_src_param.source_format == dm_420_10;
955	rq_param->yuv420_10bpc = pipe_src_param.source_format == dm_420_10;
956
957	get_surf_rq_param(
958			mode_lib,
959			&(rq_param->sizing.rq_l),
960			&(rq_param->dlg.rq_l),
961			&(rq_param->misc.rq_l),
962			pipe_src_param,
963			0);
964
965	if (is_dual_plane((enum source_format_class) pipe_src_param.source_format)) {
966		/* get param for chroma surface */
967		get_surf_rq_param(
968				mode_lib,
969				&(rq_param->sizing.rq_c),
970				&(rq_param->dlg.rq_c),
971				&(rq_param->misc.rq_c),
972				pipe_src_param,
973				1);
974	}
975
976	/* calculate how to split the det buffer space between luma and chroma */
977	handle_det_buf_split(mode_lib, rq_param, pipe_src_param);
978	print__rq_params_st(mode_lib, *rq_param);
979}
980
981/* Note: currently taken in as is.
982 * Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma.
983 */
984void dml1_rq_dlg_get_dlg_params(
985		struct display_mode_lib *mode_lib,
986		struct _vcs_dpi_display_dlg_regs_st *disp_dlg_regs,
987		struct _vcs_dpi_display_ttu_regs_st *disp_ttu_regs,
988		const struct _vcs_dpi_display_rq_dlg_params_st rq_dlg_param,
989		const struct _vcs_dpi_display_dlg_sys_params_st dlg_sys_param,
990		const struct _vcs_dpi_display_e2e_pipe_params_st e2e_pipe_param,
991		const bool cstate_en,
992		const bool pstate_en,
993		const bool vm_en,
994		const bool iflip_en)
995{
996	/* Timing */
997	unsigned int htotal = e2e_pipe_param.pipe.dest.htotal;
998	unsigned int hblank_end = e2e_pipe_param.pipe.dest.hblank_end;
999	unsigned int vblank_start = e2e_pipe_param.pipe.dest.vblank_start;
1000	unsigned int vblank_end = e2e_pipe_param.pipe.dest.vblank_end;
1001	bool interlaced = e2e_pipe_param.pipe.dest.interlaced;
1002	unsigned int min_vblank = mode_lib->ip.min_vblank_lines;
1003
1004	double pclk_freq_in_mhz = e2e_pipe_param.pipe.dest.pixel_rate_mhz;
1005	double refclk_freq_in_mhz = e2e_pipe_param.clks_cfg.refclk_mhz;
1006	double dppclk_freq_in_mhz = e2e_pipe_param.clks_cfg.dppclk_mhz;
1007	double dispclk_freq_in_mhz = e2e_pipe_param.clks_cfg.dispclk_mhz;
1008
1009	double ref_freq_to_pix_freq;
1010	double prefetch_xy_calc_in_dcfclk;
1011	double min_dcfclk_mhz;
1012	double t_calc_us;
1013	double min_ttu_vblank;
1014	double min_dst_y_ttu_vblank;
1015	unsigned int dlg_vblank_start;
1016	bool dcc_en;
1017	bool dual_plane;
1018	bool mode_422;
1019	unsigned int access_dir;
1020	unsigned int bytes_per_element_l;
1021	unsigned int bytes_per_element_c;
1022	unsigned int vp_height_l;
1023	unsigned int vp_width_l;
1024	unsigned int vp_height_c;
1025	unsigned int vp_width_c;
1026	unsigned int htaps_l;
1027	unsigned int htaps_c;
1028	double hratios_l;
1029	double hratios_c;
1030	double vratio_l;
1031	double vratio_c;
1032	double line_time_in_us;
1033	double vinit_l;
1034	double vinit_c;
1035	double vinit_bot_l;
1036	double vinit_bot_c;
1037	unsigned int swath_height_l;
1038	unsigned int swath_width_ub_l;
1039	unsigned int dpte_bytes_per_row_ub_l;
1040	unsigned int dpte_groups_per_row_ub_l;
1041	unsigned int meta_pte_bytes_per_frame_ub_l;
1042	unsigned int meta_bytes_per_row_ub_l;
1043	unsigned int swath_height_c;
1044	unsigned int swath_width_ub_c;
1045	unsigned int dpte_bytes_per_row_ub_c;
1046	unsigned int dpte_groups_per_row_ub_c;
1047	unsigned int meta_chunks_per_row_ub_l;
1048	unsigned int vupdate_offset;
1049	unsigned int vupdate_width;
1050	unsigned int vready_offset;
1051	unsigned int dppclk_delay_subtotal;
1052	unsigned int dispclk_delay_subtotal;
1053	unsigned int pixel_rate_delay_subtotal;
1054	unsigned int vstartup_start;
1055	unsigned int dst_x_after_scaler;
1056	unsigned int dst_y_after_scaler;
1057	double line_wait;
1058	double line_o;
1059	double line_setup;
1060	double line_calc;
1061	double dst_y_prefetch;
1062	double t_pre_us;
1063	unsigned int vm_bytes;
1064	unsigned int meta_row_bytes;
1065	unsigned int max_num_sw_l;
1066	unsigned int max_num_sw_c;
1067	unsigned int max_partial_sw_l;
1068	unsigned int max_partial_sw_c;
1069	double max_vinit_l;
1070	double max_vinit_c;
1071	unsigned int lsw_l;
1072	unsigned int lsw_c;
1073	unsigned int sw_bytes_ub_l;
1074	unsigned int sw_bytes_ub_c;
1075	unsigned int sw_bytes;
1076	unsigned int dpte_row_bytes;
1077	double prefetch_bw;
1078	double flip_bw;
1079	double t_vm_us;
1080	double t_r0_us;
1081	double dst_y_per_vm_vblank;
1082	double dst_y_per_row_vblank;
1083	double min_dst_y_per_vm_vblank;
1084	double min_dst_y_per_row_vblank;
1085	double lsw;
1086	double vratio_pre_l;
1087	double vratio_pre_c;
1088	unsigned int req_per_swath_ub_l;
1089	unsigned int req_per_swath_ub_c;
1090	unsigned int meta_row_height_l;
1091	unsigned int swath_width_pixels_ub_l;
1092	unsigned int swath_width_pixels_ub_c;
1093	unsigned int scaler_rec_in_width_l;
1094	unsigned int scaler_rec_in_width_c;
1095	unsigned int dpte_row_height_l;
1096	unsigned int dpte_row_height_c;
1097	double hscale_pixel_rate_l;
1098	double hscale_pixel_rate_c;
1099	double min_hratio_fact_l;
1100	double min_hratio_fact_c;
1101	double refcyc_per_line_delivery_pre_l;
1102	double refcyc_per_line_delivery_pre_c;
1103	double refcyc_per_line_delivery_l;
1104	double refcyc_per_line_delivery_c;
1105	double refcyc_per_req_delivery_pre_l;
1106	double refcyc_per_req_delivery_pre_c;
1107	double refcyc_per_req_delivery_l;
1108	double refcyc_per_req_delivery_c;
1109	double refcyc_per_req_delivery_pre_cur0;
1110	double refcyc_per_req_delivery_cur0;
1111	unsigned int full_recout_width;
1112	double hratios_cur0;
1113	unsigned int cur0_src_width;
1114	enum cursor_bpp cur0_bpp;
1115	unsigned int cur0_req_size;
1116	unsigned int cur0_req_width;
1117	double cur0_width_ub;
1118	double cur0_req_per_width;
1119	double hactive_cur0;
1120
1121	memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs));
1122	memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs));
1123
1124	DTRACE("DLG: %s: cstate_en = %d", __func__, cstate_en);
1125	DTRACE("DLG: %s: pstate_en = %d", __func__, pstate_en);
1126	DTRACE("DLG: %s: vm_en     = %d", __func__, vm_en);
1127	DTRACE("DLG: %s: iflip_en  = %d", __func__, iflip_en);
1128
1129	/* ------------------------- */
1130	/* Section 1.5.2.1: OTG dependent Params */
1131	/* ------------------------- */
1132	DTRACE("DLG: %s: dppclk_freq_in_mhz     = %3.2f", __func__, dppclk_freq_in_mhz);
1133	DTRACE("DLG: %s: dispclk_freq_in_mhz    = %3.2f", __func__, dispclk_freq_in_mhz);
1134	DTRACE("DLG: %s: refclk_freq_in_mhz     = %3.2f", __func__, refclk_freq_in_mhz);
1135	DTRACE("DLG: %s: pclk_freq_in_mhz       = %3.2f", __func__, pclk_freq_in_mhz);
1136	DTRACE("DLG: %s: interlaced             = %d", __func__, interlaced);
1137
1138	ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz;
1139	ASSERT(ref_freq_to_pix_freq < 4.0);
1140	disp_dlg_regs->ref_freq_to_pix_freq =
1141			(unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19));
1142	disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal
1143			* dml_pow(2, 8));
1144	disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end
1145			* (double) ref_freq_to_pix_freq);
1146	ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int) dml_pow(2, 13));
1147	disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; /* 15 bits */
1148
1149	prefetch_xy_calc_in_dcfclk = 24.0; /* TODO: ip_param */
1150	min_dcfclk_mhz = dlg_sys_param.deepsleep_dcfclk_mhz;
1151	t_calc_us = prefetch_xy_calc_in_dcfclk / min_dcfclk_mhz;
1152	min_ttu_vblank = dlg_sys_param.t_urg_wm_us;
1153	if (cstate_en)
1154		min_ttu_vblank = dml_max(dlg_sys_param.t_sr_wm_us, min_ttu_vblank);
1155	if (pstate_en)
1156		min_ttu_vblank = dml_max(dlg_sys_param.t_mclk_wm_us, min_ttu_vblank);
1157	min_ttu_vblank = min_ttu_vblank + t_calc_us;
1158
1159	min_dst_y_ttu_vblank = min_ttu_vblank * pclk_freq_in_mhz / (double) htotal;
1160	dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start;
1161
1162	disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start
1163			+ min_dst_y_ttu_vblank) * dml_pow(2, 2));
1164	ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int) dml_pow(2, 18));
1165
1166	DTRACE("DLG: %s: min_dcfclk_mhz                         = %3.2f", __func__, min_dcfclk_mhz);
1167	DTRACE("DLG: %s: min_ttu_vblank                         = %3.2f", __func__, min_ttu_vblank);
1168	DTRACE(
1169			"DLG: %s: min_dst_y_ttu_vblank                   = %3.2f",
1170			__func__,
1171			min_dst_y_ttu_vblank);
1172	DTRACE("DLG: %s: t_calc_us                              = %3.2f", __func__, t_calc_us);
1173	DTRACE(
1174			"DLG: %s: disp_dlg_regs->min_dst_y_next_start    = 0x%0x",
1175			__func__,
1176			disp_dlg_regs->min_dst_y_next_start);
1177	DTRACE(
1178			"DLG: %s: ref_freq_to_pix_freq                   = %3.2f",
1179			__func__,
1180			ref_freq_to_pix_freq);
1181
1182	/* ------------------------- */
1183	/* Section 1.5.2.2: Prefetch, Active and TTU  */
1184	/* ------------------------- */
1185	/* Prefetch Calc */
1186	/* Source */
1187	dcc_en = e2e_pipe_param.pipe.src.dcc;
1188	dual_plane = is_dual_plane(
1189			(enum source_format_class) e2e_pipe_param.pipe.src.source_format);
1190	mode_422 = 0; /* TODO */
1191	access_dir = (e2e_pipe_param.pipe.src.source_scan == dm_vert); /* vp access direction: horizontal or vertical accessed */
1192	bytes_per_element_l = get_bytes_per_element(
1193			(enum source_format_class) e2e_pipe_param.pipe.src.source_format,
1194			0);
1195	bytes_per_element_c = get_bytes_per_element(
1196			(enum source_format_class) e2e_pipe_param.pipe.src.source_format,
1197			1);
1198	vp_height_l = e2e_pipe_param.pipe.src.viewport_height;
1199	vp_width_l = e2e_pipe_param.pipe.src.viewport_width;
1200	vp_height_c = e2e_pipe_param.pipe.src.viewport_height_c;
1201	vp_width_c = e2e_pipe_param.pipe.src.viewport_width_c;
1202
1203	/* Scaling */
1204	htaps_l = e2e_pipe_param.pipe.scale_taps.htaps;
1205	htaps_c = e2e_pipe_param.pipe.scale_taps.htaps_c;
1206	hratios_l = e2e_pipe_param.pipe.scale_ratio_depth.hscl_ratio;
1207	hratios_c = e2e_pipe_param.pipe.scale_ratio_depth.hscl_ratio_c;
1208	vratio_l = e2e_pipe_param.pipe.scale_ratio_depth.vscl_ratio;
1209	vratio_c = e2e_pipe_param.pipe.scale_ratio_depth.vscl_ratio_c;
1210
1211	line_time_in_us = (htotal / pclk_freq_in_mhz);
1212	vinit_l = e2e_pipe_param.pipe.scale_ratio_depth.vinit;
1213	vinit_c = e2e_pipe_param.pipe.scale_ratio_depth.vinit_c;
1214	vinit_bot_l = e2e_pipe_param.pipe.scale_ratio_depth.vinit_bot;
1215	vinit_bot_c = e2e_pipe_param.pipe.scale_ratio_depth.vinit_bot_c;
1216
1217	swath_height_l = rq_dlg_param.rq_l.swath_height;
1218	swath_width_ub_l = rq_dlg_param.rq_l.swath_width_ub;
1219	dpte_bytes_per_row_ub_l = rq_dlg_param.rq_l.dpte_bytes_per_row_ub;
1220	dpte_groups_per_row_ub_l = rq_dlg_param.rq_l.dpte_groups_per_row_ub;
1221	meta_pte_bytes_per_frame_ub_l = rq_dlg_param.rq_l.meta_pte_bytes_per_frame_ub;
1222	meta_bytes_per_row_ub_l = rq_dlg_param.rq_l.meta_bytes_per_row_ub;
1223
1224	swath_height_c = rq_dlg_param.rq_c.swath_height;
1225	swath_width_ub_c = rq_dlg_param.rq_c.swath_width_ub;
1226	dpte_bytes_per_row_ub_c = rq_dlg_param.rq_c.dpte_bytes_per_row_ub;
1227	dpte_groups_per_row_ub_c = rq_dlg_param.rq_c.dpte_groups_per_row_ub;
1228
1229	meta_chunks_per_row_ub_l = rq_dlg_param.rq_l.meta_chunks_per_row_ub;
1230	vupdate_offset = e2e_pipe_param.pipe.dest.vupdate_offset;
1231	vupdate_width = e2e_pipe_param.pipe.dest.vupdate_width;
1232	vready_offset = e2e_pipe_param.pipe.dest.vready_offset;
1233
1234	dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal;
1235	dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal;
1236	pixel_rate_delay_subtotal = dppclk_delay_subtotal * pclk_freq_in_mhz / dppclk_freq_in_mhz
1237			+ dispclk_delay_subtotal * pclk_freq_in_mhz / dispclk_freq_in_mhz;
1238
1239	vstartup_start = e2e_pipe_param.pipe.dest.vstartup_start;
1240
1241	if (interlaced)
1242		vstartup_start = vstartup_start / 2;
1243
1244	if (vstartup_start >= min_vblank) {
1245		DTRACE(
1246				"WARNING_DLG: %s:  vblank_start=%d vblank_end=%d",
1247				__func__,
1248				vblank_start,
1249				vblank_end);
1250		DTRACE(
1251				"WARNING_DLG: %s:  vstartup_start=%d should be less than min_vblank=%d",
1252				__func__,
1253				vstartup_start,
1254				min_vblank);
1255		min_vblank = vstartup_start + 1;
1256		DTRACE(
1257				"WARNING_DLG: %s:  vstartup_start=%d should be less than min_vblank=%d",
1258				__func__,
1259				vstartup_start,
1260				min_vblank);
1261	}
1262
1263	dst_x_after_scaler = 0;
1264	dst_y_after_scaler = 0;
1265
1266	if (e2e_pipe_param.pipe.src.is_hsplit)
1267		dst_x_after_scaler = pixel_rate_delay_subtotal
1268				+ e2e_pipe_param.pipe.dest.recout_width;
1269	else
1270		dst_x_after_scaler = pixel_rate_delay_subtotal;
1271
1272	if (e2e_pipe_param.dout.output_format == dm_420)
1273		dst_y_after_scaler = 1;
1274	else
1275		dst_y_after_scaler = 0;
1276
1277	if (dst_x_after_scaler >= htotal) {
1278		dst_x_after_scaler = dst_x_after_scaler - htotal;
1279		dst_y_after_scaler = dst_y_after_scaler + 1;
1280	}
1281
1282	DTRACE("DLG: %s: htotal                                 = %d", __func__, htotal);
1283	DTRACE(
1284			"DLG: %s: pixel_rate_delay_subtotal              = %d",
1285			__func__,
1286			pixel_rate_delay_subtotal);
1287	DTRACE("DLG: %s: dst_x_after_scaler                     = %d", __func__, dst_x_after_scaler);
1288	DTRACE("DLG: %s: dst_y_after_scaler                     = %d", __func__, dst_y_after_scaler);
1289
1290	line_wait = mode_lib->soc.urgent_latency_us;
1291	if (cstate_en)
1292		line_wait = dml_max(mode_lib->soc.sr_enter_plus_exit_time_us, line_wait);
1293	if (pstate_en)
1294		line_wait = dml_max(
1295				mode_lib->soc.dram_clock_change_latency_us
1296						+ mode_lib->soc.urgent_latency_us,
1297				line_wait);
1298	line_wait = line_wait / line_time_in_us;
1299
1300	line_o = (double) dst_y_after_scaler + dst_x_after_scaler / (double) htotal;
1301	line_setup = (double) (vupdate_offset + vupdate_width + vready_offset) / (double) htotal;
1302	line_calc = t_calc_us / line_time_in_us;
1303
1304	DTRACE(
1305			"DLG: %s: soc.sr_enter_plus_exit_time_us     = %3.2f",
1306			__func__,
1307			(double) mode_lib->soc.sr_enter_plus_exit_time_us);
1308	DTRACE(
1309			"DLG: %s: soc.dram_clock_change_latency_us   = %3.2f",
1310			__func__,
1311			(double) mode_lib->soc.dram_clock_change_latency_us);
1312	DTRACE(
1313			"DLG: %s: soc.urgent_latency_us              = %3.2f",
1314			__func__,
1315			mode_lib->soc.urgent_latency_us);
1316
1317	DTRACE("DLG: %s: swath_height_l     = %d", __func__, swath_height_l);
1318	if (dual_plane)
1319		DTRACE("DLG: %s: swath_height_c     = %d", __func__, swath_height_c);
1320
1321	DTRACE(
1322			"DLG: %s: t_srx_delay_us     = %3.2f",
1323			__func__,
1324			(double) dlg_sys_param.t_srx_delay_us);
1325	DTRACE("DLG: %s: line_time_in_us    = %3.2f", __func__, (double) line_time_in_us);
1326	DTRACE("DLG: %s: vupdate_offset     = %d", __func__, vupdate_offset);
1327	DTRACE("DLG: %s: vupdate_width      = %d", __func__, vupdate_width);
1328	DTRACE("DLG: %s: vready_offset      = %d", __func__, vready_offset);
1329	DTRACE("DLG: %s: line_time_in_us    = %3.2f", __func__, line_time_in_us);
1330	DTRACE("DLG: %s: line_wait          = %3.2f", __func__, line_wait);
1331	DTRACE("DLG: %s: line_o             = %3.2f", __func__, line_o);
1332	DTRACE("DLG: %s: line_setup         = %3.2f", __func__, line_setup);
1333	DTRACE("DLG: %s: line_calc          = %3.2f", __func__, line_calc);
1334
1335	dst_y_prefetch = ((double) min_vblank - 1.0)
1336			- (line_setup + line_calc + line_wait + line_o);
1337	DTRACE("DLG: %s: dst_y_prefetch (before rnd) = %3.2f", __func__, dst_y_prefetch);
1338	ASSERT(dst_y_prefetch >= 2.0);
1339
1340	dst_y_prefetch = dml_floor(4.0 * (dst_y_prefetch + 0.125), 1) / 4;
1341	DTRACE("DLG: %s: dst_y_prefetch (after rnd) = %3.2f", __func__, dst_y_prefetch);
1342
1343	t_pre_us = dst_y_prefetch * line_time_in_us;
1344	vm_bytes = 0;
1345	meta_row_bytes = 0;
1346
1347	if (dcc_en && vm_en)
1348		vm_bytes = meta_pte_bytes_per_frame_ub_l;
1349	if (dcc_en)
1350		meta_row_bytes = meta_bytes_per_row_ub_l;
1351
1352	max_num_sw_l = 0;
1353	max_num_sw_c = 0;
1354	max_partial_sw_l = 0;
1355	max_partial_sw_c = 0;
1356
1357	max_vinit_l = interlaced ? dml_max(vinit_l, vinit_bot_l) : vinit_l;
1358	max_vinit_c = interlaced ? dml_max(vinit_c, vinit_bot_c) : vinit_c;
1359
1360	get_swath_need(mode_lib, &max_num_sw_l, &max_partial_sw_l, swath_height_l, max_vinit_l);
1361	if (dual_plane)
1362		get_swath_need(
1363				mode_lib,
1364				&max_num_sw_c,
1365				&max_partial_sw_c,
1366				swath_height_c,
1367				max_vinit_c);
1368
1369	lsw_l = max_num_sw_l * swath_height_l + max_partial_sw_l;
1370	lsw_c = max_num_sw_c * swath_height_c + max_partial_sw_c;
1371	sw_bytes_ub_l = lsw_l * swath_width_ub_l * bytes_per_element_l;
1372	sw_bytes_ub_c = lsw_c * swath_width_ub_c * bytes_per_element_c;
1373	sw_bytes = 0;
1374	dpte_row_bytes = 0;
1375
1376	if (vm_en) {
1377		if (dual_plane)
1378			dpte_row_bytes = dpte_bytes_per_row_ub_l + dpte_bytes_per_row_ub_c;
1379		else
1380			dpte_row_bytes = dpte_bytes_per_row_ub_l;
1381	} else {
1382		dpte_row_bytes = 0;
1383	}
1384
1385	if (dual_plane)
1386		sw_bytes = sw_bytes_ub_l + sw_bytes_ub_c;
1387	else
1388		sw_bytes = sw_bytes_ub_l;
1389
1390	DTRACE("DLG: %s: sw_bytes_ub_l           = %d", __func__, sw_bytes_ub_l);
1391	DTRACE("DLG: %s: sw_bytes_ub_c           = %d", __func__, sw_bytes_ub_c);
1392	DTRACE("DLG: %s: sw_bytes                = %d", __func__, sw_bytes);
1393	DTRACE("DLG: %s: vm_bytes                = %d", __func__, vm_bytes);
1394	DTRACE("DLG: %s: meta_row_bytes          = %d", __func__, meta_row_bytes);
1395	DTRACE("DLG: %s: dpte_row_bytes          = %d", __func__, dpte_row_bytes);
1396
1397	prefetch_bw = (vm_bytes + 2 * dpte_row_bytes + 2 * meta_row_bytes + sw_bytes) / t_pre_us;
1398	flip_bw = ((vm_bytes + dpte_row_bytes + meta_row_bytes) * dlg_sys_param.total_flip_bw)
1399			/ (double) dlg_sys_param.total_flip_bytes;
1400	t_vm_us = line_time_in_us / 4.0;
1401	if (vm_en && dcc_en) {
1402		t_vm_us = dml_max(
1403				dlg_sys_param.t_extra_us,
1404				dml_max((double) vm_bytes / prefetch_bw, t_vm_us));
1405
1406		if (iflip_en && !dual_plane) {
1407			t_vm_us = dml_max(mode_lib->soc.urgent_latency_us, t_vm_us);
1408			if (flip_bw > 0.)
1409				t_vm_us = dml_max(vm_bytes / flip_bw, t_vm_us);
1410		}
1411	}
1412
1413	t_r0_us = dml_max(dlg_sys_param.t_extra_us - t_vm_us, line_time_in_us - t_vm_us);
1414
1415	if (vm_en || dcc_en) {
1416		t_r0_us = dml_max(
1417				(double) (dpte_row_bytes + meta_row_bytes) / prefetch_bw,
1418				dlg_sys_param.t_extra_us);
1419		t_r0_us = dml_max((double) (line_time_in_us - t_vm_us), t_r0_us);
1420
1421		if (iflip_en && !dual_plane) {
1422			t_r0_us = dml_max(mode_lib->soc.urgent_latency_us * 2.0, t_r0_us);
1423			if (flip_bw > 0.)
1424				t_r0_us = dml_max(
1425						(dpte_row_bytes + meta_row_bytes) / flip_bw,
1426						t_r0_us);
1427		}
1428	}
1429
1430	disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; /* in terms of line */
1431	disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; /* in terms of refclk */
1432	ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int) dml_pow(2, 13));
1433	DTRACE(
1434			"DLG: %s: disp_dlg_regs->dst_y_after_scaler      = 0x%0x",
1435			__func__,
1436			disp_dlg_regs->dst_y_after_scaler);
1437	DTRACE(
1438			"DLG: %s: disp_dlg_regs->refcyc_x_after_scaler   = 0x%0x",
1439			__func__,
1440			disp_dlg_regs->refcyc_x_after_scaler);
1441
1442	disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2));
1443	DTRACE(
1444			"DLG: %s: disp_dlg_regs->dst_y_prefetch  = %d",
1445			__func__,
1446			disp_dlg_regs->dst_y_prefetch);
1447
1448	dst_y_per_vm_vblank = 0.0;
1449	dst_y_per_row_vblank = 0.0;
1450
1451	dst_y_per_vm_vblank = t_vm_us / line_time_in_us;
1452	dst_y_per_vm_vblank = dml_floor(4.0 * (dst_y_per_vm_vblank + 0.125), 1) / 4.0;
1453	disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2));
1454
1455	dst_y_per_row_vblank = t_r0_us / line_time_in_us;
1456	dst_y_per_row_vblank = dml_floor(4.0 * (dst_y_per_row_vblank + 0.125), 1) / 4.0;
1457	disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2));
1458
1459	DTRACE("DLG: %s: lsw_l                   = %d", __func__, lsw_l);
1460	DTRACE("DLG: %s: lsw_c                   = %d", __func__, lsw_c);
1461	DTRACE("DLG: %s: dpte_bytes_per_row_ub_l = %d", __func__, dpte_bytes_per_row_ub_l);
1462	DTRACE("DLG: %s: dpte_bytes_per_row_ub_c = %d", __func__, dpte_bytes_per_row_ub_c);
1463
1464	DTRACE("DLG: %s: prefetch_bw            = %3.2f", __func__, prefetch_bw);
1465	DTRACE("DLG: %s: flip_bw                = %3.2f", __func__, flip_bw);
1466	DTRACE("DLG: %s: t_pre_us               = %3.2f", __func__, t_pre_us);
1467	DTRACE("DLG: %s: t_vm_us                = %3.2f", __func__, t_vm_us);
1468	DTRACE("DLG: %s: t_r0_us                = %3.2f", __func__, t_r0_us);
1469	DTRACE("DLG: %s: dst_y_per_vm_vblank    = %3.2f", __func__, dst_y_per_vm_vblank);
1470	DTRACE("DLG: %s: dst_y_per_row_vblank   = %3.2f", __func__, dst_y_per_row_vblank);
1471	DTRACE("DLG: %s: dst_y_prefetch         = %3.2f", __func__, dst_y_prefetch);
1472
1473	min_dst_y_per_vm_vblank = 8.0;
1474	min_dst_y_per_row_vblank = 16.0;
1475	if (htotal <= 75) {
1476		min_vblank = 300;
1477		min_dst_y_per_vm_vblank = 100.0;
1478		min_dst_y_per_row_vblank = 100.0;
1479	}
1480
1481	ASSERT(dst_y_per_vm_vblank < min_dst_y_per_vm_vblank);
1482	ASSERT(dst_y_per_row_vblank < min_dst_y_per_row_vblank);
1483
1484	ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank));
1485	lsw = dst_y_prefetch - (dst_y_per_vm_vblank + dst_y_per_row_vblank);
1486
1487	DTRACE("DLG: %s: lsw = %3.2f", __func__, lsw);
1488
1489	vratio_pre_l = get_vratio_pre(
1490			mode_lib,
1491			max_num_sw_l,
1492			max_partial_sw_l,
1493			swath_height_l,
1494			max_vinit_l,
1495			lsw);
1496	vratio_pre_c = 1.0;
1497	if (dual_plane)
1498		vratio_pre_c = get_vratio_pre(
1499				mode_lib,
1500				max_num_sw_c,
1501				max_partial_sw_c,
1502				swath_height_c,
1503				max_vinit_c,
1504				lsw);
1505
1506	DTRACE("DLG: %s: vratio_pre_l=%3.2f", __func__, vratio_pre_l);
1507	DTRACE("DLG: %s: vratio_pre_c=%3.2f", __func__, vratio_pre_c);
1508
1509	ASSERT(vratio_pre_l <= 4.0);
1510	if (vratio_pre_l >= 4.0)
1511		disp_dlg_regs->vratio_prefetch = (unsigned int) dml_pow(2, 21) - 1;
1512	else
1513		disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19));
1514
1515	ASSERT(vratio_pre_c <= 4.0);
1516	if (vratio_pre_c >= 4.0)
1517		disp_dlg_regs->vratio_prefetch_c = (unsigned int) dml_pow(2, 21) - 1;
1518	else
1519		disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19));
1520
1521	disp_dlg_regs->refcyc_per_pte_group_vblank_l =
1522			(unsigned int) (dst_y_per_row_vblank * (double) htotal
1523					* ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l);
1524	ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int) dml_pow(2, 13));
1525
1526	disp_dlg_regs->refcyc_per_pte_group_vblank_c =
1527			(unsigned int) (dst_y_per_row_vblank * (double) htotal
1528					* ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_c);
1529	ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int) dml_pow(2, 13));
1530
1531	disp_dlg_regs->refcyc_per_meta_chunk_vblank_l =
1532			(unsigned int) (dst_y_per_row_vblank * (double) htotal
1533					* ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l);
1534	ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int) dml_pow(2, 13));
1535
1536	disp_dlg_regs->refcyc_per_meta_chunk_vblank_c =
1537			disp_dlg_regs->refcyc_per_meta_chunk_vblank_l;/* dcc for 4:2:0 is not supported in dcn1.0.  assigned to be the same as _l for now */
1538
1539	/* Active */
1540	req_per_swath_ub_l = rq_dlg_param.rq_l.req_per_swath_ub;
1541	req_per_swath_ub_c = rq_dlg_param.rq_c.req_per_swath_ub;
1542	meta_row_height_l = rq_dlg_param.rq_l.meta_row_height;
1543	swath_width_pixels_ub_l = 0;
1544	swath_width_pixels_ub_c = 0;
1545	scaler_rec_in_width_l = 0;
1546	scaler_rec_in_width_c = 0;
1547	dpte_row_height_l = rq_dlg_param.rq_l.dpte_row_height;
1548	dpte_row_height_c = rq_dlg_param.rq_c.dpte_row_height;
1549
1550	disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l
1551			/ (double) vratio_l * dml_pow(2, 2));
1552	ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int) dml_pow(2, 17));
1553
1554	disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c
1555			/ (double) vratio_c * dml_pow(2, 2));
1556	ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_c < (unsigned int) dml_pow(2, 17));
1557
1558	disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l
1559			/ (double) vratio_l * dml_pow(2, 2));
1560	ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int) dml_pow(2, 17));
1561
1562	disp_dlg_regs->dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_l; /* dcc for 4:2:0 is not supported in dcn1.0.  assigned to be the same as _l for now */
1563
1564	disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l
1565			/ (double) vratio_l * (double) htotal * ref_freq_to_pix_freq
1566			/ (double) dpte_groups_per_row_ub_l);
1567	if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23))
1568		disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1;
1569
1570	disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int) ((double) dpte_row_height_c
1571			/ (double) vratio_c * (double) htotal * ref_freq_to_pix_freq
1572			/ (double) dpte_groups_per_row_ub_c);
1573	if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23))
1574		disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1;
1575
1576	disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l
1577			/ (double) vratio_l * (double) htotal * ref_freq_to_pix_freq
1578			/ (double) meta_chunks_per_row_ub_l);
1579	if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23))
1580		disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1;
1581
1582	if (mode_422) {
1583		swath_width_pixels_ub_l = swath_width_ub_l * 2; /* *2 for 2 pixel per element */
1584		swath_width_pixels_ub_c = swath_width_ub_c * 2;
1585	} else {
1586		swath_width_pixels_ub_l = swath_width_ub_l * 1;
1587		swath_width_pixels_ub_c = swath_width_ub_c * 1;
1588	}
1589
1590	hscale_pixel_rate_l = 0.;
1591	hscale_pixel_rate_c = 0.;
1592	min_hratio_fact_l = 1.0;
1593	min_hratio_fact_c = 1.0;
1594
1595	if (htaps_l <= 1)
1596		min_hratio_fact_l = 2.0;
1597	else if (htaps_l <= 6) {
1598		if ((hratios_l * 2.0) > 4.0)
1599			min_hratio_fact_l = 4.0;
1600		else
1601			min_hratio_fact_l = hratios_l * 2.0;
1602	} else {
1603		if (hratios_l > 4.0)
1604			min_hratio_fact_l = 4.0;
1605		else
1606			min_hratio_fact_l = hratios_l;
1607	}
1608
1609	hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz;
1610
1611	if (htaps_c <= 1)
1612		min_hratio_fact_c = 2.0;
1613	else if (htaps_c <= 6) {
1614		if ((hratios_c * 2.0) > 4.0)
1615			min_hratio_fact_c = 4.0;
1616		else
1617			min_hratio_fact_c = hratios_c * 2.0;
1618	} else {
1619		if (hratios_c > 4.0)
1620			min_hratio_fact_c = 4.0;
1621		else
1622			min_hratio_fact_c = hratios_c;
1623	}
1624
1625	hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz;
1626
1627	refcyc_per_line_delivery_pre_l = 0.;
1628	refcyc_per_line_delivery_pre_c = 0.;
1629	refcyc_per_line_delivery_l = 0.;
1630	refcyc_per_line_delivery_c = 0.;
1631
1632	refcyc_per_req_delivery_pre_l = 0.;
1633	refcyc_per_req_delivery_pre_c = 0.;
1634	refcyc_per_req_delivery_l = 0.;
1635	refcyc_per_req_delivery_c = 0.;
1636	refcyc_per_req_delivery_pre_cur0 = 0.;
1637	refcyc_per_req_delivery_cur0 = 0.;
1638
1639	full_recout_width = 0;
1640	if (e2e_pipe_param.pipe.src.is_hsplit) {
1641		if (e2e_pipe_param.pipe.dest.full_recout_width == 0) {
1642			DTRACE("DLG: %s: Warningfull_recout_width not set in hsplit mode", __func__);
1643			full_recout_width = e2e_pipe_param.pipe.dest.recout_width * 2; /* assume half split for dcn1 */
1644		} else
1645			full_recout_width = e2e_pipe_param.pipe.dest.full_recout_width;
1646	} else
1647		full_recout_width = e2e_pipe_param.pipe.dest.recout_width;
1648
1649	refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery(
1650			mode_lib,
1651			refclk_freq_in_mhz,
1652			pclk_freq_in_mhz,
1653			full_recout_width,
1654			vratio_pre_l,
1655			hscale_pixel_rate_l,
1656			swath_width_pixels_ub_l,
1657			1); /* per line */
1658
1659	refcyc_per_line_delivery_l = get_refcyc_per_delivery(
1660			mode_lib,
1661			refclk_freq_in_mhz,
1662			pclk_freq_in_mhz,
1663			full_recout_width,
1664			vratio_l,
1665			hscale_pixel_rate_l,
1666			swath_width_pixels_ub_l,
1667			1); /* per line */
1668
1669	DTRACE("DLG: %s: full_recout_width              = %d", __func__, full_recout_width);
1670	DTRACE("DLG: %s: hscale_pixel_rate_l            = %3.2f", __func__, hscale_pixel_rate_l);
1671	DTRACE(
1672			"DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f",
1673			__func__,
1674			refcyc_per_line_delivery_pre_l);
1675	DTRACE(
1676			"DLG: %s: refcyc_per_line_delivery_l     = %3.2f",
1677			__func__,
1678			refcyc_per_line_delivery_l);
1679
1680	disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(
1681			refcyc_per_line_delivery_pre_l,
1682			1);
1683	disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(
1684			refcyc_per_line_delivery_l,
1685			1);
1686	ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int) dml_pow(2, 13));
1687	ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int) dml_pow(2, 13));
1688
1689	if (dual_plane) {
1690		refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery(
1691				mode_lib,
1692				refclk_freq_in_mhz,
1693				pclk_freq_in_mhz,
1694				full_recout_width,
1695				vratio_pre_c,
1696				hscale_pixel_rate_c,
1697				swath_width_pixels_ub_c,
1698				1); /* per line */
1699
1700		refcyc_per_line_delivery_c = get_refcyc_per_delivery(
1701				mode_lib,
1702				refclk_freq_in_mhz,
1703				pclk_freq_in_mhz,
1704				full_recout_width,
1705				vratio_c,
1706				hscale_pixel_rate_c,
1707				swath_width_pixels_ub_c,
1708				1); /* per line */
1709
1710		DTRACE(
1711				"DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f",
1712				__func__,
1713				refcyc_per_line_delivery_pre_c);
1714		DTRACE(
1715				"DLG: %s: refcyc_per_line_delivery_c     = %3.2f",
1716				__func__,
1717				refcyc_per_line_delivery_c);
1718
1719		disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(
1720				refcyc_per_line_delivery_pre_c,
1721				1);
1722		disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(
1723				refcyc_per_line_delivery_c,
1724				1);
1725		ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int) dml_pow(2, 13));
1726		ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int) dml_pow(2, 13));
1727	}
1728	disp_dlg_regs->chunk_hdl_adjust_cur0 = 3;
1729
1730	/* TTU - Luma / Chroma */
1731	if (access_dir) { /* vertical access */
1732		scaler_rec_in_width_l = vp_height_l;
1733		scaler_rec_in_width_c = vp_height_c;
1734	} else {
1735		scaler_rec_in_width_l = vp_width_l;
1736		scaler_rec_in_width_c = vp_width_c;
1737	}
1738
1739	refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery(
1740			mode_lib,
1741			refclk_freq_in_mhz,
1742			pclk_freq_in_mhz,
1743			full_recout_width,
1744			vratio_pre_l,
1745			hscale_pixel_rate_l,
1746			scaler_rec_in_width_l,
1747			req_per_swath_ub_l); /* per req */
1748	refcyc_per_req_delivery_l = get_refcyc_per_delivery(
1749			mode_lib,
1750			refclk_freq_in_mhz,
1751			pclk_freq_in_mhz,
1752			full_recout_width,
1753			vratio_l,
1754			hscale_pixel_rate_l,
1755			scaler_rec_in_width_l,
1756			req_per_swath_ub_l); /* per req */
1757
1758	DTRACE(
1759			"DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f",
1760			__func__,
1761			refcyc_per_req_delivery_pre_l);
1762	DTRACE(
1763			"DLG: %s: refcyc_per_req_delivery_l     = %3.2f",
1764			__func__,
1765			refcyc_per_req_delivery_l);
1766
1767	disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l
1768			* dml_pow(2, 10));
1769	disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l
1770			* dml_pow(2, 10));
1771
1772	ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13));
1773	ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13));
1774
1775	if (dual_plane) {
1776		refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery(
1777				mode_lib,
1778				refclk_freq_in_mhz,
1779				pclk_freq_in_mhz,
1780				full_recout_width,
1781				vratio_pre_c,
1782				hscale_pixel_rate_c,
1783				scaler_rec_in_width_c,
1784				req_per_swath_ub_c); /* per req  */
1785		refcyc_per_req_delivery_c = get_refcyc_per_delivery(
1786				mode_lib,
1787				refclk_freq_in_mhz,
1788				pclk_freq_in_mhz,
1789				full_recout_width,
1790				vratio_c,
1791				hscale_pixel_rate_c,
1792				scaler_rec_in_width_c,
1793				req_per_swath_ub_c); /* per req */
1794
1795		DTRACE(
1796				"DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f",
1797				__func__,
1798				refcyc_per_req_delivery_pre_c);
1799		DTRACE(
1800				"DLG: %s: refcyc_per_req_delivery_c     = %3.2f",
1801				__func__,
1802				refcyc_per_req_delivery_c);
1803
1804		disp_ttu_regs->refcyc_per_req_delivery_pre_c =
1805				(unsigned int) (refcyc_per_req_delivery_pre_c * dml_pow(2, 10));
1806		disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c
1807				* dml_pow(2, 10));
1808
1809		ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13));
1810		ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13));
1811	}
1812
1813	/* TTU - Cursor */
1814	hratios_cur0 = e2e_pipe_param.pipe.scale_ratio_depth.hscl_ratio;
1815	cur0_src_width = e2e_pipe_param.pipe.src.cur0_src_width; /* cursor source width */
1816	cur0_bpp = (enum cursor_bpp) e2e_pipe_param.pipe.src.cur0_bpp;
1817	cur0_req_size = 0;
1818	cur0_req_width = 0;
1819	cur0_width_ub = 0.0;
1820	cur0_req_per_width = 0.0;
1821	hactive_cur0 = 0.0;
1822
1823	ASSERT(cur0_src_width <= 256);
1824
1825	if (cur0_src_width > 0) {
1826		unsigned int cur0_bit_per_pixel = 0;
1827
1828		if (cur0_bpp == dm_cur_2bit) {
1829			cur0_req_size = 64; /* byte */
1830			cur0_bit_per_pixel = 2;
1831		} else { /* 32bit */
1832			cur0_bit_per_pixel = 32;
1833			if (cur0_src_width >= 1 && cur0_src_width <= 16)
1834				cur0_req_size = 64;
1835			else if (cur0_src_width >= 17 && cur0_src_width <= 31)
1836				cur0_req_size = 128;
1837			else
1838				cur0_req_size = 256;
1839		}
1840
1841		cur0_req_width = (double) cur0_req_size / ((double) cur0_bit_per_pixel / 8.0);
1842		cur0_width_ub = dml_ceil((double) cur0_src_width / (double) cur0_req_width, 1)
1843				* (double) cur0_req_width;
1844		cur0_req_per_width = cur0_width_ub / (double) cur0_req_width;
1845		hactive_cur0 = (double) cur0_src_width / hratios_cur0; /* TODO: oswin to think about what to do for cursor */
1846
1847		if (vratio_pre_l <= 1.0) {
1848			refcyc_per_req_delivery_pre_cur0 = hactive_cur0 * ref_freq_to_pix_freq
1849					/ (double) cur0_req_per_width;
1850		} else {
1851			refcyc_per_req_delivery_pre_cur0 = (double) refclk_freq_in_mhz
1852					* (double) cur0_src_width / hscale_pixel_rate_l
1853					/ (double) cur0_req_per_width;
1854		}
1855
1856		disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 =
1857				(unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10));
1858		ASSERT(refcyc_per_req_delivery_pre_cur0 < dml_pow(2, 13));
1859
1860		if (vratio_l <= 1.0) {
1861			refcyc_per_req_delivery_cur0 = hactive_cur0 * ref_freq_to_pix_freq
1862					/ (double) cur0_req_per_width;
1863		} else {
1864			refcyc_per_req_delivery_cur0 = (double) refclk_freq_in_mhz
1865					* (double) cur0_src_width / hscale_pixel_rate_l
1866					/ (double) cur0_req_per_width;
1867		}
1868
1869		DTRACE("DLG: %s: cur0_req_width                     = %d", __func__, cur0_req_width);
1870		DTRACE(
1871				"DLG: %s: cur0_width_ub                      = %3.2f",
1872				__func__,
1873				cur0_width_ub);
1874		DTRACE(
1875				"DLG: %s: cur0_req_per_width                 = %3.2f",
1876				__func__,
1877				cur0_req_per_width);
1878		DTRACE(
1879				"DLG: %s: hactive_cur0                       = %3.2f",
1880				__func__,
1881				hactive_cur0);
1882		DTRACE(
1883				"DLG: %s: refcyc_per_req_delivery_pre_cur0   = %3.2f",
1884				__func__,
1885				refcyc_per_req_delivery_pre_cur0);
1886		DTRACE(
1887				"DLG: %s: refcyc_per_req_delivery_cur0       = %3.2f",
1888				__func__,
1889				refcyc_per_req_delivery_cur0);
1890
1891		disp_ttu_regs->refcyc_per_req_delivery_cur0 =
1892				(unsigned int) (refcyc_per_req_delivery_cur0 * dml_pow(2, 10));
1893		ASSERT(refcyc_per_req_delivery_cur0 < dml_pow(2, 13));
1894	} else {
1895		disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = 0;
1896		disp_ttu_regs->refcyc_per_req_delivery_cur0 = 0;
1897	}
1898
1899	/* TTU - Misc */
1900	disp_ttu_regs->qos_level_low_wm = 0;
1901	ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14));
1902	disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal
1903			* ref_freq_to_pix_freq);
1904	ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14));
1905
1906	disp_ttu_regs->qos_level_flip = 14;
1907	disp_ttu_regs->qos_level_fixed_l = 8;
1908	disp_ttu_regs->qos_level_fixed_c = 8;
1909	disp_ttu_regs->qos_level_fixed_cur0 = 8;
1910	disp_ttu_regs->qos_ramp_disable_l = 0;
1911	disp_ttu_regs->qos_ramp_disable_c = 0;
1912	disp_ttu_regs->qos_ramp_disable_cur0 = 0;
1913
1914	disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz;
1915	ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24));
1916
1917	print__ttu_regs_st(mode_lib, *disp_ttu_regs);
1918	print__dlg_regs_st(mode_lib, *disp_dlg_regs);
1919}
1920