1/*
2 * Copyright 2019-2021 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26#include "resource.h"
27#include "clk_mgr.h"
28#include "dcn31/dcn31_resource.h"
29#include "dcn315/dcn315_resource.h"
30#include "dcn316/dcn316_resource.h"
31
32#include "dml/dcn20/dcn20_fpu.h"
33#include "dcn31_fpu.h"
34
35/**
36 * DOC: DCN31x FPU manipulation Overview
37 *
38 * The DCN architecture relies on FPU operations, which require special
39 * compilation flags and the use of kernel_fpu_begin/end functions; ideally, we
40 * want to avoid spreading FPU access across multiple files. With this idea in
41 * mind, this file aims to centralize all DCN3.1.x functions that require FPU
42 * access in a single place. Code in this file follows the following code
43 * pattern:
44 *
45 * 1. Functions that use FPU operations should be isolated in static functions.
46 * 2. The FPU functions should have the noinline attribute to ensure anything
47 *    that deals with FP register is contained within this call.
48 * 3. All function that needs to be accessed outside this file requires a
49 *    public interface that not uses any FPU reference.
50 * 4. Developers **must not** use DC_FP_START/END in this file, but they need
51 *    to ensure that the caller invokes it before access any function available
52 *    in this file. For this reason, public functions in this file must invoke
53 *    dc_assert_fp_enabled();
54 */
55
56struct _vcs_dpi_ip_params_st dcn3_1_ip = {
57	.gpuvm_enable = 1,
58	.gpuvm_max_page_table_levels = 1,
59	.hostvm_enable = 1,
60	.hostvm_max_page_table_levels = 2,
61	.rob_buffer_size_kbytes = 64,
62	.det_buffer_size_kbytes = DCN3_1_DEFAULT_DET_SIZE,
63	.config_return_buffer_size_in_kbytes = 1792,
64	.compressed_buffer_segment_size_in_kbytes = 64,
65	.meta_fifo_size_in_kentries = 32,
66	.zero_size_buffer_entries = 512,
67	.compbuf_reserved_space_64b = 256,
68	.compbuf_reserved_space_zs = 64,
69	.dpp_output_buffer_pixels = 2560,
70	.opp_output_buffer_lines = 1,
71	.pixel_chunk_size_kbytes = 8,
72	.meta_chunk_size_kbytes = 2,
73	.min_meta_chunk_size_bytes = 256,
74	.writeback_chunk_size_kbytes = 8,
75	.ptoi_supported = false,
76	.num_dsc = 3,
77	.maximum_dsc_bits_per_component = 10,
78	.dsc422_native_support = false,
79	.is_line_buffer_bpp_fixed = true,
80	.line_buffer_fixed_bpp = 48,
81	.line_buffer_size_bits = 789504,
82	.max_line_buffer_lines = 12,
83	.writeback_interface_buffer_size_kbytes = 90,
84	.max_num_dpp = 4,
85	.max_num_otg = 4,
86	.max_num_hdmi_frl_outputs = 1,
87	.max_num_wb = 1,
88	.max_dchub_pscl_bw_pix_per_clk = 4,
89	.max_pscl_lb_bw_pix_per_clk = 2,
90	.max_lb_vscl_bw_pix_per_clk = 4,
91	.max_vscl_hscl_bw_pix_per_clk = 4,
92	.max_hscl_ratio = 6,
93	.max_vscl_ratio = 6,
94	.max_hscl_taps = 8,
95	.max_vscl_taps = 8,
96	.dpte_buffer_size_in_pte_reqs_luma = 64,
97	.dpte_buffer_size_in_pte_reqs_chroma = 34,
98	.dispclk_ramp_margin_percent = 1,
99	.max_inter_dcn_tile_repeaters = 8,
100	.cursor_buffer_size = 16,
101	.cursor_chunk_size = 2,
102	.writeback_line_buffer_buffer_size = 0,
103	.writeback_min_hscl_ratio = 1,
104	.writeback_min_vscl_ratio = 1,
105	.writeback_max_hscl_ratio = 1,
106	.writeback_max_vscl_ratio = 1,
107	.writeback_max_hscl_taps = 1,
108	.writeback_max_vscl_taps = 1,
109	.dppclk_delay_subtotal = 46,
110	.dppclk_delay_scl = 50,
111	.dppclk_delay_scl_lb_only = 16,
112	.dppclk_delay_cnvc_formatter = 27,
113	.dppclk_delay_cnvc_cursor = 6,
114	.dispclk_delay_subtotal = 119,
115	.dynamic_metadata_vm_enabled = false,
116	.odm_combine_4to1_supported = false,
117	.dcc_supported = true,
118};
119
120static struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = {
121		/*TODO: correct dispclk/dppclk voltage level determination*/
122	.clock_limits = {
123		{
124			.state = 0,
125			.dispclk_mhz = 1200.0,
126			.dppclk_mhz = 1200.0,
127			.phyclk_mhz = 600.0,
128			.phyclk_d18_mhz = 667.0,
129			.dscclk_mhz = 186.0,
130			.dtbclk_mhz = 625.0,
131		},
132		{
133			.state = 1,
134			.dispclk_mhz = 1200.0,
135			.dppclk_mhz = 1200.0,
136			.phyclk_mhz = 810.0,
137			.phyclk_d18_mhz = 667.0,
138			.dscclk_mhz = 209.0,
139			.dtbclk_mhz = 625.0,
140		},
141		{
142			.state = 2,
143			.dispclk_mhz = 1200.0,
144			.dppclk_mhz = 1200.0,
145			.phyclk_mhz = 810.0,
146			.phyclk_d18_mhz = 667.0,
147			.dscclk_mhz = 209.0,
148			.dtbclk_mhz = 625.0,
149		},
150		{
151			.state = 3,
152			.dispclk_mhz = 1200.0,
153			.dppclk_mhz = 1200.0,
154			.phyclk_mhz = 810.0,
155			.phyclk_d18_mhz = 667.0,
156			.dscclk_mhz = 371.0,
157			.dtbclk_mhz = 625.0,
158		},
159		{
160			.state = 4,
161			.dispclk_mhz = 1200.0,
162			.dppclk_mhz = 1200.0,
163			.phyclk_mhz = 810.0,
164			.phyclk_d18_mhz = 667.0,
165			.dscclk_mhz = 417.0,
166			.dtbclk_mhz = 625.0,
167		},
168	},
169	.num_states = 5,
170	.sr_exit_time_us = 9.0,
171	.sr_enter_plus_exit_time_us = 11.0,
172	.sr_exit_z8_time_us = 442.0,
173	.sr_enter_plus_exit_z8_time_us = 560.0,
174	.writeback_latency_us = 12.0,
175	.dram_channel_width_bytes = 4,
176	.round_trip_ping_latency_dcfclk_cycles = 106,
177	.urgent_latency_pixel_data_only_us = 4.0,
178	.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
179	.urgent_latency_vm_data_only_us = 4.0,
180	.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
181	.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
182	.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
183	.pct_ideal_sdp_bw_after_urgent = 80.0,
184	.pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
185	.pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
186	.pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
187	.max_avg_sdp_bw_use_normal_percent = 60.0,
188	.max_avg_dram_bw_use_normal_percent = 60.0,
189	.fabric_datapath_to_dcn_data_return_bytes = 32,
190	.return_bus_width_bytes = 64,
191	.downspread_percent = 0.38,
192	.dcn_downspread_percent = 0.5,
193	.gpuvm_min_page_size_bytes = 4096,
194	.hostvm_min_page_size_bytes = 4096,
195	.do_urgent_latency_adjustment = false,
196	.urgent_latency_adjustment_fabric_clock_component_us = 0,
197	.urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
198};
199
200struct _vcs_dpi_ip_params_st dcn3_15_ip = {
201	.gpuvm_enable = 1,
202	.gpuvm_max_page_table_levels = 1,
203	.hostvm_enable = 1,
204	.hostvm_max_page_table_levels = 2,
205	.rob_buffer_size_kbytes = 64,
206	.det_buffer_size_kbytes = DCN3_15_DEFAULT_DET_SIZE,
207	.min_comp_buffer_size_kbytes = 64,
208	.config_return_buffer_size_in_kbytes = 1024,
209	.compressed_buffer_segment_size_in_kbytes = 64,
210	.meta_fifo_size_in_kentries = 32,
211	.zero_size_buffer_entries = 512,
212	.compbuf_reserved_space_64b = 256,
213	.compbuf_reserved_space_zs = 64,
214	.dpp_output_buffer_pixels = 2560,
215	.opp_output_buffer_lines = 1,
216	.pixel_chunk_size_kbytes = 8,
217	.meta_chunk_size_kbytes = 2,
218	.min_meta_chunk_size_bytes = 256,
219	.writeback_chunk_size_kbytes = 8,
220	.ptoi_supported = false,
221	.num_dsc = 3,
222	.maximum_dsc_bits_per_component = 10,
223	.dsc422_native_support = false,
224	.is_line_buffer_bpp_fixed = true,
225	.line_buffer_fixed_bpp = 48,
226	.line_buffer_size_bits = 789504,
227	.max_line_buffer_lines = 12,
228	.writeback_interface_buffer_size_kbytes = 90,
229	.max_num_dpp = 4,
230	.max_num_otg = 4,
231	.max_num_hdmi_frl_outputs = 1,
232	.max_num_wb = 1,
233	.max_dchub_pscl_bw_pix_per_clk = 4,
234	.max_pscl_lb_bw_pix_per_clk = 2,
235	.max_lb_vscl_bw_pix_per_clk = 4,
236	.max_vscl_hscl_bw_pix_per_clk = 4,
237	.max_hscl_ratio = 6,
238	.max_vscl_ratio = 6,
239	.max_hscl_taps = 8,
240	.max_vscl_taps = 8,
241	.dpte_buffer_size_in_pte_reqs_luma = 64,
242	.dpte_buffer_size_in_pte_reqs_chroma = 34,
243	.dispclk_ramp_margin_percent = 1,
244	.max_inter_dcn_tile_repeaters = 9,
245	.cursor_buffer_size = 16,
246	.cursor_chunk_size = 2,
247	.writeback_line_buffer_buffer_size = 0,
248	.writeback_min_hscl_ratio = 1,
249	.writeback_min_vscl_ratio = 1,
250	.writeback_max_hscl_ratio = 1,
251	.writeback_max_vscl_ratio = 1,
252	.writeback_max_hscl_taps = 1,
253	.writeback_max_vscl_taps = 1,
254	.dppclk_delay_subtotal = 46,
255	.dppclk_delay_scl = 50,
256	.dppclk_delay_scl_lb_only = 16,
257	.dppclk_delay_cnvc_formatter = 27,
258	.dppclk_delay_cnvc_cursor = 6,
259	.dispclk_delay_subtotal = 119,
260	.dynamic_metadata_vm_enabled = false,
261	.odm_combine_4to1_supported = false,
262	.dcc_supported = true,
263};
264
265static struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = {
266	.sr_exit_time_us = 9.0,
267	.sr_enter_plus_exit_time_us = 11.0,
268	.sr_exit_z8_time_us = 50.0,
269	.sr_enter_plus_exit_z8_time_us = 50.0,
270	.writeback_latency_us = 12.0,
271	.dram_channel_width_bytes = 4,
272	.round_trip_ping_latency_dcfclk_cycles = 106,
273	.urgent_latency_pixel_data_only_us = 4.0,
274	.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
275	.urgent_latency_vm_data_only_us = 4.0,
276	.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
277	.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
278	.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
279	.pct_ideal_sdp_bw_after_urgent = 80.0,
280	.pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
281	.pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
282	.pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
283	.max_avg_sdp_bw_use_normal_percent = 60.0,
284	.max_avg_dram_bw_use_normal_percent = 60.0,
285	.fabric_datapath_to_dcn_data_return_bytes = 32,
286	.return_bus_width_bytes = 64,
287	.downspread_percent = 0.38,
288	.dcn_downspread_percent = 0.38,
289	.gpuvm_min_page_size_bytes = 4096,
290	.hostvm_min_page_size_bytes = 4096,
291	.do_urgent_latency_adjustment = false,
292	.urgent_latency_adjustment_fabric_clock_component_us = 0,
293	.urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
294	.dispclk_dppclk_vco_speed_mhz = 2400.0,
295	.num_chans = 4,
296	.dummy_pstate_latency_us = 10.0
297};
298
299struct _vcs_dpi_ip_params_st dcn3_16_ip = {
300	.gpuvm_enable = 1,
301	.gpuvm_max_page_table_levels = 1,
302	.hostvm_enable = 1,
303	.hostvm_max_page_table_levels = 2,
304	.rob_buffer_size_kbytes = 64,
305	.det_buffer_size_kbytes = DCN3_16_DEFAULT_DET_SIZE,
306	.min_comp_buffer_size_kbytes = 64,
307	.config_return_buffer_size_in_kbytes = 1024,
308	.compressed_buffer_segment_size_in_kbytes = 64,
309	.meta_fifo_size_in_kentries = 32,
310	.zero_size_buffer_entries = 512,
311	.compbuf_reserved_space_64b = 256,
312	.compbuf_reserved_space_zs = 64,
313	.dpp_output_buffer_pixels = 2560,
314	.opp_output_buffer_lines = 1,
315	.pixel_chunk_size_kbytes = 8,
316	.meta_chunk_size_kbytes = 2,
317	.min_meta_chunk_size_bytes = 256,
318	.writeback_chunk_size_kbytes = 8,
319	.ptoi_supported = false,
320	.num_dsc = 3,
321	.maximum_dsc_bits_per_component = 10,
322	.dsc422_native_support = false,
323	.is_line_buffer_bpp_fixed = true,
324	.line_buffer_fixed_bpp = 48,
325	.line_buffer_size_bits = 789504,
326	.max_line_buffer_lines = 12,
327	.writeback_interface_buffer_size_kbytes = 90,
328	.max_num_dpp = 4,
329	.max_num_otg = 4,
330	.max_num_hdmi_frl_outputs = 1,
331	.max_num_wb = 1,
332	.max_dchub_pscl_bw_pix_per_clk = 4,
333	.max_pscl_lb_bw_pix_per_clk = 2,
334	.max_lb_vscl_bw_pix_per_clk = 4,
335	.max_vscl_hscl_bw_pix_per_clk = 4,
336	.max_hscl_ratio = 6,
337	.max_vscl_ratio = 6,
338	.max_hscl_taps = 8,
339	.max_vscl_taps = 8,
340	.dpte_buffer_size_in_pte_reqs_luma = 64,
341	.dpte_buffer_size_in_pte_reqs_chroma = 34,
342	.dispclk_ramp_margin_percent = 1,
343	.max_inter_dcn_tile_repeaters = 8,
344	.cursor_buffer_size = 16,
345	.cursor_chunk_size = 2,
346	.writeback_line_buffer_buffer_size = 0,
347	.writeback_min_hscl_ratio = 1,
348	.writeback_min_vscl_ratio = 1,
349	.writeback_max_hscl_ratio = 1,
350	.writeback_max_vscl_ratio = 1,
351	.writeback_max_hscl_taps = 1,
352	.writeback_max_vscl_taps = 1,
353	.dppclk_delay_subtotal = 46,
354	.dppclk_delay_scl = 50,
355	.dppclk_delay_scl_lb_only = 16,
356	.dppclk_delay_cnvc_formatter = 27,
357	.dppclk_delay_cnvc_cursor = 6,
358	.dispclk_delay_subtotal = 119,
359	.dynamic_metadata_vm_enabled = false,
360	.odm_combine_4to1_supported = false,
361	.dcc_supported = true,
362};
363
364static struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = {
365		/*TODO: correct dispclk/dppclk voltage level determination*/
366	.clock_limits = {
367		{
368			.state = 0,
369			.dispclk_mhz = 556.0,
370			.dppclk_mhz = 556.0,
371			.phyclk_mhz = 600.0,
372			.phyclk_d18_mhz = 445.0,
373			.dscclk_mhz = 186.0,
374			.dtbclk_mhz = 625.0,
375		},
376		{
377			.state = 1,
378			.dispclk_mhz = 625.0,
379			.dppclk_mhz = 625.0,
380			.phyclk_mhz = 810.0,
381			.phyclk_d18_mhz = 667.0,
382			.dscclk_mhz = 209.0,
383			.dtbclk_mhz = 625.0,
384		},
385		{
386			.state = 2,
387			.dispclk_mhz = 625.0,
388			.dppclk_mhz = 625.0,
389			.phyclk_mhz = 810.0,
390			.phyclk_d18_mhz = 667.0,
391			.dscclk_mhz = 209.0,
392			.dtbclk_mhz = 625.0,
393		},
394		{
395			.state = 3,
396			.dispclk_mhz = 1112.0,
397			.dppclk_mhz = 1112.0,
398			.phyclk_mhz = 810.0,
399			.phyclk_d18_mhz = 667.0,
400			.dscclk_mhz = 371.0,
401			.dtbclk_mhz = 625.0,
402		},
403		{
404			.state = 4,
405			.dispclk_mhz = 1250.0,
406			.dppclk_mhz = 1250.0,
407			.phyclk_mhz = 810.0,
408			.phyclk_d18_mhz = 667.0,
409			.dscclk_mhz = 417.0,
410			.dtbclk_mhz = 625.0,
411		},
412	},
413	.num_states = 5,
414	.sr_exit_time_us = 9.0,
415	.sr_enter_plus_exit_time_us = 11.0,
416	.sr_exit_z8_time_us = 442.0,
417	.sr_enter_plus_exit_z8_time_us = 560.0,
418	.writeback_latency_us = 12.0,
419	.dram_channel_width_bytes = 4,
420	.round_trip_ping_latency_dcfclk_cycles = 106,
421	.urgent_latency_pixel_data_only_us = 4.0,
422	.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
423	.urgent_latency_vm_data_only_us = 4.0,
424	.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
425	.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
426	.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
427	.pct_ideal_sdp_bw_after_urgent = 80.0,
428	.pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
429	.pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
430	.pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
431	.max_avg_sdp_bw_use_normal_percent = 60.0,
432	.max_avg_dram_bw_use_normal_percent = 60.0,
433	.fabric_datapath_to_dcn_data_return_bytes = 32,
434	.return_bus_width_bytes = 64,
435	.downspread_percent = 0.38,
436	.dcn_downspread_percent = 0.5,
437	.gpuvm_min_page_size_bytes = 4096,
438	.hostvm_min_page_size_bytes = 4096,
439	.do_urgent_latency_adjustment = false,
440	.urgent_latency_adjustment_fabric_clock_component_us = 0,
441	.urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
442	.dispclk_dppclk_vco_speed_mhz = 2500.0,
443};
444
445void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
446				  int pipe_cnt)
447{
448	dc_assert_fp_enabled();
449
450	pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
451	pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
452}
453
454void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
455{
456	dc_assert_fp_enabled();
457
458	if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) {
459		context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us;
460		context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us;
461		context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us;
462	}
463}
464
465void dcn315_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
466{
467	dc_assert_fp_enabled();
468
469	if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) {
470		/* For 315 pstate change is only supported if possible in vactive */
471		if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[context->bw_ctx.dml.vba.VoltageLevel][context->bw_ctx.dml.vba.maxMpcComb] != dm_dram_clock_change_vactive)
472			context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us;
473		else
474			context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us;
475		context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us =
476				dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us;
477		context->bw_ctx.dml.soc.sr_exit_time_us =
478				dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us;
479	}
480}
481
482void dcn31_calculate_wm_and_dlg_fp(
483		struct dc *dc, struct dc_state *context,
484		display_e2e_pipe_params_st *pipes,
485		int pipe_cnt,
486		int vlevel)
487{
488	int i, pipe_idx, total_det = 0, active_hubp_count = 0;
489	double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
490
491	dc_assert_fp_enabled();
492
493	if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)
494		dcfclk = context->bw_ctx.dml.soc.min_dcfclk;
495
496	/* We don't recalculate clocks for 0 pipe configs, which can block
497	 * S0i3 as high clocks will block low power states
498	 * Override any clocks that can block S0i3 to min here
499	 */
500	if (pipe_cnt == 0) {
501		context->bw_ctx.bw.dcn.clk.dcfclk_khz = dcfclk; // always should be vlevel 0
502		return;
503	}
504
505	pipes[0].clks_cfg.voltage = vlevel;
506	pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
507	pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
508
509	/* Set A:
510	 * All clocks min required
511	 *
512	 * Set A calculated last so that following calculations are based on Set A
513	 */
514	dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
515	context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
516	context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
517	context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
518	context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
519	context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
520	context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
521	context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
522	context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
523	context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
524	context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
525	context->bw_ctx.bw.dcn.watermarks.b = context->bw_ctx.bw.dcn.watermarks.a;
526	context->bw_ctx.bw.dcn.watermarks.c = context->bw_ctx.bw.dcn.watermarks.a;
527	context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a;
528
529	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
530		if (!context->res_ctx.pipe_ctx[i].stream)
531			continue;
532
533		if (context->res_ctx.pipe_ctx[i].plane_state)
534			active_hubp_count++;
535
536		pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
537		pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
538
539		if (dc->config.forced_clocks || dc->debug.max_disp_clk) {
540			pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
541			pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
542		}
543		if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
544			pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
545		if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
546			pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
547
548		pipe_idx++;
549	}
550
551	dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
552	/* For 31x apu pstate change is only supported if possible in vactive*/
553	context->bw_ctx.bw.dcn.clk.p_state_change_support =
554			context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_vactive;
555	/* If DCN isn't making memory requests we can allow pstate change and lower clocks */
556	if (!active_hubp_count) {
557		context->bw_ctx.bw.dcn.clk.socclk_khz = 0;
558		context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
559		context->bw_ctx.bw.dcn.clk.dcfclk_khz = 0;
560		context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = 0;
561		context->bw_ctx.bw.dcn.clk.dramclk_khz = 0;
562		context->bw_ctx.bw.dcn.clk.fclk_khz = 0;
563		context->bw_ctx.bw.dcn.clk.p_state_change_support = true;
564		for (i = 0; i < dc->res_pool->pipe_count; i++)
565			if (context->res_ctx.pipe_ctx[i].stream)
566				context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0;
567	}
568	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
569		if (!context->res_ctx.pipe_ctx[i].stream)
570			continue;
571
572		context->res_ctx.pipe_ctx[i].det_buffer_size_kb =
573				get_det_buffer_size_kbytes(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
574		if (context->res_ctx.pipe_ctx[i].det_buffer_size_kb > 384)
575			context->res_ctx.pipe_ctx[i].det_buffer_size_kb /= 2;
576		total_det += context->res_ctx.pipe_ctx[i].det_buffer_size_kb;
577		pipe_idx++;
578	}
579	context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - total_det;
580}
581
582void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
583{
584	struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits;
585	struct clk_limit_table *clk_table = &bw_params->clk_table;
586	unsigned int i, closest_clk_lvl;
587	int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
588	int j;
589
590	dc_assert_fp_enabled();
591
592	memcpy(s, dcn3_1_soc.clock_limits, sizeof(dcn3_1_soc.clock_limits));
593
594	// Default clock levels are used for diags, which may lead to overclocking.
595	dcn3_1_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
596	dcn3_1_ip.max_num_dpp = dc->res_pool->pipe_count;
597	dcn3_1_soc.num_chans = bw_params->num_channels;
598
599	ASSERT(clk_table->num_entries);
600
601	/* Prepass to find max clocks independent of voltage level. */
602	for (i = 0; i < clk_table->num_entries; ++i) {
603		if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
604			max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
605		if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
606			max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
607	}
608
609	for (i = 0; i < clk_table->num_entries; i++) {
610		/* loop backwards*/
611		for (closest_clk_lvl = 0, j = dcn3_1_soc.num_states - 1; j >= 0; j--) {
612			if ((unsigned int) dcn3_1_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
613				closest_clk_lvl = j;
614				break;
615			}
616		}
617
618		s[i].state = i;
619
620		/* Clocks dependent on voltage level. */
621		s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
622		s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
623		s[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
624		s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz *
625			2 * clk_table->entries[i].wck_ratio;
626
627		/* Clocks independent of voltage level. */
628		s[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
629			dcn3_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
630
631		s[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
632			dcn3_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
633
634		s[i].dram_bw_per_chan_gbps =
635			dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
636		s[i].dscclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
637		s[i].dtbclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
638		s[i].phyclk_d18_mhz =
639			dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
640		s[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
641	}
642	if (clk_table->num_entries) {
643		dcn3_1_soc.num_states = clk_table->num_entries;
644	}
645
646	memcpy(dcn3_1_soc.clock_limits, s, sizeof(dcn3_1_soc.clock_limits));
647
648	dcn3_1_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
649	dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
650
651	if ((int)(dcn3_1_soc.dram_clock_change_latency_us * 1000)
652				!= dc->debug.dram_clock_change_latency_ns
653			&& dc->debug.dram_clock_change_latency_ns) {
654		dcn3_1_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000;
655	}
656
657	dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31);
658}
659
660void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
661{
662	struct clk_limit_table *clk_table = &bw_params->clk_table;
663	int i, max_dispclk_mhz = 0, max_dppclk_mhz = 0;
664
665	dc_assert_fp_enabled();
666
667	dcn3_15_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
668	dcn3_15_ip.max_num_dpp = dc->res_pool->pipe_count;
669
670	if (bw_params->num_channels > 0)
671		dcn3_15_soc.num_chans = bw_params->num_channels;
672	if (bw_params->dram_channel_width_bytes > 0)
673		dcn3_15_soc.dram_channel_width_bytes = bw_params->dram_channel_width_bytes;
674
675	ASSERT(clk_table->num_entries);
676
677	/* Setup soc to always use max dispclk/dppclk to avoid odm-to-lower-voltage */
678	for (i = 0; i < clk_table->num_entries; ++i) {
679		if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
680			max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
681		if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
682			max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
683	}
684
685	for (i = 0; i < clk_table->num_entries; i++) {
686		dcn3_15_soc.clock_limits[i].state = i;
687
688		/* Clocks dependent on voltage level. */
689		dcn3_15_soc.clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
690		dcn3_15_soc.clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
691		dcn3_15_soc.clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
692		dcn3_15_soc.clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
693
694		/* These aren't actually read from smu, but rather set in clk_mgr defaults */
695		dcn3_15_soc.clock_limits[i].dtbclk_mhz = clk_table->entries[i].dtbclk_mhz;
696		dcn3_15_soc.clock_limits[i].phyclk_d18_mhz = clk_table->entries[i].phyclk_d18_mhz;
697		dcn3_15_soc.clock_limits[i].phyclk_mhz = clk_table->entries[i].phyclk_mhz;
698
699		/* Clocks independent of voltage level. */
700		dcn3_15_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
701		dcn3_15_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
702		dcn3_15_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3.0;
703	}
704	dcn3_15_soc.num_states = clk_table->num_entries;
705
706
707	/* Set vco to max_dispclk * 2 to make sure the highest dispclk is always available for dml calcs,
708	 * no impact outside of dml validation
709	 */
710	dcn3_15_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
711
712	if ((int)(dcn3_15_soc.dram_clock_change_latency_us * 1000)
713				!= dc->debug.dram_clock_change_latency_ns
714			&& dc->debug.dram_clock_change_latency_ns) {
715		dcn3_15_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000;
716	}
717
718	dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN315);
719}
720
721void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
722{
723	struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits;
724	struct clk_limit_table *clk_table = &bw_params->clk_table;
725	unsigned int i, closest_clk_lvl;
726	int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
727	int j;
728
729	dc_assert_fp_enabled();
730
731	memcpy(s, dcn3_16_soc.clock_limits, sizeof(dcn3_16_soc.clock_limits));
732
733	// Default clock levels are used for diags, which may lead to overclocking.
734	dcn3_16_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
735	dcn3_16_ip.max_num_dpp = dc->res_pool->pipe_count;
736	dcn3_16_soc.num_chans = bw_params->num_channels;
737
738	ASSERT(clk_table->num_entries);
739
740	/* Prepass to find max clocks independent of voltage level. */
741	for (i = 0; i < clk_table->num_entries; ++i) {
742		if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
743			max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
744		if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
745			max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
746	}
747
748	for (i = 0; i < clk_table->num_entries; i++) {
749		/* loop backwards*/
750		for (closest_clk_lvl = 0, j = dcn3_16_soc.num_states - 1; j >= 0; j--) {
751			if ((unsigned int) dcn3_16_soc.clock_limits[j].dcfclk_mhz <=
752			    clk_table->entries[i].dcfclk_mhz) {
753				closest_clk_lvl = j;
754				break;
755			}
756		}
757		// Ported from DCN315
758		if (clk_table->num_entries == 1) {
759			/*smu gives one DPM level, let's take the highest one*/
760			closest_clk_lvl = dcn3_16_soc.num_states - 1;
761		}
762
763		s[i].state = i;
764
765		/* Clocks dependent on voltage level. */
766		s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
767		if (clk_table->num_entries == 1 &&
768		    s[i].dcfclk_mhz <
769		    dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
770			/*SMU fix not released yet*/
771			s[i].dcfclk_mhz =
772				dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
773		}
774		s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
775		s[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
776		s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz *
777			2 * clk_table->entries[i].wck_ratio;
778
779		/* Clocks independent of voltage level. */
780		s[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
781			dcn3_16_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
782
783		s[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
784			dcn3_16_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
785
786		s[i].dram_bw_per_chan_gbps =
787			dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
788		s[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
789		s[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
790		s[i].phyclk_d18_mhz =
791			dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
792		s[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
793	}
794	if (clk_table->num_entries) {
795		dcn3_16_soc.num_states = clk_table->num_entries;
796	}
797
798	memcpy(dcn3_16_soc.clock_limits, s, sizeof(dcn3_16_soc.clock_limits));
799
800	if (max_dispclk_mhz) {
801		dcn3_16_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
802		dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
803	}
804	if ((int)(dcn3_16_soc.dram_clock_change_latency_us * 1000)
805				!= dc->debug.dram_clock_change_latency_ns
806			&& dc->debug.dram_clock_change_latency_ns) {
807		dcn3_16_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000;
808	}
809
810	dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31);
811}
812
813int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc)
814{
815	return soc->clock_limits[0].dispclk_mhz * 10000.0 / (1.0 + soc->dcn_downspread_percent / 100.0);
816}
817
818int dcn_get_approx_det_segs_required_for_pstate(
819		struct _vcs_dpi_soc_bounding_box_st *soc,
820		int pix_clk_100hz, int bpp, int seg_size_kb)
821{
822	/* Roughly calculate required crb to hide latency. In practice there is slightly
823	 * more buffer available for latency hiding
824	 */
825	return (int)(soc->dram_clock_change_latency_us * pix_clk_100hz * bpp
826					/ 10240000 + seg_size_kb - 1) /	seg_size_kb;
827}
828