1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (c) 2020, The Linux Foundation. All rights reserved.
4 */
5#include <linux/kernel.h>
6#include <linux/sizes.h>
7#include <linux/videodev2.h>
8
9#include "hfi.h"
10#include "hfi_plat_bufs.h"
11#include "helpers.h"
12
13#define MIN_INPUT_BUFFERS				4
14#define MIN_ENC_OUTPUT_BUFFERS				4
15
16#define NV12_UBWC_Y_TILE_WIDTH				32
17#define NV12_UBWC_Y_TILE_HEIGHT				8
18#define NV12_UBWC_UV_TILE_WIDTH				16
19#define NV12_UBWC_UV_TILE_HEIGHT			8
20#define TP10_UBWC_Y_TILE_WIDTH				48
21#define TP10_UBWC_Y_TILE_HEIGHT				4
22#define METADATA_STRIDE_MULTIPLE			64
23#define METADATA_HEIGHT_MULTIPLE			16
24#define HFI_DMA_ALIGNMENT				256
25
26#define MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE		64
27#define MAX_FE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE		64
28#define MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE		64
29#define MAX_FE_NBR_DATA_LUMA_LINE_BUFFER_SIZE		640
30#define MAX_FE_NBR_DATA_CB_LINE_BUFFER_SIZE		320
31#define MAX_FE_NBR_DATA_CR_LINE_BUFFER_SIZE		320
32
33#define MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE		(128 / 8)
34#define MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE		(128 / 8)
35#define MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE		(128 / 8)
36
37#define MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE		(64 * 2 * 3)
38#define MAX_PE_NBR_DATA_LCU32_LINE_BUFFER_SIZE		(32 * 2 * 3)
39#define MAX_PE_NBR_DATA_LCU16_LINE_BUFFER_SIZE		(16 * 2 * 3)
40
41#define MAX_TILE_COLUMNS				32 /* 8K/256 */
42
43#define VPP_CMD_MAX_SIZE				BIT(20)
44#define NUM_HW_PIC_BUF					32
45#define BIN_BUFFER_THRESHOLD				(1280 * 736)
46#define H264D_MAX_SLICE					1800
47/* sizeof(h264d_buftab_t) aligned to 256 */
48#define SIZE_H264D_BUFTAB_T				256
49/* sizeof(h264d_hw_pic_t) aligned to 32 */
50#define SIZE_H264D_HW_PIC_T				BIT(11)
51#define SIZE_H264D_BSE_CMD_PER_BUF			(32 * 4)
52#define SIZE_H264D_VPP_CMD_PER_BUF			512
53
54/* Line Buffer definitions, One for Luma and 1/2 for each Chroma */
55#define SIZE_H264D_LB_FE_TOP_DATA(width, height)	\
56	(MAX_FE_NBR_DATA_LUMA_LINE_BUFFER_SIZE * ALIGN((width), 16) * 3)
57
58#define SIZE_H264D_LB_FE_TOP_CTRL(width, height)	\
59	(MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((width) + 15) >> 4))
60
61#define SIZE_H264D_LB_FE_LEFT_CTRL(width, height)	\
62	(MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((height) + 15) >> 4))
63
64#define SIZE_H264D_LB_SE_TOP_CTRL(width, height)	\
65	(MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((width) + 15) >> 4))
66
67#define SIZE_H264D_LB_SE_LEFT_CTRL(width, height)	\
68	(MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((height) + 15) >> 4))
69
70#define SIZE_H264D_LB_PE_TOP_DATA(width, height)	\
71	(MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE * (((width) + 15) >> 4))
72
73#define SIZE_H264D_LB_VSP_TOP(width, height)	(((((width) + 15) >> 4) << 7))
74
75#define SIZE_H264D_LB_RECON_DMA_METADATA_WR(width, height)	\
76	(ALIGN((height), 16) * 32)
77
78#define SIZE_H264D_QP(width, height)	\
79	((((width) + 63) >> 6) * (((height) + 63) >> 6) * 128)
80
81#define SIZE_HW_PIC(size_per_buf)	(NUM_HW_PIC_BUF * (size_per_buf))
82
83#define H264_CABAC_HDR_RATIO_HD_TOT	1
84#define H264_CABAC_RES_RATIO_HD_TOT	3
85
86/*
87 * Some content need more bin buffer, but limit buffer
88 * size for high resolution
89 */
90#define NUM_SLIST_BUF_H264		(256 + 32)
91#define SIZE_SLIST_BUF_H264		512
92#define LCU_MAX_SIZE_PELS		64
93#define LCU_MIN_SIZE_PELS		16
94#define SIZE_SEI_USERDATA		4096
95
96#define H265D_MAX_SLICE			3600
97#define SIZE_H265D_HW_PIC_T		SIZE_H264D_HW_PIC_T
98#define SIZE_H265D_BSE_CMD_PER_BUF	(16 * sizeof(u32))
99#define SIZE_H265D_VPP_CMD_PER_BUF	256
100
101#define SIZE_H265D_LB_FE_TOP_DATA(width, height)	\
102	(MAX_FE_NBR_DATA_LUMA_LINE_BUFFER_SIZE * (ALIGN(width, 64) + 8) * 2)
103
104#define SIZE_H265D_LB_FE_TOP_CTRL(width, height)	\
105	(MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE *	\
106	(ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS))
107
108#define SIZE_H265D_LB_FE_LEFT_CTRL(width, height)	\
109	(MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE *	\
110	(ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS))
111
112#define SIZE_H265D_LB_SE_TOP_CTRL(width, height)	\
113	((LCU_MAX_SIZE_PELS / 8 * (128 / 8)) * (((width) + 15) >> 4))
114
115static inline u32 size_h265d_lb_se_left_ctrl(u32 width, u32 height)
116{
117	u32 x, y, z;
118
119	x = ((height + 16 - 1) / 8) * MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE;
120	y = ((height + 32 - 1) / 8) * MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE;
121	z = ((height + 64 - 1) / 8) * MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE;
122
123	return max3(x, y, z);
124}
125
126#define SIZE_H265D_LB_PE_TOP_DATA(width, height)	\
127	(MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE *	\
128	(ALIGN(width, LCU_MIN_SIZE_PELS) / LCU_MIN_SIZE_PELS))
129
130#define SIZE_H265D_LB_VSP_TOP(width, height)	((((width) + 63) >> 6) * 128)
131
132#define SIZE_H265D_LB_VSP_LEFT(width, height)	((((height) + 63) >> 6) * 128)
133
134#define SIZE_H265D_LB_RECON_DMA_METADATA_WR(width, height)	\
135	SIZE_H264D_LB_RECON_DMA_METADATA_WR(width, height)
136
137#define SIZE_H265D_QP(width, height)	SIZE_H264D_QP(width, height)
138
139#define H265_CABAC_HDR_RATIO_HD_TOT	2
140#define H265_CABAC_RES_RATIO_HD_TOT	2
141
142/*
143 * Some content need more bin buffer, but limit buffer size
144 * for high resolution
145 */
146#define SIZE_SLIST_BUF_H265	BIT(10)
147#define NUM_SLIST_BUF_H265	(80 + 20)
148#define H265_NUM_TILE_COL	32
149#define H265_NUM_TILE_ROW	128
150#define H265_NUM_TILE		(H265_NUM_TILE_ROW * H265_NUM_TILE_COL + 1)
151
152static inline u32 size_vpxd_lb_fe_left_ctrl(u32 width, u32 height)
153{
154	u32 x, y, z;
155
156	x = ((height + 15) >> 4) * MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE;
157	y = ((height + 31) >> 5) * MAX_FE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE;
158	z = ((height + 63) >> 6) * MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE;
159
160	return max3(x, y, z);
161}
162
163#define SIZE_VPXD_LB_FE_TOP_CTRL(width, height)		\
164	(((ALIGN(width, 64) + 8) * 10 * 2)) /* small line */
165#define SIZE_VPXD_LB_SE_TOP_CTRL(width, height) \
166	((((width) + 15) >> 4) * MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE)
167
168static inline u32 size_vpxd_lb_se_left_ctrl(u32 width, u32 height)
169{
170	u32 x, y, z;
171
172	x = ((height + 15) >> 4) * MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE;
173	y = ((height + 31) >> 5) * MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE;
174	z = ((height + 63) >> 6) * MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE;
175
176	return max3(x, y, z);
177}
178
179#define SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height)	\
180	ALIGN((ALIGN(height, 16) / (4 / 2)) * 64, 32)
181#define SIZE_VP8D_LB_FE_TOP_DATA(width, height)			\
182	((ALIGN(width, 16) + 8) * 10 * 2)
183#define SIZE_VP9D_LB_FE_TOP_DATA(width, height)			\
184	((ALIGN(ALIGN(width, 16), 64) + 8) * 10 * 2)
185#define SIZE_VP8D_LB_PE_TOP_DATA(width, height)			\
186	((ALIGN(width, 16) >> 4) * 64)
187#define SIZE_VP9D_LB_PE_TOP_DATA(width, height)			\
188	((ALIGN(ALIGN(width, 16), 64) >> 6) * 176)
189#define SIZE_VP8D_LB_VSP_TOP(width, height)			\
190	(((ALIGN(width, 16) >> 4) * 64 / 2) + 256)
191#define SIZE_VP9D_LB_VSP_TOP(width, height)			\
192	(((ALIGN(ALIGN(width, 16), 64) >> 6) * 64 * 8) + 256)
193
194#define HFI_IRIS2_VP9D_COMV_SIZE				\
195	((((8192 + 63) >> 6) * ((4320 + 63) >> 6) * 8 * 8 * 2 * 8))
196
197#define VPX_DECODER_FRAME_CONCURENCY_LVL		2
198#define VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_NUM	1
199#define VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_DEN	2
200#define VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_NUM	3
201#define VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_DEN	2
202
203#define VP8_NUM_FRAME_INFO_BUF			(5 + 1)
204#define VP9_NUM_FRAME_INFO_BUF			32
205#define VP8_NUM_PROBABILITY_TABLE_BUF		VP8_NUM_FRAME_INFO_BUF
206#define VP9_NUM_PROBABILITY_TABLE_BUF		(VP9_NUM_FRAME_INFO_BUF + 4)
207#define VP8_PROB_TABLE_SIZE			3840
208#define VP9_PROB_TABLE_SIZE			3840
209
210#define VP9_UDC_HEADER_BUF_SIZE			(3 * 128)
211#define MAX_SUPERFRAME_HEADER_LEN		34
212#define CCE_TILE_OFFSET_SIZE			ALIGN(32 * 4 * 4, 32)
213
214#define QMATRIX_SIZE				(sizeof(u32) * 128 + 256)
215#define MP2D_QPDUMP_SIZE			115200
216#define HFI_IRIS2_ENC_PERSIST_SIZE		204800
217#define HFI_MAX_COL_FRAME			6
218#define HFI_VENUS_VENC_TRE_WB_BUFF_SIZE		(65 << 4) /* in Bytes */
219#define HFI_VENUS_VENC_DB_LINE_BUFF_PER_MB	512
220#define HFI_VENUS_VPPSG_MAX_REGISTERS		2048
221#define HFI_VENUS_WIDTH_ALIGNMENT		128
222#define HFI_VENUS_WIDTH_TEN_BIT_ALIGNMENT	192
223#define HFI_VENUS_HEIGHT_ALIGNMENT		32
224
225#define SYSTEM_LAL_TILE10	192
226#define NUM_MBS_720P		(((1280 + 15) >> 4) * ((720 + 15) >> 4))
227#define NUM_MBS_4K		(((4096 + 15) >> 4) * ((2304 + 15) >> 4))
228#define MB_SIZE_IN_PIXEL	(16 * 16)
229#define HDR10PLUS_PAYLOAD_SIZE		1024
230#define HDR10_HIST_EXTRADATA_SIZE	4096
231
232static u32 size_vpss_lb(u32 width, u32 height, u32 num_vpp_pipes)
233{
234	u32 vpss_4tap_top_buffer_size, vpss_div2_top_buffer_size;
235	u32 vpss_4tap_left_buffer_size, vpss_div2_left_buffer_size;
236	u32 opb_wr_top_line_luma_buf_size, opb_wr_top_line_chroma_buf_size;
237	u32 opb_lb_wr_llb_y_buffer_size, opb_lb_wr_llb_uv_buffer_size;
238	u32 macrotiling_size;
239	u32 size = 0;
240
241	vpss_4tap_top_buffer_size = 0;
242	vpss_div2_top_buffer_size = 0;
243	vpss_4tap_left_buffer_size = 0;
244	vpss_div2_left_buffer_size = 0;
245
246	macrotiling_size = 32;
247	opb_wr_top_line_luma_buf_size =
248		ALIGN(width, macrotiling_size) / macrotiling_size * 256;
249	opb_wr_top_line_luma_buf_size =
250		ALIGN(opb_wr_top_line_luma_buf_size, HFI_DMA_ALIGNMENT) +
251		(MAX_TILE_COLUMNS - 1) * 256;
252	opb_wr_top_line_luma_buf_size =
253		max(opb_wr_top_line_luma_buf_size, (32 * ALIGN(height, 16)));
254	opb_wr_top_line_chroma_buf_size = opb_wr_top_line_luma_buf_size;
255	opb_lb_wr_llb_y_buffer_size = ALIGN((ALIGN(height, 16) / 2) * 64, 32);
256	opb_lb_wr_llb_uv_buffer_size = opb_lb_wr_llb_y_buffer_size;
257	size = num_vpp_pipes *
258		2 * (vpss_4tap_top_buffer_size + vpss_div2_top_buffer_size) +
259		2 * (vpss_4tap_left_buffer_size + vpss_div2_left_buffer_size) +
260		opb_wr_top_line_luma_buf_size +
261		opb_wr_top_line_chroma_buf_size +
262		opb_lb_wr_llb_uv_buffer_size +
263		opb_lb_wr_llb_y_buffer_size;
264
265	return size;
266}
267
268static u32 size_h264d_hw_bin_buffer(u32 width, u32 height)
269{
270	u32 size_yuv, size_bin_hdr, size_bin_res;
271	u32 size = 0;
272	u32 product;
273
274	product = width * height;
275	size_yuv = (product <= BIN_BUFFER_THRESHOLD) ?
276		((BIN_BUFFER_THRESHOLD * 3) >> 1) : ((product * 3) >> 1);
277
278	size_bin_hdr = size_yuv * H264_CABAC_HDR_RATIO_HD_TOT;
279	size_bin_res = size_yuv * H264_CABAC_RES_RATIO_HD_TOT;
280	size_bin_hdr = ALIGN(size_bin_hdr, HFI_DMA_ALIGNMENT);
281	size_bin_res = ALIGN(size_bin_res, HFI_DMA_ALIGNMENT);
282	size = size_bin_hdr + size_bin_res;
283
284	return size;
285}
286
287static u32 h264d_scratch_size(u32 width, u32 height, bool is_interlaced)
288{
289	u32 aligned_width = ALIGN(width, 16);
290	u32 aligned_height = ALIGN(height, 16);
291	u32 size = 0;
292
293	if (!is_interlaced)
294		size = size_h264d_hw_bin_buffer(aligned_width, aligned_height);
295
296	return size;
297}
298
299static u32 size_h265d_hw_bin_buffer(u32 width, u32 height)
300{
301	u32 size_yuv, size_bin_hdr, size_bin_res;
302	u32 size = 0;
303	u32 product;
304
305	product = width * height;
306	size_yuv = (product <= BIN_BUFFER_THRESHOLD) ?
307		((BIN_BUFFER_THRESHOLD * 3) >> 1) : ((product * 3) >> 1);
308	size_bin_hdr = size_yuv * H265_CABAC_HDR_RATIO_HD_TOT;
309	size_bin_res = size_yuv * H265_CABAC_RES_RATIO_HD_TOT;
310	size_bin_hdr = ALIGN(size_bin_hdr, HFI_DMA_ALIGNMENT);
311	size_bin_res = ALIGN(size_bin_res, HFI_DMA_ALIGNMENT);
312	size = size_bin_hdr + size_bin_res;
313
314	return size;
315}
316
317static u32 h265d_scratch_size(u32 width, u32 height, bool is_interlaced)
318{
319	u32 aligned_width = ALIGN(width, 16);
320	u32 aligned_height = ALIGN(height, 16);
321	u32 size = 0;
322
323	if (!is_interlaced)
324		size = size_h265d_hw_bin_buffer(aligned_width, aligned_height);
325
326	return size;
327}
328
329static u32 vpxd_scratch_size(u32 width, u32 height, bool is_interlaced)
330{
331	u32 aligned_width = ALIGN(width, 16);
332	u32 aligned_height = ALIGN(height, 16);
333	u32 size_yuv = aligned_width * aligned_height * 3 / 2;
334	u32 size = 0;
335
336	if (!is_interlaced) {
337		u32 binbuffer1_size, binbufer2_size;
338
339		binbuffer1_size = max_t(u32, size_yuv,
340					((BIN_BUFFER_THRESHOLD * 3) >> 1));
341		binbuffer1_size *= VPX_DECODER_FRAME_CONCURENCY_LVL *
342				   VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_NUM /
343				   VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_DEN;
344		binbufer2_size = max_t(u32, size_yuv,
345				       ((BIN_BUFFER_THRESHOLD * 3) >> 1));
346		binbufer2_size *= VPX_DECODER_FRAME_CONCURENCY_LVL *
347				  VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_NUM /
348				  VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_DEN;
349		size = ALIGN(binbuffer1_size + binbufer2_size,
350			     HFI_DMA_ALIGNMENT);
351	}
352
353	return size;
354}
355
356static u32 mpeg2d_scratch_size(u32 width, u32 height, bool is_interlaced)
357{
358	return 0;
359}
360
361static u32 calculate_enc_output_frame_size(u32 width, u32 height, u32 rc_type)
362{
363	u32 aligned_width, aligned_height;
364	u32 mbs_per_frame;
365	u32 frame_size;
366
367	/*
368	 * Encoder output size calculation: 32 Align width/height
369	 * For resolution < 720p : YUVsize * 4
370	 * For resolution > 720p & <= 4K : YUVsize / 2
371	 * For resolution > 4k : YUVsize / 4
372	 * Initially frame_size = YUVsize * 2;
373	 */
374	aligned_width = ALIGN(width, 32);
375	aligned_height = ALIGN(height, 32);
376	mbs_per_frame = (ALIGN(aligned_height, 16) *
377			 ALIGN(aligned_width, 16)) / 256;
378	frame_size = width * height * 3;
379
380	if (mbs_per_frame < NUM_MBS_720P)
381		frame_size = frame_size << 1;
382	else if (mbs_per_frame <= NUM_MBS_4K)
383		frame_size = frame_size >> 2;
384	else
385		frame_size = frame_size >> 3;
386
387	if (rc_type == HFI_RATE_CONTROL_OFF || rc_type == HFI_RATE_CONTROL_CQ)
388		frame_size = frame_size << 1;
389
390	/*
391	 * In case of opaque color format bitdepth will be known
392	 * with first ETB, buffers allocated already with 8 bit
393	 * won't be sufficient for 10 bit
394	 * calculate size considering 10-bit by default
395	 * For 10-bit cases size = size * 1.25
396	 */
397	frame_size *= 5;
398	frame_size /= 4;
399
400	return ALIGN(frame_size, SZ_4K);
401}
402
403static u32 calculate_enc_scratch_size(u32 width, u32 height, u32 work_mode,
404				      u32 lcu_size, u32 num_vpp_pipes,
405				      u32 rc_type)
406{
407	u32 aligned_width, aligned_height, bitstream_size;
408	u32 total_bitbin_buffers, size_single_pipe, bitbin_size;
409	u32 sao_bin_buffer_size, padded_bin_size, size;
410
411	aligned_width = ALIGN(width, lcu_size);
412	aligned_height = ALIGN(height, lcu_size);
413	bitstream_size =
414		calculate_enc_output_frame_size(width, height, rc_type);
415
416	bitstream_size = ALIGN(bitstream_size, HFI_DMA_ALIGNMENT);
417
418	if (work_mode == VIDC_WORK_MODE_2) {
419		total_bitbin_buffers = 3;
420		bitbin_size = bitstream_size * 17 / 10;
421		bitbin_size = ALIGN(bitbin_size, HFI_DMA_ALIGNMENT);
422	} else {
423		total_bitbin_buffers = 1;
424		bitstream_size = aligned_width * aligned_height * 3;
425		bitbin_size = ALIGN(bitstream_size, HFI_DMA_ALIGNMENT);
426	}
427
428	if (num_vpp_pipes > 2)
429		size_single_pipe = bitbin_size / 2;
430	else
431		size_single_pipe = bitbin_size;
432
433	size_single_pipe = ALIGN(size_single_pipe, HFI_DMA_ALIGNMENT);
434	sao_bin_buffer_size =
435		(64 * (((width + 32) * (height + 32)) >> 10)) + 384;
436	padded_bin_size = ALIGN(size_single_pipe, HFI_DMA_ALIGNMENT);
437	size_single_pipe = sao_bin_buffer_size + padded_bin_size;
438	size_single_pipe = ALIGN(size_single_pipe, HFI_DMA_ALIGNMENT);
439	bitbin_size = size_single_pipe * num_vpp_pipes;
440	size = ALIGN(bitbin_size, HFI_DMA_ALIGNMENT) *
441		total_bitbin_buffers + 512;
442
443	return size;
444}
445
446static u32 h264e_scratch_size(u32 width, u32 height, u32 work_mode,
447			      u32 num_vpp_pipes, u32 rc_type)
448{
449	return calculate_enc_scratch_size(width, height, work_mode, 16,
450					  num_vpp_pipes, rc_type);
451}
452
453static u32 h265e_scratch_size(u32 width, u32 height, u32 work_mode,
454			      u32 num_vpp_pipes, u32 rc_type)
455{
456	return calculate_enc_scratch_size(width, height, work_mode, 32,
457					  num_vpp_pipes, rc_type);
458}
459
460static u32 vp8e_scratch_size(u32 width, u32 height, u32 work_mode,
461			     u32 num_vpp_pipes, u32 rc_type)
462{
463	return calculate_enc_scratch_size(width, height, work_mode, 16,
464					  num_vpp_pipes, rc_type);
465}
466
467static u32 hfi_iris2_h264d_comv_size(u32 width, u32 height,
468				     u32 yuv_buf_min_count)
469{
470	u32 frame_width_in_mbs = ((width + 15) >> 4);
471	u32 frame_height_in_mbs = ((height + 15) >> 4);
472	u32 col_mv_aligned_width = (frame_width_in_mbs << 7);
473	u32 col_zero_aligned_width = (frame_width_in_mbs << 2);
474	u32 col_zero_size = 0, size_colloc = 0, comv_size = 0;
475
476	col_mv_aligned_width = ALIGN(col_mv_aligned_width, 16);
477	col_zero_aligned_width = ALIGN(col_zero_aligned_width, 16);
478	col_zero_size =
479		col_zero_aligned_width * ((frame_height_in_mbs + 1) >> 1);
480	col_zero_size = ALIGN(col_zero_size, 64);
481	col_zero_size <<= 1;
482	col_zero_size = ALIGN(col_zero_size, 512);
483	size_colloc = col_mv_aligned_width * ((frame_height_in_mbs + 1) >> 1);
484	size_colloc = ALIGN(size_colloc, 64);
485	size_colloc <<= 1;
486	size_colloc = ALIGN(size_colloc, 512);
487	size_colloc += (col_zero_size + SIZE_H264D_BUFTAB_T * 2);
488	comv_size = size_colloc * yuv_buf_min_count;
489	comv_size += 512;
490
491	return comv_size;
492}
493
494static u32 size_h264d_bse_cmd_buf(u32 height)
495{
496	u32 aligned_height = ALIGN(height, 32);
497
498	return min_t(u32, (((aligned_height + 15) >> 4) * 3 * 4),
499		     H264D_MAX_SLICE) * SIZE_H264D_BSE_CMD_PER_BUF;
500}
501
502static u32 size_h264d_vpp_cmd_buf(u32 height)
503{
504	u32 aligned_height = ALIGN(height, 32);
505	u32 size;
506
507	size = min_t(u32, (((aligned_height + 15) >> 4) * 3 * 4),
508		     H264D_MAX_SLICE) * SIZE_H264D_VPP_CMD_PER_BUF;
509	if (size > VPP_CMD_MAX_SIZE)
510		size = VPP_CMD_MAX_SIZE;
511
512	return size;
513}
514
515static u32 hfi_iris2_h264d_non_comv_size(u32 width, u32 height,
516					 u32 num_vpp_pipes)
517{
518	u32 size_bse, size_vpp, size;
519
520	size_bse = size_h264d_bse_cmd_buf(height);
521	size_vpp = size_h264d_vpp_cmd_buf(height);
522	size =
523		ALIGN(size_bse, HFI_DMA_ALIGNMENT) +
524		ALIGN(size_vpp, HFI_DMA_ALIGNMENT) +
525		ALIGN(SIZE_HW_PIC(SIZE_H264D_HW_PIC_T), HFI_DMA_ALIGNMENT) +
526		ALIGN(SIZE_H264D_LB_FE_TOP_DATA(width, height),
527		      HFI_DMA_ALIGNMENT) +
528		ALIGN(SIZE_H264D_LB_FE_TOP_CTRL(width, height),
529		      HFI_DMA_ALIGNMENT) +
530		ALIGN(SIZE_H264D_LB_FE_LEFT_CTRL(width, height),
531		      HFI_DMA_ALIGNMENT) * num_vpp_pipes +
532		ALIGN(SIZE_H264D_LB_SE_TOP_CTRL(width, height),
533		      HFI_DMA_ALIGNMENT) +
534		ALIGN(SIZE_H264D_LB_SE_LEFT_CTRL(width, height),
535		      HFI_DMA_ALIGNMENT) * num_vpp_pipes +
536		ALIGN(SIZE_H264D_LB_PE_TOP_DATA(width, height),
537		      HFI_DMA_ALIGNMENT) +
538		ALIGN(SIZE_H264D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) +
539		ALIGN(SIZE_H264D_LB_RECON_DMA_METADATA_WR(width, height),
540		      HFI_DMA_ALIGNMENT) * 2 +
541		ALIGN(SIZE_H264D_QP(width, height), HFI_DMA_ALIGNMENT);
542
543	return ALIGN(size, HFI_DMA_ALIGNMENT);
544}
545
546static u32 size_h265d_bse_cmd_buf(u32 width, u32 height)
547{
548	u32 size;
549
550	size = (ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) *
551	       (ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) *
552	       NUM_HW_PIC_BUF;
553	size = min_t(u32, size, H265D_MAX_SLICE + 1);
554	size = 2 * size * SIZE_H265D_BSE_CMD_PER_BUF;
555
556	return ALIGN(size, HFI_DMA_ALIGNMENT);
557}
558
559static u32 size_h265d_vpp_cmd_buf(u32 width, u32 height)
560{
561	u32 size;
562
563	size = (ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) *
564	       (ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) *
565	       NUM_HW_PIC_BUF;
566	size = min_t(u32, size, H265D_MAX_SLICE + 1);
567	size = ALIGN(size, 4);
568	size = 2 * size * SIZE_H265D_VPP_CMD_PER_BUF;
569	size = ALIGN(size, HFI_DMA_ALIGNMENT);
570	if (size > VPP_CMD_MAX_SIZE)
571		size = VPP_CMD_MAX_SIZE;
572
573	return size;
574}
575
576static u32 hfi_iris2_h265d_comv_size(u32 width, u32 height,
577				     u32 yuv_buf_count_min)
578{
579	u32 size;
580
581	size = ALIGN(((((width + 15) >> 4) * ((height + 15) >> 4)) << 8), 512);
582	size *= yuv_buf_count_min;
583	size += 512;
584
585	return size;
586}
587
588static u32 hfi_iris2_h265d_non_comv_size(u32 width, u32 height,
589					 u32 num_vpp_pipes)
590{
591	u32 size_bse, size_vpp, size;
592
593	size_bse = size_h265d_bse_cmd_buf(width, height);
594	size_vpp = size_h265d_vpp_cmd_buf(width, height);
595	size =
596		ALIGN(size_bse, HFI_DMA_ALIGNMENT) +
597		ALIGN(size_vpp, HFI_DMA_ALIGNMENT) +
598		ALIGN(NUM_HW_PIC_BUF * 20 * 22 * 4, HFI_DMA_ALIGNMENT) +
599		ALIGN(2 * sizeof(u16) *
600		(ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) *
601		(ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS),
602		       HFI_DMA_ALIGNMENT) +
603		ALIGN(SIZE_HW_PIC(SIZE_H265D_HW_PIC_T), HFI_DMA_ALIGNMENT) +
604		ALIGN(SIZE_H265D_LB_FE_TOP_DATA(width, height),
605		      HFI_DMA_ALIGNMENT) +
606		ALIGN(SIZE_H265D_LB_FE_TOP_CTRL(width, height),
607		      HFI_DMA_ALIGNMENT) +
608		ALIGN(SIZE_H265D_LB_FE_LEFT_CTRL(width, height),
609		      HFI_DMA_ALIGNMENT) * num_vpp_pipes +
610		ALIGN(size_h265d_lb_se_left_ctrl(width, height),
611		      HFI_DMA_ALIGNMENT) * num_vpp_pipes +
612		ALIGN(SIZE_H265D_LB_SE_TOP_CTRL(width, height),
613		      HFI_DMA_ALIGNMENT) +
614		ALIGN(SIZE_H265D_LB_PE_TOP_DATA(width, height),
615		      HFI_DMA_ALIGNMENT) +
616		ALIGN(SIZE_H265D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) +
617		ALIGN(SIZE_H265D_LB_VSP_LEFT(width, height),
618		      HFI_DMA_ALIGNMENT) * num_vpp_pipes +
619		ALIGN(SIZE_H265D_LB_RECON_DMA_METADATA_WR(width, height),
620		      HFI_DMA_ALIGNMENT)
621			* 4 +
622		ALIGN(SIZE_H265D_QP(width, height), HFI_DMA_ALIGNMENT);
623
624	return ALIGN(size, HFI_DMA_ALIGNMENT);
625}
626
627static u32 hfi_iris2_vp8d_comv_size(u32 width, u32 height,
628				    u32 yuv_min_buf_count)
629{
630	return (((width + 15) >> 4) * ((height + 15) >> 4) * 8 * 2);
631}
632
633static u32 h264d_scratch1_size(u32 width, u32 height, u32 min_buf_count,
634			       bool split_mode_enabled, u32 num_vpp_pipes)
635{
636	u32 co_mv_size, nonco_mv_size, vpss_lb_size = 0;
637
638	co_mv_size = hfi_iris2_h264d_comv_size(width, height, min_buf_count);
639	nonco_mv_size = hfi_iris2_h264d_non_comv_size(width, height,
640						      num_vpp_pipes);
641	if (split_mode_enabled)
642		vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes);
643
644	return co_mv_size + nonco_mv_size + vpss_lb_size;
645}
646
647static u32 h265d_scratch1_size(u32 width, u32 height, u32 min_buf_count,
648			       bool split_mode_enabled, u32 num_vpp_pipes)
649{
650	u32 co_mv_size, nonco_mv_size, vpss_lb_size = 0;
651
652	co_mv_size = hfi_iris2_h265d_comv_size(width, height, min_buf_count);
653	nonco_mv_size = hfi_iris2_h265d_non_comv_size(width, height,
654						      num_vpp_pipes);
655	if (split_mode_enabled)
656		vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes);
657
658	return co_mv_size + nonco_mv_size + vpss_lb_size +
659		HDR10_HIST_EXTRADATA_SIZE;
660}
661
662static u32 vp8d_scratch1_size(u32 width, u32 height, u32 min_buf_count,
663			      bool split_mode_enabled, u32 num_vpp_pipes)
664{
665	u32 vpss_lb_size = 0, size;
666
667	size = hfi_iris2_vp8d_comv_size(width, height, 0);
668	size += ALIGN(size_vpxd_lb_fe_left_ctrl(width, height),
669		      HFI_DMA_ALIGNMENT) * num_vpp_pipes +
670		ALIGN(size_vpxd_lb_se_left_ctrl(width, height),
671		      HFI_DMA_ALIGNMENT) * num_vpp_pipes +
672		ALIGN(SIZE_VP8D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) +
673		ALIGN(SIZE_VPXD_LB_FE_TOP_CTRL(width, height),
674		      HFI_DMA_ALIGNMENT) +
675		2 * ALIGN(SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height),
676			  HFI_DMA_ALIGNMENT) +
677		ALIGN(SIZE_VPXD_LB_SE_TOP_CTRL(width, height),
678		      HFI_DMA_ALIGNMENT) +
679		ALIGN(SIZE_VP8D_LB_PE_TOP_DATA(width, height),
680		      HFI_DMA_ALIGNMENT) +
681		ALIGN(SIZE_VP8D_LB_FE_TOP_DATA(width, height),
682		      HFI_DMA_ALIGNMENT);
683	if (split_mode_enabled)
684		vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes);
685
686	size += vpss_lb_size;
687
688	return size;
689}
690
691static u32 vp9d_scratch1_size(u32 width, u32 height, u32 min_buf_count,
692			      bool split_mode_enabled, u32 num_vpp_pipes)
693{
694	u32 vpss_lb_size = 0;
695	u32 size;
696
697	size =
698		ALIGN(size_vpxd_lb_fe_left_ctrl(width, height),
699		      HFI_DMA_ALIGNMENT) * num_vpp_pipes +
700		ALIGN(size_vpxd_lb_se_left_ctrl(width, height),
701		      HFI_DMA_ALIGNMENT) * num_vpp_pipes +
702		ALIGN(SIZE_VP9D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) +
703		ALIGN(SIZE_VPXD_LB_FE_TOP_CTRL(width, height),
704		      HFI_DMA_ALIGNMENT) +
705		2 * ALIGN(SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height),
706			  HFI_DMA_ALIGNMENT) +
707		ALIGN(SIZE_VPXD_LB_SE_TOP_CTRL(width, height),
708		      HFI_DMA_ALIGNMENT) +
709		ALIGN(SIZE_VP9D_LB_PE_TOP_DATA(width, height),
710		      HFI_DMA_ALIGNMENT) +
711		ALIGN(SIZE_VP9D_LB_FE_TOP_DATA(width, height),
712		      HFI_DMA_ALIGNMENT);
713
714	if (split_mode_enabled)
715		vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes);
716
717	size += vpss_lb_size + HDR10_HIST_EXTRADATA_SIZE;
718
719	return size;
720}
721
722static u32 mpeg2d_scratch1_size(u32 width, u32 height, u32 min_buf_count,
723				bool split_mode_enabled, u32 num_vpp_pipes)
724{
725	u32 vpss_lb_size = 0;
726	u32 size;
727
728	size =
729		ALIGN(size_vpxd_lb_fe_left_ctrl(width, height),
730		      HFI_DMA_ALIGNMENT) * num_vpp_pipes +
731		ALIGN(size_vpxd_lb_se_left_ctrl(width, height),
732		      HFI_DMA_ALIGNMENT) * num_vpp_pipes +
733		ALIGN(SIZE_VP8D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) +
734		ALIGN(SIZE_VPXD_LB_FE_TOP_CTRL(width, height),
735		      HFI_DMA_ALIGNMENT) +
736		2 * ALIGN(SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height),
737			  HFI_DMA_ALIGNMENT) +
738		ALIGN(SIZE_VPXD_LB_SE_TOP_CTRL(width, height),
739		      HFI_DMA_ALIGNMENT) +
740		ALIGN(SIZE_VP8D_LB_PE_TOP_DATA(width, height),
741		      HFI_DMA_ALIGNMENT) +
742		ALIGN(SIZE_VP8D_LB_FE_TOP_DATA(width, height),
743		      HFI_DMA_ALIGNMENT);
744
745	if (split_mode_enabled)
746		vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes);
747
748	size += vpss_lb_size;
749
750	return size;
751}
752
753static u32
754calculate_enc_scratch1_size(u32 width, u32 height, u32 lcu_size, u32 num_ref,
755			    bool ten_bit, u32 num_vpp_pipes, bool is_h265)
756{
757	u32 line_buf_ctrl_size, line_buf_data_size, leftline_buf_ctrl_size;
758	u32 line_buf_sde_size, sps_pps_slice_hdr, topline_buf_ctrl_size_FE;
759	u32 leftline_buf_ctrl_size_FE, line_buf_recon_pix_size;
760	u32 leftline_buf_recon_pix_size, lambda_lut_size, override_buffer_size;
761	u32 col_mv_buf_size, vpp_reg_buffer_size, ir_buffer_size;
762	u32 vpss_line_buf, leftline_buf_meta_recony, h265e_colrcbuf_size;
763	u32 h265e_framerc_bufsize, h265e_lcubitcnt_bufsize;
764	u32 h265e_lcubitmap_bufsize, se_stats_bufsize;
765	u32 bse_reg_buffer_size, bse_slice_cmd_buffer_size, slice_info_bufsize;
766	u32 line_buf_ctrl_size_buffid2, slice_cmd_buffer_size;
767	u32 width_lcu_num, height_lcu_num, width_coded, height_coded;
768	u32 frame_num_lcu, linebuf_meta_recon_uv, topline_bufsize_fe_1stg_sao;
769	u32 size, bit_depth, num_lcu_mb;
770	u32 vpss_line_buffer_size_1;
771
772	width_lcu_num = (width + lcu_size - 1) / lcu_size;
773	height_lcu_num = (height + lcu_size - 1) / lcu_size;
774	frame_num_lcu = width_lcu_num * height_lcu_num;
775	width_coded = width_lcu_num * lcu_size;
776	height_coded = height_lcu_num * lcu_size;
777	num_lcu_mb = (height_coded / lcu_size) *
778		     ((width_coded + lcu_size * 8) / lcu_size);
779	slice_info_bufsize = 256 + (frame_num_lcu << 4);
780	slice_info_bufsize = ALIGN(slice_info_bufsize, HFI_DMA_ALIGNMENT);
781	line_buf_ctrl_size = ALIGN(width_coded, HFI_DMA_ALIGNMENT);
782	line_buf_ctrl_size_buffid2 = ALIGN(width_coded, HFI_DMA_ALIGNMENT);
783
784	bit_depth = ten_bit ? 10 : 8;
785	line_buf_data_size =
786		(((((bit_depth * width_coded + 1024) +
787		(HFI_DMA_ALIGNMENT - 1)) & (~(HFI_DMA_ALIGNMENT - 1))) * 1) +
788		(((((bit_depth * width_coded + 1024) >> 1) +
789		(HFI_DMA_ALIGNMENT - 1)) & (~(HFI_DMA_ALIGNMENT - 1))) * 2));
790
791	leftline_buf_ctrl_size = is_h265 ?
792		((height_coded + 32) / 32 * 4 * 16) :
793		((height_coded + 15) / 16 * 5 * 16);
794
795	if (num_vpp_pipes > 1) {
796		leftline_buf_ctrl_size += 512;
797		leftline_buf_ctrl_size =
798			ALIGN(leftline_buf_ctrl_size, 512) * num_vpp_pipes;
799	}
800
801	leftline_buf_ctrl_size =
802		ALIGN(leftline_buf_ctrl_size, HFI_DMA_ALIGNMENT);
803	leftline_buf_recon_pix_size = (((ten_bit + 1) * 2 *
804		(height_coded) + HFI_DMA_ALIGNMENT) +
805		(HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1) &
806		(~((HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1)) * 1;
807
808	topline_buf_ctrl_size_FE = is_h265 ? (64 * (width_coded >> 5)) :
809		(HFI_DMA_ALIGNMENT + 16 * (width_coded >> 4));
810	topline_buf_ctrl_size_FE =
811		ALIGN(topline_buf_ctrl_size_FE, HFI_DMA_ALIGNMENT);
812	leftline_buf_ctrl_size_FE =
813		(((HFI_DMA_ALIGNMENT + 64 * (height_coded >> 4)) +
814		(HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1) &
815		(~((HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1)) * 1) *
816		num_vpp_pipes;
817	leftline_buf_meta_recony = (HFI_DMA_ALIGNMENT + 64 *
818		((height_coded) / (8 * (ten_bit ? 4 : 8))));
819	leftline_buf_meta_recony =
820		ALIGN(leftline_buf_meta_recony, HFI_DMA_ALIGNMENT);
821	leftline_buf_meta_recony = leftline_buf_meta_recony * num_vpp_pipes;
822	linebuf_meta_recon_uv = (HFI_DMA_ALIGNMENT + 64 *
823		((height_coded) / (4 * (ten_bit ? 4 : 8))));
824	linebuf_meta_recon_uv = ALIGN(linebuf_meta_recon_uv, HFI_DMA_ALIGNMENT);
825	linebuf_meta_recon_uv = linebuf_meta_recon_uv * num_vpp_pipes;
826	line_buf_recon_pix_size = ((ten_bit ? 3 : 2) * width_coded);
827	line_buf_recon_pix_size =
828		ALIGN(line_buf_recon_pix_size, HFI_DMA_ALIGNMENT);
829	slice_cmd_buffer_size = ALIGN(20480, HFI_DMA_ALIGNMENT);
830	sps_pps_slice_hdr = 2048 + 4096;
831	col_mv_buf_size = is_h265 ? (16 * ((frame_num_lcu << 2) + 32)) :
832		(3 * 16 * (width_lcu_num * height_lcu_num + 32));
833	col_mv_buf_size =
834		ALIGN(col_mv_buf_size, HFI_DMA_ALIGNMENT) * (num_ref + 1);
835	h265e_colrcbuf_size =
836		(((width_lcu_num + 7) >> 3) * 16 * 2 * height_lcu_num);
837	if (num_vpp_pipes > 1)
838		h265e_colrcbuf_size =
839			ALIGN(h265e_colrcbuf_size, HFI_DMA_ALIGNMENT) *
840			num_vpp_pipes;
841
842	h265e_colrcbuf_size = ALIGN(h265e_colrcbuf_size, HFI_DMA_ALIGNMENT) *
843				HFI_MAX_COL_FRAME;
844	h265e_framerc_bufsize = (is_h265) ? (256 + 16 *
845		(14 + (((height_coded >> 5) + 7) >> 3))) :
846		(256 + 16 * (14 + (((height_coded >> 4) + 7) >> 3)));
847	h265e_framerc_bufsize *= 6;   /* multiply by max numtilescol */
848	if (num_vpp_pipes > 1)
849		h265e_framerc_bufsize =
850			ALIGN(h265e_framerc_bufsize, HFI_DMA_ALIGNMENT) *
851			num_vpp_pipes;
852
853	h265e_framerc_bufsize = ALIGN(h265e_framerc_bufsize, 512) *
854				HFI_MAX_COL_FRAME;
855	h265e_lcubitcnt_bufsize = 256 + 4 * frame_num_lcu;
856	h265e_lcubitcnt_bufsize =
857		ALIGN(h265e_lcubitcnt_bufsize, HFI_DMA_ALIGNMENT);
858	h265e_lcubitmap_bufsize = 256 + (frame_num_lcu >> 3);
859	h265e_lcubitmap_bufsize =
860		ALIGN(h265e_lcubitmap_bufsize, HFI_DMA_ALIGNMENT);
861	line_buf_sde_size = 256 + 16 * (width_coded >> 4);
862	line_buf_sde_size = ALIGN(line_buf_sde_size, HFI_DMA_ALIGNMENT);
863	if ((width_coded * height_coded) > (4096 * 2160))
864		se_stats_bufsize = 0;
865	else if ((width_coded * height_coded) > (1920 * 1088))
866		se_stats_bufsize = (40 * 4 * frame_num_lcu + 256 + 256);
867	else
868		se_stats_bufsize = (1024 * frame_num_lcu + 256 + 256);
869
870	se_stats_bufsize = ALIGN(se_stats_bufsize, HFI_DMA_ALIGNMENT) * 2;
871	bse_slice_cmd_buffer_size = (((8192 << 2) + 7) & (~7)) * 6;
872	bse_reg_buffer_size = (((512 << 3) + 7) & (~7)) * 4;
873	vpp_reg_buffer_size =
874		(((HFI_VENUS_VPPSG_MAX_REGISTERS << 3) + 31) & (~31)) * 10;
875	lambda_lut_size = 256 * 11;
876	override_buffer_size = 16 * ((num_lcu_mb + 7) >> 3);
877	override_buffer_size =
878		ALIGN(override_buffer_size, HFI_DMA_ALIGNMENT) * 2;
879	ir_buffer_size = (((frame_num_lcu << 1) + 7) & (~7)) * 3;
880	vpss_line_buffer_size_1 = (((8192 >> 2) << 5) * num_vpp_pipes) + 64;
881	vpss_line_buf =
882		(((((max(width_coded, height_coded) + 3) >> 2) << 5) + 256) *
883		16) + vpss_line_buffer_size_1;
884	topline_bufsize_fe_1stg_sao = 16 * (width_coded >> 5);
885	topline_bufsize_fe_1stg_sao =
886		ALIGN(topline_bufsize_fe_1stg_sao, HFI_DMA_ALIGNMENT);
887
888	size =
889		line_buf_ctrl_size + line_buf_data_size +
890		line_buf_ctrl_size_buffid2 + leftline_buf_ctrl_size +
891		vpss_line_buf + col_mv_buf_size + topline_buf_ctrl_size_FE +
892		leftline_buf_ctrl_size_FE + line_buf_recon_pix_size +
893		leftline_buf_recon_pix_size +
894		leftline_buf_meta_recony + linebuf_meta_recon_uv +
895		h265e_colrcbuf_size + h265e_framerc_bufsize +
896		h265e_lcubitcnt_bufsize + h265e_lcubitmap_bufsize +
897		line_buf_sde_size +
898		topline_bufsize_fe_1stg_sao + override_buffer_size +
899		bse_reg_buffer_size + vpp_reg_buffer_size + sps_pps_slice_hdr +
900		slice_cmd_buffer_size + bse_slice_cmd_buffer_size +
901		ir_buffer_size + slice_info_bufsize + lambda_lut_size +
902		se_stats_bufsize + 1024;
903
904	return size;
905}
906
907static u32 h264e_scratch1_size(u32 width, u32 height, u32 num_ref, bool ten_bit,
908			       u32 num_vpp_pipes)
909{
910	return calculate_enc_scratch1_size(width, height, 16, num_ref, ten_bit,
911					   num_vpp_pipes, false);
912}
913
914static u32 h265e_scratch1_size(u32 width, u32 height, u32 num_ref, bool ten_bit,
915			       u32 num_vpp_pipes)
916{
917	return calculate_enc_scratch1_size(width, height, 32, num_ref, ten_bit,
918					   num_vpp_pipes, true);
919}
920
921static u32 vp8e_scratch1_size(u32 width, u32 height, u32 num_ref, bool ten_bit,
922			      u32 num_vpp_pipes)
923{
924	return calculate_enc_scratch1_size(width, height, 16, num_ref, ten_bit,
925					   1, false);
926}
927
928static u32 ubwc_metadata_plane_stride(u32 width, u32 metadata_stride_multi,
929				      u32 tile_width_pels)
930{
931	return ALIGN(((width + (tile_width_pels - 1)) / tile_width_pels),
932			metadata_stride_multi);
933}
934
935static u32 ubwc_metadata_plane_bufheight(u32 height, u32 metadata_height_multi,
936					 u32 tile_height_pels)
937{
938	return ALIGN(((height + (tile_height_pels - 1)) / tile_height_pels),
939			metadata_height_multi);
940}
941
942static u32 ubwc_metadata_plane_buffer_size(u32 metadata_stride,
943					   u32 metadata_buf_height)
944{
945	return ALIGN(metadata_stride * metadata_buf_height, SZ_4K);
946}
947
948static u32 enc_scratch2_size(u32 width, u32 height, u32 num_ref, bool ten_bit)
949{
950	u32 aligned_width, aligned_height, chroma_height, ref_buf_height;
951	u32 luma_size, chroma_size;
952	u32 metadata_stride, meta_buf_height, meta_size_y, meta_size_c;
953	u32 ref_luma_stride_bytes, ref_chroma_height_bytes;
954	u32 ref_buf_size, ref_stride;
955	u32 size;
956
957	if (!ten_bit) {
958		aligned_height = ALIGN(height, HFI_VENUS_HEIGHT_ALIGNMENT);
959		chroma_height = height >> 1;
960		chroma_height = ALIGN(chroma_height,
961				      HFI_VENUS_HEIGHT_ALIGNMENT);
962		aligned_width = ALIGN(width, HFI_VENUS_WIDTH_ALIGNMENT);
963		metadata_stride =
964			ubwc_metadata_plane_stride(width, 64,
965						   NV12_UBWC_Y_TILE_WIDTH);
966		meta_buf_height =
967			ubwc_metadata_plane_bufheight(height, 16,
968						      NV12_UBWC_Y_TILE_HEIGHT);
969		meta_size_y = ubwc_metadata_plane_buffer_size(metadata_stride,
970							      meta_buf_height);
971		meta_size_c = ubwc_metadata_plane_buffer_size(metadata_stride,
972							      meta_buf_height);
973		size = (aligned_height + chroma_height) * aligned_width +
974			meta_size_y + meta_size_c;
975		size = (size * (num_ref + 3)) + 4096;
976	} else {
977		ref_buf_height = (height + (HFI_VENUS_HEIGHT_ALIGNMENT - 1))
978					& (~(HFI_VENUS_HEIGHT_ALIGNMENT - 1));
979		ref_luma_stride_bytes =
980			((width + SYSTEM_LAL_TILE10 - 1) / SYSTEM_LAL_TILE10) *
981			SYSTEM_LAL_TILE10;
982		ref_stride = 4 * (ref_luma_stride_bytes / 3);
983		ref_stride = (ref_stride + (128 - 1)) & (~(128 - 1));
984		luma_size = ref_buf_height * ref_stride;
985		ref_chroma_height_bytes = (((height + 1) >> 1) +
986			(32 - 1)) & (~(32 - 1));
987		chroma_size = ref_stride * ref_chroma_height_bytes;
988		luma_size = (luma_size + (SZ_4K - 1)) & (~(SZ_4K - 1));
989		chroma_size = (chroma_size + (SZ_4K - 1)) & (~(SZ_4K - 1));
990		ref_buf_size = luma_size + chroma_size;
991		metadata_stride =
992			ubwc_metadata_plane_stride(width,
993						   METADATA_STRIDE_MULTIPLE,
994						   TP10_UBWC_Y_TILE_WIDTH);
995		meta_buf_height =
996			ubwc_metadata_plane_bufheight(height,
997						      METADATA_HEIGHT_MULTIPLE,
998						      TP10_UBWC_Y_TILE_HEIGHT);
999		meta_size_y = ubwc_metadata_plane_buffer_size(metadata_stride,
1000							      meta_buf_height);
1001		meta_size_c = ubwc_metadata_plane_buffer_size(metadata_stride,
1002							      meta_buf_height);
1003		size = ref_buf_size + meta_size_y + meta_size_c;
1004		size = (size * (num_ref + 3)) + 4096;
1005	}
1006
1007	return size;
1008}
1009
1010static u32 enc_persist_size(void)
1011{
1012	return HFI_IRIS2_ENC_PERSIST_SIZE;
1013}
1014
1015static u32 h264d_persist1_size(void)
1016{
1017	return ALIGN((SIZE_SLIST_BUF_H264 * NUM_SLIST_BUF_H264
1018		     + NUM_HW_PIC_BUF * SIZE_SEI_USERDATA), HFI_DMA_ALIGNMENT);
1019}
1020
1021static u32 h265d_persist1_size(void)
1022{
1023	return ALIGN((SIZE_SLIST_BUF_H265 * NUM_SLIST_BUF_H265 + H265_NUM_TILE
1024			* sizeof(u32) + NUM_HW_PIC_BUF * SIZE_SEI_USERDATA), HFI_DMA_ALIGNMENT);
1025}
1026
1027static u32 vp8d_persist1_size(void)
1028{
1029	return ALIGN(VP8_NUM_PROBABILITY_TABLE_BUF * VP8_PROB_TABLE_SIZE,
1030			HFI_DMA_ALIGNMENT);
1031}
1032
1033static u32 vp9d_persist1_size(void)
1034{
1035	return
1036		ALIGN(VP9_NUM_PROBABILITY_TABLE_BUF * VP9_PROB_TABLE_SIZE,
1037		      HFI_DMA_ALIGNMENT) +
1038		ALIGN(HFI_IRIS2_VP9D_COMV_SIZE, HFI_DMA_ALIGNMENT) +
1039		ALIGN(MAX_SUPERFRAME_HEADER_LEN, HFI_DMA_ALIGNMENT) +
1040		ALIGN(VP9_UDC_HEADER_BUF_SIZE, HFI_DMA_ALIGNMENT) +
1041		ALIGN(VP9_NUM_FRAME_INFO_BUF * CCE_TILE_OFFSET_SIZE,
1042		      HFI_DMA_ALIGNMENT);
1043}
1044
1045static u32 mpeg2d_persist1_size(void)
1046{
1047	return QMATRIX_SIZE + MP2D_QPDUMP_SIZE;
1048}
1049
1050struct dec_bufsize_ops {
1051	u32 (*scratch)(u32 width, u32 height, bool is_interlaced);
1052	u32 (*scratch1)(u32 width, u32 height, u32 min_buf_count,
1053			bool split_mode_enabled, u32 num_vpp_pipes);
1054	u32 (*persist1)(void);
1055};
1056
1057struct enc_bufsize_ops {
1058	u32 (*scratch)(u32 width, u32 height, u32 work_mode, u32 num_vpp_pipes,
1059		       u32 rc_type);
1060	u32 (*scratch1)(u32 width, u32 height, u32 num_ref, bool ten_bit,
1061			u32 num_vpp_pipes);
1062	u32 (*scratch2)(u32 width, u32 height, u32 num_ref, bool ten_bit);
1063	u32 (*persist)(void);
1064};
1065
1066static struct dec_bufsize_ops dec_h264_ops = {
1067	.scratch = h264d_scratch_size,
1068	.scratch1 = h264d_scratch1_size,
1069	.persist1 = h264d_persist1_size,
1070};
1071
1072static struct dec_bufsize_ops dec_h265_ops = {
1073	.scratch = h265d_scratch_size,
1074	.scratch1 = h265d_scratch1_size,
1075	.persist1 = h265d_persist1_size,
1076};
1077
1078static struct dec_bufsize_ops dec_vp8_ops = {
1079	.scratch = vpxd_scratch_size,
1080	.scratch1 = vp8d_scratch1_size,
1081	.persist1 = vp8d_persist1_size,
1082};
1083
1084static struct dec_bufsize_ops dec_vp9_ops = {
1085	.scratch = vpxd_scratch_size,
1086	.scratch1 = vp9d_scratch1_size,
1087	.persist1 = vp9d_persist1_size,
1088};
1089
1090static struct dec_bufsize_ops dec_mpeg2_ops = {
1091	.scratch = mpeg2d_scratch_size,
1092	.scratch1 = mpeg2d_scratch1_size,
1093	.persist1 = mpeg2d_persist1_size,
1094};
1095
1096static struct enc_bufsize_ops enc_h264_ops = {
1097	.scratch = h264e_scratch_size,
1098	.scratch1 = h264e_scratch1_size,
1099	.scratch2 = enc_scratch2_size,
1100	.persist = enc_persist_size,
1101};
1102
1103static struct enc_bufsize_ops enc_h265_ops = {
1104	.scratch = h265e_scratch_size,
1105	.scratch1 = h265e_scratch1_size,
1106	.scratch2 = enc_scratch2_size,
1107	.persist = enc_persist_size,
1108};
1109
1110static struct enc_bufsize_ops enc_vp8_ops = {
1111	.scratch = vp8e_scratch_size,
1112	.scratch1 = vp8e_scratch1_size,
1113	.scratch2 = enc_scratch2_size,
1114	.persist = enc_persist_size,
1115};
1116
1117static u32
1118calculate_dec_input_frame_size(u32 width, u32 height, u32 codec,
1119			       u32 max_mbs_per_frame, u32 buffer_size_limit)
1120{
1121	u32 frame_size, num_mbs;
1122	u32 div_factor = 1;
1123	u32 base_res_mbs = NUM_MBS_4K;
1124
1125	/*
1126	 * Decoder input size calculation:
1127	 * If clip is 8k buffer size is calculated for 8k : 8k mbs/4
1128	 * For 8k cases we expect width/height to be set always.
1129	 * In all other cases size is calculated for 4k:
1130	 * 4k mbs for VP8/VP9 and 4k/2 for remaining codecs
1131	 */
1132	num_mbs = (ALIGN(height, 16) * ALIGN(width, 16)) / 256;
1133	if (num_mbs > NUM_MBS_4K) {
1134		div_factor = 4;
1135		base_res_mbs = max_mbs_per_frame;
1136	} else {
1137		base_res_mbs = NUM_MBS_4K;
1138		if (codec == V4L2_PIX_FMT_VP9)
1139			div_factor = 1;
1140		else
1141			div_factor = 2;
1142	}
1143
1144	frame_size = base_res_mbs * MB_SIZE_IN_PIXEL * 3 / 2 / div_factor;
1145
1146	/* multiply by 10/8 (1.25) to get size for 10 bit case */
1147	if (codec == V4L2_PIX_FMT_VP9 || codec == V4L2_PIX_FMT_HEVC)
1148		frame_size = frame_size + (frame_size >> 2);
1149
1150	if (buffer_size_limit && buffer_size_limit < frame_size)
1151		frame_size = buffer_size_limit;
1152
1153	return ALIGN(frame_size, SZ_4K);
1154}
1155
1156static int output_buffer_count(u32 session_type, u32 codec)
1157{
1158	u32 output_min_count;
1159
1160	if (session_type == VIDC_SESSION_TYPE_DEC) {
1161		switch (codec) {
1162		case V4L2_PIX_FMT_MPEG2:
1163		case V4L2_PIX_FMT_VP8:
1164			output_min_count = 6;
1165			break;
1166		case V4L2_PIX_FMT_VP9:
1167			output_min_count = 11;
1168			break;
1169		case V4L2_PIX_FMT_H264:
1170		case V4L2_PIX_FMT_HEVC:
1171		default:
1172			output_min_count = 18;
1173			break;
1174		}
1175	} else {
1176		output_min_count = MIN_ENC_OUTPUT_BUFFERS;
1177	}
1178
1179	return output_min_count;
1180}
1181
1182static int bufreq_dec(struct hfi_plat_buffers_params *params, u32 buftype,
1183		      struct hfi_buffer_requirements *bufreq)
1184{
1185	enum hfi_version version = params->version;
1186	u32 codec = params->codec;
1187	u32 width = params->width, height = params->height, out_min_count;
1188	u32 out_width = params->out_width, out_height = params->out_height;
1189	struct dec_bufsize_ops *dec_ops;
1190	bool is_secondary_output = params->dec.is_secondary_output;
1191	bool is_interlaced = params->dec.is_interlaced;
1192	u32 max_mbs_per_frame = params->dec.max_mbs_per_frame;
1193	u32 buffer_size_limit = params->dec.buffer_size_limit;
1194	u32 num_vpp_pipes = params->num_vpp_pipes;
1195
1196	switch (codec) {
1197	case V4L2_PIX_FMT_H264:
1198		dec_ops = &dec_h264_ops;
1199		break;
1200	case V4L2_PIX_FMT_HEVC:
1201		dec_ops = &dec_h265_ops;
1202		break;
1203	case V4L2_PIX_FMT_VP8:
1204		dec_ops = &dec_vp8_ops;
1205		break;
1206	case V4L2_PIX_FMT_VP9:
1207		dec_ops = &dec_vp9_ops;
1208		break;
1209	case V4L2_PIX_FMT_MPEG2:
1210		dec_ops = &dec_mpeg2_ops;
1211		break;
1212	default:
1213		return -EINVAL;
1214	}
1215
1216	out_min_count = output_buffer_count(VIDC_SESSION_TYPE_DEC, codec);
1217	/* Max of driver and FW count */
1218	out_min_count = max(out_min_count, hfi_bufreq_get_count_min(bufreq, version));
1219
1220	bufreq->type = buftype;
1221	bufreq->region_size = 0;
1222	bufreq->count_actual = 1;
1223	hfi_bufreq_set_count_min(bufreq, version, 1);
1224	hfi_bufreq_set_hold_count(bufreq, version, 1);
1225	bufreq->contiguous = 1;
1226	bufreq->alignment = 256;
1227
1228	if (buftype == HFI_BUFFER_INPUT) {
1229		hfi_bufreq_set_count_min(bufreq, version, MIN_INPUT_BUFFERS);
1230		bufreq->size =
1231			calculate_dec_input_frame_size(width, height, codec,
1232						       max_mbs_per_frame,
1233						       buffer_size_limit);
1234	} else if (buftype == HFI_BUFFER_OUTPUT || buftype == HFI_BUFFER_OUTPUT2) {
1235		hfi_bufreq_set_count_min(bufreq, version, out_min_count);
1236		bufreq->size =
1237			venus_helper_get_framesz_raw(params->hfi_color_fmt,
1238						     out_width, out_height);
1239		if (buftype == HFI_BUFFER_OUTPUT &&
1240		    params->dec.is_secondary_output)
1241			bufreq->size =
1242				venus_helper_get_framesz_raw(params->hfi_dpb_color_fmt,
1243							     out_width, out_height);
1244	} else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH(version)) {
1245		bufreq->size = dec_ops->scratch(width, height, is_interlaced);
1246	} else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH_1(version)) {
1247		bufreq->size = dec_ops->scratch1(width, height, VB2_MAX_FRAME,
1248						 is_secondary_output,
1249						 num_vpp_pipes);
1250	} else if (buftype == HFI_BUFFER_INTERNAL_PERSIST_1) {
1251		bufreq->size = dec_ops->persist1();
1252	} else {
1253		bufreq->size = 0;
1254	}
1255
1256	return 0;
1257}
1258
1259static int bufreq_enc(struct hfi_plat_buffers_params *params, u32 buftype,
1260		      struct hfi_buffer_requirements *bufreq)
1261{
1262	enum hfi_version version = params->version;
1263	struct enc_bufsize_ops *enc_ops;
1264	u32 width = params->width;
1265	u32 height = params->height;
1266	bool is_tenbit = params->enc.is_tenbit;
1267	u32 num_bframes = params->enc.num_b_frames;
1268	u32 codec = params->codec;
1269	u32 work_mode = params->enc.work_mode;
1270	u32 rc_type = params->enc.rc_type;
1271	u32 num_vpp_pipes = params->num_vpp_pipes;
1272	u32 num_ref, count_min;
1273
1274	switch (codec) {
1275	case V4L2_PIX_FMT_H264:
1276		enc_ops = &enc_h264_ops;
1277		break;
1278	case V4L2_PIX_FMT_HEVC:
1279		enc_ops = &enc_h265_ops;
1280		break;
1281	case V4L2_PIX_FMT_VP8:
1282		enc_ops = &enc_vp8_ops;
1283		break;
1284	default:
1285		return -EINVAL;
1286	}
1287
1288	num_ref = num_bframes > 0 ? num_bframes + 1 : 1;
1289
1290	bufreq->type = buftype;
1291	bufreq->region_size = 0;
1292	bufreq->count_actual = 1;
1293	hfi_bufreq_set_count_min(bufreq, version, 1);
1294	hfi_bufreq_set_hold_count(bufreq, version, 1);
1295	bufreq->contiguous = 1;
1296	bufreq->alignment = 256;
1297
1298	if (buftype == HFI_BUFFER_INPUT) {
1299		hfi_bufreq_set_count_min(bufreq, version, MIN_INPUT_BUFFERS);
1300		bufreq->size =
1301			venus_helper_get_framesz_raw(params->hfi_color_fmt,
1302						     width, height);
1303	} else if (buftype == HFI_BUFFER_OUTPUT ||
1304		   buftype == HFI_BUFFER_OUTPUT2) {
1305		count_min = output_buffer_count(VIDC_SESSION_TYPE_ENC, codec);
1306		hfi_bufreq_set_count_min(bufreq, version, count_min);
1307		bufreq->size = calculate_enc_output_frame_size(width, height,
1308							       rc_type);
1309	} else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH(version)) {
1310		bufreq->size = enc_ops->scratch(width, height, work_mode,
1311						num_vpp_pipes, rc_type);
1312	} else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH_1(version)) {
1313		bufreq->size = enc_ops->scratch1(width, height, num_ref,
1314						 is_tenbit, num_vpp_pipes);
1315	} else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH_2(version)) {
1316		bufreq->size = enc_ops->scratch2(width, height, num_ref,
1317						 is_tenbit);
1318	} else if (buftype == HFI_BUFFER_INTERNAL_PERSIST) {
1319		bufreq->size = enc_ops->persist();
1320	} else {
1321		bufreq->size = 0;
1322	}
1323
1324	return 0;
1325}
1326
1327int hfi_plat_bufreq_v6(struct hfi_plat_buffers_params *params, u32 session_type,
1328		       u32 buftype, struct hfi_buffer_requirements *bufreq)
1329{
1330	if (session_type == VIDC_SESSION_TYPE_DEC)
1331		return bufreq_dec(params, buftype, bufreq);
1332	else
1333		return bufreq_enc(params, buftype, bufreq);
1334}
1335