1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2023 MediaTek Inc.
4 * Author: Xiaoyong Lu <xiaoyong.lu@mediatek.com>
5 */
6
7#include <linux/module.h>
8#include <linux/slab.h>
9#include <media/videobuf2-dma-contig.h>
10
11#include "../mtk_vcodec_dec.h"
12#include "../../common/mtk_vcodec_intr.h"
13#include "../vdec_drv_base.h"
14#include "../vdec_drv_if.h"
15#include "../vdec_vpu_if.h"
16
17#define AV1_MAX_FRAME_BUF_COUNT		(V4L2_AV1_TOTAL_REFS_PER_FRAME + 1)
18#define AV1_TILE_BUF_SIZE		64
19#define AV1_SCALE_SUBPEL_BITS		10
20#define AV1_REF_SCALE_SHIFT		14
21#define AV1_REF_NO_SCALE		BIT(AV1_REF_SCALE_SHIFT)
22#define AV1_REF_INVALID_SCALE		-1
23#define AV1_CDF_TABLE_BUFFER_SIZE	16384
24#define AV1_PRIMARY_REF_NONE		7
25
26#define AV1_INVALID_IDX			-1
27
28#define AV1_DIV_ROUND_UP_POW2(value, n)			\
29({							\
30	typeof(n) _n  = n;				\
31	typeof(value) _value = value;			\
32	(_value + (BIT(_n) >> 1)) >> _n;		\
33})
34
35#define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n)				\
36({									\
37	typeof(n) _n_  = n;						\
38	typeof(value) _value_ = value;					\
39	(((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_))	\
40		: AV1_DIV_ROUND_UP_POW2((_value_), (_n_)));		\
41})
42
43#define BIT_FLAG(x, bit)		(!!((x)->flags & (bit)))
44#define SEGMENTATION_FLAG(x, name)	(!!((x)->flags & V4L2_AV1_SEGMENTATION_FLAG_##name))
45#define QUANT_FLAG(x, name)		(!!((x)->flags & V4L2_AV1_QUANTIZATION_FLAG_##name))
46#define SEQUENCE_FLAG(x, name)		(!!((x)->flags & V4L2_AV1_SEQUENCE_FLAG_##name))
47#define FH_FLAG(x, name)		(!!((x)->flags & V4L2_AV1_FRAME_FLAG_##name))
48
49#define MINQ 0
50#define MAXQ 255
51
52#define DIV_LUT_PREC_BITS 14
53#define DIV_LUT_BITS 8
54#define DIV_LUT_NUM BIT(DIV_LUT_BITS)
55#define WARP_PARAM_REDUCE_BITS 6
56#define WARPEDMODEL_PREC_BITS 16
57
58#define SEG_LVL_ALT_Q 0
59#define SECONDARY_FILTER_STRENGTH_NUM_BITS 2
60
61static const short div_lut[DIV_LUT_NUM + 1] = {
62	16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
63	15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
64	15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
65	14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
66	13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
67	13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
68	13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
69	12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
70	12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
71	11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
72	11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
73	11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
74	10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
75	10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
76	10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
77	9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
78	9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
79	9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
80	9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
81	9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
82	8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
83	8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
84	8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
85	8240,  8224,  8208,  8192,
86};
87
88/**
89 * struct vdec_av1_slice_init_vsi - VSI used to initialize instance
90 * @architecture:	architecture type
91 * @reserved:		reserved
92 * @core_vsi:		for core vsi
93 * @cdf_table_addr:	cdf table addr
94 * @cdf_table_size:	cdf table size
95 * @iq_table_addr:	iq table addr
96 * @iq_table_size:	iq table size
97 * @vsi_size:		share vsi structure size
98 */
99struct vdec_av1_slice_init_vsi {
100	u32 architecture;
101	u32 reserved;
102	u64 core_vsi;
103	u64 cdf_table_addr;
104	u32 cdf_table_size;
105	u64 iq_table_addr;
106	u32 iq_table_size;
107	u32 vsi_size;
108};
109
110/**
111 * struct vdec_av1_slice_mem - memory address and size
112 * @buf:		dma_addr padding
113 * @dma_addr:		buffer address
114 * @size:		buffer size
115 * @dma_addr_end:	buffer end address
116 * @padding:		for padding
117 */
118struct vdec_av1_slice_mem {
119	union {
120		u64 buf;
121		dma_addr_t dma_addr;
122	};
123	union {
124		size_t size;
125		dma_addr_t dma_addr_end;
126		u64 padding;
127	};
128};
129
130/**
131 * struct vdec_av1_slice_state - decoding state
132 * @err                   : err type for decode
133 * @full                  : transcoded buffer is full or not
134 * @timeout               : decode timeout or not
135 * @perf                  : performance enable
136 * @crc                   : hw checksum
137 * @out_size              : hw output size
138 */
139struct vdec_av1_slice_state {
140	int err;
141	u32 full;
142	u32 timeout;
143	u32 perf;
144	u32 crc[16];
145	u32 out_size;
146};
147
148/*
149 * enum vdec_av1_slice_resolution_level - resolution level
150 */
151enum vdec_av1_slice_resolution_level {
152	AV1_RES_NONE,
153	AV1_RES_FHD,
154	AV1_RES_4K,
155	AV1_RES_8K,
156};
157
158/*
159 * enum vdec_av1_slice_frame_type - av1 frame type
160 */
161enum vdec_av1_slice_frame_type {
162	AV1_KEY_FRAME = 0,
163	AV1_INTER_FRAME,
164	AV1_INTRA_ONLY_FRAME,
165	AV1_SWITCH_FRAME,
166	AV1_FRAME_TYPES,
167};
168
169/*
170 * enum vdec_av1_slice_reference_mode - reference mode type
171 */
172enum vdec_av1_slice_reference_mode {
173	AV1_SINGLE_REFERENCE = 0,
174	AV1_COMPOUND_REFERENCE,
175	AV1_REFERENCE_MODE_SELECT,
176	AV1_REFERENCE_MODES,
177};
178
179/**
180 * struct vdec_av1_slice_tile_group - info for each tile
181 * @num_tiles:			tile number
182 * @tile_size:			input size for each tile
183 * @tile_start_offset:		tile offset to input buffer
184 */
185struct vdec_av1_slice_tile_group {
186	u32 num_tiles;
187	u32 tile_size[V4L2_AV1_MAX_TILE_COUNT];
188	u32 tile_start_offset[V4L2_AV1_MAX_TILE_COUNT];
189};
190
191/**
192 * struct vdec_av1_slice_scale_factors - scale info for each ref frame
193 * @is_scaled:  frame is scaled or not
194 * @x_scale:    frame width scale coefficient
195 * @y_scale:    frame height scale coefficient
196 * @x_step:     width step for x_scale
197 * @y_step:     height step for y_scale
198 */
199struct vdec_av1_slice_scale_factors {
200	u8 is_scaled;
201	int x_scale;
202	int y_scale;
203	int x_step;
204	int y_step;
205};
206
207/**
208 * struct vdec_av1_slice_frame_refs - ref frame info
209 * @ref_fb_idx:         ref slot index
210 * @ref_map_idx:        ref frame index
211 * @scale_factors:      scale factors for each ref frame
212 */
213struct vdec_av1_slice_frame_refs {
214	int ref_fb_idx;
215	int ref_map_idx;
216	struct vdec_av1_slice_scale_factors scale_factors;
217};
218
219/**
220 * struct vdec_av1_slice_gm - AV1 Global Motion parameters
221 * @wmtype:     The type of global motion transform used
222 * @wmmat:      gm_params
223 * @alpha:      alpha info
224 * @beta:       beta info
225 * @gamma:      gamma info
226 * @delta:      delta info
227 * @invalid:    is invalid or not
228 */
229struct vdec_av1_slice_gm {
230	int wmtype;
231	int wmmat[8];
232	short alpha;
233	short beta;
234	short gamma;
235	short delta;
236	char invalid;
237};
238
239/**
240 * struct vdec_av1_slice_sm - AV1 Skip Mode parameters
241 * @skip_mode_allowed:  Skip Mode is allowed or not
242 * @skip_mode_present:  specified that the skip_mode will be present or not
243 * @skip_mode_frame:    specifies the frames to use for compound prediction
244 */
245struct vdec_av1_slice_sm {
246	u8 skip_mode_allowed;
247	u8 skip_mode_present;
248	int skip_mode_frame[2];
249};
250
251/**
252 * struct vdec_av1_slice_seg - AV1 Segmentation params
253 * @segmentation_enabled:        this frame makes use of the segmentation tool or not
254 * @segmentation_update_map:     segmentation map are updated during the decoding frame
255 * @segmentation_temporal_update:segmentation map are coded relative the existing segmentaion map
256 * @segmentation_update_data:    new parameters are about to be specified for each segment
257 * @feature_data:                specifies the feature data for a segment feature
258 * @feature_enabled_mask:        the corresponding feature value is coded or not.
259 * @segid_preskip:               segment id will be read before the skip syntax element.
260 * @last_active_segid:           the highest numbered segment id that has some enabled feature
261 */
262struct vdec_av1_slice_seg {
263	u8 segmentation_enabled;
264	u8 segmentation_update_map;
265	u8 segmentation_temporal_update;
266	u8 segmentation_update_data;
267	int feature_data[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX];
268	u16 feature_enabled_mask[V4L2_AV1_MAX_SEGMENTS];
269	int segid_preskip;
270	int last_active_segid;
271};
272
273/**
274 * struct vdec_av1_slice_delta_q_lf - AV1 Loop Filter delta parameters
275 * @delta_q_present:    specified whether quantizer index delta values are present
276 * @delta_q_res:        specifies the left shift which should be applied to decoded quantizer index
277 * @delta_lf_present:   specifies whether loop filter delta values are present
278 * @delta_lf_res:       specifies the left shift which should be applied to decoded
279 *                      loop filter delta values
280 * @delta_lf_multi:     specifies that separate loop filter deltas are sent for horizontal
281 *                      luma edges,vertical luma edges,the u edges, and the v edges.
282 */
283struct vdec_av1_slice_delta_q_lf {
284	u8 delta_q_present;
285	u8 delta_q_res;
286	u8 delta_lf_present;
287	u8 delta_lf_res;
288	u8 delta_lf_multi;
289};
290
291/**
292 * struct vdec_av1_slice_quantization - AV1 Quantization params
293 * @base_q_idx:         indicates the base frame qindex. This is used for Y AC
294 *                      coefficients and as the base value for the other quantizers.
295 * @qindex:             qindex
296 * @delta_qydc:         indicates the Y DC quantizer relative to base_q_idx
297 * @delta_qudc:         indicates the U DC quantizer relative to base_q_idx.
298 * @delta_quac:         indicates the U AC quantizer relative to base_q_idx
299 * @delta_qvdc:         indicates the V DC quantizer relative to base_q_idx
300 * @delta_qvac:         indicates the V AC quantizer relative to base_q_idx
301 * @using_qmatrix:      specifies that the quantizer matrix will be used to
302 *                      compute quantizers
303 * @qm_y:               specifies the level in the quantizer matrix that should
304 *                      be used for luma plane decoding
305 * @qm_u:               specifies the level in the quantizer matrix that should
306 *                      be used for chroma U plane decoding.
307 * @qm_v:               specifies the level in the quantizer matrix that should be
308 *                      used for chroma V plane decoding
309 */
310struct vdec_av1_slice_quantization {
311	int base_q_idx;
312	int qindex[V4L2_AV1_MAX_SEGMENTS];
313	int delta_qydc;
314	int delta_qudc;
315	int delta_quac;
316	int delta_qvdc;
317	int delta_qvac;
318	u8 using_qmatrix;
319	u8 qm_y;
320	u8 qm_u;
321	u8 qm_v;
322};
323
324/**
325 * struct vdec_av1_slice_lr - AV1 Loop Restauration parameters
326 * @use_lr:                     whether to use loop restoration
327 * @use_chroma_lr:              whether to use chroma loop restoration
328 * @frame_restoration_type:     specifies the type of restoration used for each plane
329 * @loop_restoration_size:      pecifies the size of loop restoration units in units
330 *                              of samples in the current plane
331 */
332struct vdec_av1_slice_lr {
333	u8 use_lr;
334	u8 use_chroma_lr;
335	u8 frame_restoration_type[V4L2_AV1_NUM_PLANES_MAX];
336	u32 loop_restoration_size[V4L2_AV1_NUM_PLANES_MAX];
337};
338
339/**
340 * struct vdec_av1_slice_loop_filter - AV1 Loop filter parameters
341 * @loop_filter_level:          an array containing loop filter strength values.
342 * @loop_filter_ref_deltas:     contains the adjustment needed for the filter
343 *                              level based on the chosen reference frame
344 * @loop_filter_mode_deltas:    contains the adjustment needed for the filter
345 *                              level based on the chosen mode
346 * @loop_filter_sharpness:      indicates the sharpness level. The loop_filter_level
347 *                              and loop_filter_sharpness together determine when
348 *                              a block edge is filtered, and by how much the
349 *                              filtering can change the sample values
350 * @loop_filter_delta_enabled:  filetr level depends on the mode and reference
351 *                              frame used to predict a block
352 */
353struct vdec_av1_slice_loop_filter {
354	u8 loop_filter_level[4];
355	int loop_filter_ref_deltas[V4L2_AV1_TOTAL_REFS_PER_FRAME];
356	int loop_filter_mode_deltas[2];
357	u8 loop_filter_sharpness;
358	u8 loop_filter_delta_enabled;
359};
360
361/**
362 * struct vdec_av1_slice_cdef - AV1 CDEF parameters
363 * @cdef_damping:       controls the amount of damping in the deringing filter
364 * @cdef_y_strength:    specifies the strength of the primary filter and secondary filter
365 * @cdef_uv_strength:   specifies the strength of the primary filter and secondary filter
366 * @cdef_bits:          specifies the number of bits needed to specify which
367 *                      CDEF filter to apply
368 */
369struct vdec_av1_slice_cdef {
370	u8 cdef_damping;
371	u8 cdef_y_strength[8];
372	u8 cdef_uv_strength[8];
373	u8 cdef_bits;
374};
375
376/**
377 * struct vdec_av1_slice_mfmv - AV1 mfmv parameters
378 * @mfmv_valid_ref:     mfmv_valid_ref
379 * @mfmv_dir:           mfmv_dir
380 * @mfmv_ref_to_cur:    mfmv_ref_to_cur
381 * @mfmv_ref_frame_idx: mfmv_ref_frame_idx
382 * @mfmv_count:         mfmv_count
383 */
384struct vdec_av1_slice_mfmv {
385	u32 mfmv_valid_ref[3];
386	u32 mfmv_dir[3];
387	int mfmv_ref_to_cur[3];
388	int mfmv_ref_frame_idx[3];
389	int mfmv_count;
390};
391
392/**
393 * struct vdec_av1_slice_tile - AV1 Tile info
394 * @tile_cols:                  specifies the number of tiles across the frame
395 * @tile_rows:                  pecifies the number of tiles down the frame
396 * @mi_col_starts:              an array specifying the start column
397 * @mi_row_starts:              an array specifying the start row
398 * @context_update_tile_id:     specifies which tile to use for the CDF update
399 * @uniform_tile_spacing_flag:  tiles are uniformly spaced across the frame
400 *                              or the tile sizes are coded
401 */
402struct vdec_av1_slice_tile {
403	u8 tile_cols;
404	u8 tile_rows;
405	int mi_col_starts[V4L2_AV1_MAX_TILE_COLS + 1];
406	int mi_row_starts[V4L2_AV1_MAX_TILE_ROWS + 1];
407	u8 context_update_tile_id;
408	u8 uniform_tile_spacing_flag;
409};
410
411/**
412 * struct vdec_av1_slice_uncompressed_header - Represents an AV1 Frame Header OBU
413 * @use_ref_frame_mvs:          use_ref_frame_mvs flag
414 * @order_hint:                 specifies OrderHintBits least significant bits of the expected
415 * @gm:                         global motion param
416 * @upscaled_width:             the upscaled width
417 * @frame_width:                frame's width
418 * @frame_height:               frame's height
419 * @reduced_tx_set:             frame is restricted to a reduced subset of the full
420 *                              set of transform types
421 * @tx_mode:                    specifies how the transform size is determined
422 * @uniform_tile_spacing_flag:  tiles are uniformly spaced across the frame
423 *                              or the tile sizes are coded
424 * @interpolation_filter:       specifies the filter selection used for performing inter prediction
425 * @allow_warped_motion:        motion_mode may be present or not
426 * @is_motion_mode_switchable : euqlt to 0 specifies that only the SIMPLE motion mode will be used
427 * @reference_mode :            frame reference mode selected
428 * @allow_high_precision_mv:    specifies that motion vectors are specified to
429 *                              quarter pel precision or to eighth pel precision
430 * @allow_intra_bc:             ubducates that intra block copy may be used in this frame
431 * @force_integer_mv:           specifies motion vectors will always be integers or
432 *                              can contain fractional bits
433 * @allow_screen_content_tools: intra blocks may use palette encoding
434 * @error_resilient_mode:       error resislent mode is enable/disable
435 * @frame_type:                 specifies the AV1 frame type
436 * @primary_ref_frame:          specifies which reference frame contains the CDF values
437 *                              and other state that should be loaded at the start of the frame
438 *                              slots will be updated with the current frame after it is decoded
439 * @disable_frame_end_update_cdf:indicates the end of frame CDF update is disable or enable
440 * @disable_cdf_update:         specified whether the CDF update in the symbol
441 *                              decoding process should be disables
442 * @skip_mode:                  av1 skip mode parameters
443 * @seg:                        av1 segmentaon parameters
444 * @delta_q_lf:                 av1 delta loop fileter
445 * @quant:                      av1 Quantization params
446 * @lr:                         av1 Loop Restauration parameters
447 * @superres_denom:             the denominator for the upscaling ratio
448 * @loop_filter:                av1 Loop filter parameters
449 * @cdef:                       av1 CDEF parameters
450 * @mfmv:                       av1 mfmv parameters
451 * @tile:                       av1 Tile info
452 * @frame_is_intra:             intra frame
453 * @loss_less_array:            loss less array
454 * @coded_loss_less:            coded lsss less
455 * @mi_rows:                    size of mi unit in rows
456 * @mi_cols:                    size of mi unit in cols
457 */
458struct vdec_av1_slice_uncompressed_header {
459	u8 use_ref_frame_mvs;
460	int order_hint;
461	struct vdec_av1_slice_gm gm[V4L2_AV1_TOTAL_REFS_PER_FRAME];
462	u32 upscaled_width;
463	u32 frame_width;
464	u32 frame_height;
465	u8 reduced_tx_set;
466	u8 tx_mode;
467	u8 uniform_tile_spacing_flag;
468	u8 interpolation_filter;
469	u8 allow_warped_motion;
470	u8 is_motion_mode_switchable;
471	u8 reference_mode;
472	u8 allow_high_precision_mv;
473	u8 allow_intra_bc;
474	u8 force_integer_mv;
475	u8 allow_screen_content_tools;
476	u8 error_resilient_mode;
477	u8 frame_type;
478	u8 primary_ref_frame;
479	u8 disable_frame_end_update_cdf;
480	u32 disable_cdf_update;
481	struct vdec_av1_slice_sm skip_mode;
482	struct vdec_av1_slice_seg seg;
483	struct vdec_av1_slice_delta_q_lf delta_q_lf;
484	struct vdec_av1_slice_quantization quant;
485	struct vdec_av1_slice_lr lr;
486	u32 superres_denom;
487	struct vdec_av1_slice_loop_filter loop_filter;
488	struct vdec_av1_slice_cdef cdef;
489	struct vdec_av1_slice_mfmv mfmv;
490	struct vdec_av1_slice_tile tile;
491	u8 frame_is_intra;
492	u8 loss_less_array[V4L2_AV1_MAX_SEGMENTS];
493	u8 coded_loss_less;
494	u32 mi_rows;
495	u32 mi_cols;
496};
497
498/**
499 * struct vdec_av1_slice_seq_header - Represents an AV1 Sequence OBU
500 * @bitdepth:                   the bitdepth to use for the sequence
501 * @enable_superres:            specifies whether the use_superres syntax element may be present
502 * @enable_filter_intra:        specifies the use_filter_intra syntax element may be present
503 * @enable_intra_edge_filter:   whether the intra edge filtering process should be enabled
504 * @enable_interintra_compound: specifies the mode info fo rinter blocks may
505 *                              contain the syntax element interintra
506 * @enable_masked_compound:     specifies the mode info fo rinter blocks may
507 *                              contain the syntax element compound_type
508 * @enable_dual_filter:         the inter prediction filter type may be specified independently
509 * @enable_jnt_comp:            distance weights process may be used for inter prediction
510 * @mono_chrome:                indicates the video does not contain U and V color planes
511 * @enable_order_hint:          tools based on the values of order hints may be used
512 * @order_hint_bits:            the number of bits used for the order_hint field at each frame
513 * @use_128x128_superblock:     indicates superblocks contain 128*128 luma samples
514 * @subsampling_x:              the chroma subsamling format
515 * @subsampling_y:              the chroma subsamling format
516 * @max_frame_width:            the maximum frame width for the frames represented by sequence
517 * @max_frame_height:           the maximum frame height for the frames represented by sequence
518 */
519struct vdec_av1_slice_seq_header {
520	u8 bitdepth;
521	u8 enable_superres;
522	u8 enable_filter_intra;
523	u8 enable_intra_edge_filter;
524	u8 enable_interintra_compound;
525	u8 enable_masked_compound;
526	u8 enable_dual_filter;
527	u8 enable_jnt_comp;
528	u8 mono_chrome;
529	u8 enable_order_hint;
530	u8 order_hint_bits;
531	u8 use_128x128_superblock;
532	u8 subsampling_x;
533	u8 subsampling_y;
534	u32 max_frame_width;
535	u32 max_frame_height;
536};
537
538/**
539 * struct vdec_av1_slice_frame - Represents current Frame info
540 * @uh:                         uncompressed header info
541 * @seq:                        sequence header info
542 * @large_scale_tile:           is large scale mode
543 * @cur_ts:                     current frame timestamp
544 * @prev_fb_idx:                prev slot id
545 * @ref_frame_sign_bias:        arrays for ref_frame sign bias
546 * @order_hints:                arrays for ref_frame order hint
547 * @ref_frame_valid:            arrays for valid ref_frame
548 * @ref_frame_map:              map to slot frame info
549 * @frame_refs:                 ref_frame info
550 */
551struct vdec_av1_slice_frame {
552	struct vdec_av1_slice_uncompressed_header uh;
553	struct vdec_av1_slice_seq_header seq;
554	u8 large_scale_tile;
555	u64 cur_ts;
556	int prev_fb_idx;
557	u8 ref_frame_sign_bias[V4L2_AV1_TOTAL_REFS_PER_FRAME];
558	u32 order_hints[V4L2_AV1_REFS_PER_FRAME];
559	u32 ref_frame_valid[V4L2_AV1_REFS_PER_FRAME];
560	int ref_frame_map[V4L2_AV1_TOTAL_REFS_PER_FRAME];
561	struct vdec_av1_slice_frame_refs frame_refs[V4L2_AV1_REFS_PER_FRAME];
562};
563
564/**
565 * struct vdec_av1_slice_work_buffer - work buffer for lat
566 * @mv_addr:    mv buffer memory info
567 * @cdf_addr:   cdf buffer memory info
568 * @segid_addr: segid buffer memory info
569 */
570struct vdec_av1_slice_work_buffer {
571	struct vdec_av1_slice_mem mv_addr;
572	struct vdec_av1_slice_mem cdf_addr;
573	struct vdec_av1_slice_mem segid_addr;
574};
575
576/**
577 * struct vdec_av1_slice_frame_info - frame info for each slot
578 * @frame_type:         frame type
579 * @frame_is_intra:     is intra frame
580 * @order_hint:         order hint
581 * @order_hints:        referece frame order hint
582 * @upscaled_width:     upscale width
583 * @pic_pitch:          buffer pitch
584 * @frame_width:        frane width
585 * @frame_height:       frame height
586 * @mi_rows:            rows in mode info
587 * @mi_cols:            cols in mode info
588 * @ref_count:          mark to reference frame counts
589 */
590struct vdec_av1_slice_frame_info {
591	u8 frame_type;
592	u8 frame_is_intra;
593	int order_hint;
594	u32 order_hints[V4L2_AV1_REFS_PER_FRAME];
595	u32 upscaled_width;
596	u32 pic_pitch;
597	u32 frame_width;
598	u32 frame_height;
599	u32 mi_rows;
600	u32 mi_cols;
601	int ref_count;
602};
603
604/**
605 * struct vdec_av1_slice_slot - slot info that needs to be saved in the global instance
606 * @frame_info: frame info for each slot
607 * @timestamp:  time stamp info
608 */
609struct vdec_av1_slice_slot {
610	struct vdec_av1_slice_frame_info frame_info[AV1_MAX_FRAME_BUF_COUNT];
611	u64 timestamp[AV1_MAX_FRAME_BUF_COUNT];
612};
613
614/**
615 * struct vdec_av1_slice_fb - frame buffer for decoding
616 * @y:  current y buffer address info
617 * @c:  current c buffer address info
618 */
619struct vdec_av1_slice_fb {
620	struct vdec_av1_slice_mem y;
621	struct vdec_av1_slice_mem c;
622};
623
624/**
625 * struct vdec_av1_slice_vsi - exchange frame information between Main CPU and MicroP
626 * @bs:			input buffer info
627 * @work_buffer:	working buffe for hw
628 * @cdf_table:		cdf_table buffer
629 * @cdf_tmp:		cdf temp buffer
630 * @rd_mv:		mv buffer for lat output , core input
631 * @ube:		ube buffer
632 * @trans:		transcoded buffer
633 * @err_map:		err map buffer
634 * @row_info:		row info buffer
635 * @fb:			current y/c buffer
636 * @ref:		ref y/c buffer
637 * @iq_table:		iq table buffer
638 * @tile:		tile buffer
639 * @slots:		slots info for each frame
640 * @slot_id:		current frame slot id
641 * @frame:		current frame info
642 * @state:		status after decode done
643 * @cur_lst_tile_id:	tile id for large scale
644 */
645struct vdec_av1_slice_vsi {
646	/* lat */
647	struct vdec_av1_slice_mem bs;
648	struct vdec_av1_slice_work_buffer work_buffer[AV1_MAX_FRAME_BUF_COUNT];
649	struct vdec_av1_slice_mem cdf_table;
650	struct vdec_av1_slice_mem cdf_tmp;
651	/* LAT stage's output, Core stage's input */
652	struct vdec_av1_slice_mem rd_mv;
653	struct vdec_av1_slice_mem ube;
654	struct vdec_av1_slice_mem trans;
655	struct vdec_av1_slice_mem err_map;
656	struct vdec_av1_slice_mem row_info;
657	/* core */
658	struct vdec_av1_slice_fb fb;
659	struct vdec_av1_slice_fb ref[V4L2_AV1_REFS_PER_FRAME];
660	struct vdec_av1_slice_mem iq_table;
661	/* lat and core share*/
662	struct vdec_av1_slice_mem tile;
663	struct vdec_av1_slice_slot slots;
664	s8 slot_id;
665	struct vdec_av1_slice_frame frame;
666	struct vdec_av1_slice_state state;
667	u32 cur_lst_tile_id;
668};
669
670/**
671 * struct vdec_av1_slice_pfc - per-frame context that contains a local vsi.
672 *                             pass it from lat to core
673 * @vsi:        local vsi. copy to/from remote vsi before/after decoding
674 * @ref_idx:    reference buffer timestamp
675 * @seq:        picture sequence
676 */
677struct vdec_av1_slice_pfc {
678	struct vdec_av1_slice_vsi vsi;
679	u64 ref_idx[V4L2_AV1_REFS_PER_FRAME];
680	int seq;
681};
682
683/**
684 * struct vdec_av1_slice_instance - represent one av1 instance
685 * @ctx:                pointer to codec's context
686 * @vpu:                VPU instance
687 * @iq_table:           iq table buffer
688 * @cdf_table:          cdf table buffer
689 * @mv:                 mv working buffer
690 * @cdf:                cdf working buffer
691 * @seg:                segmentation working buffer
692 * @cdf_temp:           cdf temp buffer
693 * @tile:               tile buffer
694 * @slots:              slots info
695 * @tile_group:         tile_group entry
696 * @level:              level of current resolution
697 * @width:              width of last picture
698 * @height:             height of last picture
699 * @frame_type:         frame_type of last picture
700 * @irq_enabled:        irq to Main CPU or MicroP
701 * @inneracing_mode:    is inneracing mode
702 * @init_vsi:           vsi used for initialized AV1 instance
703 * @vsi:                vsi used for decoding/flush ...
704 * @core_vsi:           vsi used for Core stage
705 * @seq:                global picture sequence
706 */
707struct vdec_av1_slice_instance {
708	struct mtk_vcodec_dec_ctx *ctx;
709	struct vdec_vpu_inst vpu;
710
711	struct mtk_vcodec_mem iq_table;
712	struct mtk_vcodec_mem cdf_table;
713
714	struct mtk_vcodec_mem mv[AV1_MAX_FRAME_BUF_COUNT];
715	struct mtk_vcodec_mem cdf[AV1_MAX_FRAME_BUF_COUNT];
716	struct mtk_vcodec_mem seg[AV1_MAX_FRAME_BUF_COUNT];
717	struct mtk_vcodec_mem cdf_temp;
718	struct mtk_vcodec_mem tile;
719	struct vdec_av1_slice_slot slots;
720	struct vdec_av1_slice_tile_group tile_group;
721
722	/* for resolution change and get_pic_info */
723	enum vdec_av1_slice_resolution_level level;
724	u32 width;
725	u32 height;
726
727	u32 frame_type;
728	u32 irq_enabled;
729	u32 inneracing_mode;
730
731	/* MicroP vsi */
732	union {
733		struct vdec_av1_slice_init_vsi *init_vsi;
734		struct vdec_av1_slice_vsi *vsi;
735	};
736	struct vdec_av1_slice_vsi *core_vsi;
737	int seq;
738};
739
740static int vdec_av1_slice_core_decode(struct vdec_lat_buf *lat_buf);
741
742static inline int vdec_av1_slice_get_msb(u32 n)
743{
744	if (n == 0)
745		return 0;
746	return 31 ^ __builtin_clz(n);
747}
748
749static inline bool vdec_av1_slice_need_scale(u32 ref_width, u32 ref_height,
750					     u32 this_width, u32 this_height)
751{
752	return ((this_width << 1) >= ref_width) &&
753		((this_height << 1) >= ref_height) &&
754		(this_width <= (ref_width << 4)) &&
755		(this_height <= (ref_height << 4));
756}
757
758static void *vdec_av1_get_ctrl_ptr(struct mtk_vcodec_dec_ctx *ctx, int id)
759{
760	struct v4l2_ctrl *ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, id);
761
762	if (!ctrl)
763		return ERR_PTR(-EINVAL);
764
765	return ctrl->p_cur.p;
766}
767
768static int vdec_av1_slice_init_cdf_table(struct vdec_av1_slice_instance *instance)
769{
770	u8 *remote_cdf_table;
771	struct mtk_vcodec_dec_ctx *ctx;
772	struct vdec_av1_slice_init_vsi *vsi;
773	int ret;
774
775	ctx = instance->ctx;
776	vsi = instance->vpu.vsi;
777	remote_cdf_table = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
778						     (u32)vsi->cdf_table_addr);
779	if (IS_ERR(remote_cdf_table)) {
780		mtk_vdec_err(ctx, "failed to map cdf table\n");
781		return PTR_ERR(remote_cdf_table);
782	}
783
784	mtk_vdec_debug(ctx, "map cdf table to 0x%p\n", remote_cdf_table);
785
786	if (instance->cdf_table.va)
787		mtk_vcodec_mem_free(ctx, &instance->cdf_table);
788	instance->cdf_table.size = vsi->cdf_table_size;
789
790	ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf_table);
791	if (ret)
792		return ret;
793
794	memcpy(instance->cdf_table.va, remote_cdf_table, vsi->cdf_table_size);
795
796	return 0;
797}
798
799static int vdec_av1_slice_init_iq_table(struct vdec_av1_slice_instance *instance)
800{
801	u8 *remote_iq_table;
802	struct mtk_vcodec_dec_ctx *ctx;
803	struct vdec_av1_slice_init_vsi *vsi;
804	int ret;
805
806	ctx = instance->ctx;
807	vsi = instance->vpu.vsi;
808	remote_iq_table = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
809						    (u32)vsi->iq_table_addr);
810	if (IS_ERR(remote_iq_table)) {
811		mtk_vdec_err(ctx, "failed to map iq table\n");
812		return PTR_ERR(remote_iq_table);
813	}
814
815	mtk_vdec_debug(ctx, "map iq table to 0x%p\n", remote_iq_table);
816
817	if (instance->iq_table.va)
818		mtk_vcodec_mem_free(ctx, &instance->iq_table);
819	instance->iq_table.size = vsi->iq_table_size;
820
821	ret = mtk_vcodec_mem_alloc(ctx, &instance->iq_table);
822	if (ret)
823		return ret;
824
825	memcpy(instance->iq_table.va, remote_iq_table, vsi->iq_table_size);
826
827	return 0;
828}
829
830static int vdec_av1_slice_get_new_slot(struct vdec_av1_slice_vsi *vsi)
831{
832	struct vdec_av1_slice_slot *slots = &vsi->slots;
833	int new_slot_idx = AV1_INVALID_IDX;
834	int i;
835
836	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
837		if (slots->frame_info[i].ref_count == 0) {
838			new_slot_idx = i;
839			break;
840		}
841	}
842
843	if (new_slot_idx != AV1_INVALID_IDX) {
844		slots->frame_info[new_slot_idx].ref_count++;
845		slots->timestamp[new_slot_idx] = vsi->frame.cur_ts;
846	}
847
848	return new_slot_idx;
849}
850
851static inline void vdec_av1_slice_clear_fb(struct vdec_av1_slice_frame_info *frame_info)
852{
853	memset((void *)frame_info, 0, sizeof(struct vdec_av1_slice_frame_info));
854}
855
856static void vdec_av1_slice_decrease_ref_count(struct vdec_av1_slice_slot *slots, int fb_idx)
857{
858	struct vdec_av1_slice_frame_info *frame_info = slots->frame_info;
859
860	frame_info[fb_idx].ref_count--;
861	if (frame_info[fb_idx].ref_count < 0) {
862		frame_info[fb_idx].ref_count = 0;
863		pr_err(MTK_DBG_V4L2_STR "av1_error: %s() fb_idx %d decrease ref_count error\n",
864		       __func__, fb_idx);
865	}
866
867	vdec_av1_slice_clear_fb(&frame_info[fb_idx]);
868}
869
870static void vdec_av1_slice_cleanup_slots(struct vdec_av1_slice_slot *slots,
871					 struct vdec_av1_slice_frame *frame,
872					 struct v4l2_ctrl_av1_frame *ctrl_fh)
873{
874	int slot_id, ref_id;
875
876	for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++)
877		frame->ref_frame_map[ref_id] = AV1_INVALID_IDX;
878
879	for (slot_id = 0; slot_id < AV1_MAX_FRAME_BUF_COUNT; slot_id++) {
880		u64 timestamp = slots->timestamp[slot_id];
881		bool ref_used = false;
882
883		/* ignored unused slots */
884		if (slots->frame_info[slot_id].ref_count == 0)
885			continue;
886
887		for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++) {
888			if (ctrl_fh->reference_frame_ts[ref_id] == timestamp) {
889				frame->ref_frame_map[ref_id] = slot_id;
890				ref_used = true;
891			}
892		}
893
894		if (!ref_used)
895			vdec_av1_slice_decrease_ref_count(slots, slot_id);
896	}
897}
898
899static void vdec_av1_slice_setup_slot(struct vdec_av1_slice_instance *instance,
900				      struct vdec_av1_slice_vsi *vsi,
901				      struct v4l2_ctrl_av1_frame *ctrl_fh)
902{
903	struct vdec_av1_slice_frame_info *cur_frame_info;
904	struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh;
905	int ref_id;
906
907	memcpy(&vsi->slots, &instance->slots, sizeof(instance->slots));
908	vdec_av1_slice_cleanup_slots(&vsi->slots, &vsi->frame, ctrl_fh);
909	vsi->slot_id = vdec_av1_slice_get_new_slot(vsi);
910
911	if (vsi->slot_id == AV1_INVALID_IDX) {
912		mtk_v4l2_vdec_err(instance->ctx, "warning:av1 get invalid index slot\n");
913		vsi->slot_id = 0;
914	}
915	cur_frame_info = &vsi->slots.frame_info[vsi->slot_id];
916	cur_frame_info->frame_type = uh->frame_type;
917	cur_frame_info->frame_is_intra = ((uh->frame_type == AV1_INTRA_ONLY_FRAME) ||
918					  (uh->frame_type == AV1_KEY_FRAME));
919	cur_frame_info->order_hint = uh->order_hint;
920	cur_frame_info->upscaled_width = uh->upscaled_width;
921	cur_frame_info->pic_pitch = 0;
922	cur_frame_info->frame_width = uh->frame_width;
923	cur_frame_info->frame_height = uh->frame_height;
924	cur_frame_info->mi_cols = ((uh->frame_width + 7) >> 3) << 1;
925	cur_frame_info->mi_rows = ((uh->frame_height + 7) >> 3) << 1;
926
927	/* ensure current frame is properly mapped if referenced */
928	for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++) {
929		u64 timestamp = vsi->slots.timestamp[vsi->slot_id];
930
931		if (ctrl_fh->reference_frame_ts[ref_id] == timestamp)
932			vsi->frame.ref_frame_map[ref_id] = vsi->slot_id;
933	}
934}
935
936static int vdec_av1_slice_alloc_working_buffer(struct vdec_av1_slice_instance *instance,
937					       struct vdec_av1_slice_vsi *vsi)
938{
939	struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
940	enum vdec_av1_slice_resolution_level level;
941	u32 max_sb_w, max_sb_h, max_w, max_h, w, h;
942	int i, ret;
943
944	w = vsi->frame.uh.frame_width;
945	h = vsi->frame.uh.frame_height;
946
947	if (w > VCODEC_DEC_4K_CODED_WIDTH || h > VCODEC_DEC_4K_CODED_HEIGHT)
948		/* 8K */
949		return -EINVAL;
950
951	if (w > MTK_VDEC_MAX_W || h > MTK_VDEC_MAX_H) {
952		/* 4K */
953		level = AV1_RES_4K;
954		max_w = VCODEC_DEC_4K_CODED_WIDTH;
955		max_h = VCODEC_DEC_4K_CODED_HEIGHT;
956	} else {
957		/* FHD */
958		level = AV1_RES_FHD;
959		max_w = MTK_VDEC_MAX_W;
960		max_h = MTK_VDEC_MAX_H;
961	}
962
963	if (level == instance->level)
964		return 0;
965
966	mtk_vdec_debug(ctx, "resolution level changed from %u to %u, %ux%u",
967		       instance->level, level, w, h);
968
969	max_sb_w = DIV_ROUND_UP(max_w, 128);
970	max_sb_h = DIV_ROUND_UP(max_h, 128);
971
972	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
973		if (instance->mv[i].va)
974			mtk_vcodec_mem_free(ctx, &instance->mv[i]);
975		instance->mv[i].size = max_sb_w * max_sb_h * SZ_1K;
976		ret = mtk_vcodec_mem_alloc(ctx, &instance->mv[i]);
977		if (ret)
978			goto err;
979
980		if (instance->seg[i].va)
981			mtk_vcodec_mem_free(ctx, &instance->seg[i]);
982		instance->seg[i].size = max_sb_w * max_sb_h * 512;
983		ret = mtk_vcodec_mem_alloc(ctx, &instance->seg[i]);
984		if (ret)
985			goto err;
986
987		if (instance->cdf[i].va)
988			mtk_vcodec_mem_free(ctx, &instance->cdf[i]);
989		instance->cdf[i].size = AV1_CDF_TABLE_BUFFER_SIZE;
990		ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf[i]);
991		if (ret)
992			goto err;
993	}
994
995	if (!instance->cdf_temp.va) {
996		instance->cdf_temp.size = (SZ_1K * 16 * 100);
997		ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf_temp);
998		if (ret)
999			goto err;
1000		vsi->cdf_tmp.buf = instance->cdf_temp.dma_addr;
1001		vsi->cdf_tmp.size = instance->cdf_temp.size;
1002	}
1003
1004	if (instance->tile.va)
1005		mtk_vcodec_mem_free(ctx, &instance->tile);
1006
1007	instance->tile.size = AV1_TILE_BUF_SIZE * V4L2_AV1_MAX_TILE_COUNT;
1008	ret = mtk_vcodec_mem_alloc(ctx, &instance->tile);
1009	if (ret)
1010		goto err;
1011
1012	instance->level = level;
1013	return 0;
1014
1015err:
1016	instance->level = AV1_RES_NONE;
1017	return ret;
1018}
1019
1020static void vdec_av1_slice_free_working_buffer(struct vdec_av1_slice_instance *instance)
1021{
1022	struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
1023	int i;
1024
1025	for (i = 0; i < ARRAY_SIZE(instance->mv); i++)
1026		mtk_vcodec_mem_free(ctx, &instance->mv[i]);
1027
1028	for (i = 0; i < ARRAY_SIZE(instance->seg); i++)
1029		mtk_vcodec_mem_free(ctx, &instance->seg[i]);
1030
1031	for (i = 0; i < ARRAY_SIZE(instance->cdf); i++)
1032		mtk_vcodec_mem_free(ctx, &instance->cdf[i]);
1033
1034	mtk_vcodec_mem_free(ctx, &instance->tile);
1035	mtk_vcodec_mem_free(ctx, &instance->cdf_temp);
1036	mtk_vcodec_mem_free(ctx, &instance->cdf_table);
1037	mtk_vcodec_mem_free(ctx, &instance->iq_table);
1038
1039	instance->level = AV1_RES_NONE;
1040}
1041
1042static inline void vdec_av1_slice_vsi_from_remote(struct vdec_av1_slice_vsi *vsi,
1043						  struct vdec_av1_slice_vsi *remote_vsi)
1044{
1045	memcpy(&vsi->trans, &remote_vsi->trans, sizeof(vsi->trans));
1046	memcpy(&vsi->state, &remote_vsi->state, sizeof(vsi->state));
1047}
1048
1049static inline void vdec_av1_slice_vsi_to_remote(struct vdec_av1_slice_vsi *vsi,
1050						struct vdec_av1_slice_vsi *remote_vsi)
1051{
1052	memcpy(remote_vsi, vsi, sizeof(*vsi));
1053}
1054
1055static int vdec_av1_slice_setup_lat_from_src_buf(struct vdec_av1_slice_instance *instance,
1056						 struct vdec_av1_slice_vsi *vsi,
1057						 struct vdec_lat_buf *lat_buf)
1058{
1059	struct vb2_v4l2_buffer *src;
1060	struct vb2_v4l2_buffer *dst;
1061
1062	src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx);
1063	if (!src)
1064		return -EINVAL;
1065
1066	lat_buf->src_buf_req = src->vb2_buf.req_obj.req;
1067	dst = &lat_buf->ts_info;
1068	v4l2_m2m_buf_copy_metadata(src, dst, true);
1069	vsi->frame.cur_ts = dst->vb2_buf.timestamp;
1070
1071	return 0;
1072}
1073
1074static short vdec_av1_slice_resolve_divisor_32(u32 D, short *shift)
1075{
1076	int f;
1077	int e;
1078
1079	*shift = vdec_av1_slice_get_msb(D);
1080	/* e is obtained from D after resetting the most significant 1 bit. */
1081	e = D - ((u32)1 << *shift);
1082	/* Get the most significant DIV_LUT_BITS (8) bits of e into f */
1083	if (*shift > DIV_LUT_BITS)
1084		f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
1085	else
1086		f = e << (DIV_LUT_BITS - *shift);
1087	if (f > DIV_LUT_NUM)
1088		return -1;
1089	*shift += DIV_LUT_PREC_BITS;
1090	/* Use f as lookup into the precomputed table of multipliers */
1091	return div_lut[f];
1092}
1093
1094static void vdec_av1_slice_get_shear_params(struct vdec_av1_slice_gm *gm_params)
1095{
1096	const int *mat = gm_params->wmmat;
1097	short shift;
1098	short y;
1099	long long gv, dv;
1100
1101	if (gm_params->wmmat[2] <= 0)
1102		return;
1103
1104	gm_params->alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
1105	gm_params->beta = clamp_val(mat[3], S16_MIN, S16_MAX);
1106
1107	y = vdec_av1_slice_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
1108
1109	gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
1110	gm_params->gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift),
1111				     S16_MIN, S16_MAX);
1112
1113	dv = ((long long)mat[3] * mat[4]) * y;
1114	gm_params->delta = clamp_val(mat[5] - (int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) -
1115				     (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
1116
1117	gm_params->alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->alpha, WARP_PARAM_REDUCE_BITS) *
1118							(1 << WARP_PARAM_REDUCE_BITS);
1119	gm_params->beta = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->beta, WARP_PARAM_REDUCE_BITS) *
1120						       (1 << WARP_PARAM_REDUCE_BITS);
1121	gm_params->gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->gamma, WARP_PARAM_REDUCE_BITS) *
1122							(1 << WARP_PARAM_REDUCE_BITS);
1123	gm_params->delta = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->delta, WARP_PARAM_REDUCE_BITS) *
1124							(1 << WARP_PARAM_REDUCE_BITS);
1125}
1126
1127static void vdec_av1_slice_setup_gm(struct vdec_av1_slice_gm *gm,
1128				    struct v4l2_av1_global_motion *ctrl_gm)
1129{
1130	u32 i, j;
1131
1132	for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) {
1133		gm[i].wmtype = ctrl_gm->type[i];
1134		for (j = 0; j < 6; j++)
1135			gm[i].wmmat[j] = ctrl_gm->params[i][j];
1136
1137		gm[i].invalid = !!(ctrl_gm->invalid & BIT(i));
1138		gm[i].alpha = 0;
1139		gm[i].beta = 0;
1140		gm[i].gamma = 0;
1141		gm[i].delta = 0;
1142		if (gm[i].wmtype <= V4L2_AV1_WARP_MODEL_AFFINE)
1143			vdec_av1_slice_get_shear_params(&gm[i]);
1144	}
1145}
1146
1147static void vdec_av1_slice_setup_seg(struct vdec_av1_slice_seg *seg,
1148				     struct v4l2_av1_segmentation *ctrl_seg)
1149{
1150	u32 i, j;
1151
1152	seg->segmentation_enabled = SEGMENTATION_FLAG(ctrl_seg, ENABLED);
1153	seg->segmentation_update_map = SEGMENTATION_FLAG(ctrl_seg, UPDATE_MAP);
1154	seg->segmentation_temporal_update = SEGMENTATION_FLAG(ctrl_seg, TEMPORAL_UPDATE);
1155	seg->segmentation_update_data = SEGMENTATION_FLAG(ctrl_seg, UPDATE_DATA);
1156	seg->segid_preskip = SEGMENTATION_FLAG(ctrl_seg, SEG_ID_PRE_SKIP);
1157	seg->last_active_segid = ctrl_seg->last_active_seg_id;
1158
1159	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
1160		seg->feature_enabled_mask[i] = ctrl_seg->feature_enabled[i];
1161		for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++)
1162			seg->feature_data[i][j] = ctrl_seg->feature_data[i][j];
1163	}
1164}
1165
1166static void vdec_av1_slice_setup_quant(struct vdec_av1_slice_quantization *quant,
1167				       struct v4l2_av1_quantization *ctrl_quant)
1168{
1169	quant->base_q_idx = ctrl_quant->base_q_idx;
1170	quant->delta_qydc = ctrl_quant->delta_q_y_dc;
1171	quant->delta_qudc = ctrl_quant->delta_q_u_dc;
1172	quant->delta_quac = ctrl_quant->delta_q_u_ac;
1173	quant->delta_qvdc = ctrl_quant->delta_q_v_dc;
1174	quant->delta_qvac = ctrl_quant->delta_q_v_ac;
1175	quant->qm_y = ctrl_quant->qm_y;
1176	quant->qm_u = ctrl_quant->qm_u;
1177	quant->qm_v = ctrl_quant->qm_v;
1178	quant->using_qmatrix = QUANT_FLAG(ctrl_quant, USING_QMATRIX);
1179}
1180
1181static int vdec_av1_slice_get_qindex(struct vdec_av1_slice_uncompressed_header *uh,
1182				     int segmentation_id)
1183{
1184	struct vdec_av1_slice_seg *seg = &uh->seg;
1185	struct vdec_av1_slice_quantization *quant = &uh->quant;
1186	int data = 0, qindex = 0;
1187
1188	if (seg->segmentation_enabled &&
1189	    (seg->feature_enabled_mask[segmentation_id] & BIT(SEG_LVL_ALT_Q))) {
1190		data = seg->feature_data[segmentation_id][SEG_LVL_ALT_Q];
1191		qindex = quant->base_q_idx + data;
1192		return clamp_val(qindex, 0, MAXQ);
1193	}
1194
1195	return quant->base_q_idx;
1196}
1197
1198static void vdec_av1_slice_setup_lr(struct vdec_av1_slice_lr *lr,
1199				    struct v4l2_av1_loop_restoration  *ctrl_lr)
1200{
1201	int i;
1202
1203	lr->use_lr = 0;
1204	lr->use_chroma_lr = 0;
1205	for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
1206		lr->frame_restoration_type[i] = ctrl_lr->frame_restoration_type[i];
1207		lr->loop_restoration_size[i] = ctrl_lr->loop_restoration_size[i];
1208		if (lr->frame_restoration_type[i]) {
1209			lr->use_lr = 1;
1210			if (i > 0)
1211				lr->use_chroma_lr = 1;
1212		}
1213	}
1214}
1215
1216static void vdec_av1_slice_setup_lf(struct vdec_av1_slice_loop_filter *lf,
1217				    struct v4l2_av1_loop_filter *ctrl_lf)
1218{
1219	int i;
1220
1221	for (i = 0; i < ARRAY_SIZE(lf->loop_filter_level); i++)
1222		lf->loop_filter_level[i] = ctrl_lf->level[i];
1223
1224	for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
1225		lf->loop_filter_ref_deltas[i] = ctrl_lf->ref_deltas[i];
1226
1227	for (i = 0; i < ARRAY_SIZE(lf->loop_filter_mode_deltas); i++)
1228		lf->loop_filter_mode_deltas[i] = ctrl_lf->mode_deltas[i];
1229
1230	lf->loop_filter_sharpness = ctrl_lf->sharpness;
1231	lf->loop_filter_delta_enabled =
1232		   BIT_FLAG(ctrl_lf, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED);
1233}
1234
1235static void vdec_av1_slice_setup_cdef(struct vdec_av1_slice_cdef *cdef,
1236				      struct v4l2_av1_cdef *ctrl_cdef)
1237{
1238	int i;
1239
1240	cdef->cdef_damping = ctrl_cdef->damping_minus_3 + 3;
1241	cdef->cdef_bits = ctrl_cdef->bits;
1242
1243	for (i = 0; i < V4L2_AV1_CDEF_MAX; i++) {
1244		if (ctrl_cdef->y_sec_strength[i] == 4)
1245			ctrl_cdef->y_sec_strength[i] -= 1;
1246
1247		if (ctrl_cdef->uv_sec_strength[i] == 4)
1248			ctrl_cdef->uv_sec_strength[i] -= 1;
1249
1250		cdef->cdef_y_strength[i] =
1251			ctrl_cdef->y_pri_strength[i] << SECONDARY_FILTER_STRENGTH_NUM_BITS |
1252			ctrl_cdef->y_sec_strength[i];
1253		cdef->cdef_uv_strength[i] =
1254			ctrl_cdef->uv_pri_strength[i] << SECONDARY_FILTER_STRENGTH_NUM_BITS |
1255			ctrl_cdef->uv_sec_strength[i];
1256	}
1257}
1258
1259static void vdec_av1_slice_setup_seq(struct vdec_av1_slice_seq_header *seq,
1260				     struct v4l2_ctrl_av1_sequence *ctrl_seq)
1261{
1262	seq->bitdepth = ctrl_seq->bit_depth;
1263	seq->max_frame_width = ctrl_seq->max_frame_width_minus_1 + 1;
1264	seq->max_frame_height = ctrl_seq->max_frame_height_minus_1 + 1;
1265	seq->enable_superres = SEQUENCE_FLAG(ctrl_seq, ENABLE_SUPERRES);
1266	seq->enable_filter_intra = SEQUENCE_FLAG(ctrl_seq, ENABLE_FILTER_INTRA);
1267	seq->enable_intra_edge_filter = SEQUENCE_FLAG(ctrl_seq, ENABLE_INTRA_EDGE_FILTER);
1268	seq->enable_interintra_compound = SEQUENCE_FLAG(ctrl_seq, ENABLE_INTERINTRA_COMPOUND);
1269	seq->enable_masked_compound = SEQUENCE_FLAG(ctrl_seq, ENABLE_MASKED_COMPOUND);
1270	seq->enable_dual_filter = SEQUENCE_FLAG(ctrl_seq, ENABLE_DUAL_FILTER);
1271	seq->enable_jnt_comp = SEQUENCE_FLAG(ctrl_seq, ENABLE_JNT_COMP);
1272	seq->mono_chrome = SEQUENCE_FLAG(ctrl_seq, MONO_CHROME);
1273	seq->enable_order_hint = SEQUENCE_FLAG(ctrl_seq, ENABLE_ORDER_HINT);
1274	seq->order_hint_bits = ctrl_seq->order_hint_bits;
1275	seq->use_128x128_superblock = SEQUENCE_FLAG(ctrl_seq, USE_128X128_SUPERBLOCK);
1276	seq->subsampling_x = SEQUENCE_FLAG(ctrl_seq, SUBSAMPLING_X);
1277	seq->subsampling_y = SEQUENCE_FLAG(ctrl_seq, SUBSAMPLING_Y);
1278}
1279
1280static void vdec_av1_slice_setup_tile(struct vdec_av1_slice_frame *frame,
1281				      struct v4l2_av1_tile_info *ctrl_tile)
1282{
1283	struct vdec_av1_slice_seq_header *seq = &frame->seq;
1284	struct vdec_av1_slice_tile *tile = &frame->uh.tile;
1285	u32 mib_size_log2 = seq->use_128x128_superblock ? 5 : 4;
1286	int i;
1287
1288	tile->tile_cols = ctrl_tile->tile_cols;
1289	tile->tile_rows = ctrl_tile->tile_rows;
1290	tile->context_update_tile_id = ctrl_tile->context_update_tile_id;
1291	tile->uniform_tile_spacing_flag =
1292		BIT_FLAG(ctrl_tile, V4L2_AV1_TILE_INFO_FLAG_UNIFORM_TILE_SPACING);
1293
1294	for (i = 0; i < tile->tile_cols + 1; i++)
1295		tile->mi_col_starts[i] =
1296			ALIGN(ctrl_tile->mi_col_starts[i], BIT(mib_size_log2)) >> mib_size_log2;
1297
1298	for (i = 0; i < tile->tile_rows + 1; i++)
1299		tile->mi_row_starts[i] =
1300			ALIGN(ctrl_tile->mi_row_starts[i], BIT(mib_size_log2)) >> mib_size_log2;
1301}
1302
1303static void vdec_av1_slice_setup_uh(struct vdec_av1_slice_instance *instance,
1304				    struct vdec_av1_slice_frame *frame,
1305				    struct v4l2_ctrl_av1_frame *ctrl_fh)
1306{
1307	struct vdec_av1_slice_uncompressed_header *uh = &frame->uh;
1308	int i;
1309
1310	uh->use_ref_frame_mvs = FH_FLAG(ctrl_fh, USE_REF_FRAME_MVS);
1311	uh->order_hint = ctrl_fh->order_hint;
1312	vdec_av1_slice_setup_gm(uh->gm, &ctrl_fh->global_motion);
1313	uh->upscaled_width = ctrl_fh->upscaled_width;
1314	uh->frame_width = ctrl_fh->frame_width_minus_1 + 1;
1315	uh->frame_height = ctrl_fh->frame_height_minus_1 + 1;
1316	uh->mi_cols = ((uh->frame_width + 7) >> 3) << 1;
1317	uh->mi_rows = ((uh->frame_height + 7) >> 3) << 1;
1318	uh->reduced_tx_set = FH_FLAG(ctrl_fh, REDUCED_TX_SET);
1319	uh->tx_mode = ctrl_fh->tx_mode;
1320	uh->uniform_tile_spacing_flag =
1321		BIT_FLAG(&ctrl_fh->tile_info, V4L2_AV1_TILE_INFO_FLAG_UNIFORM_TILE_SPACING);
1322	uh->interpolation_filter = ctrl_fh->interpolation_filter;
1323	uh->allow_warped_motion = FH_FLAG(ctrl_fh, ALLOW_WARPED_MOTION);
1324	uh->is_motion_mode_switchable = FH_FLAG(ctrl_fh, IS_MOTION_MODE_SWITCHABLE);
1325	uh->frame_type = ctrl_fh->frame_type;
1326	uh->frame_is_intra = (uh->frame_type == V4L2_AV1_INTRA_ONLY_FRAME ||
1327			      uh->frame_type == V4L2_AV1_KEY_FRAME);
1328
1329	if (!uh->frame_is_intra && FH_FLAG(ctrl_fh, REFERENCE_SELECT))
1330		uh->reference_mode = AV1_REFERENCE_MODE_SELECT;
1331	else
1332		uh->reference_mode = AV1_SINGLE_REFERENCE;
1333
1334	uh->allow_high_precision_mv = FH_FLAG(ctrl_fh, ALLOW_HIGH_PRECISION_MV);
1335	uh->allow_intra_bc = FH_FLAG(ctrl_fh, ALLOW_INTRABC);
1336	uh->force_integer_mv = FH_FLAG(ctrl_fh, FORCE_INTEGER_MV);
1337	uh->allow_screen_content_tools = FH_FLAG(ctrl_fh, ALLOW_SCREEN_CONTENT_TOOLS);
1338	uh->error_resilient_mode = FH_FLAG(ctrl_fh, ERROR_RESILIENT_MODE);
1339	uh->primary_ref_frame = ctrl_fh->primary_ref_frame;
1340	uh->disable_frame_end_update_cdf =
1341			FH_FLAG(ctrl_fh, DISABLE_FRAME_END_UPDATE_CDF);
1342	uh->disable_cdf_update = FH_FLAG(ctrl_fh, DISABLE_CDF_UPDATE);
1343	uh->skip_mode.skip_mode_present = FH_FLAG(ctrl_fh, SKIP_MODE_PRESENT);
1344	uh->skip_mode.skip_mode_frame[0] =
1345		ctrl_fh->skip_mode_frame[0] - V4L2_AV1_REF_LAST_FRAME;
1346	uh->skip_mode.skip_mode_frame[1] =
1347		ctrl_fh->skip_mode_frame[1] - V4L2_AV1_REF_LAST_FRAME;
1348	uh->skip_mode.skip_mode_allowed = ctrl_fh->skip_mode_frame[0] ? 1 : 0;
1349
1350	vdec_av1_slice_setup_seg(&uh->seg, &ctrl_fh->segmentation);
1351	uh->delta_q_lf.delta_q_present = QUANT_FLAG(&ctrl_fh->quantization, DELTA_Q_PRESENT);
1352	uh->delta_q_lf.delta_q_res = 1 << ctrl_fh->quantization.delta_q_res;
1353	uh->delta_q_lf.delta_lf_present =
1354		BIT_FLAG(&ctrl_fh->loop_filter, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT);
1355	uh->delta_q_lf.delta_lf_res = ctrl_fh->loop_filter.delta_lf_res;
1356	uh->delta_q_lf.delta_lf_multi =
1357		BIT_FLAG(&ctrl_fh->loop_filter, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI);
1358	vdec_av1_slice_setup_quant(&uh->quant, &ctrl_fh->quantization);
1359
1360	uh->coded_loss_less = 1;
1361	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
1362		uh->quant.qindex[i] = vdec_av1_slice_get_qindex(uh, i);
1363		uh->loss_less_array[i] =
1364			(uh->quant.qindex[i] == 0 && uh->quant.delta_qydc == 0 &&
1365			uh->quant.delta_quac == 0 && uh->quant.delta_qudc == 0 &&
1366			uh->quant.delta_qvac == 0 && uh->quant.delta_qvdc == 0);
1367
1368		if (!uh->loss_less_array[i])
1369			uh->coded_loss_less = 0;
1370	}
1371
1372	vdec_av1_slice_setup_lr(&uh->lr, &ctrl_fh->loop_restoration);
1373	uh->superres_denom = ctrl_fh->superres_denom;
1374	vdec_av1_slice_setup_lf(&uh->loop_filter, &ctrl_fh->loop_filter);
1375	vdec_av1_slice_setup_cdef(&uh->cdef, &ctrl_fh->cdef);
1376	vdec_av1_slice_setup_tile(frame, &ctrl_fh->tile_info);
1377}
1378
1379static int vdec_av1_slice_setup_tile_group(struct vdec_av1_slice_instance *instance,
1380					   struct vdec_av1_slice_vsi *vsi)
1381{
1382	struct v4l2_ctrl_av1_tile_group_entry *ctrl_tge;
1383	struct vdec_av1_slice_tile_group *tile_group = &instance->tile_group;
1384	struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh;
1385	struct vdec_av1_slice_tile *tile = &uh->tile;
1386	struct v4l2_ctrl *ctrl;
1387	u32 tge_size;
1388	int i;
1389
1390	ctrl = v4l2_ctrl_find(&instance->ctx->ctrl_hdl, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
1391	if (!ctrl)
1392		return -EINVAL;
1393
1394	tge_size = ctrl->elems;
1395	ctrl_tge = (struct v4l2_ctrl_av1_tile_group_entry *)ctrl->p_cur.p;
1396
1397	tile_group->num_tiles = tile->tile_cols * tile->tile_rows;
1398
1399	if (tile_group->num_tiles != tge_size ||
1400	    tile_group->num_tiles > V4L2_AV1_MAX_TILE_COUNT) {
1401		mtk_vdec_err(instance->ctx, "invalid tge_size %d, tile_num:%d\n",
1402			     tge_size, tile_group->num_tiles);
1403		return -EINVAL;
1404	}
1405
1406	for (i = 0; i < tge_size; i++) {
1407		if (i != ctrl_tge[i].tile_row * vsi->frame.uh.tile.tile_cols +
1408		    ctrl_tge[i].tile_col) {
1409			mtk_vdec_err(instance->ctx, "invalid tge info %d, %d %d %d\n",
1410				     i, ctrl_tge[i].tile_row, ctrl_tge[i].tile_col,
1411				     vsi->frame.uh.tile.tile_rows);
1412			return -EINVAL;
1413		}
1414		tile_group->tile_size[i] = ctrl_tge[i].tile_size;
1415		tile_group->tile_start_offset[i] = ctrl_tge[i].tile_offset;
1416	}
1417
1418	return 0;
1419}
1420
1421static inline void vdec_av1_slice_setup_state(struct vdec_av1_slice_vsi *vsi)
1422{
1423	memset(&vsi->state, 0, sizeof(vsi->state));
1424}
1425
1426static void vdec_av1_slice_setup_scale_factors(struct vdec_av1_slice_frame_refs *frame_ref,
1427					       struct vdec_av1_slice_frame_info *ref_frame_info,
1428					       struct vdec_av1_slice_uncompressed_header *uh)
1429{
1430	struct vdec_av1_slice_scale_factors *scale_factors = &frame_ref->scale_factors;
1431	u32 ref_upscaled_width = ref_frame_info->upscaled_width;
1432	u32 ref_frame_height = ref_frame_info->frame_height;
1433	u32 frame_width = uh->frame_width;
1434	u32 frame_height = uh->frame_height;
1435
1436	if (!vdec_av1_slice_need_scale(ref_upscaled_width, ref_frame_height,
1437				       frame_width, frame_height)) {
1438		scale_factors->x_scale = -1;
1439		scale_factors->y_scale = -1;
1440		scale_factors->is_scaled = 0;
1441		return;
1442	}
1443
1444	scale_factors->x_scale =
1445		((ref_upscaled_width << AV1_REF_SCALE_SHIFT) + (frame_width >> 1)) / frame_width;
1446	scale_factors->y_scale =
1447		((ref_frame_height << AV1_REF_SCALE_SHIFT) + (frame_height >> 1)) / frame_height;
1448	scale_factors->is_scaled =
1449		(scale_factors->x_scale != AV1_REF_INVALID_SCALE) &&
1450		(scale_factors->y_scale != AV1_REF_INVALID_SCALE) &&
1451		(scale_factors->x_scale != AV1_REF_NO_SCALE ||
1452		 scale_factors->y_scale != AV1_REF_NO_SCALE);
1453	scale_factors->x_step =
1454		AV1_DIV_ROUND_UP_POW2(scale_factors->x_scale,
1455				      AV1_REF_SCALE_SHIFT - AV1_SCALE_SUBPEL_BITS);
1456	scale_factors->y_step =
1457		AV1_DIV_ROUND_UP_POW2(scale_factors->y_scale,
1458				      AV1_REF_SCALE_SHIFT - AV1_SCALE_SUBPEL_BITS);
1459}
1460
1461static unsigned char vdec_av1_slice_get_sign_bias(int a,
1462						  int b,
1463						  u8 enable_order_hint,
1464						  u8 order_hint_bits)
1465{
1466	int diff = 0;
1467	int m = 0;
1468	unsigned char result = 0;
1469
1470	if (!enable_order_hint)
1471		return 0;
1472
1473	diff = a - b;
1474	m = 1 << (order_hint_bits - 1);
1475	diff = (diff & (m - 1)) - (diff & m);
1476
1477	if (diff > 0)
1478		result = 1;
1479
1480	return result;
1481}
1482
1483static void vdec_av1_slice_setup_ref(struct vdec_av1_slice_pfc *pfc,
1484				     struct v4l2_ctrl_av1_frame *ctrl_fh)
1485{
1486	struct vdec_av1_slice_vsi *vsi = &pfc->vsi;
1487	struct vdec_av1_slice_frame *frame = &vsi->frame;
1488	struct vdec_av1_slice_slot *slots = &vsi->slots;
1489	struct vdec_av1_slice_uncompressed_header *uh = &frame->uh;
1490	struct vdec_av1_slice_seq_header *seq = &frame->seq;
1491	struct vdec_av1_slice_frame_info *cur_frame_info =
1492		&slots->frame_info[vsi->slot_id];
1493	struct vdec_av1_slice_frame_info *frame_info;
1494	int i, slot_id;
1495
1496	if (uh->frame_is_intra)
1497		return;
1498
1499	for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
1500		int ref_idx = ctrl_fh->ref_frame_idx[i];
1501
1502		pfc->ref_idx[i] = ctrl_fh->reference_frame_ts[ref_idx];
1503		slot_id = frame->ref_frame_map[ref_idx];
1504		frame_info = &slots->frame_info[slot_id];
1505		if (slot_id == AV1_INVALID_IDX) {
1506			pr_err(MTK_DBG_V4L2_STR "cannot match reference[%d] 0x%llx\n", i,
1507			       ctrl_fh->reference_frame_ts[ref_idx]);
1508			frame->order_hints[i] = 0;
1509			frame->ref_frame_valid[i] = 0;
1510			continue;
1511		}
1512
1513		frame->frame_refs[i].ref_fb_idx = slot_id;
1514		vdec_av1_slice_setup_scale_factors(&frame->frame_refs[i],
1515						   frame_info, uh);
1516		if (!seq->enable_order_hint)
1517			frame->ref_frame_sign_bias[i + 1] = 0;
1518		else
1519			frame->ref_frame_sign_bias[i + 1] =
1520				vdec_av1_slice_get_sign_bias(frame_info->order_hint,
1521							     uh->order_hint,
1522							     seq->enable_order_hint,
1523							     seq->order_hint_bits);
1524
1525		frame->order_hints[i] = ctrl_fh->order_hints[i + 1];
1526		cur_frame_info->order_hints[i] = frame->order_hints[i];
1527		frame->ref_frame_valid[i] = 1;
1528	}
1529}
1530
1531static void vdec_av1_slice_get_previous(struct vdec_av1_slice_vsi *vsi)
1532{
1533	struct vdec_av1_slice_frame *frame = &vsi->frame;
1534
1535	if (frame->uh.primary_ref_frame == AV1_PRIMARY_REF_NONE)
1536		frame->prev_fb_idx = AV1_INVALID_IDX;
1537	else
1538		frame->prev_fb_idx = frame->frame_refs[frame->uh.primary_ref_frame].ref_fb_idx;
1539}
1540
1541static inline void vdec_av1_slice_setup_operating_mode(struct vdec_av1_slice_instance *instance,
1542						       struct vdec_av1_slice_frame *frame)
1543{
1544	frame->large_scale_tile = 0;
1545}
1546
1547static int vdec_av1_slice_setup_pfc(struct vdec_av1_slice_instance *instance,
1548				    struct vdec_av1_slice_pfc *pfc)
1549{
1550	struct v4l2_ctrl_av1_frame *ctrl_fh;
1551	struct v4l2_ctrl_av1_sequence *ctrl_seq;
1552	struct vdec_av1_slice_vsi *vsi = &pfc->vsi;
1553	int ret = 0;
1554
1555	/* frame header */
1556	ctrl_fh = (struct v4l2_ctrl_av1_frame *)
1557		  vdec_av1_get_ctrl_ptr(instance->ctx,
1558					V4L2_CID_STATELESS_AV1_FRAME);
1559	if (IS_ERR(ctrl_fh))
1560		return PTR_ERR(ctrl_fh);
1561
1562	ctrl_seq = (struct v4l2_ctrl_av1_sequence *)
1563		   vdec_av1_get_ctrl_ptr(instance->ctx,
1564					 V4L2_CID_STATELESS_AV1_SEQUENCE);
1565	if (IS_ERR(ctrl_seq))
1566		return PTR_ERR(ctrl_seq);
1567
1568	/* setup vsi information */
1569	vdec_av1_slice_setup_seq(&vsi->frame.seq, ctrl_seq);
1570	vdec_av1_slice_setup_uh(instance, &vsi->frame, ctrl_fh);
1571	vdec_av1_slice_setup_operating_mode(instance, &vsi->frame);
1572
1573	vdec_av1_slice_setup_state(vsi);
1574	vdec_av1_slice_setup_slot(instance, vsi, ctrl_fh);
1575	vdec_av1_slice_setup_ref(pfc, ctrl_fh);
1576	vdec_av1_slice_get_previous(vsi);
1577
1578	pfc->seq = instance->seq;
1579	instance->seq++;
1580
1581	return ret;
1582}
1583
1584static void vdec_av1_slice_setup_lat_buffer(struct vdec_av1_slice_instance *instance,
1585					    struct vdec_av1_slice_vsi *vsi,
1586					    struct mtk_vcodec_mem *bs,
1587					    struct vdec_lat_buf *lat_buf)
1588{
1589	struct vdec_av1_slice_work_buffer *work_buffer;
1590	int i;
1591
1592	vsi->bs.dma_addr = bs->dma_addr;
1593	vsi->bs.size = bs->size;
1594
1595	vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr;
1596	vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size;
1597	vsi->trans.dma_addr = lat_buf->ctx->msg_queue.wdma_wptr_addr;
1598	/* used to store trans end */
1599	vsi->trans.dma_addr_end = lat_buf->ctx->msg_queue.wdma_rptr_addr;
1600	vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr;
1601	vsi->err_map.size = lat_buf->wdma_err_addr.size;
1602	vsi->rd_mv.dma_addr = lat_buf->rd_mv_addr.dma_addr;
1603	vsi->rd_mv.size = lat_buf->rd_mv_addr.size;
1604
1605	vsi->row_info.buf = 0;
1606	vsi->row_info.size = 0;
1607
1608	work_buffer = vsi->work_buffer;
1609
1610	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
1611		work_buffer[i].mv_addr.buf = instance->mv[i].dma_addr;
1612		work_buffer[i].mv_addr.size = instance->mv[i].size;
1613		work_buffer[i].segid_addr.buf = instance->seg[i].dma_addr;
1614		work_buffer[i].segid_addr.size = instance->seg[i].size;
1615		work_buffer[i].cdf_addr.buf = instance->cdf[i].dma_addr;
1616		work_buffer[i].cdf_addr.size = instance->cdf[i].size;
1617	}
1618
1619	vsi->cdf_tmp.buf = instance->cdf_temp.dma_addr;
1620	vsi->cdf_tmp.size = instance->cdf_temp.size;
1621
1622	vsi->tile.buf = instance->tile.dma_addr;
1623	vsi->tile.size = instance->tile.size;
1624	memcpy(lat_buf->tile_addr.va, instance->tile.va, 64 * instance->tile_group.num_tiles);
1625
1626	vsi->cdf_table.buf = instance->cdf_table.dma_addr;
1627	vsi->cdf_table.size = instance->cdf_table.size;
1628	vsi->iq_table.buf = instance->iq_table.dma_addr;
1629	vsi->iq_table.size = instance->iq_table.size;
1630}
1631
1632static void vdec_av1_slice_setup_seg_buffer(struct vdec_av1_slice_instance *instance,
1633					    struct vdec_av1_slice_vsi *vsi)
1634{
1635	struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh;
1636	struct mtk_vcodec_mem *buf;
1637
1638	/* reset segment buffer */
1639	if (uh->primary_ref_frame == AV1_PRIMARY_REF_NONE || !uh->seg.segmentation_enabled) {
1640		mtk_vdec_debug(instance->ctx, "reset seg %d\n", vsi->slot_id);
1641		if (vsi->slot_id != AV1_INVALID_IDX) {
1642			buf = &instance->seg[vsi->slot_id];
1643			memset(buf->va, 0, buf->size);
1644		}
1645	}
1646}
1647
1648static void vdec_av1_slice_setup_tile_buffer(struct vdec_av1_slice_instance *instance,
1649					     struct vdec_av1_slice_vsi *vsi,
1650					     struct mtk_vcodec_mem *bs)
1651{
1652	struct vdec_av1_slice_tile_group *tile_group = &instance->tile_group;
1653	struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh;
1654	struct vdec_av1_slice_tile *tile = &uh->tile;
1655	u32 tile_num, tile_row, tile_col;
1656	u32 allow_update_cdf = 0;
1657	u32 sb_boundary_x_m1 = 0, sb_boundary_y_m1 = 0;
1658	int tile_info_base;
1659	u64 tile_buf_pa;
1660	u32 *tile_info_buf = instance->tile.va;
1661	u64 pa = (u64)bs->dma_addr;
1662
1663	if (uh->disable_cdf_update == 0)
1664		allow_update_cdf = 1;
1665
1666	for (tile_num = 0; tile_num < tile_group->num_tiles; tile_num++) {
1667		/* each uint32 takes place of 4 bytes */
1668		tile_info_base = (AV1_TILE_BUF_SIZE * tile_num) >> 2;
1669		tile_row = tile_num / tile->tile_cols;
1670		tile_col = tile_num % tile->tile_cols;
1671		tile_info_buf[tile_info_base + 0] = (tile_group->tile_size[tile_num] << 3);
1672		tile_buf_pa = pa + tile_group->tile_start_offset[tile_num];
1673
1674		/* save av1 tile high 4bits(bit 32-35) address in lower 4 bits position
1675		 * and clear original for hw requirement.
1676		 */
1677		tile_info_buf[tile_info_base + 1] = (tile_buf_pa & 0xFFFFFFF0ull) |
1678			((tile_buf_pa & 0xF00000000ull) >> 32);
1679		tile_info_buf[tile_info_base + 2] = (tile_buf_pa & 0xFull) << 3;
1680
1681		sb_boundary_x_m1 =
1682			(tile->mi_col_starts[tile_col + 1] - tile->mi_col_starts[tile_col] - 1) &
1683			0x3f;
1684		sb_boundary_y_m1 =
1685			(tile->mi_row_starts[tile_row + 1] - tile->mi_row_starts[tile_row] - 1) &
1686			0x1ff;
1687
1688		tile_info_buf[tile_info_base + 3] = (sb_boundary_y_m1 << 7) | sb_boundary_x_m1;
1689		tile_info_buf[tile_info_base + 4] = ((allow_update_cdf << 18) | (1 << 16));
1690
1691		if (tile_num == tile->context_update_tile_id &&
1692		    uh->disable_frame_end_update_cdf == 0)
1693			tile_info_buf[tile_info_base + 4] |= (1 << 17);
1694
1695		mtk_vdec_debug(instance->ctx, "// tile buf %d pos(%dx%d) offset 0x%x\n",
1696			       tile_num, tile_row, tile_col, tile_info_base);
1697		mtk_vdec_debug(instance->ctx, "// %08x %08x %08x %08x\n",
1698			       tile_info_buf[tile_info_base + 0],
1699			       tile_info_buf[tile_info_base + 1],
1700			       tile_info_buf[tile_info_base + 2],
1701			       tile_info_buf[tile_info_base + 3]);
1702		mtk_vdec_debug(instance->ctx, "// %08x %08x %08x %08x\n",
1703			       tile_info_buf[tile_info_base + 4],
1704			       tile_info_buf[tile_info_base + 5],
1705			       tile_info_buf[tile_info_base + 6],
1706			       tile_info_buf[tile_info_base + 7]);
1707	}
1708}
1709
1710static int vdec_av1_slice_setup_lat(struct vdec_av1_slice_instance *instance,
1711				    struct mtk_vcodec_mem *bs,
1712				    struct vdec_lat_buf *lat_buf,
1713				    struct vdec_av1_slice_pfc *pfc)
1714{
1715	struct vdec_av1_slice_vsi *vsi = &pfc->vsi;
1716	int ret;
1717
1718	ret = vdec_av1_slice_setup_lat_from_src_buf(instance, vsi, lat_buf);
1719	if (ret)
1720		return ret;
1721
1722	ret = vdec_av1_slice_setup_pfc(instance, pfc);
1723	if (ret)
1724		return ret;
1725
1726	ret = vdec_av1_slice_setup_tile_group(instance, vsi);
1727	if (ret)
1728		return ret;
1729
1730	ret = vdec_av1_slice_alloc_working_buffer(instance, vsi);
1731	if (ret)
1732		return ret;
1733
1734	vdec_av1_slice_setup_seg_buffer(instance, vsi);
1735	vdec_av1_slice_setup_tile_buffer(instance, vsi, bs);
1736	vdec_av1_slice_setup_lat_buffer(instance, vsi, bs, lat_buf);
1737
1738	return 0;
1739}
1740
1741static int vdec_av1_slice_update_lat(struct vdec_av1_slice_instance *instance,
1742				     struct vdec_lat_buf *lat_buf,
1743				     struct vdec_av1_slice_pfc *pfc)
1744{
1745	struct vdec_av1_slice_vsi *vsi;
1746
1747	vsi = &pfc->vsi;
1748	mtk_vdec_debug(instance->ctx, "frame %u LAT CRC 0x%08x, output size is %d\n",
1749		       pfc->seq, vsi->state.crc[0], vsi->state.out_size);
1750
1751	/* buffer full, need to re-decode */
1752	if (vsi->state.full) {
1753		/* buffer not enough */
1754		if (vsi->trans.dma_addr_end - vsi->trans.dma_addr == vsi->ube.size)
1755			return -ENOMEM;
1756		return -EAGAIN;
1757	}
1758
1759	instance->width = vsi->frame.uh.upscaled_width;
1760	instance->height = vsi->frame.uh.frame_height;
1761	instance->frame_type = vsi->frame.uh.frame_type;
1762
1763	return 0;
1764}
1765
1766static int vdec_av1_slice_setup_core_to_dst_buf(struct vdec_av1_slice_instance *instance,
1767						struct vdec_lat_buf *lat_buf)
1768{
1769	struct vb2_v4l2_buffer *dst;
1770
1771	dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx);
1772	if (!dst)
1773		return -EINVAL;
1774
1775	v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, dst, true);
1776
1777	return 0;
1778}
1779
1780static int vdec_av1_slice_setup_core_buffer(struct vdec_av1_slice_instance *instance,
1781					    struct vdec_av1_slice_pfc *pfc,
1782					    struct vdec_av1_slice_vsi *vsi,
1783					    struct vdec_fb *fb,
1784					    struct vdec_lat_buf *lat_buf)
1785{
1786	struct vb2_buffer *vb;
1787	struct vb2_queue *vq;
1788	int w, h, plane, size;
1789	int i;
1790
1791	plane = instance->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes;
1792	w = vsi->frame.uh.upscaled_width;
1793	h = vsi->frame.uh.frame_height;
1794	size = ALIGN(w, VCODEC_DEC_ALIGNED_64) * ALIGN(h, VCODEC_DEC_ALIGNED_64);
1795
1796	/* frame buffer */
1797	vsi->fb.y.dma_addr = fb->base_y.dma_addr;
1798	if (plane == 1)
1799		vsi->fb.c.dma_addr = fb->base_y.dma_addr + size;
1800	else
1801		vsi->fb.c.dma_addr = fb->base_c.dma_addr;
1802
1803	/* reference buffers */
1804	vq = v4l2_m2m_get_vq(instance->ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
1805	if (!vq)
1806		return -EINVAL;
1807
1808	/* get current output buffer */
1809	vb = &v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx)->vb2_buf;
1810	if (!vb)
1811		return -EINVAL;
1812
1813	/* get buffer address from vb2buf */
1814	for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
1815		struct vdec_av1_slice_fb *vref = &vsi->ref[i];
1816
1817		vb = vb2_find_buffer(vq, pfc->ref_idx[i]);
1818		if (!vb) {
1819			memset(vref, 0, sizeof(*vref));
1820			continue;
1821		}
1822
1823		vref->y.dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
1824		if (plane == 1)
1825			vref->c.dma_addr = vref->y.dma_addr + size;
1826		else
1827			vref->c.dma_addr = vb2_dma_contig_plane_dma_addr(vb, 1);
1828	}
1829	vsi->tile.dma_addr = lat_buf->tile_addr.dma_addr;
1830	vsi->tile.size = lat_buf->tile_addr.size;
1831
1832	return 0;
1833}
1834
1835static int vdec_av1_slice_setup_core(struct vdec_av1_slice_instance *instance,
1836				     struct vdec_fb *fb,
1837				     struct vdec_lat_buf *lat_buf,
1838				     struct vdec_av1_slice_pfc *pfc)
1839{
1840	struct vdec_av1_slice_vsi *vsi = &pfc->vsi;
1841	int ret;
1842
1843	ret = vdec_av1_slice_setup_core_to_dst_buf(instance, lat_buf);
1844	if (ret)
1845		return ret;
1846
1847	ret = vdec_av1_slice_setup_core_buffer(instance, pfc, vsi, fb, lat_buf);
1848	if (ret)
1849		return ret;
1850
1851	return 0;
1852}
1853
1854static int vdec_av1_slice_update_core(struct vdec_av1_slice_instance *instance,
1855				      struct vdec_lat_buf *lat_buf,
1856				      struct vdec_av1_slice_pfc *pfc)
1857{
1858	struct vdec_av1_slice_vsi *vsi = instance->core_vsi;
1859
1860	mtk_vdec_debug(instance->ctx, "frame %u Y_CRC %08x %08x %08x %08x\n",
1861		       pfc->seq, vsi->state.crc[0], vsi->state.crc[1],
1862		       vsi->state.crc[2], vsi->state.crc[3]);
1863	mtk_vdec_debug(instance->ctx, "frame %u C_CRC %08x %08x %08x %08x\n",
1864		       pfc->seq, vsi->state.crc[8], vsi->state.crc[9],
1865		       vsi->state.crc[10], vsi->state.crc[11]);
1866
1867	return 0;
1868}
1869
1870static int vdec_av1_slice_init(struct mtk_vcodec_dec_ctx *ctx)
1871{
1872	struct vdec_av1_slice_instance *instance;
1873	struct vdec_av1_slice_init_vsi *vsi;
1874	int ret;
1875
1876	instance = kzalloc(sizeof(*instance), GFP_KERNEL);
1877	if (!instance)
1878		return -ENOMEM;
1879
1880	instance->ctx = ctx;
1881	instance->vpu.id = SCP_IPI_VDEC_LAT;
1882	instance->vpu.core_id = SCP_IPI_VDEC_CORE;
1883	instance->vpu.ctx = ctx;
1884	instance->vpu.codec_type = ctx->current_codec;
1885
1886	ret = vpu_dec_init(&instance->vpu);
1887	if (ret) {
1888		mtk_vdec_err(ctx, "failed to init vpu dec, ret %d\n", ret);
1889		goto error_vpu_init;
1890	}
1891
1892	/* init vsi and global flags */
1893	vsi = instance->vpu.vsi;
1894	if (!vsi) {
1895		mtk_vdec_err(ctx, "failed to get AV1 vsi\n");
1896		ret = -EINVAL;
1897		goto error_vsi;
1898	}
1899	instance->init_vsi = vsi;
1900	instance->core_vsi = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, (u32)vsi->core_vsi);
1901
1902	if (!instance->core_vsi) {
1903		mtk_vdec_err(ctx, "failed to get AV1 core vsi\n");
1904		ret = -EINVAL;
1905		goto error_vsi;
1906	}
1907
1908	if (vsi->vsi_size != sizeof(struct vdec_av1_slice_vsi))
1909		mtk_vdec_err(ctx, "remote vsi size 0x%x mismatch! expected: 0x%zx\n",
1910			     vsi->vsi_size, sizeof(struct vdec_av1_slice_vsi));
1911
1912	instance->irq_enabled = 1;
1913	instance->inneracing_mode = IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability);
1914
1915	mtk_vdec_debug(ctx, "vsi 0x%p core_vsi 0x%llx 0x%p, inneracing_mode %d\n",
1916		       vsi, vsi->core_vsi, instance->core_vsi, instance->inneracing_mode);
1917
1918	ret = vdec_av1_slice_init_cdf_table(instance);
1919	if (ret)
1920		goto error_vsi;
1921
1922	ret = vdec_av1_slice_init_iq_table(instance);
1923	if (ret)
1924		goto error_vsi;
1925
1926	ctx->drv_handle = instance;
1927
1928	return 0;
1929error_vsi:
1930	vpu_dec_deinit(&instance->vpu);
1931error_vpu_init:
1932	kfree(instance);
1933
1934	return ret;
1935}
1936
1937static void vdec_av1_slice_deinit(void *h_vdec)
1938{
1939	struct vdec_av1_slice_instance *instance = h_vdec;
1940
1941	if (!instance)
1942		return;
1943	mtk_vdec_debug(instance->ctx, "h_vdec 0x%p\n", h_vdec);
1944	vpu_dec_deinit(&instance->vpu);
1945	vdec_av1_slice_free_working_buffer(instance);
1946	vdec_msg_queue_deinit(&instance->ctx->msg_queue, instance->ctx);
1947	kfree(instance);
1948}
1949
1950static int vdec_av1_slice_flush(void *h_vdec, struct mtk_vcodec_mem *bs,
1951				struct vdec_fb *fb, bool *res_chg)
1952{
1953	struct vdec_av1_slice_instance *instance = h_vdec;
1954	int i;
1955
1956	mtk_vdec_debug(instance->ctx, "flush ...\n");
1957
1958	vdec_msg_queue_wait_lat_buf_full(&instance->ctx->msg_queue);
1959
1960	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++)
1961		vdec_av1_slice_clear_fb(&instance->slots.frame_info[i]);
1962
1963	return vpu_dec_reset(&instance->vpu);
1964}
1965
1966static void vdec_av1_slice_get_pic_info(struct vdec_av1_slice_instance *instance)
1967{
1968	struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
1969	u32 data[3];
1970
1971	mtk_vdec_debug(ctx, "w %u h %u\n", ctx->picinfo.pic_w, ctx->picinfo.pic_h);
1972
1973	data[0] = ctx->picinfo.pic_w;
1974	data[1] = ctx->picinfo.pic_h;
1975	data[2] = ctx->capture_fourcc;
1976	vpu_dec_get_param(&instance->vpu, data, 3, GET_PARAM_PIC_INFO);
1977
1978	ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, VCODEC_DEC_ALIGNED_64);
1979	ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, VCODEC_DEC_ALIGNED_64);
1980	ctx->picinfo.fb_sz[0] = instance->vpu.fb_sz[0];
1981	ctx->picinfo.fb_sz[1] = instance->vpu.fb_sz[1];
1982}
1983
1984static inline void vdec_av1_slice_get_dpb_size(struct vdec_av1_slice_instance *instance,
1985					       u32 *dpb_sz)
1986{
1987	/* refer av1 specification */
1988	*dpb_sz = V4L2_AV1_TOTAL_REFS_PER_FRAME + 1;
1989}
1990
1991static void vdec_av1_slice_get_crop_info(struct vdec_av1_slice_instance *instance,
1992					 struct v4l2_rect *cr)
1993{
1994	struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
1995
1996	cr->left = 0;
1997	cr->top = 0;
1998	cr->width = ctx->picinfo.pic_w;
1999	cr->height = ctx->picinfo.pic_h;
2000
2001	mtk_vdec_debug(ctx, "l=%d, t=%d, w=%d, h=%d\n",
2002		       cr->left, cr->top, cr->width, cr->height);
2003}
2004
2005static int vdec_av1_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out)
2006{
2007	struct vdec_av1_slice_instance *instance = h_vdec;
2008
2009	switch (type) {
2010	case GET_PARAM_PIC_INFO:
2011		vdec_av1_slice_get_pic_info(instance);
2012		break;
2013	case GET_PARAM_DPB_SIZE:
2014		vdec_av1_slice_get_dpb_size(instance, out);
2015		break;
2016	case GET_PARAM_CROP_INFO:
2017		vdec_av1_slice_get_crop_info(instance, out);
2018		break;
2019	default:
2020		mtk_vdec_err(instance->ctx, "invalid get parameter type=%d\n", type);
2021		return -EINVAL;
2022	}
2023
2024	return 0;
2025}
2026
2027static int vdec_av1_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
2028				     struct vdec_fb *fb, bool *res_chg)
2029{
2030	struct vdec_av1_slice_instance *instance = h_vdec;
2031	struct vdec_lat_buf *lat_buf;
2032	struct vdec_av1_slice_pfc *pfc;
2033	struct vdec_av1_slice_vsi *vsi;
2034	struct mtk_vcodec_dec_ctx *ctx;
2035	int ret;
2036
2037	if (!instance || !instance->ctx)
2038		return -EINVAL;
2039
2040	ctx = instance->ctx;
2041	/* init msgQ for the first time */
2042	if (vdec_msg_queue_init(&ctx->msg_queue, ctx,
2043				vdec_av1_slice_core_decode, sizeof(*pfc))) {
2044		mtk_vdec_err(ctx, "failed to init AV1 msg queue\n");
2045		return -ENOMEM;
2046	}
2047
2048	/* bs NULL means flush decoder */
2049	if (!bs)
2050		return vdec_av1_slice_flush(h_vdec, bs, fb, res_chg);
2051
2052	lat_buf = vdec_msg_queue_dqbuf(&ctx->msg_queue.lat_ctx);
2053	if (!lat_buf) {
2054		mtk_vdec_err(ctx, "failed to get AV1 lat buf\n");
2055		return -EAGAIN;
2056	}
2057	pfc = (struct vdec_av1_slice_pfc *)lat_buf->private_data;
2058	if (!pfc) {
2059		ret = -EINVAL;
2060		goto err_free_fb_out;
2061	}
2062	vsi = &pfc->vsi;
2063
2064	ret = vdec_av1_slice_setup_lat(instance, bs, lat_buf, pfc);
2065	if (ret) {
2066		mtk_vdec_err(ctx, "failed to setup AV1 lat ret %d\n", ret);
2067		goto err_free_fb_out;
2068	}
2069
2070	vdec_av1_slice_vsi_to_remote(vsi, instance->vsi);
2071	ret = vpu_dec_start(&instance->vpu, NULL, 0);
2072	if (ret) {
2073		mtk_vdec_err(ctx, "failed to dec AV1 ret %d\n", ret);
2074		goto err_free_fb_out;
2075	}
2076	if (instance->inneracing_mode)
2077		vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf);
2078
2079	if (instance->irq_enabled) {
2080		ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
2081						   WAIT_INTR_TIMEOUT_MS,
2082						   MTK_VDEC_LAT0);
2083		/* update remote vsi if decode timeout */
2084		if (ret) {
2085			mtk_vdec_err(ctx, "AV1 Frame %d decode timeout %d\n", pfc->seq, ret);
2086			WRITE_ONCE(instance->vsi->state.timeout, 1);
2087		}
2088		vpu_dec_end(&instance->vpu);
2089	}
2090
2091	vdec_av1_slice_vsi_from_remote(vsi, instance->vsi);
2092	ret = vdec_av1_slice_update_lat(instance, lat_buf, pfc);
2093
2094	/* LAT trans full, re-decode */
2095	if (ret == -EAGAIN) {
2096		mtk_vdec_err(ctx, "AV1 Frame %d trans full\n", pfc->seq);
2097		if (!instance->inneracing_mode)
2098			vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf);
2099		return 0;
2100	}
2101
2102	/* LAT trans full, no more UBE or decode timeout */
2103	if (ret == -ENOMEM || vsi->state.timeout) {
2104		mtk_vdec_err(ctx, "AV1 Frame %d insufficient buffer or timeout\n", pfc->seq);
2105		if (!instance->inneracing_mode)
2106			vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf);
2107		return -EBUSY;
2108	}
2109	vsi->trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
2110	mtk_vdec_debug(ctx, "lat dma 1 0x%pad 0x%pad\n",
2111		       &pfc->vsi.trans.dma_addr, &pfc->vsi.trans.dma_addr_end);
2112
2113	vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, vsi->trans.dma_addr_end);
2114
2115	if (!instance->inneracing_mode)
2116		vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf);
2117	memcpy(&instance->slots, &vsi->slots, sizeof(instance->slots));
2118
2119	return 0;
2120
2121err_free_fb_out:
2122	vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf);
2123
2124	if (pfc)
2125		mtk_vdec_err(ctx, "slice dec number: %d err: %d", pfc->seq, ret);
2126
2127	return ret;
2128}
2129
2130static int vdec_av1_slice_core_decode(struct vdec_lat_buf *lat_buf)
2131{
2132	struct vdec_av1_slice_instance *instance;
2133	struct vdec_av1_slice_pfc *pfc;
2134	struct mtk_vcodec_dec_ctx *ctx = NULL;
2135	struct vdec_fb *fb = NULL;
2136	int ret = -EINVAL;
2137
2138	if (!lat_buf)
2139		return -EINVAL;
2140
2141	pfc = lat_buf->private_data;
2142	ctx = lat_buf->ctx;
2143	if (!pfc || !ctx)
2144		return -EINVAL;
2145
2146	instance = ctx->drv_handle;
2147	if (!instance)
2148		goto err;
2149
2150	fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
2151	if (!fb) {
2152		ret = -EBUSY;
2153		goto err;
2154	}
2155
2156	ret = vdec_av1_slice_setup_core(instance, fb, lat_buf, pfc);
2157	if (ret) {
2158		mtk_vdec_err(ctx, "vdec_av1_slice_setup_core\n");
2159		goto err;
2160	}
2161	vdec_av1_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi);
2162	ret = vpu_dec_core(&instance->vpu);
2163	if (ret) {
2164		mtk_vdec_err(ctx, "vpu_dec_core\n");
2165		goto err;
2166	}
2167
2168	if (instance->irq_enabled) {
2169		ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
2170						   WAIT_INTR_TIMEOUT_MS,
2171						   MTK_VDEC_CORE);
2172		/* update remote vsi if decode timeout */
2173		if (ret) {
2174			mtk_vdec_err(ctx, "AV1 frame %d core timeout\n", pfc->seq);
2175			WRITE_ONCE(instance->vsi->state.timeout, 1);
2176		}
2177		vpu_dec_core_end(&instance->vpu);
2178	}
2179
2180	ret = vdec_av1_slice_update_core(instance, lat_buf, pfc);
2181	if (ret) {
2182		mtk_vdec_err(ctx, "vdec_av1_slice_update_core\n");
2183		goto err;
2184	}
2185
2186	mtk_vdec_debug(ctx, "core dma_addr_end 0x%pad\n",
2187		       &instance->core_vsi->trans.dma_addr_end);
2188	vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, instance->core_vsi->trans.dma_addr_end);
2189
2190	ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req);
2191
2192	return 0;
2193
2194err:
2195	/* always update read pointer */
2196	vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
2197
2198	if (fb)
2199		ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req);
2200
2201	return ret;
2202}
2203
2204const struct vdec_common_if vdec_av1_slice_lat_if = {
2205	.init		= vdec_av1_slice_init,
2206	.decode		= vdec_av1_slice_lat_decode,
2207	.get_param	= vdec_av1_slice_get_param,
2208	.deinit		= vdec_av1_slice_deinit,
2209};
2210