1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright (c) 2023 MediaTek Inc. 4 * Author: Xiaoyong Lu <xiaoyong.lu@mediatek.com> 5 */ 6 7#include <linux/module.h> 8#include <linux/slab.h> 9#include <media/videobuf2-dma-contig.h> 10 11#include "../mtk_vcodec_dec.h" 12#include "../../common/mtk_vcodec_intr.h" 13#include "../vdec_drv_base.h" 14#include "../vdec_drv_if.h" 15#include "../vdec_vpu_if.h" 16 17#define AV1_MAX_FRAME_BUF_COUNT (V4L2_AV1_TOTAL_REFS_PER_FRAME + 1) 18#define AV1_TILE_BUF_SIZE 64 19#define AV1_SCALE_SUBPEL_BITS 10 20#define AV1_REF_SCALE_SHIFT 14 21#define AV1_REF_NO_SCALE BIT(AV1_REF_SCALE_SHIFT) 22#define AV1_REF_INVALID_SCALE -1 23#define AV1_CDF_TABLE_BUFFER_SIZE 16384 24#define AV1_PRIMARY_REF_NONE 7 25 26#define AV1_INVALID_IDX -1 27 28#define AV1_DIV_ROUND_UP_POW2(value, n) \ 29({ \ 30 typeof(n) _n = n; \ 31 typeof(value) _value = value; \ 32 (_value + (BIT(_n) >> 1)) >> _n; \ 33}) 34 35#define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n) \ 36({ \ 37 typeof(n) _n_ = n; \ 38 typeof(value) _value_ = value; \ 39 (((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_)) \ 40 : AV1_DIV_ROUND_UP_POW2((_value_), (_n_))); \ 41}) 42 43#define BIT_FLAG(x, bit) (!!((x)->flags & (bit))) 44#define SEGMENTATION_FLAG(x, name) (!!((x)->flags & V4L2_AV1_SEGMENTATION_FLAG_##name)) 45#define QUANT_FLAG(x, name) (!!((x)->flags & V4L2_AV1_QUANTIZATION_FLAG_##name)) 46#define SEQUENCE_FLAG(x, name) (!!((x)->flags & V4L2_AV1_SEQUENCE_FLAG_##name)) 47#define FH_FLAG(x, name) (!!((x)->flags & V4L2_AV1_FRAME_FLAG_##name)) 48 49#define MINQ 0 50#define MAXQ 255 51 52#define DIV_LUT_PREC_BITS 14 53#define DIV_LUT_BITS 8 54#define DIV_LUT_NUM BIT(DIV_LUT_BITS) 55#define WARP_PARAM_REDUCE_BITS 6 56#define WARPEDMODEL_PREC_BITS 16 57 58#define SEG_LVL_ALT_Q 0 59#define SECONDARY_FILTER_STRENGTH_NUM_BITS 2 60 61static const short div_lut[DIV_LUT_NUM + 1] = { 62 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768, 63 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142, 64 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564, 65 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028, 66 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530, 67 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066, 68 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633, 69 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228, 70 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848, 71 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491, 72 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155, 73 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838, 74 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538, 75 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255, 76 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986, 77 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732, 78 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489, 79 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259, 80 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039, 81 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830, 82 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630, 83 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439, 84 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257, 85 8240, 8224, 8208, 8192, 86}; 87 88/** 89 * struct vdec_av1_slice_init_vsi - VSI used to initialize instance 90 * @architecture: architecture type 91 * @reserved: reserved 92 * @core_vsi: for core vsi 93 * @cdf_table_addr: cdf table addr 94 * @cdf_table_size: cdf table size 95 * @iq_table_addr: iq table addr 96 * @iq_table_size: iq table size 97 * @vsi_size: share vsi structure size 98 */ 99struct vdec_av1_slice_init_vsi { 100 u32 architecture; 101 u32 reserved; 102 u64 core_vsi; 103 u64 cdf_table_addr; 104 u32 cdf_table_size; 105 u64 iq_table_addr; 106 u32 iq_table_size; 107 u32 vsi_size; 108}; 109 110/** 111 * struct vdec_av1_slice_mem - memory address and size 112 * @buf: dma_addr padding 113 * @dma_addr: buffer address 114 * @size: buffer size 115 * @dma_addr_end: buffer end address 116 * @padding: for padding 117 */ 118struct vdec_av1_slice_mem { 119 union { 120 u64 buf; 121 dma_addr_t dma_addr; 122 }; 123 union { 124 size_t size; 125 dma_addr_t dma_addr_end; 126 u64 padding; 127 }; 128}; 129 130/** 131 * struct vdec_av1_slice_state - decoding state 132 * @err : err type for decode 133 * @full : transcoded buffer is full or not 134 * @timeout : decode timeout or not 135 * @perf : performance enable 136 * @crc : hw checksum 137 * @out_size : hw output size 138 */ 139struct vdec_av1_slice_state { 140 int err; 141 u32 full; 142 u32 timeout; 143 u32 perf; 144 u32 crc[16]; 145 u32 out_size; 146}; 147 148/* 149 * enum vdec_av1_slice_resolution_level - resolution level 150 */ 151enum vdec_av1_slice_resolution_level { 152 AV1_RES_NONE, 153 AV1_RES_FHD, 154 AV1_RES_4K, 155 AV1_RES_8K, 156}; 157 158/* 159 * enum vdec_av1_slice_frame_type - av1 frame type 160 */ 161enum vdec_av1_slice_frame_type { 162 AV1_KEY_FRAME = 0, 163 AV1_INTER_FRAME, 164 AV1_INTRA_ONLY_FRAME, 165 AV1_SWITCH_FRAME, 166 AV1_FRAME_TYPES, 167}; 168 169/* 170 * enum vdec_av1_slice_reference_mode - reference mode type 171 */ 172enum vdec_av1_slice_reference_mode { 173 AV1_SINGLE_REFERENCE = 0, 174 AV1_COMPOUND_REFERENCE, 175 AV1_REFERENCE_MODE_SELECT, 176 AV1_REFERENCE_MODES, 177}; 178 179/** 180 * struct vdec_av1_slice_tile_group - info for each tile 181 * @num_tiles: tile number 182 * @tile_size: input size for each tile 183 * @tile_start_offset: tile offset to input buffer 184 */ 185struct vdec_av1_slice_tile_group { 186 u32 num_tiles; 187 u32 tile_size[V4L2_AV1_MAX_TILE_COUNT]; 188 u32 tile_start_offset[V4L2_AV1_MAX_TILE_COUNT]; 189}; 190 191/** 192 * struct vdec_av1_slice_scale_factors - scale info for each ref frame 193 * @is_scaled: frame is scaled or not 194 * @x_scale: frame width scale coefficient 195 * @y_scale: frame height scale coefficient 196 * @x_step: width step for x_scale 197 * @y_step: height step for y_scale 198 */ 199struct vdec_av1_slice_scale_factors { 200 u8 is_scaled; 201 int x_scale; 202 int y_scale; 203 int x_step; 204 int y_step; 205}; 206 207/** 208 * struct vdec_av1_slice_frame_refs - ref frame info 209 * @ref_fb_idx: ref slot index 210 * @ref_map_idx: ref frame index 211 * @scale_factors: scale factors for each ref frame 212 */ 213struct vdec_av1_slice_frame_refs { 214 int ref_fb_idx; 215 int ref_map_idx; 216 struct vdec_av1_slice_scale_factors scale_factors; 217}; 218 219/** 220 * struct vdec_av1_slice_gm - AV1 Global Motion parameters 221 * @wmtype: The type of global motion transform used 222 * @wmmat: gm_params 223 * @alpha: alpha info 224 * @beta: beta info 225 * @gamma: gamma info 226 * @delta: delta info 227 * @invalid: is invalid or not 228 */ 229struct vdec_av1_slice_gm { 230 int wmtype; 231 int wmmat[8]; 232 short alpha; 233 short beta; 234 short gamma; 235 short delta; 236 char invalid; 237}; 238 239/** 240 * struct vdec_av1_slice_sm - AV1 Skip Mode parameters 241 * @skip_mode_allowed: Skip Mode is allowed or not 242 * @skip_mode_present: specified that the skip_mode will be present or not 243 * @skip_mode_frame: specifies the frames to use for compound prediction 244 */ 245struct vdec_av1_slice_sm { 246 u8 skip_mode_allowed; 247 u8 skip_mode_present; 248 int skip_mode_frame[2]; 249}; 250 251/** 252 * struct vdec_av1_slice_seg - AV1 Segmentation params 253 * @segmentation_enabled: this frame makes use of the segmentation tool or not 254 * @segmentation_update_map: segmentation map are updated during the decoding frame 255 * @segmentation_temporal_update:segmentation map are coded relative the existing segmentaion map 256 * @segmentation_update_data: new parameters are about to be specified for each segment 257 * @feature_data: specifies the feature data for a segment feature 258 * @feature_enabled_mask: the corresponding feature value is coded or not. 259 * @segid_preskip: segment id will be read before the skip syntax element. 260 * @last_active_segid: the highest numbered segment id that has some enabled feature 261 */ 262struct vdec_av1_slice_seg { 263 u8 segmentation_enabled; 264 u8 segmentation_update_map; 265 u8 segmentation_temporal_update; 266 u8 segmentation_update_data; 267 int feature_data[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX]; 268 u16 feature_enabled_mask[V4L2_AV1_MAX_SEGMENTS]; 269 int segid_preskip; 270 int last_active_segid; 271}; 272 273/** 274 * struct vdec_av1_slice_delta_q_lf - AV1 Loop Filter delta parameters 275 * @delta_q_present: specified whether quantizer index delta values are present 276 * @delta_q_res: specifies the left shift which should be applied to decoded quantizer index 277 * @delta_lf_present: specifies whether loop filter delta values are present 278 * @delta_lf_res: specifies the left shift which should be applied to decoded 279 * loop filter delta values 280 * @delta_lf_multi: specifies that separate loop filter deltas are sent for horizontal 281 * luma edges,vertical luma edges,the u edges, and the v edges. 282 */ 283struct vdec_av1_slice_delta_q_lf { 284 u8 delta_q_present; 285 u8 delta_q_res; 286 u8 delta_lf_present; 287 u8 delta_lf_res; 288 u8 delta_lf_multi; 289}; 290 291/** 292 * struct vdec_av1_slice_quantization - AV1 Quantization params 293 * @base_q_idx: indicates the base frame qindex. This is used for Y AC 294 * coefficients and as the base value for the other quantizers. 295 * @qindex: qindex 296 * @delta_qydc: indicates the Y DC quantizer relative to base_q_idx 297 * @delta_qudc: indicates the U DC quantizer relative to base_q_idx. 298 * @delta_quac: indicates the U AC quantizer relative to base_q_idx 299 * @delta_qvdc: indicates the V DC quantizer relative to base_q_idx 300 * @delta_qvac: indicates the V AC quantizer relative to base_q_idx 301 * @using_qmatrix: specifies that the quantizer matrix will be used to 302 * compute quantizers 303 * @qm_y: specifies the level in the quantizer matrix that should 304 * be used for luma plane decoding 305 * @qm_u: specifies the level in the quantizer matrix that should 306 * be used for chroma U plane decoding. 307 * @qm_v: specifies the level in the quantizer matrix that should be 308 * used for chroma V plane decoding 309 */ 310struct vdec_av1_slice_quantization { 311 int base_q_idx; 312 int qindex[V4L2_AV1_MAX_SEGMENTS]; 313 int delta_qydc; 314 int delta_qudc; 315 int delta_quac; 316 int delta_qvdc; 317 int delta_qvac; 318 u8 using_qmatrix; 319 u8 qm_y; 320 u8 qm_u; 321 u8 qm_v; 322}; 323 324/** 325 * struct vdec_av1_slice_lr - AV1 Loop Restauration parameters 326 * @use_lr: whether to use loop restoration 327 * @use_chroma_lr: whether to use chroma loop restoration 328 * @frame_restoration_type: specifies the type of restoration used for each plane 329 * @loop_restoration_size: pecifies the size of loop restoration units in units 330 * of samples in the current plane 331 */ 332struct vdec_av1_slice_lr { 333 u8 use_lr; 334 u8 use_chroma_lr; 335 u8 frame_restoration_type[V4L2_AV1_NUM_PLANES_MAX]; 336 u32 loop_restoration_size[V4L2_AV1_NUM_PLANES_MAX]; 337}; 338 339/** 340 * struct vdec_av1_slice_loop_filter - AV1 Loop filter parameters 341 * @loop_filter_level: an array containing loop filter strength values. 342 * @loop_filter_ref_deltas: contains the adjustment needed for the filter 343 * level based on the chosen reference frame 344 * @loop_filter_mode_deltas: contains the adjustment needed for the filter 345 * level based on the chosen mode 346 * @loop_filter_sharpness: indicates the sharpness level. The loop_filter_level 347 * and loop_filter_sharpness together determine when 348 * a block edge is filtered, and by how much the 349 * filtering can change the sample values 350 * @loop_filter_delta_enabled: filetr level depends on the mode and reference 351 * frame used to predict a block 352 */ 353struct vdec_av1_slice_loop_filter { 354 u8 loop_filter_level[4]; 355 int loop_filter_ref_deltas[V4L2_AV1_TOTAL_REFS_PER_FRAME]; 356 int loop_filter_mode_deltas[2]; 357 u8 loop_filter_sharpness; 358 u8 loop_filter_delta_enabled; 359}; 360 361/** 362 * struct vdec_av1_slice_cdef - AV1 CDEF parameters 363 * @cdef_damping: controls the amount of damping in the deringing filter 364 * @cdef_y_strength: specifies the strength of the primary filter and secondary filter 365 * @cdef_uv_strength: specifies the strength of the primary filter and secondary filter 366 * @cdef_bits: specifies the number of bits needed to specify which 367 * CDEF filter to apply 368 */ 369struct vdec_av1_slice_cdef { 370 u8 cdef_damping; 371 u8 cdef_y_strength[8]; 372 u8 cdef_uv_strength[8]; 373 u8 cdef_bits; 374}; 375 376/** 377 * struct vdec_av1_slice_mfmv - AV1 mfmv parameters 378 * @mfmv_valid_ref: mfmv_valid_ref 379 * @mfmv_dir: mfmv_dir 380 * @mfmv_ref_to_cur: mfmv_ref_to_cur 381 * @mfmv_ref_frame_idx: mfmv_ref_frame_idx 382 * @mfmv_count: mfmv_count 383 */ 384struct vdec_av1_slice_mfmv { 385 u32 mfmv_valid_ref[3]; 386 u32 mfmv_dir[3]; 387 int mfmv_ref_to_cur[3]; 388 int mfmv_ref_frame_idx[3]; 389 int mfmv_count; 390}; 391 392/** 393 * struct vdec_av1_slice_tile - AV1 Tile info 394 * @tile_cols: specifies the number of tiles across the frame 395 * @tile_rows: pecifies the number of tiles down the frame 396 * @mi_col_starts: an array specifying the start column 397 * @mi_row_starts: an array specifying the start row 398 * @context_update_tile_id: specifies which tile to use for the CDF update 399 * @uniform_tile_spacing_flag: tiles are uniformly spaced across the frame 400 * or the tile sizes are coded 401 */ 402struct vdec_av1_slice_tile { 403 u8 tile_cols; 404 u8 tile_rows; 405 int mi_col_starts[V4L2_AV1_MAX_TILE_COLS + 1]; 406 int mi_row_starts[V4L2_AV1_MAX_TILE_ROWS + 1]; 407 u8 context_update_tile_id; 408 u8 uniform_tile_spacing_flag; 409}; 410 411/** 412 * struct vdec_av1_slice_uncompressed_header - Represents an AV1 Frame Header OBU 413 * @use_ref_frame_mvs: use_ref_frame_mvs flag 414 * @order_hint: specifies OrderHintBits least significant bits of the expected 415 * @gm: global motion param 416 * @upscaled_width: the upscaled width 417 * @frame_width: frame's width 418 * @frame_height: frame's height 419 * @reduced_tx_set: frame is restricted to a reduced subset of the full 420 * set of transform types 421 * @tx_mode: specifies how the transform size is determined 422 * @uniform_tile_spacing_flag: tiles are uniformly spaced across the frame 423 * or the tile sizes are coded 424 * @interpolation_filter: specifies the filter selection used for performing inter prediction 425 * @allow_warped_motion: motion_mode may be present or not 426 * @is_motion_mode_switchable : euqlt to 0 specifies that only the SIMPLE motion mode will be used 427 * @reference_mode : frame reference mode selected 428 * @allow_high_precision_mv: specifies that motion vectors are specified to 429 * quarter pel precision or to eighth pel precision 430 * @allow_intra_bc: ubducates that intra block copy may be used in this frame 431 * @force_integer_mv: specifies motion vectors will always be integers or 432 * can contain fractional bits 433 * @allow_screen_content_tools: intra blocks may use palette encoding 434 * @error_resilient_mode: error resislent mode is enable/disable 435 * @frame_type: specifies the AV1 frame type 436 * @primary_ref_frame: specifies which reference frame contains the CDF values 437 * and other state that should be loaded at the start of the frame 438 * slots will be updated with the current frame after it is decoded 439 * @disable_frame_end_update_cdf:indicates the end of frame CDF update is disable or enable 440 * @disable_cdf_update: specified whether the CDF update in the symbol 441 * decoding process should be disables 442 * @skip_mode: av1 skip mode parameters 443 * @seg: av1 segmentaon parameters 444 * @delta_q_lf: av1 delta loop fileter 445 * @quant: av1 Quantization params 446 * @lr: av1 Loop Restauration parameters 447 * @superres_denom: the denominator for the upscaling ratio 448 * @loop_filter: av1 Loop filter parameters 449 * @cdef: av1 CDEF parameters 450 * @mfmv: av1 mfmv parameters 451 * @tile: av1 Tile info 452 * @frame_is_intra: intra frame 453 * @loss_less_array: loss less array 454 * @coded_loss_less: coded lsss less 455 * @mi_rows: size of mi unit in rows 456 * @mi_cols: size of mi unit in cols 457 */ 458struct vdec_av1_slice_uncompressed_header { 459 u8 use_ref_frame_mvs; 460 int order_hint; 461 struct vdec_av1_slice_gm gm[V4L2_AV1_TOTAL_REFS_PER_FRAME]; 462 u32 upscaled_width; 463 u32 frame_width; 464 u32 frame_height; 465 u8 reduced_tx_set; 466 u8 tx_mode; 467 u8 uniform_tile_spacing_flag; 468 u8 interpolation_filter; 469 u8 allow_warped_motion; 470 u8 is_motion_mode_switchable; 471 u8 reference_mode; 472 u8 allow_high_precision_mv; 473 u8 allow_intra_bc; 474 u8 force_integer_mv; 475 u8 allow_screen_content_tools; 476 u8 error_resilient_mode; 477 u8 frame_type; 478 u8 primary_ref_frame; 479 u8 disable_frame_end_update_cdf; 480 u32 disable_cdf_update; 481 struct vdec_av1_slice_sm skip_mode; 482 struct vdec_av1_slice_seg seg; 483 struct vdec_av1_slice_delta_q_lf delta_q_lf; 484 struct vdec_av1_slice_quantization quant; 485 struct vdec_av1_slice_lr lr; 486 u32 superres_denom; 487 struct vdec_av1_slice_loop_filter loop_filter; 488 struct vdec_av1_slice_cdef cdef; 489 struct vdec_av1_slice_mfmv mfmv; 490 struct vdec_av1_slice_tile tile; 491 u8 frame_is_intra; 492 u8 loss_less_array[V4L2_AV1_MAX_SEGMENTS]; 493 u8 coded_loss_less; 494 u32 mi_rows; 495 u32 mi_cols; 496}; 497 498/** 499 * struct vdec_av1_slice_seq_header - Represents an AV1 Sequence OBU 500 * @bitdepth: the bitdepth to use for the sequence 501 * @enable_superres: specifies whether the use_superres syntax element may be present 502 * @enable_filter_intra: specifies the use_filter_intra syntax element may be present 503 * @enable_intra_edge_filter: whether the intra edge filtering process should be enabled 504 * @enable_interintra_compound: specifies the mode info fo rinter blocks may 505 * contain the syntax element interintra 506 * @enable_masked_compound: specifies the mode info fo rinter blocks may 507 * contain the syntax element compound_type 508 * @enable_dual_filter: the inter prediction filter type may be specified independently 509 * @enable_jnt_comp: distance weights process may be used for inter prediction 510 * @mono_chrome: indicates the video does not contain U and V color planes 511 * @enable_order_hint: tools based on the values of order hints may be used 512 * @order_hint_bits: the number of bits used for the order_hint field at each frame 513 * @use_128x128_superblock: indicates superblocks contain 128*128 luma samples 514 * @subsampling_x: the chroma subsamling format 515 * @subsampling_y: the chroma subsamling format 516 * @max_frame_width: the maximum frame width for the frames represented by sequence 517 * @max_frame_height: the maximum frame height for the frames represented by sequence 518 */ 519struct vdec_av1_slice_seq_header { 520 u8 bitdepth; 521 u8 enable_superres; 522 u8 enable_filter_intra; 523 u8 enable_intra_edge_filter; 524 u8 enable_interintra_compound; 525 u8 enable_masked_compound; 526 u8 enable_dual_filter; 527 u8 enable_jnt_comp; 528 u8 mono_chrome; 529 u8 enable_order_hint; 530 u8 order_hint_bits; 531 u8 use_128x128_superblock; 532 u8 subsampling_x; 533 u8 subsampling_y; 534 u32 max_frame_width; 535 u32 max_frame_height; 536}; 537 538/** 539 * struct vdec_av1_slice_frame - Represents current Frame info 540 * @uh: uncompressed header info 541 * @seq: sequence header info 542 * @large_scale_tile: is large scale mode 543 * @cur_ts: current frame timestamp 544 * @prev_fb_idx: prev slot id 545 * @ref_frame_sign_bias: arrays for ref_frame sign bias 546 * @order_hints: arrays for ref_frame order hint 547 * @ref_frame_valid: arrays for valid ref_frame 548 * @ref_frame_map: map to slot frame info 549 * @frame_refs: ref_frame info 550 */ 551struct vdec_av1_slice_frame { 552 struct vdec_av1_slice_uncompressed_header uh; 553 struct vdec_av1_slice_seq_header seq; 554 u8 large_scale_tile; 555 u64 cur_ts; 556 int prev_fb_idx; 557 u8 ref_frame_sign_bias[V4L2_AV1_TOTAL_REFS_PER_FRAME]; 558 u32 order_hints[V4L2_AV1_REFS_PER_FRAME]; 559 u32 ref_frame_valid[V4L2_AV1_REFS_PER_FRAME]; 560 int ref_frame_map[V4L2_AV1_TOTAL_REFS_PER_FRAME]; 561 struct vdec_av1_slice_frame_refs frame_refs[V4L2_AV1_REFS_PER_FRAME]; 562}; 563 564/** 565 * struct vdec_av1_slice_work_buffer - work buffer for lat 566 * @mv_addr: mv buffer memory info 567 * @cdf_addr: cdf buffer memory info 568 * @segid_addr: segid buffer memory info 569 */ 570struct vdec_av1_slice_work_buffer { 571 struct vdec_av1_slice_mem mv_addr; 572 struct vdec_av1_slice_mem cdf_addr; 573 struct vdec_av1_slice_mem segid_addr; 574}; 575 576/** 577 * struct vdec_av1_slice_frame_info - frame info for each slot 578 * @frame_type: frame type 579 * @frame_is_intra: is intra frame 580 * @order_hint: order hint 581 * @order_hints: referece frame order hint 582 * @upscaled_width: upscale width 583 * @pic_pitch: buffer pitch 584 * @frame_width: frane width 585 * @frame_height: frame height 586 * @mi_rows: rows in mode info 587 * @mi_cols: cols in mode info 588 * @ref_count: mark to reference frame counts 589 */ 590struct vdec_av1_slice_frame_info { 591 u8 frame_type; 592 u8 frame_is_intra; 593 int order_hint; 594 u32 order_hints[V4L2_AV1_REFS_PER_FRAME]; 595 u32 upscaled_width; 596 u32 pic_pitch; 597 u32 frame_width; 598 u32 frame_height; 599 u32 mi_rows; 600 u32 mi_cols; 601 int ref_count; 602}; 603 604/** 605 * struct vdec_av1_slice_slot - slot info that needs to be saved in the global instance 606 * @frame_info: frame info for each slot 607 * @timestamp: time stamp info 608 */ 609struct vdec_av1_slice_slot { 610 struct vdec_av1_slice_frame_info frame_info[AV1_MAX_FRAME_BUF_COUNT]; 611 u64 timestamp[AV1_MAX_FRAME_BUF_COUNT]; 612}; 613 614/** 615 * struct vdec_av1_slice_fb - frame buffer for decoding 616 * @y: current y buffer address info 617 * @c: current c buffer address info 618 */ 619struct vdec_av1_slice_fb { 620 struct vdec_av1_slice_mem y; 621 struct vdec_av1_slice_mem c; 622}; 623 624/** 625 * struct vdec_av1_slice_vsi - exchange frame information between Main CPU and MicroP 626 * @bs: input buffer info 627 * @work_buffer: working buffe for hw 628 * @cdf_table: cdf_table buffer 629 * @cdf_tmp: cdf temp buffer 630 * @rd_mv: mv buffer for lat output , core input 631 * @ube: ube buffer 632 * @trans: transcoded buffer 633 * @err_map: err map buffer 634 * @row_info: row info buffer 635 * @fb: current y/c buffer 636 * @ref: ref y/c buffer 637 * @iq_table: iq table buffer 638 * @tile: tile buffer 639 * @slots: slots info for each frame 640 * @slot_id: current frame slot id 641 * @frame: current frame info 642 * @state: status after decode done 643 * @cur_lst_tile_id: tile id for large scale 644 */ 645struct vdec_av1_slice_vsi { 646 /* lat */ 647 struct vdec_av1_slice_mem bs; 648 struct vdec_av1_slice_work_buffer work_buffer[AV1_MAX_FRAME_BUF_COUNT]; 649 struct vdec_av1_slice_mem cdf_table; 650 struct vdec_av1_slice_mem cdf_tmp; 651 /* LAT stage's output, Core stage's input */ 652 struct vdec_av1_slice_mem rd_mv; 653 struct vdec_av1_slice_mem ube; 654 struct vdec_av1_slice_mem trans; 655 struct vdec_av1_slice_mem err_map; 656 struct vdec_av1_slice_mem row_info; 657 /* core */ 658 struct vdec_av1_slice_fb fb; 659 struct vdec_av1_slice_fb ref[V4L2_AV1_REFS_PER_FRAME]; 660 struct vdec_av1_slice_mem iq_table; 661 /* lat and core share*/ 662 struct vdec_av1_slice_mem tile; 663 struct vdec_av1_slice_slot slots; 664 s8 slot_id; 665 struct vdec_av1_slice_frame frame; 666 struct vdec_av1_slice_state state; 667 u32 cur_lst_tile_id; 668}; 669 670/** 671 * struct vdec_av1_slice_pfc - per-frame context that contains a local vsi. 672 * pass it from lat to core 673 * @vsi: local vsi. copy to/from remote vsi before/after decoding 674 * @ref_idx: reference buffer timestamp 675 * @seq: picture sequence 676 */ 677struct vdec_av1_slice_pfc { 678 struct vdec_av1_slice_vsi vsi; 679 u64 ref_idx[V4L2_AV1_REFS_PER_FRAME]; 680 int seq; 681}; 682 683/** 684 * struct vdec_av1_slice_instance - represent one av1 instance 685 * @ctx: pointer to codec's context 686 * @vpu: VPU instance 687 * @iq_table: iq table buffer 688 * @cdf_table: cdf table buffer 689 * @mv: mv working buffer 690 * @cdf: cdf working buffer 691 * @seg: segmentation working buffer 692 * @cdf_temp: cdf temp buffer 693 * @tile: tile buffer 694 * @slots: slots info 695 * @tile_group: tile_group entry 696 * @level: level of current resolution 697 * @width: width of last picture 698 * @height: height of last picture 699 * @frame_type: frame_type of last picture 700 * @irq_enabled: irq to Main CPU or MicroP 701 * @inneracing_mode: is inneracing mode 702 * @init_vsi: vsi used for initialized AV1 instance 703 * @vsi: vsi used for decoding/flush ... 704 * @core_vsi: vsi used for Core stage 705 * @seq: global picture sequence 706 */ 707struct vdec_av1_slice_instance { 708 struct mtk_vcodec_dec_ctx *ctx; 709 struct vdec_vpu_inst vpu; 710 711 struct mtk_vcodec_mem iq_table; 712 struct mtk_vcodec_mem cdf_table; 713 714 struct mtk_vcodec_mem mv[AV1_MAX_FRAME_BUF_COUNT]; 715 struct mtk_vcodec_mem cdf[AV1_MAX_FRAME_BUF_COUNT]; 716 struct mtk_vcodec_mem seg[AV1_MAX_FRAME_BUF_COUNT]; 717 struct mtk_vcodec_mem cdf_temp; 718 struct mtk_vcodec_mem tile; 719 struct vdec_av1_slice_slot slots; 720 struct vdec_av1_slice_tile_group tile_group; 721 722 /* for resolution change and get_pic_info */ 723 enum vdec_av1_slice_resolution_level level; 724 u32 width; 725 u32 height; 726 727 u32 frame_type; 728 u32 irq_enabled; 729 u32 inneracing_mode; 730 731 /* MicroP vsi */ 732 union { 733 struct vdec_av1_slice_init_vsi *init_vsi; 734 struct vdec_av1_slice_vsi *vsi; 735 }; 736 struct vdec_av1_slice_vsi *core_vsi; 737 int seq; 738}; 739 740static int vdec_av1_slice_core_decode(struct vdec_lat_buf *lat_buf); 741 742static inline int vdec_av1_slice_get_msb(u32 n) 743{ 744 if (n == 0) 745 return 0; 746 return 31 ^ __builtin_clz(n); 747} 748 749static inline bool vdec_av1_slice_need_scale(u32 ref_width, u32 ref_height, 750 u32 this_width, u32 this_height) 751{ 752 return ((this_width << 1) >= ref_width) && 753 ((this_height << 1) >= ref_height) && 754 (this_width <= (ref_width << 4)) && 755 (this_height <= (ref_height << 4)); 756} 757 758static void *vdec_av1_get_ctrl_ptr(struct mtk_vcodec_dec_ctx *ctx, int id) 759{ 760 struct v4l2_ctrl *ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, id); 761 762 if (!ctrl) 763 return ERR_PTR(-EINVAL); 764 765 return ctrl->p_cur.p; 766} 767 768static int vdec_av1_slice_init_cdf_table(struct vdec_av1_slice_instance *instance) 769{ 770 u8 *remote_cdf_table; 771 struct mtk_vcodec_dec_ctx *ctx; 772 struct vdec_av1_slice_init_vsi *vsi; 773 int ret; 774 775 ctx = instance->ctx; 776 vsi = instance->vpu.vsi; 777 remote_cdf_table = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, 778 (u32)vsi->cdf_table_addr); 779 if (IS_ERR(remote_cdf_table)) { 780 mtk_vdec_err(ctx, "failed to map cdf table\n"); 781 return PTR_ERR(remote_cdf_table); 782 } 783 784 mtk_vdec_debug(ctx, "map cdf table to 0x%p\n", remote_cdf_table); 785 786 if (instance->cdf_table.va) 787 mtk_vcodec_mem_free(ctx, &instance->cdf_table); 788 instance->cdf_table.size = vsi->cdf_table_size; 789 790 ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf_table); 791 if (ret) 792 return ret; 793 794 memcpy(instance->cdf_table.va, remote_cdf_table, vsi->cdf_table_size); 795 796 return 0; 797} 798 799static int vdec_av1_slice_init_iq_table(struct vdec_av1_slice_instance *instance) 800{ 801 u8 *remote_iq_table; 802 struct mtk_vcodec_dec_ctx *ctx; 803 struct vdec_av1_slice_init_vsi *vsi; 804 int ret; 805 806 ctx = instance->ctx; 807 vsi = instance->vpu.vsi; 808 remote_iq_table = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, 809 (u32)vsi->iq_table_addr); 810 if (IS_ERR(remote_iq_table)) { 811 mtk_vdec_err(ctx, "failed to map iq table\n"); 812 return PTR_ERR(remote_iq_table); 813 } 814 815 mtk_vdec_debug(ctx, "map iq table to 0x%p\n", remote_iq_table); 816 817 if (instance->iq_table.va) 818 mtk_vcodec_mem_free(ctx, &instance->iq_table); 819 instance->iq_table.size = vsi->iq_table_size; 820 821 ret = mtk_vcodec_mem_alloc(ctx, &instance->iq_table); 822 if (ret) 823 return ret; 824 825 memcpy(instance->iq_table.va, remote_iq_table, vsi->iq_table_size); 826 827 return 0; 828} 829 830static int vdec_av1_slice_get_new_slot(struct vdec_av1_slice_vsi *vsi) 831{ 832 struct vdec_av1_slice_slot *slots = &vsi->slots; 833 int new_slot_idx = AV1_INVALID_IDX; 834 int i; 835 836 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) { 837 if (slots->frame_info[i].ref_count == 0) { 838 new_slot_idx = i; 839 break; 840 } 841 } 842 843 if (new_slot_idx != AV1_INVALID_IDX) { 844 slots->frame_info[new_slot_idx].ref_count++; 845 slots->timestamp[new_slot_idx] = vsi->frame.cur_ts; 846 } 847 848 return new_slot_idx; 849} 850 851static inline void vdec_av1_slice_clear_fb(struct vdec_av1_slice_frame_info *frame_info) 852{ 853 memset((void *)frame_info, 0, sizeof(struct vdec_av1_slice_frame_info)); 854} 855 856static void vdec_av1_slice_decrease_ref_count(struct vdec_av1_slice_slot *slots, int fb_idx) 857{ 858 struct vdec_av1_slice_frame_info *frame_info = slots->frame_info; 859 860 frame_info[fb_idx].ref_count--; 861 if (frame_info[fb_idx].ref_count < 0) { 862 frame_info[fb_idx].ref_count = 0; 863 pr_err(MTK_DBG_V4L2_STR "av1_error: %s() fb_idx %d decrease ref_count error\n", 864 __func__, fb_idx); 865 } 866 867 vdec_av1_slice_clear_fb(&frame_info[fb_idx]); 868} 869 870static void vdec_av1_slice_cleanup_slots(struct vdec_av1_slice_slot *slots, 871 struct vdec_av1_slice_frame *frame, 872 struct v4l2_ctrl_av1_frame *ctrl_fh) 873{ 874 int slot_id, ref_id; 875 876 for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++) 877 frame->ref_frame_map[ref_id] = AV1_INVALID_IDX; 878 879 for (slot_id = 0; slot_id < AV1_MAX_FRAME_BUF_COUNT; slot_id++) { 880 u64 timestamp = slots->timestamp[slot_id]; 881 bool ref_used = false; 882 883 /* ignored unused slots */ 884 if (slots->frame_info[slot_id].ref_count == 0) 885 continue; 886 887 for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++) { 888 if (ctrl_fh->reference_frame_ts[ref_id] == timestamp) { 889 frame->ref_frame_map[ref_id] = slot_id; 890 ref_used = true; 891 } 892 } 893 894 if (!ref_used) 895 vdec_av1_slice_decrease_ref_count(slots, slot_id); 896 } 897} 898 899static void vdec_av1_slice_setup_slot(struct vdec_av1_slice_instance *instance, 900 struct vdec_av1_slice_vsi *vsi, 901 struct v4l2_ctrl_av1_frame *ctrl_fh) 902{ 903 struct vdec_av1_slice_frame_info *cur_frame_info; 904 struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh; 905 int ref_id; 906 907 memcpy(&vsi->slots, &instance->slots, sizeof(instance->slots)); 908 vdec_av1_slice_cleanup_slots(&vsi->slots, &vsi->frame, ctrl_fh); 909 vsi->slot_id = vdec_av1_slice_get_new_slot(vsi); 910 911 if (vsi->slot_id == AV1_INVALID_IDX) { 912 mtk_v4l2_vdec_err(instance->ctx, "warning:av1 get invalid index slot\n"); 913 vsi->slot_id = 0; 914 } 915 cur_frame_info = &vsi->slots.frame_info[vsi->slot_id]; 916 cur_frame_info->frame_type = uh->frame_type; 917 cur_frame_info->frame_is_intra = ((uh->frame_type == AV1_INTRA_ONLY_FRAME) || 918 (uh->frame_type == AV1_KEY_FRAME)); 919 cur_frame_info->order_hint = uh->order_hint; 920 cur_frame_info->upscaled_width = uh->upscaled_width; 921 cur_frame_info->pic_pitch = 0; 922 cur_frame_info->frame_width = uh->frame_width; 923 cur_frame_info->frame_height = uh->frame_height; 924 cur_frame_info->mi_cols = ((uh->frame_width + 7) >> 3) << 1; 925 cur_frame_info->mi_rows = ((uh->frame_height + 7) >> 3) << 1; 926 927 /* ensure current frame is properly mapped if referenced */ 928 for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++) { 929 u64 timestamp = vsi->slots.timestamp[vsi->slot_id]; 930 931 if (ctrl_fh->reference_frame_ts[ref_id] == timestamp) 932 vsi->frame.ref_frame_map[ref_id] = vsi->slot_id; 933 } 934} 935 936static int vdec_av1_slice_alloc_working_buffer(struct vdec_av1_slice_instance *instance, 937 struct vdec_av1_slice_vsi *vsi) 938{ 939 struct mtk_vcodec_dec_ctx *ctx = instance->ctx; 940 enum vdec_av1_slice_resolution_level level; 941 u32 max_sb_w, max_sb_h, max_w, max_h, w, h; 942 int i, ret; 943 944 w = vsi->frame.uh.frame_width; 945 h = vsi->frame.uh.frame_height; 946 947 if (w > VCODEC_DEC_4K_CODED_WIDTH || h > VCODEC_DEC_4K_CODED_HEIGHT) 948 /* 8K */ 949 return -EINVAL; 950 951 if (w > MTK_VDEC_MAX_W || h > MTK_VDEC_MAX_H) { 952 /* 4K */ 953 level = AV1_RES_4K; 954 max_w = VCODEC_DEC_4K_CODED_WIDTH; 955 max_h = VCODEC_DEC_4K_CODED_HEIGHT; 956 } else { 957 /* FHD */ 958 level = AV1_RES_FHD; 959 max_w = MTK_VDEC_MAX_W; 960 max_h = MTK_VDEC_MAX_H; 961 } 962 963 if (level == instance->level) 964 return 0; 965 966 mtk_vdec_debug(ctx, "resolution level changed from %u to %u, %ux%u", 967 instance->level, level, w, h); 968 969 max_sb_w = DIV_ROUND_UP(max_w, 128); 970 max_sb_h = DIV_ROUND_UP(max_h, 128); 971 972 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) { 973 if (instance->mv[i].va) 974 mtk_vcodec_mem_free(ctx, &instance->mv[i]); 975 instance->mv[i].size = max_sb_w * max_sb_h * SZ_1K; 976 ret = mtk_vcodec_mem_alloc(ctx, &instance->mv[i]); 977 if (ret) 978 goto err; 979 980 if (instance->seg[i].va) 981 mtk_vcodec_mem_free(ctx, &instance->seg[i]); 982 instance->seg[i].size = max_sb_w * max_sb_h * 512; 983 ret = mtk_vcodec_mem_alloc(ctx, &instance->seg[i]); 984 if (ret) 985 goto err; 986 987 if (instance->cdf[i].va) 988 mtk_vcodec_mem_free(ctx, &instance->cdf[i]); 989 instance->cdf[i].size = AV1_CDF_TABLE_BUFFER_SIZE; 990 ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf[i]); 991 if (ret) 992 goto err; 993 } 994 995 if (!instance->cdf_temp.va) { 996 instance->cdf_temp.size = (SZ_1K * 16 * 100); 997 ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf_temp); 998 if (ret) 999 goto err; 1000 vsi->cdf_tmp.buf = instance->cdf_temp.dma_addr; 1001 vsi->cdf_tmp.size = instance->cdf_temp.size; 1002 } 1003 1004 if (instance->tile.va) 1005 mtk_vcodec_mem_free(ctx, &instance->tile); 1006 1007 instance->tile.size = AV1_TILE_BUF_SIZE * V4L2_AV1_MAX_TILE_COUNT; 1008 ret = mtk_vcodec_mem_alloc(ctx, &instance->tile); 1009 if (ret) 1010 goto err; 1011 1012 instance->level = level; 1013 return 0; 1014 1015err: 1016 instance->level = AV1_RES_NONE; 1017 return ret; 1018} 1019 1020static void vdec_av1_slice_free_working_buffer(struct vdec_av1_slice_instance *instance) 1021{ 1022 struct mtk_vcodec_dec_ctx *ctx = instance->ctx; 1023 int i; 1024 1025 for (i = 0; i < ARRAY_SIZE(instance->mv); i++) 1026 mtk_vcodec_mem_free(ctx, &instance->mv[i]); 1027 1028 for (i = 0; i < ARRAY_SIZE(instance->seg); i++) 1029 mtk_vcodec_mem_free(ctx, &instance->seg[i]); 1030 1031 for (i = 0; i < ARRAY_SIZE(instance->cdf); i++) 1032 mtk_vcodec_mem_free(ctx, &instance->cdf[i]); 1033 1034 mtk_vcodec_mem_free(ctx, &instance->tile); 1035 mtk_vcodec_mem_free(ctx, &instance->cdf_temp); 1036 mtk_vcodec_mem_free(ctx, &instance->cdf_table); 1037 mtk_vcodec_mem_free(ctx, &instance->iq_table); 1038 1039 instance->level = AV1_RES_NONE; 1040} 1041 1042static inline void vdec_av1_slice_vsi_from_remote(struct vdec_av1_slice_vsi *vsi, 1043 struct vdec_av1_slice_vsi *remote_vsi) 1044{ 1045 memcpy(&vsi->trans, &remote_vsi->trans, sizeof(vsi->trans)); 1046 memcpy(&vsi->state, &remote_vsi->state, sizeof(vsi->state)); 1047} 1048 1049static inline void vdec_av1_slice_vsi_to_remote(struct vdec_av1_slice_vsi *vsi, 1050 struct vdec_av1_slice_vsi *remote_vsi) 1051{ 1052 memcpy(remote_vsi, vsi, sizeof(*vsi)); 1053} 1054 1055static int vdec_av1_slice_setup_lat_from_src_buf(struct vdec_av1_slice_instance *instance, 1056 struct vdec_av1_slice_vsi *vsi, 1057 struct vdec_lat_buf *lat_buf) 1058{ 1059 struct vb2_v4l2_buffer *src; 1060 struct vb2_v4l2_buffer *dst; 1061 1062 src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx); 1063 if (!src) 1064 return -EINVAL; 1065 1066 lat_buf->src_buf_req = src->vb2_buf.req_obj.req; 1067 dst = &lat_buf->ts_info; 1068 v4l2_m2m_buf_copy_metadata(src, dst, true); 1069 vsi->frame.cur_ts = dst->vb2_buf.timestamp; 1070 1071 return 0; 1072} 1073 1074static short vdec_av1_slice_resolve_divisor_32(u32 D, short *shift) 1075{ 1076 int f; 1077 int e; 1078 1079 *shift = vdec_av1_slice_get_msb(D); 1080 /* e is obtained from D after resetting the most significant 1 bit. */ 1081 e = D - ((u32)1 << *shift); 1082 /* Get the most significant DIV_LUT_BITS (8) bits of e into f */ 1083 if (*shift > DIV_LUT_BITS) 1084 f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS); 1085 else 1086 f = e << (DIV_LUT_BITS - *shift); 1087 if (f > DIV_LUT_NUM) 1088 return -1; 1089 *shift += DIV_LUT_PREC_BITS; 1090 /* Use f as lookup into the precomputed table of multipliers */ 1091 return div_lut[f]; 1092} 1093 1094static void vdec_av1_slice_get_shear_params(struct vdec_av1_slice_gm *gm_params) 1095{ 1096 const int *mat = gm_params->wmmat; 1097 short shift; 1098 short y; 1099 long long gv, dv; 1100 1101 if (gm_params->wmmat[2] <= 0) 1102 return; 1103 1104 gm_params->alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX); 1105 gm_params->beta = clamp_val(mat[3], S16_MIN, S16_MAX); 1106 1107 y = vdec_av1_slice_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1); 1108 1109 gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y; 1110 gm_params->gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), 1111 S16_MIN, S16_MAX); 1112 1113 dv = ((long long)mat[3] * mat[4]) * y; 1114 gm_params->delta = clamp_val(mat[5] - (int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - 1115 (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX); 1116 1117 gm_params->alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->alpha, WARP_PARAM_REDUCE_BITS) * 1118 (1 << WARP_PARAM_REDUCE_BITS); 1119 gm_params->beta = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->beta, WARP_PARAM_REDUCE_BITS) * 1120 (1 << WARP_PARAM_REDUCE_BITS); 1121 gm_params->gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->gamma, WARP_PARAM_REDUCE_BITS) * 1122 (1 << WARP_PARAM_REDUCE_BITS); 1123 gm_params->delta = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->delta, WARP_PARAM_REDUCE_BITS) * 1124 (1 << WARP_PARAM_REDUCE_BITS); 1125} 1126 1127static void vdec_av1_slice_setup_gm(struct vdec_av1_slice_gm *gm, 1128 struct v4l2_av1_global_motion *ctrl_gm) 1129{ 1130 u32 i, j; 1131 1132 for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) { 1133 gm[i].wmtype = ctrl_gm->type[i]; 1134 for (j = 0; j < 6; j++) 1135 gm[i].wmmat[j] = ctrl_gm->params[i][j]; 1136 1137 gm[i].invalid = !!(ctrl_gm->invalid & BIT(i)); 1138 gm[i].alpha = 0; 1139 gm[i].beta = 0; 1140 gm[i].gamma = 0; 1141 gm[i].delta = 0; 1142 if (gm[i].wmtype <= V4L2_AV1_WARP_MODEL_AFFINE) 1143 vdec_av1_slice_get_shear_params(&gm[i]); 1144 } 1145} 1146 1147static void vdec_av1_slice_setup_seg(struct vdec_av1_slice_seg *seg, 1148 struct v4l2_av1_segmentation *ctrl_seg) 1149{ 1150 u32 i, j; 1151 1152 seg->segmentation_enabled = SEGMENTATION_FLAG(ctrl_seg, ENABLED); 1153 seg->segmentation_update_map = SEGMENTATION_FLAG(ctrl_seg, UPDATE_MAP); 1154 seg->segmentation_temporal_update = SEGMENTATION_FLAG(ctrl_seg, TEMPORAL_UPDATE); 1155 seg->segmentation_update_data = SEGMENTATION_FLAG(ctrl_seg, UPDATE_DATA); 1156 seg->segid_preskip = SEGMENTATION_FLAG(ctrl_seg, SEG_ID_PRE_SKIP); 1157 seg->last_active_segid = ctrl_seg->last_active_seg_id; 1158 1159 for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) { 1160 seg->feature_enabled_mask[i] = ctrl_seg->feature_enabled[i]; 1161 for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) 1162 seg->feature_data[i][j] = ctrl_seg->feature_data[i][j]; 1163 } 1164} 1165 1166static void vdec_av1_slice_setup_quant(struct vdec_av1_slice_quantization *quant, 1167 struct v4l2_av1_quantization *ctrl_quant) 1168{ 1169 quant->base_q_idx = ctrl_quant->base_q_idx; 1170 quant->delta_qydc = ctrl_quant->delta_q_y_dc; 1171 quant->delta_qudc = ctrl_quant->delta_q_u_dc; 1172 quant->delta_quac = ctrl_quant->delta_q_u_ac; 1173 quant->delta_qvdc = ctrl_quant->delta_q_v_dc; 1174 quant->delta_qvac = ctrl_quant->delta_q_v_ac; 1175 quant->qm_y = ctrl_quant->qm_y; 1176 quant->qm_u = ctrl_quant->qm_u; 1177 quant->qm_v = ctrl_quant->qm_v; 1178 quant->using_qmatrix = QUANT_FLAG(ctrl_quant, USING_QMATRIX); 1179} 1180 1181static int vdec_av1_slice_get_qindex(struct vdec_av1_slice_uncompressed_header *uh, 1182 int segmentation_id) 1183{ 1184 struct vdec_av1_slice_seg *seg = &uh->seg; 1185 struct vdec_av1_slice_quantization *quant = &uh->quant; 1186 int data = 0, qindex = 0; 1187 1188 if (seg->segmentation_enabled && 1189 (seg->feature_enabled_mask[segmentation_id] & BIT(SEG_LVL_ALT_Q))) { 1190 data = seg->feature_data[segmentation_id][SEG_LVL_ALT_Q]; 1191 qindex = quant->base_q_idx + data; 1192 return clamp_val(qindex, 0, MAXQ); 1193 } 1194 1195 return quant->base_q_idx; 1196} 1197 1198static void vdec_av1_slice_setup_lr(struct vdec_av1_slice_lr *lr, 1199 struct v4l2_av1_loop_restoration *ctrl_lr) 1200{ 1201 int i; 1202 1203 lr->use_lr = 0; 1204 lr->use_chroma_lr = 0; 1205 for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) { 1206 lr->frame_restoration_type[i] = ctrl_lr->frame_restoration_type[i]; 1207 lr->loop_restoration_size[i] = ctrl_lr->loop_restoration_size[i]; 1208 if (lr->frame_restoration_type[i]) { 1209 lr->use_lr = 1; 1210 if (i > 0) 1211 lr->use_chroma_lr = 1; 1212 } 1213 } 1214} 1215 1216static void vdec_av1_slice_setup_lf(struct vdec_av1_slice_loop_filter *lf, 1217 struct v4l2_av1_loop_filter *ctrl_lf) 1218{ 1219 int i; 1220 1221 for (i = 0; i < ARRAY_SIZE(lf->loop_filter_level); i++) 1222 lf->loop_filter_level[i] = ctrl_lf->level[i]; 1223 1224 for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) 1225 lf->loop_filter_ref_deltas[i] = ctrl_lf->ref_deltas[i]; 1226 1227 for (i = 0; i < ARRAY_SIZE(lf->loop_filter_mode_deltas); i++) 1228 lf->loop_filter_mode_deltas[i] = ctrl_lf->mode_deltas[i]; 1229 1230 lf->loop_filter_sharpness = ctrl_lf->sharpness; 1231 lf->loop_filter_delta_enabled = 1232 BIT_FLAG(ctrl_lf, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED); 1233} 1234 1235static void vdec_av1_slice_setup_cdef(struct vdec_av1_slice_cdef *cdef, 1236 struct v4l2_av1_cdef *ctrl_cdef) 1237{ 1238 int i; 1239 1240 cdef->cdef_damping = ctrl_cdef->damping_minus_3 + 3; 1241 cdef->cdef_bits = ctrl_cdef->bits; 1242 1243 for (i = 0; i < V4L2_AV1_CDEF_MAX; i++) { 1244 if (ctrl_cdef->y_sec_strength[i] == 4) 1245 ctrl_cdef->y_sec_strength[i] -= 1; 1246 1247 if (ctrl_cdef->uv_sec_strength[i] == 4) 1248 ctrl_cdef->uv_sec_strength[i] -= 1; 1249 1250 cdef->cdef_y_strength[i] = 1251 ctrl_cdef->y_pri_strength[i] << SECONDARY_FILTER_STRENGTH_NUM_BITS | 1252 ctrl_cdef->y_sec_strength[i]; 1253 cdef->cdef_uv_strength[i] = 1254 ctrl_cdef->uv_pri_strength[i] << SECONDARY_FILTER_STRENGTH_NUM_BITS | 1255 ctrl_cdef->uv_sec_strength[i]; 1256 } 1257} 1258 1259static void vdec_av1_slice_setup_seq(struct vdec_av1_slice_seq_header *seq, 1260 struct v4l2_ctrl_av1_sequence *ctrl_seq) 1261{ 1262 seq->bitdepth = ctrl_seq->bit_depth; 1263 seq->max_frame_width = ctrl_seq->max_frame_width_minus_1 + 1; 1264 seq->max_frame_height = ctrl_seq->max_frame_height_minus_1 + 1; 1265 seq->enable_superres = SEQUENCE_FLAG(ctrl_seq, ENABLE_SUPERRES); 1266 seq->enable_filter_intra = SEQUENCE_FLAG(ctrl_seq, ENABLE_FILTER_INTRA); 1267 seq->enable_intra_edge_filter = SEQUENCE_FLAG(ctrl_seq, ENABLE_INTRA_EDGE_FILTER); 1268 seq->enable_interintra_compound = SEQUENCE_FLAG(ctrl_seq, ENABLE_INTERINTRA_COMPOUND); 1269 seq->enable_masked_compound = SEQUENCE_FLAG(ctrl_seq, ENABLE_MASKED_COMPOUND); 1270 seq->enable_dual_filter = SEQUENCE_FLAG(ctrl_seq, ENABLE_DUAL_FILTER); 1271 seq->enable_jnt_comp = SEQUENCE_FLAG(ctrl_seq, ENABLE_JNT_COMP); 1272 seq->mono_chrome = SEQUENCE_FLAG(ctrl_seq, MONO_CHROME); 1273 seq->enable_order_hint = SEQUENCE_FLAG(ctrl_seq, ENABLE_ORDER_HINT); 1274 seq->order_hint_bits = ctrl_seq->order_hint_bits; 1275 seq->use_128x128_superblock = SEQUENCE_FLAG(ctrl_seq, USE_128X128_SUPERBLOCK); 1276 seq->subsampling_x = SEQUENCE_FLAG(ctrl_seq, SUBSAMPLING_X); 1277 seq->subsampling_y = SEQUENCE_FLAG(ctrl_seq, SUBSAMPLING_Y); 1278} 1279 1280static void vdec_av1_slice_setup_tile(struct vdec_av1_slice_frame *frame, 1281 struct v4l2_av1_tile_info *ctrl_tile) 1282{ 1283 struct vdec_av1_slice_seq_header *seq = &frame->seq; 1284 struct vdec_av1_slice_tile *tile = &frame->uh.tile; 1285 u32 mib_size_log2 = seq->use_128x128_superblock ? 5 : 4; 1286 int i; 1287 1288 tile->tile_cols = ctrl_tile->tile_cols; 1289 tile->tile_rows = ctrl_tile->tile_rows; 1290 tile->context_update_tile_id = ctrl_tile->context_update_tile_id; 1291 tile->uniform_tile_spacing_flag = 1292 BIT_FLAG(ctrl_tile, V4L2_AV1_TILE_INFO_FLAG_UNIFORM_TILE_SPACING); 1293 1294 for (i = 0; i < tile->tile_cols + 1; i++) 1295 tile->mi_col_starts[i] = 1296 ALIGN(ctrl_tile->mi_col_starts[i], BIT(mib_size_log2)) >> mib_size_log2; 1297 1298 for (i = 0; i < tile->tile_rows + 1; i++) 1299 tile->mi_row_starts[i] = 1300 ALIGN(ctrl_tile->mi_row_starts[i], BIT(mib_size_log2)) >> mib_size_log2; 1301} 1302 1303static void vdec_av1_slice_setup_uh(struct vdec_av1_slice_instance *instance, 1304 struct vdec_av1_slice_frame *frame, 1305 struct v4l2_ctrl_av1_frame *ctrl_fh) 1306{ 1307 struct vdec_av1_slice_uncompressed_header *uh = &frame->uh; 1308 int i; 1309 1310 uh->use_ref_frame_mvs = FH_FLAG(ctrl_fh, USE_REF_FRAME_MVS); 1311 uh->order_hint = ctrl_fh->order_hint; 1312 vdec_av1_slice_setup_gm(uh->gm, &ctrl_fh->global_motion); 1313 uh->upscaled_width = ctrl_fh->upscaled_width; 1314 uh->frame_width = ctrl_fh->frame_width_minus_1 + 1; 1315 uh->frame_height = ctrl_fh->frame_height_minus_1 + 1; 1316 uh->mi_cols = ((uh->frame_width + 7) >> 3) << 1; 1317 uh->mi_rows = ((uh->frame_height + 7) >> 3) << 1; 1318 uh->reduced_tx_set = FH_FLAG(ctrl_fh, REDUCED_TX_SET); 1319 uh->tx_mode = ctrl_fh->tx_mode; 1320 uh->uniform_tile_spacing_flag = 1321 BIT_FLAG(&ctrl_fh->tile_info, V4L2_AV1_TILE_INFO_FLAG_UNIFORM_TILE_SPACING); 1322 uh->interpolation_filter = ctrl_fh->interpolation_filter; 1323 uh->allow_warped_motion = FH_FLAG(ctrl_fh, ALLOW_WARPED_MOTION); 1324 uh->is_motion_mode_switchable = FH_FLAG(ctrl_fh, IS_MOTION_MODE_SWITCHABLE); 1325 uh->frame_type = ctrl_fh->frame_type; 1326 uh->frame_is_intra = (uh->frame_type == V4L2_AV1_INTRA_ONLY_FRAME || 1327 uh->frame_type == V4L2_AV1_KEY_FRAME); 1328 1329 if (!uh->frame_is_intra && FH_FLAG(ctrl_fh, REFERENCE_SELECT)) 1330 uh->reference_mode = AV1_REFERENCE_MODE_SELECT; 1331 else 1332 uh->reference_mode = AV1_SINGLE_REFERENCE; 1333 1334 uh->allow_high_precision_mv = FH_FLAG(ctrl_fh, ALLOW_HIGH_PRECISION_MV); 1335 uh->allow_intra_bc = FH_FLAG(ctrl_fh, ALLOW_INTRABC); 1336 uh->force_integer_mv = FH_FLAG(ctrl_fh, FORCE_INTEGER_MV); 1337 uh->allow_screen_content_tools = FH_FLAG(ctrl_fh, ALLOW_SCREEN_CONTENT_TOOLS); 1338 uh->error_resilient_mode = FH_FLAG(ctrl_fh, ERROR_RESILIENT_MODE); 1339 uh->primary_ref_frame = ctrl_fh->primary_ref_frame; 1340 uh->disable_frame_end_update_cdf = 1341 FH_FLAG(ctrl_fh, DISABLE_FRAME_END_UPDATE_CDF); 1342 uh->disable_cdf_update = FH_FLAG(ctrl_fh, DISABLE_CDF_UPDATE); 1343 uh->skip_mode.skip_mode_present = FH_FLAG(ctrl_fh, SKIP_MODE_PRESENT); 1344 uh->skip_mode.skip_mode_frame[0] = 1345 ctrl_fh->skip_mode_frame[0] - V4L2_AV1_REF_LAST_FRAME; 1346 uh->skip_mode.skip_mode_frame[1] = 1347 ctrl_fh->skip_mode_frame[1] - V4L2_AV1_REF_LAST_FRAME; 1348 uh->skip_mode.skip_mode_allowed = ctrl_fh->skip_mode_frame[0] ? 1 : 0; 1349 1350 vdec_av1_slice_setup_seg(&uh->seg, &ctrl_fh->segmentation); 1351 uh->delta_q_lf.delta_q_present = QUANT_FLAG(&ctrl_fh->quantization, DELTA_Q_PRESENT); 1352 uh->delta_q_lf.delta_q_res = 1 << ctrl_fh->quantization.delta_q_res; 1353 uh->delta_q_lf.delta_lf_present = 1354 BIT_FLAG(&ctrl_fh->loop_filter, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT); 1355 uh->delta_q_lf.delta_lf_res = ctrl_fh->loop_filter.delta_lf_res; 1356 uh->delta_q_lf.delta_lf_multi = 1357 BIT_FLAG(&ctrl_fh->loop_filter, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI); 1358 vdec_av1_slice_setup_quant(&uh->quant, &ctrl_fh->quantization); 1359 1360 uh->coded_loss_less = 1; 1361 for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) { 1362 uh->quant.qindex[i] = vdec_av1_slice_get_qindex(uh, i); 1363 uh->loss_less_array[i] = 1364 (uh->quant.qindex[i] == 0 && uh->quant.delta_qydc == 0 && 1365 uh->quant.delta_quac == 0 && uh->quant.delta_qudc == 0 && 1366 uh->quant.delta_qvac == 0 && uh->quant.delta_qvdc == 0); 1367 1368 if (!uh->loss_less_array[i]) 1369 uh->coded_loss_less = 0; 1370 } 1371 1372 vdec_av1_slice_setup_lr(&uh->lr, &ctrl_fh->loop_restoration); 1373 uh->superres_denom = ctrl_fh->superres_denom; 1374 vdec_av1_slice_setup_lf(&uh->loop_filter, &ctrl_fh->loop_filter); 1375 vdec_av1_slice_setup_cdef(&uh->cdef, &ctrl_fh->cdef); 1376 vdec_av1_slice_setup_tile(frame, &ctrl_fh->tile_info); 1377} 1378 1379static int vdec_av1_slice_setup_tile_group(struct vdec_av1_slice_instance *instance, 1380 struct vdec_av1_slice_vsi *vsi) 1381{ 1382 struct v4l2_ctrl_av1_tile_group_entry *ctrl_tge; 1383 struct vdec_av1_slice_tile_group *tile_group = &instance->tile_group; 1384 struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh; 1385 struct vdec_av1_slice_tile *tile = &uh->tile; 1386 struct v4l2_ctrl *ctrl; 1387 u32 tge_size; 1388 int i; 1389 1390 ctrl = v4l2_ctrl_find(&instance->ctx->ctrl_hdl, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY); 1391 if (!ctrl) 1392 return -EINVAL; 1393 1394 tge_size = ctrl->elems; 1395 ctrl_tge = (struct v4l2_ctrl_av1_tile_group_entry *)ctrl->p_cur.p; 1396 1397 tile_group->num_tiles = tile->tile_cols * tile->tile_rows; 1398 1399 if (tile_group->num_tiles != tge_size || 1400 tile_group->num_tiles > V4L2_AV1_MAX_TILE_COUNT) { 1401 mtk_vdec_err(instance->ctx, "invalid tge_size %d, tile_num:%d\n", 1402 tge_size, tile_group->num_tiles); 1403 return -EINVAL; 1404 } 1405 1406 for (i = 0; i < tge_size; i++) { 1407 if (i != ctrl_tge[i].tile_row * vsi->frame.uh.tile.tile_cols + 1408 ctrl_tge[i].tile_col) { 1409 mtk_vdec_err(instance->ctx, "invalid tge info %d, %d %d %d\n", 1410 i, ctrl_tge[i].tile_row, ctrl_tge[i].tile_col, 1411 vsi->frame.uh.tile.tile_rows); 1412 return -EINVAL; 1413 } 1414 tile_group->tile_size[i] = ctrl_tge[i].tile_size; 1415 tile_group->tile_start_offset[i] = ctrl_tge[i].tile_offset; 1416 } 1417 1418 return 0; 1419} 1420 1421static inline void vdec_av1_slice_setup_state(struct vdec_av1_slice_vsi *vsi) 1422{ 1423 memset(&vsi->state, 0, sizeof(vsi->state)); 1424} 1425 1426static void vdec_av1_slice_setup_scale_factors(struct vdec_av1_slice_frame_refs *frame_ref, 1427 struct vdec_av1_slice_frame_info *ref_frame_info, 1428 struct vdec_av1_slice_uncompressed_header *uh) 1429{ 1430 struct vdec_av1_slice_scale_factors *scale_factors = &frame_ref->scale_factors; 1431 u32 ref_upscaled_width = ref_frame_info->upscaled_width; 1432 u32 ref_frame_height = ref_frame_info->frame_height; 1433 u32 frame_width = uh->frame_width; 1434 u32 frame_height = uh->frame_height; 1435 1436 if (!vdec_av1_slice_need_scale(ref_upscaled_width, ref_frame_height, 1437 frame_width, frame_height)) { 1438 scale_factors->x_scale = -1; 1439 scale_factors->y_scale = -1; 1440 scale_factors->is_scaled = 0; 1441 return; 1442 } 1443 1444 scale_factors->x_scale = 1445 ((ref_upscaled_width << AV1_REF_SCALE_SHIFT) + (frame_width >> 1)) / frame_width; 1446 scale_factors->y_scale = 1447 ((ref_frame_height << AV1_REF_SCALE_SHIFT) + (frame_height >> 1)) / frame_height; 1448 scale_factors->is_scaled = 1449 (scale_factors->x_scale != AV1_REF_INVALID_SCALE) && 1450 (scale_factors->y_scale != AV1_REF_INVALID_SCALE) && 1451 (scale_factors->x_scale != AV1_REF_NO_SCALE || 1452 scale_factors->y_scale != AV1_REF_NO_SCALE); 1453 scale_factors->x_step = 1454 AV1_DIV_ROUND_UP_POW2(scale_factors->x_scale, 1455 AV1_REF_SCALE_SHIFT - AV1_SCALE_SUBPEL_BITS); 1456 scale_factors->y_step = 1457 AV1_DIV_ROUND_UP_POW2(scale_factors->y_scale, 1458 AV1_REF_SCALE_SHIFT - AV1_SCALE_SUBPEL_BITS); 1459} 1460 1461static unsigned char vdec_av1_slice_get_sign_bias(int a, 1462 int b, 1463 u8 enable_order_hint, 1464 u8 order_hint_bits) 1465{ 1466 int diff = 0; 1467 int m = 0; 1468 unsigned char result = 0; 1469 1470 if (!enable_order_hint) 1471 return 0; 1472 1473 diff = a - b; 1474 m = 1 << (order_hint_bits - 1); 1475 diff = (diff & (m - 1)) - (diff & m); 1476 1477 if (diff > 0) 1478 result = 1; 1479 1480 return result; 1481} 1482 1483static void vdec_av1_slice_setup_ref(struct vdec_av1_slice_pfc *pfc, 1484 struct v4l2_ctrl_av1_frame *ctrl_fh) 1485{ 1486 struct vdec_av1_slice_vsi *vsi = &pfc->vsi; 1487 struct vdec_av1_slice_frame *frame = &vsi->frame; 1488 struct vdec_av1_slice_slot *slots = &vsi->slots; 1489 struct vdec_av1_slice_uncompressed_header *uh = &frame->uh; 1490 struct vdec_av1_slice_seq_header *seq = &frame->seq; 1491 struct vdec_av1_slice_frame_info *cur_frame_info = 1492 &slots->frame_info[vsi->slot_id]; 1493 struct vdec_av1_slice_frame_info *frame_info; 1494 int i, slot_id; 1495 1496 if (uh->frame_is_intra) 1497 return; 1498 1499 for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) { 1500 int ref_idx = ctrl_fh->ref_frame_idx[i]; 1501 1502 pfc->ref_idx[i] = ctrl_fh->reference_frame_ts[ref_idx]; 1503 slot_id = frame->ref_frame_map[ref_idx]; 1504 frame_info = &slots->frame_info[slot_id]; 1505 if (slot_id == AV1_INVALID_IDX) { 1506 pr_err(MTK_DBG_V4L2_STR "cannot match reference[%d] 0x%llx\n", i, 1507 ctrl_fh->reference_frame_ts[ref_idx]); 1508 frame->order_hints[i] = 0; 1509 frame->ref_frame_valid[i] = 0; 1510 continue; 1511 } 1512 1513 frame->frame_refs[i].ref_fb_idx = slot_id; 1514 vdec_av1_slice_setup_scale_factors(&frame->frame_refs[i], 1515 frame_info, uh); 1516 if (!seq->enable_order_hint) 1517 frame->ref_frame_sign_bias[i + 1] = 0; 1518 else 1519 frame->ref_frame_sign_bias[i + 1] = 1520 vdec_av1_slice_get_sign_bias(frame_info->order_hint, 1521 uh->order_hint, 1522 seq->enable_order_hint, 1523 seq->order_hint_bits); 1524 1525 frame->order_hints[i] = ctrl_fh->order_hints[i + 1]; 1526 cur_frame_info->order_hints[i] = frame->order_hints[i]; 1527 frame->ref_frame_valid[i] = 1; 1528 } 1529} 1530 1531static void vdec_av1_slice_get_previous(struct vdec_av1_slice_vsi *vsi) 1532{ 1533 struct vdec_av1_slice_frame *frame = &vsi->frame; 1534 1535 if (frame->uh.primary_ref_frame == AV1_PRIMARY_REF_NONE) 1536 frame->prev_fb_idx = AV1_INVALID_IDX; 1537 else 1538 frame->prev_fb_idx = frame->frame_refs[frame->uh.primary_ref_frame].ref_fb_idx; 1539} 1540 1541static inline void vdec_av1_slice_setup_operating_mode(struct vdec_av1_slice_instance *instance, 1542 struct vdec_av1_slice_frame *frame) 1543{ 1544 frame->large_scale_tile = 0; 1545} 1546 1547static int vdec_av1_slice_setup_pfc(struct vdec_av1_slice_instance *instance, 1548 struct vdec_av1_slice_pfc *pfc) 1549{ 1550 struct v4l2_ctrl_av1_frame *ctrl_fh; 1551 struct v4l2_ctrl_av1_sequence *ctrl_seq; 1552 struct vdec_av1_slice_vsi *vsi = &pfc->vsi; 1553 int ret = 0; 1554 1555 /* frame header */ 1556 ctrl_fh = (struct v4l2_ctrl_av1_frame *) 1557 vdec_av1_get_ctrl_ptr(instance->ctx, 1558 V4L2_CID_STATELESS_AV1_FRAME); 1559 if (IS_ERR(ctrl_fh)) 1560 return PTR_ERR(ctrl_fh); 1561 1562 ctrl_seq = (struct v4l2_ctrl_av1_sequence *) 1563 vdec_av1_get_ctrl_ptr(instance->ctx, 1564 V4L2_CID_STATELESS_AV1_SEQUENCE); 1565 if (IS_ERR(ctrl_seq)) 1566 return PTR_ERR(ctrl_seq); 1567 1568 /* setup vsi information */ 1569 vdec_av1_slice_setup_seq(&vsi->frame.seq, ctrl_seq); 1570 vdec_av1_slice_setup_uh(instance, &vsi->frame, ctrl_fh); 1571 vdec_av1_slice_setup_operating_mode(instance, &vsi->frame); 1572 1573 vdec_av1_slice_setup_state(vsi); 1574 vdec_av1_slice_setup_slot(instance, vsi, ctrl_fh); 1575 vdec_av1_slice_setup_ref(pfc, ctrl_fh); 1576 vdec_av1_slice_get_previous(vsi); 1577 1578 pfc->seq = instance->seq; 1579 instance->seq++; 1580 1581 return ret; 1582} 1583 1584static void vdec_av1_slice_setup_lat_buffer(struct vdec_av1_slice_instance *instance, 1585 struct vdec_av1_slice_vsi *vsi, 1586 struct mtk_vcodec_mem *bs, 1587 struct vdec_lat_buf *lat_buf) 1588{ 1589 struct vdec_av1_slice_work_buffer *work_buffer; 1590 int i; 1591 1592 vsi->bs.dma_addr = bs->dma_addr; 1593 vsi->bs.size = bs->size; 1594 1595 vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr; 1596 vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size; 1597 vsi->trans.dma_addr = lat_buf->ctx->msg_queue.wdma_wptr_addr; 1598 /* used to store trans end */ 1599 vsi->trans.dma_addr_end = lat_buf->ctx->msg_queue.wdma_rptr_addr; 1600 vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr; 1601 vsi->err_map.size = lat_buf->wdma_err_addr.size; 1602 vsi->rd_mv.dma_addr = lat_buf->rd_mv_addr.dma_addr; 1603 vsi->rd_mv.size = lat_buf->rd_mv_addr.size; 1604 1605 vsi->row_info.buf = 0; 1606 vsi->row_info.size = 0; 1607 1608 work_buffer = vsi->work_buffer; 1609 1610 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) { 1611 work_buffer[i].mv_addr.buf = instance->mv[i].dma_addr; 1612 work_buffer[i].mv_addr.size = instance->mv[i].size; 1613 work_buffer[i].segid_addr.buf = instance->seg[i].dma_addr; 1614 work_buffer[i].segid_addr.size = instance->seg[i].size; 1615 work_buffer[i].cdf_addr.buf = instance->cdf[i].dma_addr; 1616 work_buffer[i].cdf_addr.size = instance->cdf[i].size; 1617 } 1618 1619 vsi->cdf_tmp.buf = instance->cdf_temp.dma_addr; 1620 vsi->cdf_tmp.size = instance->cdf_temp.size; 1621 1622 vsi->tile.buf = instance->tile.dma_addr; 1623 vsi->tile.size = instance->tile.size; 1624 memcpy(lat_buf->tile_addr.va, instance->tile.va, 64 * instance->tile_group.num_tiles); 1625 1626 vsi->cdf_table.buf = instance->cdf_table.dma_addr; 1627 vsi->cdf_table.size = instance->cdf_table.size; 1628 vsi->iq_table.buf = instance->iq_table.dma_addr; 1629 vsi->iq_table.size = instance->iq_table.size; 1630} 1631 1632static void vdec_av1_slice_setup_seg_buffer(struct vdec_av1_slice_instance *instance, 1633 struct vdec_av1_slice_vsi *vsi) 1634{ 1635 struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh; 1636 struct mtk_vcodec_mem *buf; 1637 1638 /* reset segment buffer */ 1639 if (uh->primary_ref_frame == AV1_PRIMARY_REF_NONE || !uh->seg.segmentation_enabled) { 1640 mtk_vdec_debug(instance->ctx, "reset seg %d\n", vsi->slot_id); 1641 if (vsi->slot_id != AV1_INVALID_IDX) { 1642 buf = &instance->seg[vsi->slot_id]; 1643 memset(buf->va, 0, buf->size); 1644 } 1645 } 1646} 1647 1648static void vdec_av1_slice_setup_tile_buffer(struct vdec_av1_slice_instance *instance, 1649 struct vdec_av1_slice_vsi *vsi, 1650 struct mtk_vcodec_mem *bs) 1651{ 1652 struct vdec_av1_slice_tile_group *tile_group = &instance->tile_group; 1653 struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh; 1654 struct vdec_av1_slice_tile *tile = &uh->tile; 1655 u32 tile_num, tile_row, tile_col; 1656 u32 allow_update_cdf = 0; 1657 u32 sb_boundary_x_m1 = 0, sb_boundary_y_m1 = 0; 1658 int tile_info_base; 1659 u64 tile_buf_pa; 1660 u32 *tile_info_buf = instance->tile.va; 1661 u64 pa = (u64)bs->dma_addr; 1662 1663 if (uh->disable_cdf_update == 0) 1664 allow_update_cdf = 1; 1665 1666 for (tile_num = 0; tile_num < tile_group->num_tiles; tile_num++) { 1667 /* each uint32 takes place of 4 bytes */ 1668 tile_info_base = (AV1_TILE_BUF_SIZE * tile_num) >> 2; 1669 tile_row = tile_num / tile->tile_cols; 1670 tile_col = tile_num % tile->tile_cols; 1671 tile_info_buf[tile_info_base + 0] = (tile_group->tile_size[tile_num] << 3); 1672 tile_buf_pa = pa + tile_group->tile_start_offset[tile_num]; 1673 1674 /* save av1 tile high 4bits(bit 32-35) address in lower 4 bits position 1675 * and clear original for hw requirement. 1676 */ 1677 tile_info_buf[tile_info_base + 1] = (tile_buf_pa & 0xFFFFFFF0ull) | 1678 ((tile_buf_pa & 0xF00000000ull) >> 32); 1679 tile_info_buf[tile_info_base + 2] = (tile_buf_pa & 0xFull) << 3; 1680 1681 sb_boundary_x_m1 = 1682 (tile->mi_col_starts[tile_col + 1] - tile->mi_col_starts[tile_col] - 1) & 1683 0x3f; 1684 sb_boundary_y_m1 = 1685 (tile->mi_row_starts[tile_row + 1] - tile->mi_row_starts[tile_row] - 1) & 1686 0x1ff; 1687 1688 tile_info_buf[tile_info_base + 3] = (sb_boundary_y_m1 << 7) | sb_boundary_x_m1; 1689 tile_info_buf[tile_info_base + 4] = ((allow_update_cdf << 18) | (1 << 16)); 1690 1691 if (tile_num == tile->context_update_tile_id && 1692 uh->disable_frame_end_update_cdf == 0) 1693 tile_info_buf[tile_info_base + 4] |= (1 << 17); 1694 1695 mtk_vdec_debug(instance->ctx, "// tile buf %d pos(%dx%d) offset 0x%x\n", 1696 tile_num, tile_row, tile_col, tile_info_base); 1697 mtk_vdec_debug(instance->ctx, "// %08x %08x %08x %08x\n", 1698 tile_info_buf[tile_info_base + 0], 1699 tile_info_buf[tile_info_base + 1], 1700 tile_info_buf[tile_info_base + 2], 1701 tile_info_buf[tile_info_base + 3]); 1702 mtk_vdec_debug(instance->ctx, "// %08x %08x %08x %08x\n", 1703 tile_info_buf[tile_info_base + 4], 1704 tile_info_buf[tile_info_base + 5], 1705 tile_info_buf[tile_info_base + 6], 1706 tile_info_buf[tile_info_base + 7]); 1707 } 1708} 1709 1710static int vdec_av1_slice_setup_lat(struct vdec_av1_slice_instance *instance, 1711 struct mtk_vcodec_mem *bs, 1712 struct vdec_lat_buf *lat_buf, 1713 struct vdec_av1_slice_pfc *pfc) 1714{ 1715 struct vdec_av1_slice_vsi *vsi = &pfc->vsi; 1716 int ret; 1717 1718 ret = vdec_av1_slice_setup_lat_from_src_buf(instance, vsi, lat_buf); 1719 if (ret) 1720 return ret; 1721 1722 ret = vdec_av1_slice_setup_pfc(instance, pfc); 1723 if (ret) 1724 return ret; 1725 1726 ret = vdec_av1_slice_setup_tile_group(instance, vsi); 1727 if (ret) 1728 return ret; 1729 1730 ret = vdec_av1_slice_alloc_working_buffer(instance, vsi); 1731 if (ret) 1732 return ret; 1733 1734 vdec_av1_slice_setup_seg_buffer(instance, vsi); 1735 vdec_av1_slice_setup_tile_buffer(instance, vsi, bs); 1736 vdec_av1_slice_setup_lat_buffer(instance, vsi, bs, lat_buf); 1737 1738 return 0; 1739} 1740 1741static int vdec_av1_slice_update_lat(struct vdec_av1_slice_instance *instance, 1742 struct vdec_lat_buf *lat_buf, 1743 struct vdec_av1_slice_pfc *pfc) 1744{ 1745 struct vdec_av1_slice_vsi *vsi; 1746 1747 vsi = &pfc->vsi; 1748 mtk_vdec_debug(instance->ctx, "frame %u LAT CRC 0x%08x, output size is %d\n", 1749 pfc->seq, vsi->state.crc[0], vsi->state.out_size); 1750 1751 /* buffer full, need to re-decode */ 1752 if (vsi->state.full) { 1753 /* buffer not enough */ 1754 if (vsi->trans.dma_addr_end - vsi->trans.dma_addr == vsi->ube.size) 1755 return -ENOMEM; 1756 return -EAGAIN; 1757 } 1758 1759 instance->width = vsi->frame.uh.upscaled_width; 1760 instance->height = vsi->frame.uh.frame_height; 1761 instance->frame_type = vsi->frame.uh.frame_type; 1762 1763 return 0; 1764} 1765 1766static int vdec_av1_slice_setup_core_to_dst_buf(struct vdec_av1_slice_instance *instance, 1767 struct vdec_lat_buf *lat_buf) 1768{ 1769 struct vb2_v4l2_buffer *dst; 1770 1771 dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx); 1772 if (!dst) 1773 return -EINVAL; 1774 1775 v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, dst, true); 1776 1777 return 0; 1778} 1779 1780static int vdec_av1_slice_setup_core_buffer(struct vdec_av1_slice_instance *instance, 1781 struct vdec_av1_slice_pfc *pfc, 1782 struct vdec_av1_slice_vsi *vsi, 1783 struct vdec_fb *fb, 1784 struct vdec_lat_buf *lat_buf) 1785{ 1786 struct vb2_buffer *vb; 1787 struct vb2_queue *vq; 1788 int w, h, plane, size; 1789 int i; 1790 1791 plane = instance->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes; 1792 w = vsi->frame.uh.upscaled_width; 1793 h = vsi->frame.uh.frame_height; 1794 size = ALIGN(w, VCODEC_DEC_ALIGNED_64) * ALIGN(h, VCODEC_DEC_ALIGNED_64); 1795 1796 /* frame buffer */ 1797 vsi->fb.y.dma_addr = fb->base_y.dma_addr; 1798 if (plane == 1) 1799 vsi->fb.c.dma_addr = fb->base_y.dma_addr + size; 1800 else 1801 vsi->fb.c.dma_addr = fb->base_c.dma_addr; 1802 1803 /* reference buffers */ 1804 vq = v4l2_m2m_get_vq(instance->ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); 1805 if (!vq) 1806 return -EINVAL; 1807 1808 /* get current output buffer */ 1809 vb = &v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx)->vb2_buf; 1810 if (!vb) 1811 return -EINVAL; 1812 1813 /* get buffer address from vb2buf */ 1814 for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) { 1815 struct vdec_av1_slice_fb *vref = &vsi->ref[i]; 1816 1817 vb = vb2_find_buffer(vq, pfc->ref_idx[i]); 1818 if (!vb) { 1819 memset(vref, 0, sizeof(*vref)); 1820 continue; 1821 } 1822 1823 vref->y.dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0); 1824 if (plane == 1) 1825 vref->c.dma_addr = vref->y.dma_addr + size; 1826 else 1827 vref->c.dma_addr = vb2_dma_contig_plane_dma_addr(vb, 1); 1828 } 1829 vsi->tile.dma_addr = lat_buf->tile_addr.dma_addr; 1830 vsi->tile.size = lat_buf->tile_addr.size; 1831 1832 return 0; 1833} 1834 1835static int vdec_av1_slice_setup_core(struct vdec_av1_slice_instance *instance, 1836 struct vdec_fb *fb, 1837 struct vdec_lat_buf *lat_buf, 1838 struct vdec_av1_slice_pfc *pfc) 1839{ 1840 struct vdec_av1_slice_vsi *vsi = &pfc->vsi; 1841 int ret; 1842 1843 ret = vdec_av1_slice_setup_core_to_dst_buf(instance, lat_buf); 1844 if (ret) 1845 return ret; 1846 1847 ret = vdec_av1_slice_setup_core_buffer(instance, pfc, vsi, fb, lat_buf); 1848 if (ret) 1849 return ret; 1850 1851 return 0; 1852} 1853 1854static int vdec_av1_slice_update_core(struct vdec_av1_slice_instance *instance, 1855 struct vdec_lat_buf *lat_buf, 1856 struct vdec_av1_slice_pfc *pfc) 1857{ 1858 struct vdec_av1_slice_vsi *vsi = instance->core_vsi; 1859 1860 mtk_vdec_debug(instance->ctx, "frame %u Y_CRC %08x %08x %08x %08x\n", 1861 pfc->seq, vsi->state.crc[0], vsi->state.crc[1], 1862 vsi->state.crc[2], vsi->state.crc[3]); 1863 mtk_vdec_debug(instance->ctx, "frame %u C_CRC %08x %08x %08x %08x\n", 1864 pfc->seq, vsi->state.crc[8], vsi->state.crc[9], 1865 vsi->state.crc[10], vsi->state.crc[11]); 1866 1867 return 0; 1868} 1869 1870static int vdec_av1_slice_init(struct mtk_vcodec_dec_ctx *ctx) 1871{ 1872 struct vdec_av1_slice_instance *instance; 1873 struct vdec_av1_slice_init_vsi *vsi; 1874 int ret; 1875 1876 instance = kzalloc(sizeof(*instance), GFP_KERNEL); 1877 if (!instance) 1878 return -ENOMEM; 1879 1880 instance->ctx = ctx; 1881 instance->vpu.id = SCP_IPI_VDEC_LAT; 1882 instance->vpu.core_id = SCP_IPI_VDEC_CORE; 1883 instance->vpu.ctx = ctx; 1884 instance->vpu.codec_type = ctx->current_codec; 1885 1886 ret = vpu_dec_init(&instance->vpu); 1887 if (ret) { 1888 mtk_vdec_err(ctx, "failed to init vpu dec, ret %d\n", ret); 1889 goto error_vpu_init; 1890 } 1891 1892 /* init vsi and global flags */ 1893 vsi = instance->vpu.vsi; 1894 if (!vsi) { 1895 mtk_vdec_err(ctx, "failed to get AV1 vsi\n"); 1896 ret = -EINVAL; 1897 goto error_vsi; 1898 } 1899 instance->init_vsi = vsi; 1900 instance->core_vsi = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, (u32)vsi->core_vsi); 1901 1902 if (!instance->core_vsi) { 1903 mtk_vdec_err(ctx, "failed to get AV1 core vsi\n"); 1904 ret = -EINVAL; 1905 goto error_vsi; 1906 } 1907 1908 if (vsi->vsi_size != sizeof(struct vdec_av1_slice_vsi)) 1909 mtk_vdec_err(ctx, "remote vsi size 0x%x mismatch! expected: 0x%zx\n", 1910 vsi->vsi_size, sizeof(struct vdec_av1_slice_vsi)); 1911 1912 instance->irq_enabled = 1; 1913 instance->inneracing_mode = IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability); 1914 1915 mtk_vdec_debug(ctx, "vsi 0x%p core_vsi 0x%llx 0x%p, inneracing_mode %d\n", 1916 vsi, vsi->core_vsi, instance->core_vsi, instance->inneracing_mode); 1917 1918 ret = vdec_av1_slice_init_cdf_table(instance); 1919 if (ret) 1920 goto error_vsi; 1921 1922 ret = vdec_av1_slice_init_iq_table(instance); 1923 if (ret) 1924 goto error_vsi; 1925 1926 ctx->drv_handle = instance; 1927 1928 return 0; 1929error_vsi: 1930 vpu_dec_deinit(&instance->vpu); 1931error_vpu_init: 1932 kfree(instance); 1933 1934 return ret; 1935} 1936 1937static void vdec_av1_slice_deinit(void *h_vdec) 1938{ 1939 struct vdec_av1_slice_instance *instance = h_vdec; 1940 1941 if (!instance) 1942 return; 1943 mtk_vdec_debug(instance->ctx, "h_vdec 0x%p\n", h_vdec); 1944 vpu_dec_deinit(&instance->vpu); 1945 vdec_av1_slice_free_working_buffer(instance); 1946 vdec_msg_queue_deinit(&instance->ctx->msg_queue, instance->ctx); 1947 kfree(instance); 1948} 1949 1950static int vdec_av1_slice_flush(void *h_vdec, struct mtk_vcodec_mem *bs, 1951 struct vdec_fb *fb, bool *res_chg) 1952{ 1953 struct vdec_av1_slice_instance *instance = h_vdec; 1954 int i; 1955 1956 mtk_vdec_debug(instance->ctx, "flush ...\n"); 1957 1958 vdec_msg_queue_wait_lat_buf_full(&instance->ctx->msg_queue); 1959 1960 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) 1961 vdec_av1_slice_clear_fb(&instance->slots.frame_info[i]); 1962 1963 return vpu_dec_reset(&instance->vpu); 1964} 1965 1966static void vdec_av1_slice_get_pic_info(struct vdec_av1_slice_instance *instance) 1967{ 1968 struct mtk_vcodec_dec_ctx *ctx = instance->ctx; 1969 u32 data[3]; 1970 1971 mtk_vdec_debug(ctx, "w %u h %u\n", ctx->picinfo.pic_w, ctx->picinfo.pic_h); 1972 1973 data[0] = ctx->picinfo.pic_w; 1974 data[1] = ctx->picinfo.pic_h; 1975 data[2] = ctx->capture_fourcc; 1976 vpu_dec_get_param(&instance->vpu, data, 3, GET_PARAM_PIC_INFO); 1977 1978 ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, VCODEC_DEC_ALIGNED_64); 1979 ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, VCODEC_DEC_ALIGNED_64); 1980 ctx->picinfo.fb_sz[0] = instance->vpu.fb_sz[0]; 1981 ctx->picinfo.fb_sz[1] = instance->vpu.fb_sz[1]; 1982} 1983 1984static inline void vdec_av1_slice_get_dpb_size(struct vdec_av1_slice_instance *instance, 1985 u32 *dpb_sz) 1986{ 1987 /* refer av1 specification */ 1988 *dpb_sz = V4L2_AV1_TOTAL_REFS_PER_FRAME + 1; 1989} 1990 1991static void vdec_av1_slice_get_crop_info(struct vdec_av1_slice_instance *instance, 1992 struct v4l2_rect *cr) 1993{ 1994 struct mtk_vcodec_dec_ctx *ctx = instance->ctx; 1995 1996 cr->left = 0; 1997 cr->top = 0; 1998 cr->width = ctx->picinfo.pic_w; 1999 cr->height = ctx->picinfo.pic_h; 2000 2001 mtk_vdec_debug(ctx, "l=%d, t=%d, w=%d, h=%d\n", 2002 cr->left, cr->top, cr->width, cr->height); 2003} 2004 2005static int vdec_av1_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out) 2006{ 2007 struct vdec_av1_slice_instance *instance = h_vdec; 2008 2009 switch (type) { 2010 case GET_PARAM_PIC_INFO: 2011 vdec_av1_slice_get_pic_info(instance); 2012 break; 2013 case GET_PARAM_DPB_SIZE: 2014 vdec_av1_slice_get_dpb_size(instance, out); 2015 break; 2016 case GET_PARAM_CROP_INFO: 2017 vdec_av1_slice_get_crop_info(instance, out); 2018 break; 2019 default: 2020 mtk_vdec_err(instance->ctx, "invalid get parameter type=%d\n", type); 2021 return -EINVAL; 2022 } 2023 2024 return 0; 2025} 2026 2027static int vdec_av1_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs, 2028 struct vdec_fb *fb, bool *res_chg) 2029{ 2030 struct vdec_av1_slice_instance *instance = h_vdec; 2031 struct vdec_lat_buf *lat_buf; 2032 struct vdec_av1_slice_pfc *pfc; 2033 struct vdec_av1_slice_vsi *vsi; 2034 struct mtk_vcodec_dec_ctx *ctx; 2035 int ret; 2036 2037 if (!instance || !instance->ctx) 2038 return -EINVAL; 2039 2040 ctx = instance->ctx; 2041 /* init msgQ for the first time */ 2042 if (vdec_msg_queue_init(&ctx->msg_queue, ctx, 2043 vdec_av1_slice_core_decode, sizeof(*pfc))) { 2044 mtk_vdec_err(ctx, "failed to init AV1 msg queue\n"); 2045 return -ENOMEM; 2046 } 2047 2048 /* bs NULL means flush decoder */ 2049 if (!bs) 2050 return vdec_av1_slice_flush(h_vdec, bs, fb, res_chg); 2051 2052 lat_buf = vdec_msg_queue_dqbuf(&ctx->msg_queue.lat_ctx); 2053 if (!lat_buf) { 2054 mtk_vdec_err(ctx, "failed to get AV1 lat buf\n"); 2055 return -EAGAIN; 2056 } 2057 pfc = (struct vdec_av1_slice_pfc *)lat_buf->private_data; 2058 if (!pfc) { 2059 ret = -EINVAL; 2060 goto err_free_fb_out; 2061 } 2062 vsi = &pfc->vsi; 2063 2064 ret = vdec_av1_slice_setup_lat(instance, bs, lat_buf, pfc); 2065 if (ret) { 2066 mtk_vdec_err(ctx, "failed to setup AV1 lat ret %d\n", ret); 2067 goto err_free_fb_out; 2068 } 2069 2070 vdec_av1_slice_vsi_to_remote(vsi, instance->vsi); 2071 ret = vpu_dec_start(&instance->vpu, NULL, 0); 2072 if (ret) { 2073 mtk_vdec_err(ctx, "failed to dec AV1 ret %d\n", ret); 2074 goto err_free_fb_out; 2075 } 2076 if (instance->inneracing_mode) 2077 vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf); 2078 2079 if (instance->irq_enabled) { 2080 ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, 2081 WAIT_INTR_TIMEOUT_MS, 2082 MTK_VDEC_LAT0); 2083 /* update remote vsi if decode timeout */ 2084 if (ret) { 2085 mtk_vdec_err(ctx, "AV1 Frame %d decode timeout %d\n", pfc->seq, ret); 2086 WRITE_ONCE(instance->vsi->state.timeout, 1); 2087 } 2088 vpu_dec_end(&instance->vpu); 2089 } 2090 2091 vdec_av1_slice_vsi_from_remote(vsi, instance->vsi); 2092 ret = vdec_av1_slice_update_lat(instance, lat_buf, pfc); 2093 2094 /* LAT trans full, re-decode */ 2095 if (ret == -EAGAIN) { 2096 mtk_vdec_err(ctx, "AV1 Frame %d trans full\n", pfc->seq); 2097 if (!instance->inneracing_mode) 2098 vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf); 2099 return 0; 2100 } 2101 2102 /* LAT trans full, no more UBE or decode timeout */ 2103 if (ret == -ENOMEM || vsi->state.timeout) { 2104 mtk_vdec_err(ctx, "AV1 Frame %d insufficient buffer or timeout\n", pfc->seq); 2105 if (!instance->inneracing_mode) 2106 vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf); 2107 return -EBUSY; 2108 } 2109 vsi->trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr; 2110 mtk_vdec_debug(ctx, "lat dma 1 0x%pad 0x%pad\n", 2111 &pfc->vsi.trans.dma_addr, &pfc->vsi.trans.dma_addr_end); 2112 2113 vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, vsi->trans.dma_addr_end); 2114 2115 if (!instance->inneracing_mode) 2116 vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf); 2117 memcpy(&instance->slots, &vsi->slots, sizeof(instance->slots)); 2118 2119 return 0; 2120 2121err_free_fb_out: 2122 vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf); 2123 2124 if (pfc) 2125 mtk_vdec_err(ctx, "slice dec number: %d err: %d", pfc->seq, ret); 2126 2127 return ret; 2128} 2129 2130static int vdec_av1_slice_core_decode(struct vdec_lat_buf *lat_buf) 2131{ 2132 struct vdec_av1_slice_instance *instance; 2133 struct vdec_av1_slice_pfc *pfc; 2134 struct mtk_vcodec_dec_ctx *ctx = NULL; 2135 struct vdec_fb *fb = NULL; 2136 int ret = -EINVAL; 2137 2138 if (!lat_buf) 2139 return -EINVAL; 2140 2141 pfc = lat_buf->private_data; 2142 ctx = lat_buf->ctx; 2143 if (!pfc || !ctx) 2144 return -EINVAL; 2145 2146 instance = ctx->drv_handle; 2147 if (!instance) 2148 goto err; 2149 2150 fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx); 2151 if (!fb) { 2152 ret = -EBUSY; 2153 goto err; 2154 } 2155 2156 ret = vdec_av1_slice_setup_core(instance, fb, lat_buf, pfc); 2157 if (ret) { 2158 mtk_vdec_err(ctx, "vdec_av1_slice_setup_core\n"); 2159 goto err; 2160 } 2161 vdec_av1_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi); 2162 ret = vpu_dec_core(&instance->vpu); 2163 if (ret) { 2164 mtk_vdec_err(ctx, "vpu_dec_core\n"); 2165 goto err; 2166 } 2167 2168 if (instance->irq_enabled) { 2169 ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, 2170 WAIT_INTR_TIMEOUT_MS, 2171 MTK_VDEC_CORE); 2172 /* update remote vsi if decode timeout */ 2173 if (ret) { 2174 mtk_vdec_err(ctx, "AV1 frame %d core timeout\n", pfc->seq); 2175 WRITE_ONCE(instance->vsi->state.timeout, 1); 2176 } 2177 vpu_dec_core_end(&instance->vpu); 2178 } 2179 2180 ret = vdec_av1_slice_update_core(instance, lat_buf, pfc); 2181 if (ret) { 2182 mtk_vdec_err(ctx, "vdec_av1_slice_update_core\n"); 2183 goto err; 2184 } 2185 2186 mtk_vdec_debug(ctx, "core dma_addr_end 0x%pad\n", 2187 &instance->core_vsi->trans.dma_addr_end); 2188 vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, instance->core_vsi->trans.dma_addr_end); 2189 2190 ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req); 2191 2192 return 0; 2193 2194err: 2195 /* always update read pointer */ 2196 vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end); 2197 2198 if (fb) 2199 ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req); 2200 2201 return ret; 2202} 2203 2204const struct vdec_common_if vdec_av1_slice_lat_if = { 2205 .init = vdec_av1_slice_init, 2206 .decode = vdec_av1_slice_lat_decode, 2207 .get_param = vdec_av1_slice_get_param, 2208 .deinit = vdec_av1_slice_deinit, 2209}; 2210