1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright (c) 2021 MediaTek Inc. 4 * Author: George Sun <george.sun@mediatek.com> 5 */ 6 7#include <linux/module.h> 8#include <linux/slab.h> 9#include <media/videobuf2-dma-contig.h> 10#include <media/v4l2-vp9.h> 11 12#include "../mtk_vcodec_dec.h" 13#include "../../common/mtk_vcodec_intr.h" 14#include "../vdec_drv_base.h" 15#include "../vdec_drv_if.h" 16#include "../vdec_vpu_if.h" 17 18/* reset_frame_context defined in VP9 spec */ 19#define VP9_RESET_FRAME_CONTEXT_NONE0 0 20#define VP9_RESET_FRAME_CONTEXT_NONE1 1 21#define VP9_RESET_FRAME_CONTEXT_SPEC 2 22#define VP9_RESET_FRAME_CONTEXT_ALL 3 23 24#define VP9_TILE_BUF_SIZE 4096 25#define VP9_PROB_BUF_SIZE 2560 26#define VP9_COUNTS_BUF_SIZE 16384 27 28#define HDR_FLAG(x) (!!((hdr)->flags & V4L2_VP9_FRAME_FLAG_##x)) 29#define LF_FLAG(x) (!!((lf)->flags & V4L2_VP9_LOOP_FILTER_FLAG_##x)) 30#define SEG_FLAG(x) (!!((seg)->flags & V4L2_VP9_SEGMENTATION_FLAG_##x)) 31#define VP9_BAND_6(band) ((band) == 0 ? 3 : 6) 32 33/* 34 * struct vdec_vp9_slice_frame_ctx - vp9 prob tables footprint 35 */ 36struct vdec_vp9_slice_frame_ctx { 37 struct { 38 u8 probs[6][3]; 39 u8 padding[2]; 40 } coef_probs[4][2][2][6]; 41 42 u8 y_mode_prob[4][16]; 43 u8 switch_interp_prob[4][16]; 44 u8 seg[32]; /* ignore */ 45 u8 comp_inter_prob[16]; 46 u8 comp_ref_prob[16]; 47 u8 single_ref_prob[5][2]; 48 u8 single_ref_prob_padding[6]; 49 50 u8 joint[3]; 51 u8 joint_padding[13]; 52 struct { 53 u8 sign; 54 u8 classes[10]; 55 u8 padding[5]; 56 } sign_classes[2]; 57 struct { 58 u8 class0[1]; 59 u8 bits[10]; 60 u8 padding[5]; 61 } class0_bits[2]; 62 struct { 63 u8 class0_fp[2][3]; 64 u8 fp[3]; 65 u8 class0_hp; 66 u8 hp; 67 u8 padding[5]; 68 } class0_fp_hp[2]; 69 70 u8 uv_mode_prob[10][16]; 71 u8 uv_mode_prob_padding[2][16]; 72 73 u8 partition_prob[16][4]; 74 75 u8 inter_mode_probs[7][4]; 76 u8 skip_probs[4]; 77 78 u8 tx_p8x8[2][4]; 79 u8 tx_p16x16[2][4]; 80 u8 tx_p32x32[2][4]; 81 u8 intra_inter_prob[8]; 82}; 83 84/* 85 * struct vdec_vp9_slice_frame_counts - vp9 counts tables footprint 86 */ 87struct vdec_vp9_slice_frame_counts { 88 union { 89 struct { 90 u32 band_0[3]; 91 u32 padding0[1]; 92 u32 band_1_5[5][6]; 93 u32 padding1[2]; 94 } eob_branch[4][2][2]; 95 u32 eob_branch_space[256 * 4]; 96 }; 97 98 struct { 99 u32 band_0[3][4]; 100 u32 band_1_5[5][6][4]; 101 } coef_probs[4][2][2]; 102 103 u32 intra_inter[4][2]; 104 u32 comp_inter[5][2]; 105 u32 comp_inter_padding[2]; 106 u32 comp_ref[5][2]; 107 u32 comp_ref_padding[2]; 108 u32 single_ref[5][2][2]; 109 u32 inter_mode[7][4]; 110 u32 y_mode[4][12]; 111 u32 uv_mode[10][10]; 112 u32 partition[16][4]; 113 u32 switchable_interp[4][4]; 114 115 u32 tx_p8x8[2][2]; 116 u32 tx_p16x16[2][4]; 117 u32 tx_p32x32[2][4]; 118 119 u32 skip[3][4]; 120 121 u32 joint[4]; 122 123 struct { 124 u32 sign[2]; 125 u32 class0[2]; 126 u32 classes[12]; 127 u32 bits[10][2]; 128 u32 padding[4]; 129 u32 class0_fp[2][4]; 130 u32 fp[4]; 131 u32 class0_hp[2]; 132 u32 hp[2]; 133 } mvcomp[2]; 134 135 u32 reserved[126][4]; 136}; 137 138/** 139 * struct vdec_vp9_slice_counts_map - vp9 counts tables to map 140 * v4l2_vp9_frame_symbol_counts 141 * @skip: skip counts. 142 * @y_mode: Y prediction mode counts. 143 * @filter: interpolation filter counts. 144 * @sign: motion vector sign counts. 145 * @classes: motion vector class counts. 146 * @class0: motion vector class0 bit counts. 147 * @bits: motion vector bits counts. 148 * @class0_fp: motion vector class0 fractional bit counts. 149 * @fp: motion vector fractional bit counts. 150 * @class0_hp: motion vector class0 high precision fractional bit counts. 151 * @hp: motion vector high precision fractional bit counts. 152 */ 153struct vdec_vp9_slice_counts_map { 154 u32 skip[3][2]; 155 u32 y_mode[4][10]; 156 u32 filter[4][3]; 157 u32 sign[2][2]; 158 u32 classes[2][11]; 159 u32 class0[2][2]; 160 u32 bits[2][10][2]; 161 u32 class0_fp[2][2][4]; 162 u32 fp[2][4]; 163 u32 class0_hp[2][2]; 164 u32 hp[2][2]; 165}; 166 167/* 168 * struct vdec_vp9_slice_uncompressed_header - vp9 uncompressed header syntax 169 * used for decoding 170 */ 171struct vdec_vp9_slice_uncompressed_header { 172 u8 profile; 173 u8 last_frame_type; 174 u8 frame_type; 175 176 u8 last_show_frame; 177 u8 show_frame; 178 u8 error_resilient_mode; 179 180 u8 bit_depth; 181 u8 padding0[1]; 182 u16 last_frame_width; 183 u16 last_frame_height; 184 u16 frame_width; 185 u16 frame_height; 186 187 u8 intra_only; 188 u8 reset_frame_context; 189 u8 ref_frame_sign_bias[4]; 190 u8 allow_high_precision_mv; 191 u8 interpolation_filter; 192 193 u8 refresh_frame_context; 194 u8 frame_parallel_decoding_mode; 195 u8 frame_context_idx; 196 197 /* loop_filter_params */ 198 u8 loop_filter_level; 199 u8 loop_filter_sharpness; 200 u8 loop_filter_delta_enabled; 201 s8 loop_filter_ref_deltas[4]; 202 s8 loop_filter_mode_deltas[2]; 203 204 /* quantization_params */ 205 u8 base_q_idx; 206 s8 delta_q_y_dc; 207 s8 delta_q_uv_dc; 208 s8 delta_q_uv_ac; 209 210 /* segmentation_params */ 211 u8 segmentation_enabled; 212 u8 segmentation_update_map; 213 u8 segmentation_tree_probs[7]; 214 u8 padding1[1]; 215 u8 segmentation_temporal_udpate; 216 u8 segmentation_pred_prob[3]; 217 u8 segmentation_update_data; 218 u8 segmentation_abs_or_delta_update; 219 u8 feature_enabled[8]; 220 s16 feature_value[8][4]; 221 222 /* tile_info */ 223 u8 tile_cols_log2; 224 u8 tile_rows_log2; 225 u8 padding2[2]; 226 227 u16 uncompressed_header_size; 228 u16 header_size_in_bytes; 229 230 /* LAT OUT, CORE IN */ 231 u32 dequant[8][4]; 232}; 233 234/* 235 * struct vdec_vp9_slice_compressed_header - vp9 compressed header syntax 236 * used for decoding. 237 */ 238struct vdec_vp9_slice_compressed_header { 239 u8 tx_mode; 240 u8 ref_mode; 241 u8 comp_fixed_ref; 242 u8 comp_var_ref[2]; 243 u8 padding[3]; 244}; 245 246/* 247 * struct vdec_vp9_slice_tiles - vp9 tile syntax 248 */ 249struct vdec_vp9_slice_tiles { 250 u32 size[4][64]; 251 u32 mi_rows[4]; 252 u32 mi_cols[64]; 253 u8 actual_rows; 254 u8 padding[7]; 255}; 256 257/* 258 * struct vdec_vp9_slice_reference - vp9 reference frame information 259 */ 260struct vdec_vp9_slice_reference { 261 u16 frame_width; 262 u16 frame_height; 263 u8 bit_depth; 264 u8 subsampling_x; 265 u8 subsampling_y; 266 u8 padding; 267}; 268 269/* 270 * struct vdec_vp9_slice_frame - vp9 syntax used for decoding 271 */ 272struct vdec_vp9_slice_frame { 273 struct vdec_vp9_slice_uncompressed_header uh; 274 struct vdec_vp9_slice_compressed_header ch; 275 struct vdec_vp9_slice_tiles tiles; 276 struct vdec_vp9_slice_reference ref[3]; 277}; 278 279/* 280 * struct vdec_vp9_slice_init_vsi - VSI used to initialize instance 281 */ 282struct vdec_vp9_slice_init_vsi { 283 unsigned int architecture; 284 unsigned int reserved; 285 u64 core_vsi; 286 /* default frame context's position in MicroP */ 287 u64 default_frame_ctx; 288}; 289 290/* 291 * struct vdec_vp9_slice_mem - memory address and size 292 */ 293struct vdec_vp9_slice_mem { 294 union { 295 u64 buf; 296 dma_addr_t dma_addr; 297 }; 298 union { 299 size_t size; 300 dma_addr_t dma_addr_end; 301 u64 padding; 302 }; 303}; 304 305/* 306 * struct vdec_vp9_slice_bs - input buffer for decoding 307 */ 308struct vdec_vp9_slice_bs { 309 struct vdec_vp9_slice_mem buf; 310 struct vdec_vp9_slice_mem frame; 311}; 312 313/* 314 * struct vdec_vp9_slice_fb - frame buffer for decoding 315 */ 316struct vdec_vp9_slice_fb { 317 struct vdec_vp9_slice_mem y; 318 struct vdec_vp9_slice_mem c; 319}; 320 321/* 322 * struct vdec_vp9_slice_state - decoding state 323 */ 324struct vdec_vp9_slice_state { 325 int err; 326 unsigned int full; 327 unsigned int timeout; 328 unsigned int perf; 329 330 unsigned int crc[12]; 331}; 332 333/** 334 * struct vdec_vp9_slice_vsi - exchange decoding information 335 * between Main CPU and MicroP 336 * 337 * @bs: input buffer 338 * @fb: output buffer 339 * @ref: 3 reference buffers 340 * @mv: mv working buffer 341 * @seg: segmentation working buffer 342 * @tile: tile buffer 343 * @prob: prob table buffer, used to set/update prob table 344 * @counts: counts table buffer, used to update prob table 345 * @ube: general buffer 346 * @trans: trans buffer position in general buffer 347 * @err_map: error buffer 348 * @row_info: row info buffer 349 * @frame: decoding syntax 350 * @state: decoding state 351 */ 352struct vdec_vp9_slice_vsi { 353 /* used in LAT stage */ 354 struct vdec_vp9_slice_bs bs; 355 /* used in Core stage */ 356 struct vdec_vp9_slice_fb fb; 357 struct vdec_vp9_slice_fb ref[3]; 358 359 struct vdec_vp9_slice_mem mv[2]; 360 struct vdec_vp9_slice_mem seg[2]; 361 struct vdec_vp9_slice_mem tile; 362 struct vdec_vp9_slice_mem prob; 363 struct vdec_vp9_slice_mem counts; 364 365 /* LAT stage's output, Core stage's input */ 366 struct vdec_vp9_slice_mem ube; 367 struct vdec_vp9_slice_mem trans; 368 struct vdec_vp9_slice_mem err_map; 369 struct vdec_vp9_slice_mem row_info; 370 371 /* decoding parameters */ 372 struct vdec_vp9_slice_frame frame; 373 374 struct vdec_vp9_slice_state state; 375}; 376 377/** 378 * struct vdec_vp9_slice_pfc - per-frame context that contains a local vsi. 379 * pass it from lat to core 380 * 381 * @vsi: local vsi. copy to/from remote vsi before/after decoding 382 * @ref_idx: reference buffer index 383 * @seq: picture sequence 384 * @state: decoding state 385 */ 386struct vdec_vp9_slice_pfc { 387 struct vdec_vp9_slice_vsi vsi; 388 389 u64 ref_idx[3]; 390 391 int seq; 392 393 /* LAT/Core CRC */ 394 struct vdec_vp9_slice_state state[2]; 395}; 396 397/* 398 * enum vdec_vp9_slice_resolution_level 399 */ 400enum vdec_vp9_slice_resolution_level { 401 VP9_RES_NONE, 402 VP9_RES_FHD, 403 VP9_RES_4K, 404 VP9_RES_8K, 405}; 406 407/* 408 * struct vdec_vp9_slice_ref - picture's width & height should kept 409 * for later decoding as reference picture 410 */ 411struct vdec_vp9_slice_ref { 412 unsigned int width; 413 unsigned int height; 414}; 415 416/** 417 * struct vdec_vp9_slice_instance - represent one vp9 instance 418 * 419 * @ctx: pointer to codec's context 420 * @vpu: VPU instance 421 * @seq: global picture sequence 422 * @level: level of current resolution 423 * @width: width of last picture 424 * @height: height of last picture 425 * @frame_type: frame_type of last picture 426 * @irq: irq to Main CPU or MicroP 427 * @show_frame: show_frame of last picture 428 * @dpb: picture information (width/height) for reference 429 * @mv: mv working buffer 430 * @seg: segmentation working buffer 431 * @tile: tile buffer 432 * @prob: prob table buffer, used to set/update prob table 433 * @counts: counts table buffer, used to update prob table 434 * @frame_ctx: 4 frame context according to VP9 Spec 435 * @frame_ctx_helper: 4 frame context according to newest kernel spec 436 * @dirty: state of each frame context 437 * @init_vsi: vsi used for initialized VP9 instance 438 * @vsi: vsi used for decoding/flush ... 439 * @core_vsi: vsi used for Core stage 440 * 441 * @sc_pfc: per frame context single core 442 * @counts_map: used map to counts_helper 443 * @counts_helper: counts table according to newest kernel spec 444 */ 445struct vdec_vp9_slice_instance { 446 struct mtk_vcodec_dec_ctx *ctx; 447 struct vdec_vpu_inst vpu; 448 449 int seq; 450 451 enum vdec_vp9_slice_resolution_level level; 452 453 /* for resolution change and get_pic_info */ 454 unsigned int width; 455 unsigned int height; 456 457 /* for last_frame_type */ 458 unsigned int frame_type; 459 unsigned int irq; 460 461 unsigned int show_frame; 462 463 /* maintain vp9 reference frame state */ 464 struct vdec_vp9_slice_ref dpb[VB2_MAX_FRAME]; 465 466 /* 467 * normal working buffers 468 * mv[0]/seg[0]/tile/prob/counts is used for LAT 469 * mv[1]/seg[1] is used for CORE 470 */ 471 struct mtk_vcodec_mem mv[2]; 472 struct mtk_vcodec_mem seg[2]; 473 struct mtk_vcodec_mem tile; 474 struct mtk_vcodec_mem prob; 475 struct mtk_vcodec_mem counts; 476 477 /* 4 prob tables */ 478 struct vdec_vp9_slice_frame_ctx frame_ctx[4]; 479 /*4 helper tables */ 480 struct v4l2_vp9_frame_context frame_ctx_helper; 481 unsigned char dirty[4]; 482 483 /* MicroP vsi */ 484 union { 485 struct vdec_vp9_slice_init_vsi *init_vsi; 486 struct vdec_vp9_slice_vsi *vsi; 487 }; 488 struct vdec_vp9_slice_vsi *core_vsi; 489 490 struct vdec_vp9_slice_pfc sc_pfc; 491 struct vdec_vp9_slice_counts_map counts_map; 492 struct v4l2_vp9_frame_symbol_counts counts_helper; 493}; 494 495/* 496 * all VP9 instances could share this default frame context. 497 */ 498static struct vdec_vp9_slice_frame_ctx *vdec_vp9_slice_default_frame_ctx; 499static DEFINE_MUTEX(vdec_vp9_slice_frame_ctx_lock); 500 501static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf); 502 503static int vdec_vp9_slice_init_default_frame_ctx(struct vdec_vp9_slice_instance *instance) 504{ 505 struct vdec_vp9_slice_frame_ctx *remote_frame_ctx; 506 struct vdec_vp9_slice_frame_ctx *frame_ctx; 507 struct mtk_vcodec_dec_ctx *ctx; 508 struct vdec_vp9_slice_init_vsi *vsi; 509 int ret = 0; 510 511 ctx = instance->ctx; 512 vsi = instance->vpu.vsi; 513 if (!ctx || !vsi) 514 return -EINVAL; 515 516 remote_frame_ctx = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, 517 (u32)vsi->default_frame_ctx); 518 if (!remote_frame_ctx) { 519 mtk_vdec_err(ctx, "failed to map default frame ctx\n"); 520 return -EINVAL; 521 } 522 523 mutex_lock(&vdec_vp9_slice_frame_ctx_lock); 524 if (vdec_vp9_slice_default_frame_ctx) 525 goto out; 526 527 frame_ctx = kmemdup(remote_frame_ctx, sizeof(*frame_ctx), GFP_KERNEL); 528 if (!frame_ctx) { 529 ret = -ENOMEM; 530 goto out; 531 } 532 533 vdec_vp9_slice_default_frame_ctx = frame_ctx; 534 535out: 536 mutex_unlock(&vdec_vp9_slice_frame_ctx_lock); 537 538 return ret; 539} 540 541static int vdec_vp9_slice_alloc_working_buffer(struct vdec_vp9_slice_instance *instance, 542 struct vdec_vp9_slice_vsi *vsi) 543{ 544 struct mtk_vcodec_dec_ctx *ctx = instance->ctx; 545 enum vdec_vp9_slice_resolution_level level; 546 /* super blocks */ 547 unsigned int max_sb_w; 548 unsigned int max_sb_h; 549 unsigned int max_w; 550 unsigned int max_h; 551 unsigned int w; 552 unsigned int h; 553 size_t size; 554 int ret; 555 int i; 556 557 w = vsi->frame.uh.frame_width; 558 h = vsi->frame.uh.frame_height; 559 560 if (w > VCODEC_DEC_4K_CODED_WIDTH || 561 h > VCODEC_DEC_4K_CODED_HEIGHT) { 562 return -EINVAL; 563 } else if (w > MTK_VDEC_MAX_W || h > MTK_VDEC_MAX_H) { 564 /* 4K */ 565 level = VP9_RES_4K; 566 max_w = VCODEC_DEC_4K_CODED_WIDTH; 567 max_h = VCODEC_DEC_4K_CODED_HEIGHT; 568 } else { 569 /* FHD */ 570 level = VP9_RES_FHD; 571 max_w = MTK_VDEC_MAX_W; 572 max_h = MTK_VDEC_MAX_H; 573 } 574 575 if (level == instance->level) 576 return 0; 577 578 mtk_vdec_debug(ctx, "resolution level changed, from %u to %u, %ux%u", 579 instance->level, level, w, h); 580 581 max_sb_w = DIV_ROUND_UP(max_w, 64); 582 max_sb_h = DIV_ROUND_UP(max_h, 64); 583 ret = -ENOMEM; 584 585 /* 586 * Lat-flush must wait core idle, otherwise core will 587 * use released buffers 588 */ 589 590 size = (max_sb_w * max_sb_h + 2) * 576; 591 for (i = 0; i < 2; i++) { 592 if (instance->mv[i].va) 593 mtk_vcodec_mem_free(ctx, &instance->mv[i]); 594 instance->mv[i].size = size; 595 if (mtk_vcodec_mem_alloc(ctx, &instance->mv[i])) 596 goto err; 597 } 598 599 size = (max_sb_w * max_sb_h * 32) + 256; 600 for (i = 0; i < 2; i++) { 601 if (instance->seg[i].va) 602 mtk_vcodec_mem_free(ctx, &instance->seg[i]); 603 instance->seg[i].size = size; 604 if (mtk_vcodec_mem_alloc(ctx, &instance->seg[i])) 605 goto err; 606 } 607 608 if (!instance->tile.va) { 609 instance->tile.size = VP9_TILE_BUF_SIZE; 610 if (mtk_vcodec_mem_alloc(ctx, &instance->tile)) 611 goto err; 612 } 613 614 if (!instance->prob.va) { 615 instance->prob.size = VP9_PROB_BUF_SIZE; 616 if (mtk_vcodec_mem_alloc(ctx, &instance->prob)) 617 goto err; 618 } 619 620 if (!instance->counts.va) { 621 instance->counts.size = VP9_COUNTS_BUF_SIZE; 622 if (mtk_vcodec_mem_alloc(ctx, &instance->counts)) 623 goto err; 624 } 625 626 instance->level = level; 627 return 0; 628 629err: 630 instance->level = VP9_RES_NONE; 631 return ret; 632} 633 634static void vdec_vp9_slice_free_working_buffer(struct vdec_vp9_slice_instance *instance) 635{ 636 struct mtk_vcodec_dec_ctx *ctx = instance->ctx; 637 int i; 638 639 for (i = 0; i < ARRAY_SIZE(instance->mv); i++) { 640 if (instance->mv[i].va) 641 mtk_vcodec_mem_free(ctx, &instance->mv[i]); 642 } 643 for (i = 0; i < ARRAY_SIZE(instance->seg); i++) { 644 if (instance->seg[i].va) 645 mtk_vcodec_mem_free(ctx, &instance->seg[i]); 646 } 647 if (instance->tile.va) 648 mtk_vcodec_mem_free(ctx, &instance->tile); 649 if (instance->prob.va) 650 mtk_vcodec_mem_free(ctx, &instance->prob); 651 if (instance->counts.va) 652 mtk_vcodec_mem_free(ctx, &instance->counts); 653 654 instance->level = VP9_RES_NONE; 655} 656 657static void vdec_vp9_slice_vsi_from_remote(struct vdec_vp9_slice_vsi *vsi, 658 struct vdec_vp9_slice_vsi *remote_vsi, 659 int skip) 660{ 661 struct vdec_vp9_slice_frame *rf; 662 struct vdec_vp9_slice_frame *f; 663 664 /* 665 * compressed header 666 * dequant 667 * buffer position 668 * decode state 669 */ 670 if (!skip) { 671 rf = &remote_vsi->frame; 672 f = &vsi->frame; 673 memcpy(&f->ch, &rf->ch, sizeof(f->ch)); 674 memcpy(&f->uh.dequant, &rf->uh.dequant, sizeof(f->uh.dequant)); 675 memcpy(&vsi->trans, &remote_vsi->trans, sizeof(vsi->trans)); 676 } 677 678 memcpy(&vsi->state, &remote_vsi->state, sizeof(vsi->state)); 679} 680 681static void vdec_vp9_slice_vsi_to_remote(struct vdec_vp9_slice_vsi *vsi, 682 struct vdec_vp9_slice_vsi *remote_vsi) 683{ 684 memcpy(remote_vsi, vsi, sizeof(*vsi)); 685} 686 687static int vdec_vp9_slice_tile_offset(int idx, int mi_num, int tile_log2) 688{ 689 int sbs = (mi_num + 7) >> 3; 690 int offset = ((idx * sbs) >> tile_log2) << 3; 691 692 return min(offset, mi_num); 693} 694 695static 696int vdec_vp9_slice_setup_single_from_src_to_dst(struct vdec_vp9_slice_instance *instance) 697{ 698 struct vb2_v4l2_buffer *src; 699 struct vb2_v4l2_buffer *dst; 700 701 src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx); 702 if (!src) 703 return -EINVAL; 704 705 dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx); 706 if (!dst) 707 return -EINVAL; 708 709 v4l2_m2m_buf_copy_metadata(src, dst, true); 710 711 return 0; 712} 713 714static int vdec_vp9_slice_setup_lat_from_src_buf(struct vdec_vp9_slice_instance *instance, 715 struct vdec_lat_buf *lat_buf) 716{ 717 struct vb2_v4l2_buffer *src; 718 struct vb2_v4l2_buffer *dst; 719 720 src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx); 721 if (!src) 722 return -EINVAL; 723 724 lat_buf->src_buf_req = src->vb2_buf.req_obj.req; 725 726 dst = &lat_buf->ts_info; 727 v4l2_m2m_buf_copy_metadata(src, dst, true); 728 return 0; 729} 730 731static void vdec_vp9_slice_setup_hdr(struct vdec_vp9_slice_instance *instance, 732 struct vdec_vp9_slice_uncompressed_header *uh, 733 struct v4l2_ctrl_vp9_frame *hdr) 734{ 735 int i; 736 737 uh->profile = hdr->profile; 738 uh->last_frame_type = instance->frame_type; 739 uh->frame_type = !HDR_FLAG(KEY_FRAME); 740 uh->last_show_frame = instance->show_frame; 741 uh->show_frame = HDR_FLAG(SHOW_FRAME); 742 uh->error_resilient_mode = HDR_FLAG(ERROR_RESILIENT); 743 uh->bit_depth = hdr->bit_depth; 744 uh->last_frame_width = instance->width; 745 uh->last_frame_height = instance->height; 746 uh->frame_width = hdr->frame_width_minus_1 + 1; 747 uh->frame_height = hdr->frame_height_minus_1 + 1; 748 uh->intra_only = HDR_FLAG(INTRA_ONLY); 749 /* map v4l2 enum to values defined in VP9 spec for firmware */ 750 switch (hdr->reset_frame_context) { 751 case V4L2_VP9_RESET_FRAME_CTX_NONE: 752 uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0; 753 break; 754 case V4L2_VP9_RESET_FRAME_CTX_SPEC: 755 uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_SPEC; 756 break; 757 case V4L2_VP9_RESET_FRAME_CTX_ALL: 758 uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_ALL; 759 break; 760 default: 761 uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0; 762 break; 763 } 764 /* 765 * ref_frame_sign_bias specifies the intended direction 766 * of the motion vector in time for each reference frame. 767 * - INTRA_FRAME = 0, 768 * - LAST_FRAME = 1, 769 * - GOLDEN_FRAME = 2, 770 * - ALTREF_FRAME = 3, 771 * ref_frame_sign_bias[INTRA_FRAME] is always 0 772 * and VDA only passes another 3 directions 773 */ 774 uh->ref_frame_sign_bias[0] = 0; 775 for (i = 0; i < 3; i++) 776 uh->ref_frame_sign_bias[i + 1] = 777 !!(hdr->ref_frame_sign_bias & (1 << i)); 778 uh->allow_high_precision_mv = HDR_FLAG(ALLOW_HIGH_PREC_MV); 779 uh->interpolation_filter = hdr->interpolation_filter; 780 uh->refresh_frame_context = HDR_FLAG(REFRESH_FRAME_CTX); 781 uh->frame_parallel_decoding_mode = HDR_FLAG(PARALLEL_DEC_MODE); 782 uh->frame_context_idx = hdr->frame_context_idx; 783 784 /* tile info */ 785 uh->tile_cols_log2 = hdr->tile_cols_log2; 786 uh->tile_rows_log2 = hdr->tile_rows_log2; 787 788 uh->uncompressed_header_size = hdr->uncompressed_header_size; 789 uh->header_size_in_bytes = hdr->compressed_header_size; 790} 791 792static void vdec_vp9_slice_setup_frame_ctx(struct vdec_vp9_slice_instance *instance, 793 struct vdec_vp9_slice_uncompressed_header *uh, 794 struct v4l2_ctrl_vp9_frame *hdr) 795{ 796 int error_resilient_mode; 797 int reset_frame_context; 798 int key_frame; 799 int intra_only; 800 int i; 801 802 key_frame = HDR_FLAG(KEY_FRAME); 803 intra_only = HDR_FLAG(INTRA_ONLY); 804 error_resilient_mode = HDR_FLAG(ERROR_RESILIENT); 805 reset_frame_context = uh->reset_frame_context; 806 807 /* 808 * according to "6.2 Uncompressed header syntax" in 809 * "VP9 Bitstream & Decoding Process Specification", 810 * reset @frame_context_idx when (FrameIsIntra || error_resilient_mode) 811 */ 812 if (key_frame || intra_only || error_resilient_mode) { 813 /* 814 * @reset_frame_context specifies 815 * whether the frame context should be 816 * reset to default values: 817 * 0 or 1 means do not reset any frame context 818 * 2 resets just the context specified in the frame header 819 * 3 resets all contexts 820 */ 821 if (key_frame || error_resilient_mode || 822 reset_frame_context == 3) { 823 /* use default table */ 824 for (i = 0; i < 4; i++) 825 instance->dirty[i] = 0; 826 } else if (reset_frame_context == 2) { 827 instance->dirty[uh->frame_context_idx] = 0; 828 } 829 uh->frame_context_idx = 0; 830 } 831} 832 833static void vdec_vp9_slice_setup_loop_filter(struct vdec_vp9_slice_uncompressed_header *uh, 834 struct v4l2_vp9_loop_filter *lf) 835{ 836 int i; 837 838 uh->loop_filter_level = lf->level; 839 uh->loop_filter_sharpness = lf->sharpness; 840 uh->loop_filter_delta_enabled = LF_FLAG(DELTA_ENABLED); 841 for (i = 0; i < 4; i++) 842 uh->loop_filter_ref_deltas[i] = lf->ref_deltas[i]; 843 for (i = 0; i < 2; i++) 844 uh->loop_filter_mode_deltas[i] = lf->mode_deltas[i]; 845} 846 847static void vdec_vp9_slice_setup_quantization(struct vdec_vp9_slice_uncompressed_header *uh, 848 struct v4l2_vp9_quantization *quant) 849{ 850 uh->base_q_idx = quant->base_q_idx; 851 uh->delta_q_y_dc = quant->delta_q_y_dc; 852 uh->delta_q_uv_dc = quant->delta_q_uv_dc; 853 uh->delta_q_uv_ac = quant->delta_q_uv_ac; 854} 855 856static void vdec_vp9_slice_setup_segmentation(struct vdec_vp9_slice_uncompressed_header *uh, 857 struct v4l2_vp9_segmentation *seg) 858{ 859 int i; 860 int j; 861 862 uh->segmentation_enabled = SEG_FLAG(ENABLED); 863 uh->segmentation_update_map = SEG_FLAG(UPDATE_MAP); 864 for (i = 0; i < 7; i++) 865 uh->segmentation_tree_probs[i] = seg->tree_probs[i]; 866 uh->segmentation_temporal_udpate = SEG_FLAG(TEMPORAL_UPDATE); 867 for (i = 0; i < 3; i++) 868 uh->segmentation_pred_prob[i] = seg->pred_probs[i]; 869 uh->segmentation_update_data = SEG_FLAG(UPDATE_DATA); 870 uh->segmentation_abs_or_delta_update = SEG_FLAG(ABS_OR_DELTA_UPDATE); 871 for (i = 0; i < 8; i++) { 872 uh->feature_enabled[i] = seg->feature_enabled[i]; 873 for (j = 0; j < 4; j++) 874 uh->feature_value[i][j] = seg->feature_data[i][j]; 875 } 876} 877 878static int vdec_vp9_slice_setup_tile(struct vdec_vp9_slice_vsi *vsi, 879 struct v4l2_ctrl_vp9_frame *hdr) 880{ 881 unsigned int rows_log2; 882 unsigned int cols_log2; 883 unsigned int rows; 884 unsigned int cols; 885 unsigned int mi_rows; 886 unsigned int mi_cols; 887 struct vdec_vp9_slice_tiles *tiles; 888 int offset; 889 int start; 890 int end; 891 int i; 892 893 rows_log2 = hdr->tile_rows_log2; 894 cols_log2 = hdr->tile_cols_log2; 895 rows = 1 << rows_log2; 896 cols = 1 << cols_log2; 897 tiles = &vsi->frame.tiles; 898 tiles->actual_rows = 0; 899 900 if (rows > 4 || cols > 64) 901 return -EINVAL; 902 903 /* setup mi rows/cols information */ 904 mi_rows = (hdr->frame_height_minus_1 + 1 + 7) >> 3; 905 mi_cols = (hdr->frame_width_minus_1 + 1 + 7) >> 3; 906 907 for (i = 0; i < rows; i++) { 908 start = vdec_vp9_slice_tile_offset(i, mi_rows, rows_log2); 909 end = vdec_vp9_slice_tile_offset(i + 1, mi_rows, rows_log2); 910 offset = end - start; 911 tiles->mi_rows[i] = (offset + 7) >> 3; 912 if (tiles->mi_rows[i]) 913 tiles->actual_rows++; 914 } 915 916 for (i = 0; i < cols; i++) { 917 start = vdec_vp9_slice_tile_offset(i, mi_cols, cols_log2); 918 end = vdec_vp9_slice_tile_offset(i + 1, mi_cols, cols_log2); 919 offset = end - start; 920 tiles->mi_cols[i] = (offset + 7) >> 3; 921 } 922 923 return 0; 924} 925 926static void vdec_vp9_slice_setup_state(struct vdec_vp9_slice_vsi *vsi) 927{ 928 memset(&vsi->state, 0, sizeof(vsi->state)); 929} 930 931static void vdec_vp9_slice_setup_ref_idx(struct vdec_vp9_slice_pfc *pfc, 932 struct v4l2_ctrl_vp9_frame *hdr) 933{ 934 pfc->ref_idx[0] = hdr->last_frame_ts; 935 pfc->ref_idx[1] = hdr->golden_frame_ts; 936 pfc->ref_idx[2] = hdr->alt_frame_ts; 937} 938 939static int vdec_vp9_slice_setup_pfc(struct vdec_vp9_slice_instance *instance, 940 struct vdec_vp9_slice_pfc *pfc) 941{ 942 struct v4l2_ctrl_vp9_frame *hdr; 943 struct vdec_vp9_slice_uncompressed_header *uh; 944 struct v4l2_ctrl *hdr_ctrl; 945 struct vdec_vp9_slice_vsi *vsi; 946 int ret; 947 948 /* frame header */ 949 hdr_ctrl = v4l2_ctrl_find(&instance->ctx->ctrl_hdl, V4L2_CID_STATELESS_VP9_FRAME); 950 if (!hdr_ctrl || !hdr_ctrl->p_cur.p) 951 return -EINVAL; 952 953 hdr = hdr_ctrl->p_cur.p; 954 vsi = &pfc->vsi; 955 uh = &vsi->frame.uh; 956 957 /* setup vsi information */ 958 vdec_vp9_slice_setup_hdr(instance, uh, hdr); 959 vdec_vp9_slice_setup_frame_ctx(instance, uh, hdr); 960 vdec_vp9_slice_setup_loop_filter(uh, &hdr->lf); 961 vdec_vp9_slice_setup_quantization(uh, &hdr->quant); 962 vdec_vp9_slice_setup_segmentation(uh, &hdr->seg); 963 ret = vdec_vp9_slice_setup_tile(vsi, hdr); 964 if (ret) 965 return ret; 966 vdec_vp9_slice_setup_state(vsi); 967 968 /* core stage needs buffer index to get ref y/c ... */ 969 vdec_vp9_slice_setup_ref_idx(pfc, hdr); 970 971 pfc->seq = instance->seq; 972 instance->seq++; 973 974 return 0; 975} 976 977static int vdec_vp9_slice_setup_lat_buffer(struct vdec_vp9_slice_instance *instance, 978 struct vdec_vp9_slice_vsi *vsi, 979 struct mtk_vcodec_mem *bs, 980 struct vdec_lat_buf *lat_buf) 981{ 982 int i; 983 984 vsi->bs.buf.dma_addr = bs->dma_addr; 985 vsi->bs.buf.size = bs->size; 986 vsi->bs.frame.dma_addr = bs->dma_addr; 987 vsi->bs.frame.size = bs->size; 988 989 for (i = 0; i < 2; i++) { 990 vsi->mv[i].dma_addr = instance->mv[i].dma_addr; 991 vsi->mv[i].size = instance->mv[i].size; 992 } 993 for (i = 0; i < 2; i++) { 994 vsi->seg[i].dma_addr = instance->seg[i].dma_addr; 995 vsi->seg[i].size = instance->seg[i].size; 996 } 997 vsi->tile.dma_addr = instance->tile.dma_addr; 998 vsi->tile.size = instance->tile.size; 999 vsi->prob.dma_addr = instance->prob.dma_addr; 1000 vsi->prob.size = instance->prob.size; 1001 vsi->counts.dma_addr = instance->counts.dma_addr; 1002 vsi->counts.size = instance->counts.size; 1003 1004 vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr; 1005 vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size; 1006 vsi->trans.dma_addr = lat_buf->ctx->msg_queue.wdma_wptr_addr; 1007 /* used to store trans end */ 1008 vsi->trans.dma_addr_end = lat_buf->ctx->msg_queue.wdma_rptr_addr; 1009 vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr; 1010 vsi->err_map.size = lat_buf->wdma_err_addr.size; 1011 1012 vsi->row_info.buf = 0; 1013 vsi->row_info.size = 0; 1014 1015 return 0; 1016} 1017 1018static int vdec_vp9_slice_setup_prob_buffer(struct vdec_vp9_slice_instance *instance, 1019 struct vdec_vp9_slice_vsi *vsi) 1020{ 1021 struct vdec_vp9_slice_frame_ctx *frame_ctx; 1022 struct vdec_vp9_slice_uncompressed_header *uh; 1023 1024 uh = &vsi->frame.uh; 1025 1026 mtk_vdec_debug(instance->ctx, "ctx dirty %u idx %d\n", 1027 instance->dirty[uh->frame_context_idx], 1028 uh->frame_context_idx); 1029 1030 if (instance->dirty[uh->frame_context_idx]) 1031 frame_ctx = &instance->frame_ctx[uh->frame_context_idx]; 1032 else 1033 frame_ctx = vdec_vp9_slice_default_frame_ctx; 1034 memcpy(instance->prob.va, frame_ctx, sizeof(*frame_ctx)); 1035 1036 return 0; 1037} 1038 1039static void vdec_vp9_slice_setup_seg_buffer(struct vdec_vp9_slice_instance *instance, 1040 struct vdec_vp9_slice_vsi *vsi, 1041 struct mtk_vcodec_mem *buf) 1042{ 1043 struct vdec_vp9_slice_uncompressed_header *uh; 1044 1045 /* reset segment buffer */ 1046 uh = &vsi->frame.uh; 1047 if (uh->frame_type == 0 || 1048 uh->intra_only || 1049 uh->error_resilient_mode || 1050 uh->frame_width != instance->width || 1051 uh->frame_height != instance->height) { 1052 mtk_vdec_debug(instance->ctx, "reset seg\n"); 1053 memset(buf->va, 0, buf->size); 1054 } 1055} 1056 1057/* 1058 * parse tiles according to `6.4 Decode tiles syntax` 1059 * in "vp9-bitstream-specification" 1060 * 1061 * frame contains uncompress header, compressed header and several tiles. 1062 * this function parses tiles' position and size, stores them to tile buffer 1063 * for decoding. 1064 */ 1065static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *instance, 1066 struct vdec_vp9_slice_vsi *vsi, 1067 struct mtk_vcodec_mem *bs) 1068{ 1069 struct vdec_vp9_slice_uncompressed_header *uh; 1070 unsigned int rows_log2; 1071 unsigned int cols_log2; 1072 unsigned int rows; 1073 unsigned int cols; 1074 unsigned int mi_row; 1075 unsigned int mi_col; 1076 unsigned int offset; 1077 dma_addr_t pa; 1078 unsigned int size; 1079 struct vdec_vp9_slice_tiles *tiles; 1080 unsigned char *pos; 1081 unsigned char *end; 1082 unsigned char *va; 1083 unsigned int *tb; 1084 int i; 1085 int j; 1086 1087 uh = &vsi->frame.uh; 1088 rows_log2 = uh->tile_rows_log2; 1089 cols_log2 = uh->tile_cols_log2; 1090 rows = 1 << rows_log2; 1091 cols = 1 << cols_log2; 1092 1093 if (rows > 4 || cols > 64) { 1094 mtk_vdec_err(instance->ctx, "tile_rows %u tile_cols %u\n", rows, cols); 1095 return -EINVAL; 1096 } 1097 1098 offset = uh->uncompressed_header_size + 1099 uh->header_size_in_bytes; 1100 if (bs->size <= offset) { 1101 mtk_vdec_err(instance->ctx, "bs size %zu tile offset %u\n", bs->size, offset); 1102 return -EINVAL; 1103 } 1104 1105 tiles = &vsi->frame.tiles; 1106 /* setup tile buffer */ 1107 1108 va = (unsigned char *)bs->va; 1109 pos = va + offset; 1110 end = va + bs->size; 1111 /* truncated */ 1112 pa = bs->dma_addr + offset; 1113 tb = instance->tile.va; 1114 for (i = 0; i < rows; i++) { 1115 for (j = 0; j < cols; j++) { 1116 if (i == rows - 1 && 1117 j == cols - 1) { 1118 size = (unsigned int)(end - pos); 1119 } else { 1120 if (end - pos < 4) 1121 return -EINVAL; 1122 1123 size = (pos[0] << 24) | (pos[1] << 16) | 1124 (pos[2] << 8) | pos[3]; 1125 pos += 4; 1126 pa += 4; 1127 offset += 4; 1128 if (end - pos < size) 1129 return -EINVAL; 1130 } 1131 tiles->size[i][j] = size; 1132 if (tiles->mi_rows[i]) { 1133 *tb++ = (size << 3) + ((offset << 3) & 0x7f); 1134 *tb++ = pa & ~0xf; 1135 *tb++ = (pa << 3) & 0x7f; 1136 mi_row = (tiles->mi_rows[i] - 1) & 0x1ff; 1137 mi_col = (tiles->mi_cols[j] - 1) & 0x3f; 1138 *tb++ = (mi_row << 6) + mi_col; 1139 } 1140 pos += size; 1141 pa += size; 1142 offset += size; 1143 } 1144 } 1145 1146 return 0; 1147} 1148 1149static int vdec_vp9_slice_setup_lat(struct vdec_vp9_slice_instance *instance, 1150 struct mtk_vcodec_mem *bs, 1151 struct vdec_lat_buf *lat_buf, 1152 struct vdec_vp9_slice_pfc *pfc) 1153{ 1154 struct vdec_vp9_slice_vsi *vsi = &pfc->vsi; 1155 int ret; 1156 1157 ret = vdec_vp9_slice_setup_lat_from_src_buf(instance, lat_buf); 1158 if (ret) 1159 goto err; 1160 1161 ret = vdec_vp9_slice_setup_pfc(instance, pfc); 1162 if (ret) 1163 goto err; 1164 1165 ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi); 1166 if (ret) 1167 goto err; 1168 1169 ret = vdec_vp9_slice_setup_lat_buffer(instance, vsi, bs, lat_buf); 1170 if (ret) 1171 goto err; 1172 1173 vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[0]); 1174 1175 /* setup prob/tile buffers for LAT */ 1176 1177 ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi); 1178 if (ret) 1179 goto err; 1180 1181 ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs); 1182 if (ret) 1183 goto err; 1184 1185 return 0; 1186 1187err: 1188 return ret; 1189} 1190 1191static 1192void vdec_vp9_slice_map_counts_eob_coef(unsigned int i, unsigned int j, unsigned int k, 1193 struct vdec_vp9_slice_frame_counts *counts, 1194 struct v4l2_vp9_frame_symbol_counts *counts_helper) 1195{ 1196 u32 l = 0, m; 1197 1198 /* 1199 * helper eo -> mtk eo 1200 * helpre e1 -> mtk c3 1201 * helper c0 -> c0 1202 * helper c1 -> c1 1203 * helper c2 -> c2 1204 */ 1205 for (m = 0; m < 3; m++) { 1206 counts_helper->coeff[i][j][k][l][m] = 1207 (u32 (*)[3]) & counts->coef_probs[i][j][k].band_0[m]; 1208 counts_helper->eob[i][j][k][l][m][0] = 1209 &counts->eob_branch[i][j][k].band_0[m]; 1210 counts_helper->eob[i][j][k][l][m][1] = 1211 &counts->coef_probs[i][j][k].band_0[m][3]; 1212 } 1213 1214 for (l = 1; l < 6; l++) { 1215 for (m = 0; m < 6; m++) { 1216 counts_helper->coeff[i][j][k][l][m] = 1217 (u32 (*)[3]) & counts->coef_probs[i][j][k].band_1_5[l - 1][m]; 1218 counts_helper->eob[i][j][k][l][m][0] = 1219 &counts->eob_branch[i][j][k].band_1_5[l - 1][m]; 1220 counts_helper->eob[i][j][k][l][m][1] = 1221 &counts->coef_probs[i][j][k].band_1_5[l - 1][m][3]; 1222 } 1223 } 1224} 1225 1226static void vdec_vp9_slice_counts_map_helper(struct vdec_vp9_slice_counts_map *counts_map, 1227 struct vdec_vp9_slice_frame_counts *counts, 1228 struct v4l2_vp9_frame_symbol_counts *counts_helper) 1229{ 1230 int i, j, k; 1231 1232 counts_helper->partition = &counts->partition; 1233 counts_helper->intra_inter = &counts->intra_inter; 1234 counts_helper->tx32p = &counts->tx_p32x32; 1235 counts_helper->tx16p = &counts->tx_p16x16; 1236 counts_helper->tx8p = &counts->tx_p8x8; 1237 counts_helper->uv_mode = &counts->uv_mode; 1238 1239 counts_helper->comp = &counts->comp_inter; 1240 counts_helper->comp_ref = &counts->comp_ref; 1241 counts_helper->single_ref = &counts->single_ref; 1242 counts_helper->mv_mode = &counts->inter_mode; 1243 counts_helper->mv_joint = &counts->joint; 1244 1245 for (i = 0; i < ARRAY_SIZE(counts_map->skip); i++) 1246 memcpy(counts_map->skip[i], counts->skip[i], 1247 sizeof(counts_map->skip[0])); 1248 counts_helper->skip = &counts_map->skip; 1249 1250 for (i = 0; i < ARRAY_SIZE(counts_map->y_mode); i++) 1251 memcpy(counts_map->y_mode[i], counts->y_mode[i], 1252 sizeof(counts_map->y_mode[0])); 1253 counts_helper->y_mode = &counts_map->y_mode; 1254 1255 for (i = 0; i < ARRAY_SIZE(counts_map->filter); i++) 1256 memcpy(counts_map->filter[i], counts->switchable_interp[i], 1257 sizeof(counts_map->filter[0])); 1258 counts_helper->filter = &counts_map->filter; 1259 1260 for (i = 0; i < ARRAY_SIZE(counts_map->sign); i++) 1261 memcpy(counts_map->sign[i], counts->mvcomp[i].sign, 1262 sizeof(counts_map->sign[0])); 1263 counts_helper->sign = &counts_map->sign; 1264 1265 for (i = 0; i < ARRAY_SIZE(counts_map->classes); i++) 1266 memcpy(counts_map->classes[i], counts->mvcomp[i].classes, 1267 sizeof(counts_map->classes[0])); 1268 counts_helper->classes = &counts_map->classes; 1269 1270 for (i = 0; i < ARRAY_SIZE(counts_map->class0); i++) 1271 memcpy(counts_map->class0[i], counts->mvcomp[i].class0, 1272 sizeof(counts_map->class0[0])); 1273 counts_helper->class0 = &counts_map->class0; 1274 1275 for (i = 0; i < ARRAY_SIZE(counts_map->bits); i++) 1276 for (j = 0; j < ARRAY_SIZE(counts_map->bits[0]); j++) 1277 memcpy(counts_map->bits[i][j], counts->mvcomp[i].bits[j], 1278 sizeof(counts_map->bits[0][0])); 1279 counts_helper->bits = &counts_map->bits; 1280 1281 for (i = 0; i < ARRAY_SIZE(counts_map->class0_fp); i++) 1282 for (j = 0; j < ARRAY_SIZE(counts_map->class0_fp[0]); j++) 1283 memcpy(counts_map->class0_fp[i][j], counts->mvcomp[i].class0_fp[j], 1284 sizeof(counts_map->class0_fp[0][0])); 1285 counts_helper->class0_fp = &counts_map->class0_fp; 1286 1287 for (i = 0; i < ARRAY_SIZE(counts_map->fp); i++) 1288 memcpy(counts_map->fp[i], counts->mvcomp[i].fp, 1289 sizeof(counts_map->fp[0])); 1290 counts_helper->fp = &counts_map->fp; 1291 1292 for (i = 0; i < ARRAY_SIZE(counts_map->class0_hp); i++) 1293 memcpy(counts_map->class0_hp[i], counts->mvcomp[i].class0_hp, 1294 sizeof(counts_map->class0_hp[0])); 1295 counts_helper->class0_hp = &counts_map->class0_hp; 1296 1297 for (i = 0; i < ARRAY_SIZE(counts_map->hp); i++) 1298 memcpy(counts_map->hp[i], counts->mvcomp[i].hp, sizeof(counts_map->hp[0])); 1299 1300 counts_helper->hp = &counts_map->hp; 1301 1302 for (i = 0; i < 4; i++) 1303 for (j = 0; j < 2; j++) 1304 for (k = 0; k < 2; k++) 1305 vdec_vp9_slice_map_counts_eob_coef(i, j, k, counts, counts_helper); 1306} 1307 1308static void vdec_vp9_slice_map_to_coef(unsigned int i, unsigned int j, unsigned int k, 1309 struct vdec_vp9_slice_frame_ctx *frame_ctx, 1310 struct v4l2_vp9_frame_context *frame_ctx_helper) 1311{ 1312 u32 l, m; 1313 1314 for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) { 1315 for (m = 0; m < VP9_BAND_6(l); m++) { 1316 memcpy(frame_ctx_helper->coef[i][j][k][l][m], 1317 frame_ctx->coef_probs[i][j][k][l].probs[m], 1318 sizeof(frame_ctx_helper->coef[i][j][k][l][0])); 1319 } 1320 } 1321} 1322 1323static void vdec_vp9_slice_map_from_coef(unsigned int i, unsigned int j, unsigned int k, 1324 struct vdec_vp9_slice_frame_ctx *frame_ctx, 1325 struct v4l2_vp9_frame_context *frame_ctx_helper) 1326{ 1327 u32 l, m; 1328 1329 for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) { 1330 for (m = 0; m < VP9_BAND_6(l); m++) { 1331 memcpy(frame_ctx->coef_probs[i][j][k][l].probs[m], 1332 frame_ctx_helper->coef[i][j][k][l][m], 1333 sizeof(frame_ctx_helper->coef[i][j][k][l][0])); 1334 } 1335 } 1336} 1337 1338static 1339void vdec_vp9_slice_framectx_map_helper(bool frame_is_intra, 1340 struct vdec_vp9_slice_frame_ctx *pre_frame_ctx, 1341 struct vdec_vp9_slice_frame_ctx *frame_ctx, 1342 struct v4l2_vp9_frame_context *frame_ctx_helper) 1343{ 1344 struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv; 1345 u32 i, j, k; 1346 1347 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++) 1348 for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++) 1349 for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++) 1350 vdec_vp9_slice_map_to_coef(i, j, k, pre_frame_ctx, 1351 frame_ctx_helper); 1352 1353 /* 1354 * use previous prob when frame is not intra or 1355 * we should use the prob updated by the compressed header parse 1356 */ 1357 if (!frame_is_intra) 1358 frame_ctx = pre_frame_ctx; 1359 1360 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++) 1361 memcpy(frame_ctx_helper->tx8[i], frame_ctx->tx_p8x8[i], 1362 sizeof(frame_ctx_helper->tx8[0])); 1363 1364 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++) 1365 memcpy(frame_ctx_helper->tx16[i], frame_ctx->tx_p16x16[i], 1366 sizeof(frame_ctx_helper->tx16[0])); 1367 1368 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++) 1369 memcpy(frame_ctx_helper->tx32[i], frame_ctx->tx_p32x32[i], 1370 sizeof(frame_ctx_helper->tx32[0])); 1371 1372 memcpy(frame_ctx_helper->skip, frame_ctx->skip_probs, sizeof(frame_ctx_helper->skip)); 1373 1374 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++) 1375 memcpy(frame_ctx_helper->inter_mode[i], frame_ctx->inter_mode_probs[i], 1376 sizeof(frame_ctx_helper->inter_mode[0])); 1377 1378 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++) 1379 memcpy(frame_ctx_helper->interp_filter[i], frame_ctx->switch_interp_prob[i], 1380 sizeof(frame_ctx_helper->interp_filter[0])); 1381 1382 memcpy(frame_ctx_helper->is_inter, frame_ctx->intra_inter_prob, 1383 sizeof(frame_ctx_helper->is_inter)); 1384 1385 memcpy(frame_ctx_helper->comp_mode, frame_ctx->comp_inter_prob, 1386 sizeof(frame_ctx_helper->comp_mode)); 1387 1388 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++) 1389 memcpy(frame_ctx_helper->single_ref[i], frame_ctx->single_ref_prob[i], 1390 sizeof(frame_ctx_helper->single_ref[0])); 1391 1392 memcpy(frame_ctx_helper->comp_ref, frame_ctx->comp_ref_prob, 1393 sizeof(frame_ctx_helper->comp_ref)); 1394 1395 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++) 1396 memcpy(frame_ctx_helper->y_mode[i], frame_ctx->y_mode_prob[i], 1397 sizeof(frame_ctx_helper->y_mode[0])); 1398 1399 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++) 1400 memcpy(frame_ctx_helper->uv_mode[i], frame_ctx->uv_mode_prob[i], 1401 sizeof(frame_ctx_helper->uv_mode[0])); 1402 1403 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++) 1404 memcpy(frame_ctx_helper->partition[i], frame_ctx->partition_prob[i], 1405 sizeof(frame_ctx_helper->partition[0])); 1406 1407 memcpy(mv->joint, frame_ctx->joint, sizeof(mv->joint)); 1408 1409 for (i = 0; i < ARRAY_SIZE(mv->sign); i++) 1410 mv->sign[i] = frame_ctx->sign_classes[i].sign; 1411 1412 for (i = 0; i < ARRAY_SIZE(mv->classes); i++) 1413 memcpy(mv->classes[i], frame_ctx->sign_classes[i].classes, 1414 sizeof(mv->classes[i])); 1415 1416 for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++) 1417 mv->class0_bit[i] = frame_ctx->class0_bits[i].class0[0]; 1418 1419 for (i = 0; i < ARRAY_SIZE(mv->bits); i++) 1420 memcpy(mv->bits[i], frame_ctx->class0_bits[i].bits, sizeof(mv->bits[0])); 1421 1422 for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++) 1423 for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++) 1424 memcpy(mv->class0_fr[i][j], frame_ctx->class0_fp_hp[i].class0_fp[j], 1425 sizeof(mv->class0_fr[0][0])); 1426 1427 for (i = 0; i < ARRAY_SIZE(mv->fr); i++) 1428 memcpy(mv->fr[i], frame_ctx->class0_fp_hp[i].fp, sizeof(mv->fr[0])); 1429 1430 for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++) 1431 mv->class0_hp[i] = frame_ctx->class0_fp_hp[i].class0_hp; 1432 1433 for (i = 0; i < ARRAY_SIZE(mv->hp); i++) 1434 mv->hp[i] = frame_ctx->class0_fp_hp[i].hp; 1435} 1436 1437static void vdec_vp9_slice_helper_map_framectx(struct v4l2_vp9_frame_context *frame_ctx_helper, 1438 struct vdec_vp9_slice_frame_ctx *frame_ctx) 1439{ 1440 struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv; 1441 u32 i, j, k; 1442 1443 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++) 1444 memcpy(frame_ctx->tx_p8x8[i], frame_ctx_helper->tx8[i], 1445 sizeof(frame_ctx_helper->tx8[0])); 1446 1447 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++) 1448 memcpy(frame_ctx->tx_p16x16[i], frame_ctx_helper->tx16[i], 1449 sizeof(frame_ctx_helper->tx16[0])); 1450 1451 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++) 1452 memcpy(frame_ctx->tx_p32x32[i], frame_ctx_helper->tx32[i], 1453 sizeof(frame_ctx_helper->tx32[0])); 1454 1455 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++) 1456 for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++) 1457 for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++) 1458 vdec_vp9_slice_map_from_coef(i, j, k, frame_ctx, 1459 frame_ctx_helper); 1460 1461 memcpy(frame_ctx->skip_probs, frame_ctx_helper->skip, sizeof(frame_ctx_helper->skip)); 1462 1463 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++) 1464 memcpy(frame_ctx->inter_mode_probs[i], frame_ctx_helper->inter_mode[i], 1465 sizeof(frame_ctx_helper->inter_mode[0])); 1466 1467 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++) 1468 memcpy(frame_ctx->switch_interp_prob[i], frame_ctx_helper->interp_filter[i], 1469 sizeof(frame_ctx_helper->interp_filter[0])); 1470 1471 memcpy(frame_ctx->intra_inter_prob, frame_ctx_helper->is_inter, 1472 sizeof(frame_ctx_helper->is_inter)); 1473 1474 memcpy(frame_ctx->comp_inter_prob, frame_ctx_helper->comp_mode, 1475 sizeof(frame_ctx_helper->comp_mode)); 1476 1477 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++) 1478 memcpy(frame_ctx->single_ref_prob[i], frame_ctx_helper->single_ref[i], 1479 sizeof(frame_ctx_helper->single_ref[0])); 1480 1481 memcpy(frame_ctx->comp_ref_prob, frame_ctx_helper->comp_ref, 1482 sizeof(frame_ctx_helper->comp_ref)); 1483 1484 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++) 1485 memcpy(frame_ctx->y_mode_prob[i], frame_ctx_helper->y_mode[i], 1486 sizeof(frame_ctx_helper->y_mode[0])); 1487 1488 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++) 1489 memcpy(frame_ctx->uv_mode_prob[i], frame_ctx_helper->uv_mode[i], 1490 sizeof(frame_ctx_helper->uv_mode[0])); 1491 1492 for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++) 1493 memcpy(frame_ctx->partition_prob[i], frame_ctx_helper->partition[i], 1494 sizeof(frame_ctx_helper->partition[0])); 1495 1496 memcpy(frame_ctx->joint, mv->joint, sizeof(mv->joint)); 1497 1498 for (i = 0; i < ARRAY_SIZE(mv->sign); i++) 1499 frame_ctx->sign_classes[i].sign = mv->sign[i]; 1500 1501 for (i = 0; i < ARRAY_SIZE(mv->classes); i++) 1502 memcpy(frame_ctx->sign_classes[i].classes, mv->classes[i], 1503 sizeof(mv->classes[i])); 1504 1505 for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++) 1506 frame_ctx->class0_bits[i].class0[0] = mv->class0_bit[i]; 1507 1508 for (i = 0; i < ARRAY_SIZE(mv->bits); i++) 1509 memcpy(frame_ctx->class0_bits[i].bits, mv->bits[i], sizeof(mv->bits[0])); 1510 1511 for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++) 1512 for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++) 1513 memcpy(frame_ctx->class0_fp_hp[i].class0_fp[j], mv->class0_fr[i][j], 1514 sizeof(mv->class0_fr[0][0])); 1515 1516 for (i = 0; i < ARRAY_SIZE(mv->fr); i++) 1517 memcpy(frame_ctx->class0_fp_hp[i].fp, mv->fr[i], sizeof(mv->fr[0])); 1518 1519 for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++) 1520 frame_ctx->class0_fp_hp[i].class0_hp = mv->class0_hp[i]; 1521 1522 for (i = 0; i < ARRAY_SIZE(mv->hp); i++) 1523 frame_ctx->class0_fp_hp[i].hp = mv->hp[i]; 1524} 1525 1526static int vdec_vp9_slice_update_prob(struct vdec_vp9_slice_instance *instance, 1527 struct vdec_vp9_slice_vsi *vsi) 1528{ 1529 struct vdec_vp9_slice_frame_ctx *pre_frame_ctx; 1530 struct v4l2_vp9_frame_context *pre_frame_ctx_helper; 1531 struct vdec_vp9_slice_frame_ctx *frame_ctx; 1532 struct vdec_vp9_slice_frame_counts *counts; 1533 struct v4l2_vp9_frame_symbol_counts *counts_helper; 1534 struct vdec_vp9_slice_uncompressed_header *uh; 1535 bool frame_is_intra; 1536 bool use_128; 1537 1538 uh = &vsi->frame.uh; 1539 pre_frame_ctx = &instance->frame_ctx[uh->frame_context_idx]; 1540 pre_frame_ctx_helper = &instance->frame_ctx_helper; 1541 frame_ctx = (struct vdec_vp9_slice_frame_ctx *)instance->prob.va; 1542 counts = (struct vdec_vp9_slice_frame_counts *)instance->counts.va; 1543 counts_helper = &instance->counts_helper; 1544 1545 if (!uh->refresh_frame_context) 1546 return 0; 1547 1548 if (!uh->frame_parallel_decoding_mode) { 1549 vdec_vp9_slice_counts_map_helper(&instance->counts_map, counts, counts_helper); 1550 1551 frame_is_intra = !vsi->frame.uh.frame_type || vsi->frame.uh.intra_only; 1552 /* check default prob */ 1553 if (!instance->dirty[uh->frame_context_idx]) 1554 vdec_vp9_slice_framectx_map_helper(frame_is_intra, 1555 vdec_vp9_slice_default_frame_ctx, 1556 frame_ctx, 1557 pre_frame_ctx_helper); 1558 else 1559 vdec_vp9_slice_framectx_map_helper(frame_is_intra, 1560 pre_frame_ctx, 1561 frame_ctx, 1562 pre_frame_ctx_helper); 1563 1564 use_128 = !frame_is_intra && !vsi->frame.uh.last_frame_type; 1565 v4l2_vp9_adapt_coef_probs(pre_frame_ctx_helper, 1566 counts_helper, 1567 use_128, 1568 frame_is_intra); 1569 if (!frame_is_intra) 1570 v4l2_vp9_adapt_noncoef_probs(pre_frame_ctx_helper, 1571 counts_helper, 1572 V4L2_VP9_REFERENCE_MODE_SINGLE_REFERENCE, 1573 vsi->frame.uh.interpolation_filter, 1574 vsi->frame.ch.tx_mode, 1575 vsi->frame.uh.allow_high_precision_mv ? 1576 V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV : 0); 1577 vdec_vp9_slice_helper_map_framectx(pre_frame_ctx_helper, pre_frame_ctx); 1578 } else { 1579 memcpy(pre_frame_ctx, frame_ctx, sizeof(*frame_ctx)); 1580 } 1581 1582 instance->dirty[uh->frame_context_idx] = 1; 1583 1584 return 0; 1585} 1586 1587static int vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance *instance, 1588 struct vdec_vp9_slice_pfc *pfc) 1589{ 1590 struct vdec_vp9_slice_vsi *vsi; 1591 1592 vsi = &pfc->vsi; 1593 memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state)); 1594 1595 mtk_vdec_debug(instance->ctx, "Frame %u Y_CRC %08x %08x %08x %08x\n", 1596 pfc->seq, vsi->state.crc[0], vsi->state.crc[1], 1597 vsi->state.crc[2], vsi->state.crc[3]); 1598 mtk_vdec_debug(instance->ctx, "Frame %u C_CRC %08x %08x %08x %08x\n", 1599 pfc->seq, vsi->state.crc[4], vsi->state.crc[5], 1600 vsi->state.crc[6], vsi->state.crc[7]); 1601 1602 vdec_vp9_slice_update_prob(instance, vsi); 1603 1604 instance->width = vsi->frame.uh.frame_width; 1605 instance->height = vsi->frame.uh.frame_height; 1606 instance->frame_type = vsi->frame.uh.frame_type; 1607 instance->show_frame = vsi->frame.uh.show_frame; 1608 1609 return 0; 1610} 1611 1612static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance *instance, 1613 struct vdec_lat_buf *lat_buf, 1614 struct vdec_vp9_slice_pfc *pfc) 1615{ 1616 struct vdec_vp9_slice_vsi *vsi; 1617 1618 vsi = &pfc->vsi; 1619 memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state)); 1620 1621 mtk_vdec_debug(instance->ctx, "Frame %u LAT CRC 0x%08x %lx %lx\n", 1622 pfc->seq, vsi->state.crc[0], 1623 (unsigned long)vsi->trans.dma_addr, 1624 (unsigned long)vsi->trans.dma_addr_end); 1625 1626 /* buffer full, need to re-decode */ 1627 if (vsi->state.full) { 1628 /* buffer not enough */ 1629 if (vsi->trans.dma_addr_end - vsi->trans.dma_addr == 1630 vsi->ube.size) 1631 return -ENOMEM; 1632 return -EAGAIN; 1633 } 1634 1635 vdec_vp9_slice_update_prob(instance, vsi); 1636 1637 instance->width = vsi->frame.uh.frame_width; 1638 instance->height = vsi->frame.uh.frame_height; 1639 instance->frame_type = vsi->frame.uh.frame_type; 1640 instance->show_frame = vsi->frame.uh.show_frame; 1641 1642 return 0; 1643} 1644 1645static int vdec_vp9_slice_setup_core_to_dst_buf(struct vdec_vp9_slice_instance *instance, 1646 struct vdec_lat_buf *lat_buf) 1647{ 1648 struct vb2_v4l2_buffer *dst; 1649 1650 dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx); 1651 if (!dst) 1652 return -EINVAL; 1653 1654 v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, dst, true); 1655 return 0; 1656} 1657 1658static int vdec_vp9_slice_setup_core_buffer(struct vdec_vp9_slice_instance *instance, 1659 struct vdec_vp9_slice_pfc *pfc, 1660 struct vdec_vp9_slice_vsi *vsi, 1661 struct vdec_fb *fb, 1662 struct vdec_lat_buf *lat_buf) 1663{ 1664 struct vb2_buffer *vb; 1665 struct vb2_queue *vq; 1666 struct vdec_vp9_slice_reference *ref; 1667 int plane; 1668 int size; 1669 int w; 1670 int h; 1671 int i; 1672 1673 plane = instance->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes; 1674 w = vsi->frame.uh.frame_width; 1675 h = vsi->frame.uh.frame_height; 1676 size = ALIGN(w, 64) * ALIGN(h, 64); 1677 1678 /* frame buffer */ 1679 vsi->fb.y.dma_addr = fb->base_y.dma_addr; 1680 if (plane == 1) 1681 vsi->fb.c.dma_addr = fb->base_y.dma_addr + size; 1682 else 1683 vsi->fb.c.dma_addr = fb->base_c.dma_addr; 1684 1685 /* reference buffers */ 1686 vq = v4l2_m2m_get_vq(instance->ctx->m2m_ctx, 1687 V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); 1688 if (!vq) 1689 return -EINVAL; 1690 1691 /* get current output buffer */ 1692 vb = &v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx)->vb2_buf; 1693 if (!vb) 1694 return -EINVAL; 1695 1696 /* update internal buffer's width/height */ 1697 instance->dpb[vb->index].width = w; 1698 instance->dpb[vb->index].height = h; 1699 1700 /* 1701 * get buffer's width/height from instance 1702 * get buffer address from vb2buf 1703 */ 1704 for (i = 0; i < 3; i++) { 1705 ref = &vsi->frame.ref[i]; 1706 vb = vb2_find_buffer(vq, pfc->ref_idx[i]); 1707 if (!vb) { 1708 ref->frame_width = w; 1709 ref->frame_height = h; 1710 memset(&vsi->ref[i], 0, sizeof(vsi->ref[i])); 1711 } else { 1712 int idx = vb->index; 1713 1714 ref->frame_width = instance->dpb[idx].width; 1715 ref->frame_height = instance->dpb[idx].height; 1716 vsi->ref[i].y.dma_addr = 1717 vb2_dma_contig_plane_dma_addr(vb, 0); 1718 if (plane == 1) 1719 vsi->ref[i].c.dma_addr = 1720 vsi->ref[i].y.dma_addr + size; 1721 else 1722 vsi->ref[i].c.dma_addr = 1723 vb2_dma_contig_plane_dma_addr(vb, 1); 1724 } 1725 } 1726 1727 return 0; 1728} 1729 1730static void vdec_vp9_slice_setup_single_buffer(struct vdec_vp9_slice_instance *instance, 1731 struct vdec_vp9_slice_pfc *pfc, 1732 struct vdec_vp9_slice_vsi *vsi, 1733 struct mtk_vcodec_mem *bs, 1734 struct vdec_fb *fb) 1735{ 1736 int i; 1737 1738 vsi->bs.buf.dma_addr = bs->dma_addr; 1739 vsi->bs.buf.size = bs->size; 1740 vsi->bs.frame.dma_addr = bs->dma_addr; 1741 vsi->bs.frame.size = bs->size; 1742 1743 for (i = 0; i < 2; i++) { 1744 vsi->mv[i].dma_addr = instance->mv[i].dma_addr; 1745 vsi->mv[i].size = instance->mv[i].size; 1746 } 1747 for (i = 0; i < 2; i++) { 1748 vsi->seg[i].dma_addr = instance->seg[i].dma_addr; 1749 vsi->seg[i].size = instance->seg[i].size; 1750 } 1751 vsi->tile.dma_addr = instance->tile.dma_addr; 1752 vsi->tile.size = instance->tile.size; 1753 vsi->prob.dma_addr = instance->prob.dma_addr; 1754 vsi->prob.size = instance->prob.size; 1755 vsi->counts.dma_addr = instance->counts.dma_addr; 1756 vsi->counts.size = instance->counts.size; 1757 1758 vsi->row_info.buf = 0; 1759 vsi->row_info.size = 0; 1760 1761 vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, NULL); 1762} 1763 1764static int vdec_vp9_slice_setup_core(struct vdec_vp9_slice_instance *instance, 1765 struct vdec_fb *fb, 1766 struct vdec_lat_buf *lat_buf, 1767 struct vdec_vp9_slice_pfc *pfc) 1768{ 1769 struct vdec_vp9_slice_vsi *vsi = &pfc->vsi; 1770 int ret; 1771 1772 vdec_vp9_slice_setup_state(vsi); 1773 1774 ret = vdec_vp9_slice_setup_core_to_dst_buf(instance, lat_buf); 1775 if (ret) 1776 goto err; 1777 1778 ret = vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, lat_buf); 1779 if (ret) 1780 goto err; 1781 1782 vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[1]); 1783 1784 return 0; 1785 1786err: 1787 return ret; 1788} 1789 1790static int vdec_vp9_slice_setup_single(struct vdec_vp9_slice_instance *instance, 1791 struct mtk_vcodec_mem *bs, 1792 struct vdec_fb *fb, 1793 struct vdec_vp9_slice_pfc *pfc) 1794{ 1795 struct vdec_vp9_slice_vsi *vsi = &pfc->vsi; 1796 int ret; 1797 1798 ret = vdec_vp9_slice_setup_single_from_src_to_dst(instance); 1799 if (ret) 1800 goto err; 1801 1802 ret = vdec_vp9_slice_setup_pfc(instance, pfc); 1803 if (ret) 1804 goto err; 1805 1806 ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi); 1807 if (ret) 1808 goto err; 1809 1810 vdec_vp9_slice_setup_single_buffer(instance, pfc, vsi, bs, fb); 1811 vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[0]); 1812 1813 ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi); 1814 if (ret) 1815 goto err; 1816 1817 ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs); 1818 if (ret) 1819 goto err; 1820 1821 return 0; 1822 1823err: 1824 return ret; 1825} 1826 1827static int vdec_vp9_slice_update_core(struct vdec_vp9_slice_instance *instance, 1828 struct vdec_lat_buf *lat_buf, 1829 struct vdec_vp9_slice_pfc *pfc) 1830{ 1831 struct vdec_vp9_slice_vsi *vsi; 1832 1833 vsi = &pfc->vsi; 1834 memcpy(&pfc->state[1], &vsi->state, sizeof(vsi->state)); 1835 1836 mtk_vdec_debug(instance->ctx, "Frame %u Y_CRC %08x %08x %08x %08x\n", 1837 pfc->seq, vsi->state.crc[0], vsi->state.crc[1], 1838 vsi->state.crc[2], vsi->state.crc[3]); 1839 mtk_vdec_debug(instance->ctx, "Frame %u C_CRC %08x %08x %08x %08x\n", 1840 pfc->seq, vsi->state.crc[4], vsi->state.crc[5], 1841 vsi->state.crc[6], vsi->state.crc[7]); 1842 1843 return 0; 1844} 1845 1846static int vdec_vp9_slice_init(struct mtk_vcodec_dec_ctx *ctx) 1847{ 1848 struct vdec_vp9_slice_instance *instance; 1849 struct vdec_vp9_slice_init_vsi *vsi; 1850 int ret; 1851 1852 instance = kzalloc(sizeof(*instance), GFP_KERNEL); 1853 if (!instance) 1854 return -ENOMEM; 1855 1856 instance->ctx = ctx; 1857 instance->vpu.id = SCP_IPI_VDEC_LAT; 1858 instance->vpu.core_id = SCP_IPI_VDEC_CORE; 1859 instance->vpu.ctx = ctx; 1860 instance->vpu.codec_type = ctx->current_codec; 1861 1862 ret = vpu_dec_init(&instance->vpu); 1863 if (ret) { 1864 mtk_vdec_err(ctx, "failed to init vpu dec, ret %d\n", ret); 1865 goto error_vpu_init; 1866 } 1867 1868 /* init vsi and global flags */ 1869 1870 vsi = instance->vpu.vsi; 1871 if (!vsi) { 1872 mtk_vdec_err(ctx, "failed to get VP9 vsi\n"); 1873 ret = -EINVAL; 1874 goto error_vsi; 1875 } 1876 instance->init_vsi = vsi; 1877 instance->core_vsi = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, 1878 (u32)vsi->core_vsi); 1879 if (!instance->core_vsi) { 1880 mtk_vdec_err(ctx, "failed to get VP9 core vsi\n"); 1881 ret = -EINVAL; 1882 goto error_vsi; 1883 } 1884 1885 instance->irq = 1; 1886 1887 ret = vdec_vp9_slice_init_default_frame_ctx(instance); 1888 if (ret) 1889 goto error_default_frame_ctx; 1890 1891 ctx->drv_handle = instance; 1892 1893 return 0; 1894 1895error_default_frame_ctx: 1896error_vsi: 1897 vpu_dec_deinit(&instance->vpu); 1898error_vpu_init: 1899 kfree(instance); 1900 return ret; 1901} 1902 1903static void vdec_vp9_slice_deinit(void *h_vdec) 1904{ 1905 struct vdec_vp9_slice_instance *instance = h_vdec; 1906 1907 if (!instance) 1908 return; 1909 1910 vpu_dec_deinit(&instance->vpu); 1911 vdec_vp9_slice_free_working_buffer(instance); 1912 vdec_msg_queue_deinit(&instance->ctx->msg_queue, instance->ctx); 1913 kfree(instance); 1914} 1915 1916static int vdec_vp9_slice_flush(void *h_vdec, struct mtk_vcodec_mem *bs, 1917 struct vdec_fb *fb, bool *res_chg) 1918{ 1919 struct vdec_vp9_slice_instance *instance = h_vdec; 1920 1921 mtk_vdec_debug(instance->ctx, "flush ...\n"); 1922 if (instance->ctx->dev->vdec_pdata->hw_arch != MTK_VDEC_PURE_SINGLE_CORE) 1923 vdec_msg_queue_wait_lat_buf_full(&instance->ctx->msg_queue); 1924 return vpu_dec_reset(&instance->vpu); 1925} 1926 1927static void vdec_vp9_slice_get_pic_info(struct vdec_vp9_slice_instance *instance) 1928{ 1929 struct mtk_vcodec_dec_ctx *ctx = instance->ctx; 1930 unsigned int data[3]; 1931 1932 mtk_vdec_debug(instance->ctx, "w %u h %u\n", ctx->picinfo.pic_w, ctx->picinfo.pic_h); 1933 1934 data[0] = ctx->picinfo.pic_w; 1935 data[1] = ctx->picinfo.pic_h; 1936 data[2] = ctx->capture_fourcc; 1937 vpu_dec_get_param(&instance->vpu, data, 3, GET_PARAM_PIC_INFO); 1938 1939 ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, 64); 1940 ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, 64); 1941 ctx->picinfo.fb_sz[0] = instance->vpu.fb_sz[0]; 1942 ctx->picinfo.fb_sz[1] = instance->vpu.fb_sz[1]; 1943} 1944 1945static void vdec_vp9_slice_get_dpb_size(struct vdec_vp9_slice_instance *instance, 1946 unsigned int *dpb_sz) 1947{ 1948 /* refer VP9 specification */ 1949 *dpb_sz = 9; 1950} 1951 1952static int vdec_vp9_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out) 1953{ 1954 struct vdec_vp9_slice_instance *instance = h_vdec; 1955 1956 switch (type) { 1957 case GET_PARAM_PIC_INFO: 1958 vdec_vp9_slice_get_pic_info(instance); 1959 break; 1960 case GET_PARAM_DPB_SIZE: 1961 vdec_vp9_slice_get_dpb_size(instance, out); 1962 break; 1963 case GET_PARAM_CROP_INFO: 1964 mtk_vdec_debug(instance->ctx, "No need to get vp9 crop information."); 1965 break; 1966 default: 1967 mtk_vdec_err(instance->ctx, "invalid get parameter type=%d\n", type); 1968 return -EINVAL; 1969 } 1970 1971 return 0; 1972} 1973 1974static int vdec_vp9_slice_single_decode(void *h_vdec, struct mtk_vcodec_mem *bs, 1975 struct vdec_fb *fb, bool *res_chg) 1976{ 1977 struct vdec_vp9_slice_instance *instance = h_vdec; 1978 struct vdec_vp9_slice_pfc *pfc = &instance->sc_pfc; 1979 struct vdec_vp9_slice_vsi *vsi; 1980 struct mtk_vcodec_dec_ctx *ctx; 1981 int ret; 1982 1983 if (!instance || !instance->ctx) 1984 return -EINVAL; 1985 ctx = instance->ctx; 1986 1987 /* bs NULL means flush decoder */ 1988 if (!bs) 1989 return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg); 1990 1991 fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx); 1992 if (!fb) 1993 return -EBUSY; 1994 1995 vsi = &pfc->vsi; 1996 1997 ret = vdec_vp9_slice_setup_single(instance, bs, fb, pfc); 1998 if (ret) { 1999 mtk_vdec_err(ctx, "Failed to setup VP9 single ret %d\n", ret); 2000 return ret; 2001 } 2002 vdec_vp9_slice_vsi_to_remote(vsi, instance->vsi); 2003 2004 ret = vpu_dec_start(&instance->vpu, NULL, 0); 2005 if (ret) { 2006 mtk_vdec_err(ctx, "Failed to dec VP9 ret %d\n", ret); 2007 return ret; 2008 } 2009 2010 ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, 2011 WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE); 2012 /* update remote vsi if decode timeout */ 2013 if (ret) { 2014 mtk_vdec_err(ctx, "VP9 decode timeout %d\n", ret); 2015 WRITE_ONCE(instance->vsi->state.timeout, 1); 2016 } 2017 2018 vpu_dec_end(&instance->vpu); 2019 2020 vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0); 2021 ret = vdec_vp9_slice_update_single(instance, pfc); 2022 if (ret) { 2023 mtk_vdec_err(ctx, "VP9 decode error: %d\n", ret); 2024 return ret; 2025 } 2026 2027 instance->ctx->decoded_frame_cnt++; 2028 return 0; 2029} 2030 2031static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs, 2032 struct vdec_fb *fb, bool *res_chg) 2033{ 2034 struct vdec_vp9_slice_instance *instance = h_vdec; 2035 struct vdec_lat_buf *lat_buf; 2036 struct vdec_vp9_slice_pfc *pfc; 2037 struct vdec_vp9_slice_vsi *vsi; 2038 struct mtk_vcodec_dec_ctx *ctx; 2039 int ret; 2040 2041 if (!instance || !instance->ctx) 2042 return -EINVAL; 2043 ctx = instance->ctx; 2044 2045 /* init msgQ for the first time */ 2046 if (vdec_msg_queue_init(&ctx->msg_queue, ctx, 2047 vdec_vp9_slice_core_decode, 2048 sizeof(*pfc))) 2049 return -ENOMEM; 2050 2051 /* bs NULL means flush decoder */ 2052 if (!bs) 2053 return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg); 2054 2055 lat_buf = vdec_msg_queue_dqbuf(&instance->ctx->msg_queue.lat_ctx); 2056 if (!lat_buf) { 2057 mtk_vdec_debug(ctx, "Failed to get VP9 lat buf\n"); 2058 return -EAGAIN; 2059 } 2060 pfc = (struct vdec_vp9_slice_pfc *)lat_buf->private_data; 2061 if (!pfc) { 2062 ret = -EINVAL; 2063 goto err_free_fb_out; 2064 } 2065 vsi = &pfc->vsi; 2066 2067 ret = vdec_vp9_slice_setup_lat(instance, bs, lat_buf, pfc); 2068 if (ret) { 2069 mtk_vdec_err(ctx, "Failed to setup VP9 lat ret %d\n", ret); 2070 goto err_free_fb_out; 2071 } 2072 vdec_vp9_slice_vsi_to_remote(vsi, instance->vsi); 2073 2074 ret = vpu_dec_start(&instance->vpu, NULL, 0); 2075 if (ret) { 2076 mtk_vdec_err(ctx, "Failed to dec VP9 ret %d\n", ret); 2077 goto err_free_fb_out; 2078 } 2079 2080 if (instance->irq) { 2081 ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, 2082 WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0); 2083 /* update remote vsi if decode timeout */ 2084 if (ret) { 2085 mtk_vdec_err(ctx, "VP9 decode timeout %d pic %d\n", ret, pfc->seq); 2086 WRITE_ONCE(instance->vsi->state.timeout, 1); 2087 } 2088 vpu_dec_end(&instance->vpu); 2089 } 2090 2091 vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0); 2092 ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc); 2093 2094 /* LAT trans full, no more UBE or decode timeout */ 2095 if (ret) { 2096 mtk_vdec_err(ctx, "VP9 decode error: %d\n", ret); 2097 goto err_free_fb_out; 2098 } 2099 2100 mtk_vdec_debug(ctx, "lat dma addr: 0x%lx 0x%lx\n", 2101 (unsigned long)pfc->vsi.trans.dma_addr, 2102 (unsigned long)pfc->vsi.trans.dma_addr_end); 2103 2104 vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, 2105 vsi->trans.dma_addr_end + 2106 ctx->msg_queue.wdma_addr.dma_addr); 2107 vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf); 2108 2109 return 0; 2110err_free_fb_out: 2111 vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf); 2112 return ret; 2113} 2114 2115static int vdec_vp9_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs, 2116 struct vdec_fb *fb, bool *res_chg) 2117{ 2118 struct vdec_vp9_slice_instance *instance = h_vdec; 2119 int ret; 2120 2121 if (instance->ctx->dev->vdec_pdata->hw_arch == MTK_VDEC_PURE_SINGLE_CORE) 2122 ret = vdec_vp9_slice_single_decode(h_vdec, bs, fb, res_chg); 2123 else 2124 ret = vdec_vp9_slice_lat_decode(h_vdec, bs, fb, res_chg); 2125 2126 return ret; 2127} 2128 2129static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf) 2130{ 2131 struct vdec_vp9_slice_instance *instance; 2132 struct vdec_vp9_slice_pfc *pfc; 2133 struct mtk_vcodec_dec_ctx *ctx = NULL; 2134 struct vdec_fb *fb = NULL; 2135 int ret = -EINVAL; 2136 2137 if (!lat_buf) 2138 goto err; 2139 2140 pfc = lat_buf->private_data; 2141 ctx = lat_buf->ctx; 2142 if (!pfc || !ctx) 2143 goto err; 2144 2145 instance = ctx->drv_handle; 2146 if (!instance) 2147 goto err; 2148 2149 fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx); 2150 if (!fb) { 2151 ret = -EBUSY; 2152 goto err; 2153 } 2154 2155 ret = vdec_vp9_slice_setup_core(instance, fb, lat_buf, pfc); 2156 if (ret) { 2157 mtk_vdec_err(ctx, "vdec_vp9_slice_setup_core\n"); 2158 goto err; 2159 } 2160 vdec_vp9_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi); 2161 2162 ret = vpu_dec_core(&instance->vpu); 2163 if (ret) { 2164 mtk_vdec_err(ctx, "vpu_dec_core\n"); 2165 goto err; 2166 } 2167 2168 if (instance->irq) { 2169 ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, 2170 WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE); 2171 /* update remote vsi if decode timeout */ 2172 if (ret) { 2173 mtk_vdec_err(ctx, "VP9 core timeout pic %d\n", pfc->seq); 2174 WRITE_ONCE(instance->core_vsi->state.timeout, 1); 2175 } 2176 vpu_dec_core_end(&instance->vpu); 2177 } 2178 2179 vdec_vp9_slice_vsi_from_remote(&pfc->vsi, instance->core_vsi, 1); 2180 ret = vdec_vp9_slice_update_core(instance, lat_buf, pfc); 2181 if (ret) { 2182 mtk_vdec_err(ctx, "vdec_vp9_slice_update_core\n"); 2183 goto err; 2184 } 2185 2186 pfc->vsi.trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr; 2187 mtk_vdec_debug(ctx, "core dma_addr_end 0x%lx\n", 2188 (unsigned long)pfc->vsi.trans.dma_addr_end); 2189 vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end); 2190 ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req); 2191 2192 return 0; 2193 2194err: 2195 if (ctx && pfc) { 2196 /* always update read pointer */ 2197 vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end); 2198 2199 if (fb) 2200 ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req); 2201 } 2202 return ret; 2203} 2204 2205const struct vdec_common_if vdec_vp9_slice_lat_if = { 2206 .init = vdec_vp9_slice_init, 2207 .decode = vdec_vp9_slice_decode, 2208 .get_param = vdec_vp9_slice_get_param, 2209 .deinit = vdec_vp9_slice_deinit, 2210}; 2211