1// SPDX-License-Identifier: GPL-2.0+ 2/* 3 * Copyright (C) 2018 Maxime Jourdan <mjourdan@baylibre.com> 4 * Copyright (C) 2015 Amlogic, Inc. All rights reserved. 5 */ 6 7#include <media/v4l2-mem2mem.h> 8#include <media/videobuf2-dma-contig.h> 9 10#include "dos_regs.h" 11#include "hevc_regs.h" 12#include "codec_vp9.h" 13#include "vdec_helpers.h" 14#include "codec_hevc_common.h" 15 16/* HEVC reg mapping */ 17#define VP9_DEC_STATUS_REG HEVC_ASSIST_SCRATCH_0 18 #define VP9_10B_DECODE_SLICE 5 19 #define VP9_HEAD_PARSER_DONE 0xf0 20#define VP9_RPM_BUFFER HEVC_ASSIST_SCRATCH_1 21#define VP9_SHORT_TERM_RPS HEVC_ASSIST_SCRATCH_2 22#define VP9_ADAPT_PROB_REG HEVC_ASSIST_SCRATCH_3 23#define VP9_MMU_MAP_BUFFER HEVC_ASSIST_SCRATCH_4 24#define VP9_PPS_BUFFER HEVC_ASSIST_SCRATCH_5 25#define VP9_SAO_UP HEVC_ASSIST_SCRATCH_6 26#define VP9_STREAM_SWAP_BUFFER HEVC_ASSIST_SCRATCH_7 27#define VP9_STREAM_SWAP_BUFFER2 HEVC_ASSIST_SCRATCH_8 28#define VP9_PROB_SWAP_BUFFER HEVC_ASSIST_SCRATCH_9 29#define VP9_COUNT_SWAP_BUFFER HEVC_ASSIST_SCRATCH_A 30#define VP9_SEG_MAP_BUFFER HEVC_ASSIST_SCRATCH_B 31#define VP9_SCALELUT HEVC_ASSIST_SCRATCH_D 32#define VP9_WAIT_FLAG HEVC_ASSIST_SCRATCH_E 33#define LMEM_DUMP_ADR HEVC_ASSIST_SCRATCH_F 34#define NAL_SEARCH_CTL HEVC_ASSIST_SCRATCH_I 35#define VP9_DECODE_MODE HEVC_ASSIST_SCRATCH_J 36 #define DECODE_MODE_SINGLE 0 37#define DECODE_STOP_POS HEVC_ASSIST_SCRATCH_K 38#define HEVC_DECODE_COUNT HEVC_ASSIST_SCRATCH_M 39#define HEVC_DECODE_SIZE HEVC_ASSIST_SCRATCH_N 40 41/* VP9 Constants */ 42#define LCU_SIZE 64 43#define MAX_REF_PIC_NUM 24 44#define REFS_PER_FRAME 3 45#define REF_FRAMES 8 46#define MV_MEM_UNIT 0x240 47#define ADAPT_PROB_SIZE 0xf80 48 49enum FRAME_TYPE { 50 KEY_FRAME = 0, 51 INTER_FRAME = 1, 52 FRAME_TYPES, 53}; 54 55/* VP9 Workspace layout */ 56#define MPRED_MV_BUF_SIZE 0x120000 57 58#define IPP_SIZE 0x4000 59#define SAO_ABV_SIZE 0x30000 60#define SAO_VB_SIZE 0x30000 61#define SH_TM_RPS_SIZE 0x800 62#define VPS_SIZE 0x800 63#define SPS_SIZE 0x800 64#define PPS_SIZE 0x2000 65#define SAO_UP_SIZE 0x2800 66#define SWAP_BUF_SIZE 0x800 67#define SWAP_BUF2_SIZE 0x800 68#define SCALELUT_SIZE 0x8000 69#define DBLK_PARA_SIZE 0x80000 70#define DBLK_DATA_SIZE 0x80000 71#define SEG_MAP_SIZE 0xd800 72#define PROB_SIZE 0x5000 73#define COUNT_SIZE 0x3000 74#define MMU_VBH_SIZE 0x5000 75#define MPRED_ABV_SIZE 0x10000 76#define MPRED_MV_SIZE (MPRED_MV_BUF_SIZE * MAX_REF_PIC_NUM) 77#define RPM_BUF_SIZE 0x100 78#define LMEM_SIZE 0x800 79 80#define IPP_OFFSET 0x00 81#define SAO_ABV_OFFSET (IPP_OFFSET + IPP_SIZE) 82#define SAO_VB_OFFSET (SAO_ABV_OFFSET + SAO_ABV_SIZE) 83#define SH_TM_RPS_OFFSET (SAO_VB_OFFSET + SAO_VB_SIZE) 84#define VPS_OFFSET (SH_TM_RPS_OFFSET + SH_TM_RPS_SIZE) 85#define SPS_OFFSET (VPS_OFFSET + VPS_SIZE) 86#define PPS_OFFSET (SPS_OFFSET + SPS_SIZE) 87#define SAO_UP_OFFSET (PPS_OFFSET + PPS_SIZE) 88#define SWAP_BUF_OFFSET (SAO_UP_OFFSET + SAO_UP_SIZE) 89#define SWAP_BUF2_OFFSET (SWAP_BUF_OFFSET + SWAP_BUF_SIZE) 90#define SCALELUT_OFFSET (SWAP_BUF2_OFFSET + SWAP_BUF2_SIZE) 91#define DBLK_PARA_OFFSET (SCALELUT_OFFSET + SCALELUT_SIZE) 92#define DBLK_DATA_OFFSET (DBLK_PARA_OFFSET + DBLK_PARA_SIZE) 93#define SEG_MAP_OFFSET (DBLK_DATA_OFFSET + DBLK_DATA_SIZE) 94#define PROB_OFFSET (SEG_MAP_OFFSET + SEG_MAP_SIZE) 95#define COUNT_OFFSET (PROB_OFFSET + PROB_SIZE) 96#define MMU_VBH_OFFSET (COUNT_OFFSET + COUNT_SIZE) 97#define MPRED_ABV_OFFSET (MMU_VBH_OFFSET + MMU_VBH_SIZE) 98#define MPRED_MV_OFFSET (MPRED_ABV_OFFSET + MPRED_ABV_SIZE) 99#define RPM_OFFSET (MPRED_MV_OFFSET + MPRED_MV_SIZE) 100#define LMEM_OFFSET (RPM_OFFSET + RPM_BUF_SIZE) 101 102#define SIZE_WORKSPACE ALIGN(LMEM_OFFSET + LMEM_SIZE, 64 * SZ_1K) 103 104#define NONE -1 105#define INTRA_FRAME 0 106#define LAST_FRAME 1 107#define GOLDEN_FRAME 2 108#define ALTREF_FRAME 3 109#define MAX_REF_FRAMES 4 110 111/* 112 * Defines, declarations, sub-functions for vp9 de-block loop 113 filter Thr/Lvl table update 114 * - struct segmentation is for loop filter only (removed something) 115 * - function "vp9_loop_filter_init" and "vp9_loop_filter_frame_init" will 116 be instantiated in C_Entry 117 * - vp9_loop_filter_init run once before decoding start 118 * - vp9_loop_filter_frame_init run before every frame decoding start 119 * - set video format to VP9 is in vp9_loop_filter_init 120 */ 121#define MAX_LOOP_FILTER 63 122#define MAX_REF_LF_DELTAS 4 123#define MAX_MODE_LF_DELTAS 2 124#define SEGMENT_DELTADATA 0 125#define SEGMENT_ABSDATA 1 126#define MAX_SEGMENTS 8 127 128/* VP9 PROB processing defines */ 129#define VP9_PARTITION_START 0 130#define VP9_PARTITION_SIZE_STEP (3 * 4) 131#define VP9_PARTITION_ONE_SIZE (4 * VP9_PARTITION_SIZE_STEP) 132#define VP9_PARTITION_KEY_START 0 133#define VP9_PARTITION_P_START VP9_PARTITION_ONE_SIZE 134#define VP9_PARTITION_SIZE (2 * VP9_PARTITION_ONE_SIZE) 135#define VP9_SKIP_START (VP9_PARTITION_START + VP9_PARTITION_SIZE) 136#define VP9_SKIP_SIZE 4 /* only use 3*/ 137#define VP9_TX_MODE_START (VP9_SKIP_START + VP9_SKIP_SIZE) 138#define VP9_TX_MODE_8_0_OFFSET 0 139#define VP9_TX_MODE_8_1_OFFSET 1 140#define VP9_TX_MODE_16_0_OFFSET 2 141#define VP9_TX_MODE_16_1_OFFSET 4 142#define VP9_TX_MODE_32_0_OFFSET 6 143#define VP9_TX_MODE_32_1_OFFSET 9 144#define VP9_TX_MODE_SIZE 12 145#define VP9_COEF_START (VP9_TX_MODE_START + VP9_TX_MODE_SIZE) 146#define VP9_COEF_BAND_0_OFFSET 0 147#define VP9_COEF_BAND_1_OFFSET (VP9_COEF_BAND_0_OFFSET + 3 * 3 + 1) 148#define VP9_COEF_BAND_2_OFFSET (VP9_COEF_BAND_1_OFFSET + 6 * 3) 149#define VP9_COEF_BAND_3_OFFSET (VP9_COEF_BAND_2_OFFSET + 6 * 3) 150#define VP9_COEF_BAND_4_OFFSET (VP9_COEF_BAND_3_OFFSET + 6 * 3) 151#define VP9_COEF_BAND_5_OFFSET (VP9_COEF_BAND_4_OFFSET + 6 * 3) 152#define VP9_COEF_SIZE_ONE_SET 100 /* ((3 + 5 * 6) * 3 + 1 padding)*/ 153#define VP9_COEF_4X4_START (VP9_COEF_START + 0 * VP9_COEF_SIZE_ONE_SET) 154#define VP9_COEF_8X8_START (VP9_COEF_START + 4 * VP9_COEF_SIZE_ONE_SET) 155#define VP9_COEF_16X16_START (VP9_COEF_START + 8 * VP9_COEF_SIZE_ONE_SET) 156#define VP9_COEF_32X32_START (VP9_COEF_START + 12 * VP9_COEF_SIZE_ONE_SET) 157#define VP9_COEF_SIZE_PLANE (2 * VP9_COEF_SIZE_ONE_SET) 158#define VP9_COEF_SIZE (4 * 2 * 2 * VP9_COEF_SIZE_ONE_SET) 159#define VP9_INTER_MODE_START (VP9_COEF_START + VP9_COEF_SIZE) 160#define VP9_INTER_MODE_SIZE 24 /* only use 21 (# * 7)*/ 161#define VP9_INTERP_START (VP9_INTER_MODE_START + VP9_INTER_MODE_SIZE) 162#define VP9_INTERP_SIZE 8 163#define VP9_INTRA_INTER_START (VP9_INTERP_START + VP9_INTERP_SIZE) 164#define VP9_INTRA_INTER_SIZE 4 165#define VP9_INTERP_INTRA_INTER_START VP9_INTERP_START 166#define VP9_INTERP_INTRA_INTER_SIZE (VP9_INTERP_SIZE + VP9_INTRA_INTER_SIZE) 167#define VP9_COMP_INTER_START \ 168 (VP9_INTERP_INTRA_INTER_START + VP9_INTERP_INTRA_INTER_SIZE) 169#define VP9_COMP_INTER_SIZE 5 170#define VP9_COMP_REF_START (VP9_COMP_INTER_START + VP9_COMP_INTER_SIZE) 171#define VP9_COMP_REF_SIZE 5 172#define VP9_SINGLE_REF_START (VP9_COMP_REF_START + VP9_COMP_REF_SIZE) 173#define VP9_SINGLE_REF_SIZE 10 174#define VP9_REF_MODE_START VP9_COMP_INTER_START 175#define VP9_REF_MODE_SIZE \ 176 (VP9_COMP_INTER_SIZE + VP9_COMP_REF_SIZE + VP9_SINGLE_REF_SIZE) 177#define VP9_IF_Y_MODE_START (VP9_REF_MODE_START + VP9_REF_MODE_SIZE) 178#define VP9_IF_Y_MODE_SIZE 36 179#define VP9_IF_UV_MODE_START (VP9_IF_Y_MODE_START + VP9_IF_Y_MODE_SIZE) 180#define VP9_IF_UV_MODE_SIZE 92 /* only use 90*/ 181#define VP9_MV_JOINTS_START (VP9_IF_UV_MODE_START + VP9_IF_UV_MODE_SIZE) 182#define VP9_MV_JOINTS_SIZE 3 183#define VP9_MV_SIGN_0_START (VP9_MV_JOINTS_START + VP9_MV_JOINTS_SIZE) 184#define VP9_MV_SIGN_0_SIZE 1 185#define VP9_MV_CLASSES_0_START (VP9_MV_SIGN_0_START + VP9_MV_SIGN_0_SIZE) 186#define VP9_MV_CLASSES_0_SIZE 10 187#define VP9_MV_CLASS0_0_START \ 188 (VP9_MV_CLASSES_0_START + VP9_MV_CLASSES_0_SIZE) 189#define VP9_MV_CLASS0_0_SIZE 1 190#define VP9_MV_BITS_0_START (VP9_MV_CLASS0_0_START + VP9_MV_CLASS0_0_SIZE) 191#define VP9_MV_BITS_0_SIZE 10 192#define VP9_MV_SIGN_1_START (VP9_MV_BITS_0_START + VP9_MV_BITS_0_SIZE) 193#define VP9_MV_SIGN_1_SIZE 1 194#define VP9_MV_CLASSES_1_START \ 195 (VP9_MV_SIGN_1_START + VP9_MV_SIGN_1_SIZE) 196#define VP9_MV_CLASSES_1_SIZE 10 197#define VP9_MV_CLASS0_1_START \ 198 (VP9_MV_CLASSES_1_START + VP9_MV_CLASSES_1_SIZE) 199#define VP9_MV_CLASS0_1_SIZE 1 200#define VP9_MV_BITS_1_START \ 201 (VP9_MV_CLASS0_1_START + VP9_MV_CLASS0_1_SIZE) 202#define VP9_MV_BITS_1_SIZE 10 203#define VP9_MV_CLASS0_FP_0_START \ 204 (VP9_MV_BITS_1_START + VP9_MV_BITS_1_SIZE) 205#define VP9_MV_CLASS0_FP_0_SIZE 9 206#define VP9_MV_CLASS0_FP_1_START \ 207 (VP9_MV_CLASS0_FP_0_START + VP9_MV_CLASS0_FP_0_SIZE) 208#define VP9_MV_CLASS0_FP_1_SIZE 9 209#define VP9_MV_CLASS0_HP_0_START \ 210 (VP9_MV_CLASS0_FP_1_START + VP9_MV_CLASS0_FP_1_SIZE) 211#define VP9_MV_CLASS0_HP_0_SIZE 2 212#define VP9_MV_CLASS0_HP_1_START \ 213 (VP9_MV_CLASS0_HP_0_START + VP9_MV_CLASS0_HP_0_SIZE) 214#define VP9_MV_CLASS0_HP_1_SIZE 2 215#define VP9_MV_START VP9_MV_JOINTS_START 216#define VP9_MV_SIZE 72 /*only use 69*/ 217 218#define VP9_TOTAL_SIZE (VP9_MV_START + VP9_MV_SIZE) 219 220/* VP9 COUNT mem processing defines */ 221#define VP9_COEF_COUNT_START 0 222#define VP9_COEF_COUNT_BAND_0_OFFSET 0 223#define VP9_COEF_COUNT_BAND_1_OFFSET \ 224 (VP9_COEF_COUNT_BAND_0_OFFSET + 3 * 5) 225#define VP9_COEF_COUNT_BAND_2_OFFSET \ 226 (VP9_COEF_COUNT_BAND_1_OFFSET + 6 * 5) 227#define VP9_COEF_COUNT_BAND_3_OFFSET \ 228 (VP9_COEF_COUNT_BAND_2_OFFSET + 6 * 5) 229#define VP9_COEF_COUNT_BAND_4_OFFSET \ 230 (VP9_COEF_COUNT_BAND_3_OFFSET + 6 * 5) 231#define VP9_COEF_COUNT_BAND_5_OFFSET \ 232 (VP9_COEF_COUNT_BAND_4_OFFSET + 6 * 5) 233#define VP9_COEF_COUNT_SIZE_ONE_SET 165 /* ((3 + 5 * 6) * 5 */ 234#define VP9_COEF_COUNT_4X4_START \ 235 (VP9_COEF_COUNT_START + 0 * VP9_COEF_COUNT_SIZE_ONE_SET) 236#define VP9_COEF_COUNT_8X8_START \ 237 (VP9_COEF_COUNT_START + 4 * VP9_COEF_COUNT_SIZE_ONE_SET) 238#define VP9_COEF_COUNT_16X16_START \ 239 (VP9_COEF_COUNT_START + 8 * VP9_COEF_COUNT_SIZE_ONE_SET) 240#define VP9_COEF_COUNT_32X32_START \ 241 (VP9_COEF_COUNT_START + 12 * VP9_COEF_COUNT_SIZE_ONE_SET) 242#define VP9_COEF_COUNT_SIZE_PLANE (2 * VP9_COEF_COUNT_SIZE_ONE_SET) 243#define VP9_COEF_COUNT_SIZE (4 * 2 * 2 * VP9_COEF_COUNT_SIZE_ONE_SET) 244 245#define VP9_INTRA_INTER_COUNT_START \ 246 (VP9_COEF_COUNT_START + VP9_COEF_COUNT_SIZE) 247#define VP9_INTRA_INTER_COUNT_SIZE (4 * 2) 248#define VP9_COMP_INTER_COUNT_START \ 249 (VP9_INTRA_INTER_COUNT_START + VP9_INTRA_INTER_COUNT_SIZE) 250#define VP9_COMP_INTER_COUNT_SIZE (5 * 2) 251#define VP9_COMP_REF_COUNT_START \ 252 (VP9_COMP_INTER_COUNT_START + VP9_COMP_INTER_COUNT_SIZE) 253#define VP9_COMP_REF_COUNT_SIZE (5 * 2) 254#define VP9_SINGLE_REF_COUNT_START \ 255 (VP9_COMP_REF_COUNT_START + VP9_COMP_REF_COUNT_SIZE) 256#define VP9_SINGLE_REF_COUNT_SIZE (10 * 2) 257#define VP9_TX_MODE_COUNT_START \ 258 (VP9_SINGLE_REF_COUNT_START + VP9_SINGLE_REF_COUNT_SIZE) 259#define VP9_TX_MODE_COUNT_SIZE (12 * 2) 260#define VP9_SKIP_COUNT_START \ 261 (VP9_TX_MODE_COUNT_START + VP9_TX_MODE_COUNT_SIZE) 262#define VP9_SKIP_COUNT_SIZE (3 * 2) 263#define VP9_MV_SIGN_0_COUNT_START \ 264 (VP9_SKIP_COUNT_START + VP9_SKIP_COUNT_SIZE) 265#define VP9_MV_SIGN_0_COUNT_SIZE (1 * 2) 266#define VP9_MV_SIGN_1_COUNT_START \ 267 (VP9_MV_SIGN_0_COUNT_START + VP9_MV_SIGN_0_COUNT_SIZE) 268#define VP9_MV_SIGN_1_COUNT_SIZE (1 * 2) 269#define VP9_MV_BITS_0_COUNT_START \ 270 (VP9_MV_SIGN_1_COUNT_START + VP9_MV_SIGN_1_COUNT_SIZE) 271#define VP9_MV_BITS_0_COUNT_SIZE (10 * 2) 272#define VP9_MV_BITS_1_COUNT_START \ 273 (VP9_MV_BITS_0_COUNT_START + VP9_MV_BITS_0_COUNT_SIZE) 274#define VP9_MV_BITS_1_COUNT_SIZE (10 * 2) 275#define VP9_MV_CLASS0_HP_0_COUNT_START \ 276 (VP9_MV_BITS_1_COUNT_START + VP9_MV_BITS_1_COUNT_SIZE) 277#define VP9_MV_CLASS0_HP_0_COUNT_SIZE (2 * 2) 278#define VP9_MV_CLASS0_HP_1_COUNT_START \ 279 (VP9_MV_CLASS0_HP_0_COUNT_START + VP9_MV_CLASS0_HP_0_COUNT_SIZE) 280#define VP9_MV_CLASS0_HP_1_COUNT_SIZE (2 * 2) 281 282/* Start merge_tree */ 283#define VP9_INTER_MODE_COUNT_START \ 284 (VP9_MV_CLASS0_HP_1_COUNT_START + VP9_MV_CLASS0_HP_1_COUNT_SIZE) 285#define VP9_INTER_MODE_COUNT_SIZE (7 * 4) 286#define VP9_IF_Y_MODE_COUNT_START \ 287 (VP9_INTER_MODE_COUNT_START + VP9_INTER_MODE_COUNT_SIZE) 288#define VP9_IF_Y_MODE_COUNT_SIZE (10 * 4) 289#define VP9_IF_UV_MODE_COUNT_START \ 290 (VP9_IF_Y_MODE_COUNT_START + VP9_IF_Y_MODE_COUNT_SIZE) 291#define VP9_IF_UV_MODE_COUNT_SIZE (10 * 10) 292#define VP9_PARTITION_P_COUNT_START \ 293 (VP9_IF_UV_MODE_COUNT_START + VP9_IF_UV_MODE_COUNT_SIZE) 294#define VP9_PARTITION_P_COUNT_SIZE (4 * 4 * 4) 295#define VP9_INTERP_COUNT_START \ 296 (VP9_PARTITION_P_COUNT_START + VP9_PARTITION_P_COUNT_SIZE) 297#define VP9_INTERP_COUNT_SIZE (4 * 3) 298#define VP9_MV_JOINTS_COUNT_START \ 299 (VP9_INTERP_COUNT_START + VP9_INTERP_COUNT_SIZE) 300#define VP9_MV_JOINTS_COUNT_SIZE (1 * 4) 301#define VP9_MV_CLASSES_0_COUNT_START \ 302 (VP9_MV_JOINTS_COUNT_START + VP9_MV_JOINTS_COUNT_SIZE) 303#define VP9_MV_CLASSES_0_COUNT_SIZE (1 * 11) 304#define VP9_MV_CLASS0_0_COUNT_START \ 305 (VP9_MV_CLASSES_0_COUNT_START + VP9_MV_CLASSES_0_COUNT_SIZE) 306#define VP9_MV_CLASS0_0_COUNT_SIZE (1 * 2) 307#define VP9_MV_CLASSES_1_COUNT_START \ 308 (VP9_MV_CLASS0_0_COUNT_START + VP9_MV_CLASS0_0_COUNT_SIZE) 309#define VP9_MV_CLASSES_1_COUNT_SIZE (1 * 11) 310#define VP9_MV_CLASS0_1_COUNT_START \ 311 (VP9_MV_CLASSES_1_COUNT_START + VP9_MV_CLASSES_1_COUNT_SIZE) 312#define VP9_MV_CLASS0_1_COUNT_SIZE (1 * 2) 313#define VP9_MV_CLASS0_FP_0_COUNT_START \ 314 (VP9_MV_CLASS0_1_COUNT_START + VP9_MV_CLASS0_1_COUNT_SIZE) 315#define VP9_MV_CLASS0_FP_0_COUNT_SIZE (3 * 4) 316#define VP9_MV_CLASS0_FP_1_COUNT_START \ 317 (VP9_MV_CLASS0_FP_0_COUNT_START + VP9_MV_CLASS0_FP_0_COUNT_SIZE) 318#define VP9_MV_CLASS0_FP_1_COUNT_SIZE (3 * 4) 319 320#define DC_PRED 0 /* Average of above and left pixels */ 321#define V_PRED 1 /* Vertical */ 322#define H_PRED 2 /* Horizontal */ 323#define D45_PRED 3 /* Directional 45 deg = round(arctan(1/1) * 180/pi) */ 324#define D135_PRED 4 /* Directional 135 deg = 180 - 45 */ 325#define D117_PRED 5 /* Directional 117 deg = 180 - 63 */ 326#define D153_PRED 6 /* Directional 153 deg = 180 - 27 */ 327#define D207_PRED 7 /* Directional 207 deg = 180 + 27 */ 328#define D63_PRED 8 /* Directional 63 deg = round(arctan(2/1) * 180/pi) */ 329#define TM_PRED 9 /* True-motion */ 330 331/* Use a static inline to avoid possible side effect from num being reused */ 332static inline int round_power_of_two(int value, int num) 333{ 334 return (value + (1 << (num - 1))) >> num; 335} 336 337#define MODE_MV_COUNT_SAT 20 338static const int count_to_update_factor[MODE_MV_COUNT_SAT + 1] = { 339 0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64, 340 70, 76, 83, 89, 96, 102, 108, 115, 121, 128 341}; 342 343union rpm_param { 344 struct { 345 u16 data[RPM_BUF_SIZE]; 346 } l; 347 struct { 348 u16 profile; 349 u16 show_existing_frame; 350 u16 frame_to_show_idx; 351 u16 frame_type; /*1 bit*/ 352 u16 show_frame; /*1 bit*/ 353 u16 error_resilient_mode; /*1 bit*/ 354 u16 intra_only; /*1 bit*/ 355 u16 display_size_present; /*1 bit*/ 356 u16 reset_frame_context; 357 u16 refresh_frame_flags; 358 u16 width; 359 u16 height; 360 u16 display_width; 361 u16 display_height; 362 u16 ref_info; 363 u16 same_frame_size; 364 u16 mode_ref_delta_enabled; 365 u16 ref_deltas[4]; 366 u16 mode_deltas[2]; 367 u16 filter_level; 368 u16 sharpness_level; 369 u16 bit_depth; 370 u16 seg_quant_info[8]; 371 u16 seg_enabled; 372 u16 seg_abs_delta; 373 /* bit 15: feature enabled; bit 8, sign; bit[5:0], data */ 374 u16 seg_lf_info[8]; 375 } p; 376}; 377 378enum SEG_LVL_FEATURES { 379 SEG_LVL_ALT_Q = 0, /* Use alternate Quantizer */ 380 SEG_LVL_ALT_LF = 1, /* Use alternate loop filter value */ 381 SEG_LVL_REF_FRAME = 2, /* Optional Segment reference frame */ 382 SEG_LVL_SKIP = 3, /* Optional Segment (0,0) + skip mode */ 383 SEG_LVL_MAX = 4 /* Number of features supported */ 384}; 385 386struct segmentation { 387 u8 enabled; 388 u8 update_map; 389 u8 update_data; 390 u8 abs_delta; 391 u8 temporal_update; 392 s16 feature_data[MAX_SEGMENTS][SEG_LVL_MAX]; 393 unsigned int feature_mask[MAX_SEGMENTS]; 394}; 395 396struct loop_filter_thresh { 397 u8 mblim; 398 u8 lim; 399 u8 hev_thr; 400}; 401 402struct loop_filter_info_n { 403 struct loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1]; 404 u8 lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS]; 405}; 406 407struct loopfilter { 408 int filter_level; 409 410 int sharpness_level; 411 int last_sharpness_level; 412 413 u8 mode_ref_delta_enabled; 414 u8 mode_ref_delta_update; 415 416 /*0 = Intra, Last, GF, ARF*/ 417 signed char ref_deltas[MAX_REF_LF_DELTAS]; 418 signed char last_ref_deltas[MAX_REF_LF_DELTAS]; 419 420 /*0 = ZERO_MV, MV*/ 421 signed char mode_deltas[MAX_MODE_LF_DELTAS]; 422 signed char last_mode_deltas[MAX_MODE_LF_DELTAS]; 423}; 424 425struct vp9_frame { 426 struct list_head list; 427 struct vb2_v4l2_buffer *vbuf; 428 int index; 429 int intra_only; 430 int show; 431 int type; 432 int done; 433 unsigned int width; 434 unsigned int height; 435}; 436 437struct codec_vp9 { 438 /* VP9 context lock */ 439 struct mutex lock; 440 441 /* Common part with the HEVC decoder */ 442 struct codec_hevc_common common; 443 444 /* Buffer for the VP9 Workspace */ 445 void *workspace_vaddr; 446 dma_addr_t workspace_paddr; 447 448 /* Contains many information parsed from the bitstream */ 449 union rpm_param rpm_param; 450 451 /* Whether we detected the bitstream as 10-bit */ 452 int is_10bit; 453 454 /* Coded resolution reported by the hardware */ 455 u32 width, height; 456 457 /* All ref frames used by the HW at a given time */ 458 struct list_head ref_frames_list; 459 u32 frames_num; 460 461 /* In case of downsampling (decoding with FBC but outputting in NV12M), 462 * we need to allocate additional buffers for FBC. 463 */ 464 void *fbc_buffer_vaddr[MAX_REF_PIC_NUM]; 465 dma_addr_t fbc_buffer_paddr[MAX_REF_PIC_NUM]; 466 467 int ref_frame_map[REF_FRAMES]; 468 int next_ref_frame_map[REF_FRAMES]; 469 struct vp9_frame *frame_refs[REFS_PER_FRAME]; 470 471 u32 lcu_total; 472 473 /* loop filter */ 474 int default_filt_lvl; 475 struct loop_filter_info_n lfi; 476 struct loopfilter lf; 477 struct segmentation seg_4lf; 478 479 struct vp9_frame *cur_frame; 480 struct vp9_frame *prev_frame; 481}; 482 483static int div_r32(s64 m, int n) 484{ 485 s64 qu = div_s64(m, n); 486 487 return (int)qu; 488} 489 490static int clip_prob(int p) 491{ 492 return clamp_val(p, 1, 255); 493} 494 495static int segfeature_active(struct segmentation *seg, int segment_id, 496 enum SEG_LVL_FEATURES feature_id) 497{ 498 return seg->enabled && 499 (seg->feature_mask[segment_id] & (1 << feature_id)); 500} 501 502static int get_segdata(struct segmentation *seg, int segment_id, 503 enum SEG_LVL_FEATURES feature_id) 504{ 505 return seg->feature_data[segment_id][feature_id]; 506} 507 508static void vp9_update_sharpness(struct loop_filter_info_n *lfi, 509 int sharpness_lvl) 510{ 511 int lvl; 512 513 /* For each possible value for the loop filter fill out limits*/ 514 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) { 515 /* Set loop filter parameters that control sharpness.*/ 516 int block_inside_limit = lvl >> ((sharpness_lvl > 0) + 517 (sharpness_lvl > 4)); 518 519 if (sharpness_lvl > 0) { 520 if (block_inside_limit > (9 - sharpness_lvl)) 521 block_inside_limit = (9 - sharpness_lvl); 522 } 523 524 if (block_inside_limit < 1) 525 block_inside_limit = 1; 526 527 lfi->lfthr[lvl].lim = (u8)block_inside_limit; 528 lfi->lfthr[lvl].mblim = (u8)(2 * (lvl + 2) + 529 block_inside_limit); 530 } 531} 532 533/* Instantiate this function once when decode is started */ 534static void 535vp9_loop_filter_init(struct amvdec_core *core, struct codec_vp9 *vp9) 536{ 537 struct loop_filter_info_n *lfi = &vp9->lfi; 538 struct loopfilter *lf = &vp9->lf; 539 struct segmentation *seg_4lf = &vp9->seg_4lf; 540 int i; 541 542 memset(lfi, 0, sizeof(struct loop_filter_info_n)); 543 memset(lf, 0, sizeof(struct loopfilter)); 544 memset(seg_4lf, 0, sizeof(struct segmentation)); 545 lf->sharpness_level = 0; 546 vp9_update_sharpness(lfi, lf->sharpness_level); 547 lf->last_sharpness_level = lf->sharpness_level; 548 549 for (i = 0; i < 32; i++) { 550 unsigned int thr; 551 552 thr = ((lfi->lfthr[i * 2 + 1].lim & 0x3f) << 8) | 553 (lfi->lfthr[i * 2 + 1].mblim & 0xff); 554 thr = (thr << 16) | ((lfi->lfthr[i * 2].lim & 0x3f) << 8) | 555 (lfi->lfthr[i * 2].mblim & 0xff); 556 557 amvdec_write_dos(core, HEVC_DBLK_CFG9, thr); 558 } 559 560 if (core->platform->revision >= VDEC_REVISION_SM1) 561 amvdec_write_dos(core, HEVC_DBLK_CFGB, 562 (0x3 << 14) | /* dw fifo thres r and b */ 563 (0x3 << 12) | /* dw fifo thres r or b */ 564 (0x3 << 10) | /* dw fifo thres not r/b */ 565 BIT(0)); /* VP9 video format */ 566 else if (core->platform->revision >= VDEC_REVISION_G12A) 567 /* VP9 video format */ 568 amvdec_write_dos(core, HEVC_DBLK_CFGB, (0x54 << 8) | BIT(0)); 569 else 570 amvdec_write_dos(core, HEVC_DBLK_CFGB, 0x40400001); 571} 572 573static void 574vp9_loop_filter_frame_init(struct amvdec_core *core, struct segmentation *seg, 575 struct loop_filter_info_n *lfi, 576 struct loopfilter *lf, int default_filt_lvl) 577{ 578 int i; 579 int seg_id; 580 581 /* 582 * n_shift is the multiplier for lf_deltas 583 * the multiplier is: 584 * - 1 for when filter_lvl is between 0 and 31 585 * - 2 when filter_lvl is between 32 and 63 586 */ 587 const int scale = 1 << (default_filt_lvl >> 5); 588 589 /* update limits if sharpness has changed */ 590 if (lf->last_sharpness_level != lf->sharpness_level) { 591 vp9_update_sharpness(lfi, lf->sharpness_level); 592 lf->last_sharpness_level = lf->sharpness_level; 593 594 /* Write to register */ 595 for (i = 0; i < 32; i++) { 596 unsigned int thr; 597 598 thr = ((lfi->lfthr[i * 2 + 1].lim & 0x3f) << 8) | 599 (lfi->lfthr[i * 2 + 1].mblim & 0xff); 600 thr = (thr << 16) | 601 ((lfi->lfthr[i * 2].lim & 0x3f) << 8) | 602 (lfi->lfthr[i * 2].mblim & 0xff); 603 604 amvdec_write_dos(core, HEVC_DBLK_CFG9, thr); 605 } 606 } 607 608 for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { 609 int lvl_seg = default_filt_lvl; 610 611 if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { 612 const int data = get_segdata(seg, seg_id, 613 SEG_LVL_ALT_LF); 614 lvl_seg = clamp_t(int, 615 seg->abs_delta == SEGMENT_ABSDATA ? 616 data : default_filt_lvl + data, 617 0, MAX_LOOP_FILTER); 618 } 619 620 if (!lf->mode_ref_delta_enabled) { 621 /* 622 * We could get rid of this if we assume that deltas 623 * are set to zero when not in use. 624 * encoder always uses deltas 625 */ 626 memset(lfi->lvl[seg_id], lvl_seg, 627 sizeof(lfi->lvl[seg_id])); 628 } else { 629 int ref, mode; 630 const int intra_lvl = 631 lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; 632 lfi->lvl[seg_id][INTRA_FRAME][0] = 633 clamp_val(intra_lvl, 0, MAX_LOOP_FILTER); 634 635 for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) { 636 for (mode = 0; mode < MAX_MODE_LF_DELTAS; 637 ++mode) { 638 const int inter_lvl = 639 lvl_seg + 640 lf->ref_deltas[ref] * scale + 641 lf->mode_deltas[mode] * scale; 642 lfi->lvl[seg_id][ref][mode] = 643 clamp_val(inter_lvl, 0, 644 MAX_LOOP_FILTER); 645 } 646 } 647 } 648 } 649 650 for (i = 0; i < 16; i++) { 651 unsigned int level; 652 653 level = ((lfi->lvl[i >> 1][3][i & 1] & 0x3f) << 24) | 654 ((lfi->lvl[i >> 1][2][i & 1] & 0x3f) << 16) | 655 ((lfi->lvl[i >> 1][1][i & 1] & 0x3f) << 8) | 656 (lfi->lvl[i >> 1][0][i & 1] & 0x3f); 657 if (!default_filt_lvl) 658 level = 0; 659 660 amvdec_write_dos(core, HEVC_DBLK_CFGA, level); 661 } 662} 663 664static void codec_vp9_flush_output(struct amvdec_session *sess) 665{ 666 struct codec_vp9 *vp9 = sess->priv; 667 struct vp9_frame *tmp, *n; 668 669 mutex_lock(&vp9->lock); 670 list_for_each_entry_safe(tmp, n, &vp9->ref_frames_list, list) { 671 if (!tmp->done) { 672 if (tmp->show) 673 amvdec_dst_buf_done(sess, tmp->vbuf, 674 V4L2_FIELD_NONE); 675 else 676 v4l2_m2m_buf_queue(sess->m2m_ctx, tmp->vbuf); 677 678 vp9->frames_num--; 679 } 680 681 list_del(&tmp->list); 682 kfree(tmp); 683 } 684 mutex_unlock(&vp9->lock); 685} 686 687static u32 codec_vp9_num_pending_bufs(struct amvdec_session *sess) 688{ 689 struct codec_vp9 *vp9 = sess->priv; 690 691 if (!vp9) 692 return 0; 693 694 return vp9->frames_num; 695} 696 697static int codec_vp9_alloc_workspace(struct amvdec_core *core, 698 struct codec_vp9 *vp9) 699{ 700 /* Allocate some memory for the VP9 decoder's state */ 701 vp9->workspace_vaddr = dma_alloc_coherent(core->dev, SIZE_WORKSPACE, 702 &vp9->workspace_paddr, 703 GFP_KERNEL); 704 if (!vp9->workspace_vaddr) { 705 dev_err(core->dev, "Failed to allocate VP9 Workspace\n"); 706 return -ENOMEM; 707 } 708 709 return 0; 710} 711 712static void codec_vp9_setup_workspace(struct amvdec_session *sess, 713 struct codec_vp9 *vp9) 714{ 715 struct amvdec_core *core = sess->core; 716 u32 revision = core->platform->revision; 717 dma_addr_t wkaddr = vp9->workspace_paddr; 718 719 amvdec_write_dos(core, HEVCD_IPP_LINEBUFF_BASE, wkaddr + IPP_OFFSET); 720 amvdec_write_dos(core, VP9_RPM_BUFFER, wkaddr + RPM_OFFSET); 721 amvdec_write_dos(core, VP9_SHORT_TERM_RPS, wkaddr + SH_TM_RPS_OFFSET); 722 amvdec_write_dos(core, VP9_PPS_BUFFER, wkaddr + PPS_OFFSET); 723 amvdec_write_dos(core, VP9_SAO_UP, wkaddr + SAO_UP_OFFSET); 724 725 amvdec_write_dos(core, VP9_STREAM_SWAP_BUFFER, 726 wkaddr + SWAP_BUF_OFFSET); 727 amvdec_write_dos(core, VP9_STREAM_SWAP_BUFFER2, 728 wkaddr + SWAP_BUF2_OFFSET); 729 amvdec_write_dos(core, VP9_SCALELUT, wkaddr + SCALELUT_OFFSET); 730 731 if (core->platform->revision >= VDEC_REVISION_G12A) 732 amvdec_write_dos(core, HEVC_DBLK_CFGE, 733 wkaddr + DBLK_PARA_OFFSET); 734 735 amvdec_write_dos(core, HEVC_DBLK_CFG4, wkaddr + DBLK_PARA_OFFSET); 736 amvdec_write_dos(core, HEVC_DBLK_CFG5, wkaddr + DBLK_DATA_OFFSET); 737 amvdec_write_dos(core, VP9_SEG_MAP_BUFFER, wkaddr + SEG_MAP_OFFSET); 738 amvdec_write_dos(core, VP9_PROB_SWAP_BUFFER, wkaddr + PROB_OFFSET); 739 amvdec_write_dos(core, VP9_COUNT_SWAP_BUFFER, wkaddr + COUNT_OFFSET); 740 amvdec_write_dos(core, LMEM_DUMP_ADR, wkaddr + LMEM_OFFSET); 741 742 if (codec_hevc_use_mmu(revision, sess->pixfmt_cap, vp9->is_10bit)) { 743 amvdec_write_dos(core, HEVC_SAO_MMU_VH0_ADDR, 744 wkaddr + MMU_VBH_OFFSET); 745 amvdec_write_dos(core, HEVC_SAO_MMU_VH1_ADDR, 746 wkaddr + MMU_VBH_OFFSET + (MMU_VBH_SIZE / 2)); 747 748 if (revision >= VDEC_REVISION_G12A) 749 amvdec_write_dos(core, HEVC_ASSIST_MMU_MAP_ADDR, 750 vp9->common.mmu_map_paddr); 751 else 752 amvdec_write_dos(core, VP9_MMU_MAP_BUFFER, 753 vp9->common.mmu_map_paddr); 754 } 755} 756 757static int codec_vp9_start(struct amvdec_session *sess) 758{ 759 struct amvdec_core *core = sess->core; 760 struct codec_vp9 *vp9; 761 u32 val; 762 int i; 763 int ret; 764 765 vp9 = kzalloc(sizeof(*vp9), GFP_KERNEL); 766 if (!vp9) 767 return -ENOMEM; 768 769 ret = codec_vp9_alloc_workspace(core, vp9); 770 if (ret) 771 goto free_vp9; 772 773 codec_vp9_setup_workspace(sess, vp9); 774 amvdec_write_dos_bits(core, HEVC_STREAM_CONTROL, BIT(0)); 775 /* stream_fifo_hole */ 776 if (core->platform->revision >= VDEC_REVISION_G12A) 777 amvdec_write_dos_bits(core, HEVC_STREAM_FIFO_CTL, BIT(29)); 778 779 val = amvdec_read_dos(core, HEVC_PARSER_INT_CONTROL) & 0x7fffffff; 780 val |= (3 << 29) | BIT(24) | BIT(22) | BIT(7) | BIT(4) | BIT(0); 781 amvdec_write_dos(core, HEVC_PARSER_INT_CONTROL, val); 782 amvdec_write_dos_bits(core, HEVC_SHIFT_STATUS, BIT(0)); 783 amvdec_write_dos(core, HEVC_SHIFT_CONTROL, BIT(10) | BIT(9) | 784 (3 << 6) | BIT(5) | BIT(2) | BIT(1) | BIT(0)); 785 amvdec_write_dos(core, HEVC_CABAC_CONTROL, BIT(0)); 786 amvdec_write_dos(core, HEVC_PARSER_CORE_CONTROL, BIT(0)); 787 amvdec_write_dos(core, HEVC_SHIFT_STARTCODE, 0x00000001); 788 789 amvdec_write_dos(core, VP9_DEC_STATUS_REG, 0); 790 791 amvdec_write_dos(core, HEVC_PARSER_CMD_WRITE, BIT(16)); 792 for (i = 0; i < ARRAY_SIZE(vdec_hevc_parser_cmd); ++i) 793 amvdec_write_dos(core, HEVC_PARSER_CMD_WRITE, 794 vdec_hevc_parser_cmd[i]); 795 796 amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_0, PARSER_CMD_SKIP_CFG_0); 797 amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_1, PARSER_CMD_SKIP_CFG_1); 798 amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_2, PARSER_CMD_SKIP_CFG_2); 799 amvdec_write_dos(core, HEVC_PARSER_IF_CONTROL, 800 BIT(5) | BIT(2) | BIT(0)); 801 802 amvdec_write_dos(core, HEVCD_IPP_TOP_CNTL, BIT(0)); 803 amvdec_write_dos(core, HEVCD_IPP_TOP_CNTL, BIT(1)); 804 805 amvdec_write_dos(core, VP9_WAIT_FLAG, 1); 806 807 /* clear mailbox interrupt */ 808 amvdec_write_dos(core, HEVC_ASSIST_MBOX1_CLR_REG, 1); 809 /* enable mailbox interrupt */ 810 amvdec_write_dos(core, HEVC_ASSIST_MBOX1_MASK, 1); 811 /* disable PSCALE for hardware sharing */ 812 amvdec_write_dos(core, HEVC_PSCALE_CTRL, 0); 813 /* Let the uCode do all the parsing */ 814 amvdec_write_dos(core, NAL_SEARCH_CTL, 0x8); 815 816 amvdec_write_dos(core, DECODE_STOP_POS, 0); 817 amvdec_write_dos(core, VP9_DECODE_MODE, DECODE_MODE_SINGLE); 818 819 pr_debug("decode_count: %u; decode_size: %u\n", 820 amvdec_read_dos(core, HEVC_DECODE_COUNT), 821 amvdec_read_dos(core, HEVC_DECODE_SIZE)); 822 823 vp9_loop_filter_init(core, vp9); 824 825 INIT_LIST_HEAD(&vp9->ref_frames_list); 826 mutex_init(&vp9->lock); 827 memset(&vp9->ref_frame_map, -1, sizeof(vp9->ref_frame_map)); 828 memset(&vp9->next_ref_frame_map, -1, sizeof(vp9->next_ref_frame_map)); 829 for (i = 0; i < REFS_PER_FRAME; ++i) 830 vp9->frame_refs[i] = NULL; 831 sess->priv = vp9; 832 833 return 0; 834 835free_vp9: 836 kfree(vp9); 837 return ret; 838} 839 840static int codec_vp9_stop(struct amvdec_session *sess) 841{ 842 struct amvdec_core *core = sess->core; 843 struct codec_vp9 *vp9 = sess->priv; 844 845 mutex_lock(&vp9->lock); 846 if (vp9->workspace_vaddr) 847 dma_free_coherent(core->dev, SIZE_WORKSPACE, 848 vp9->workspace_vaddr, 849 vp9->workspace_paddr); 850 851 codec_hevc_free_fbc_buffers(sess, &vp9->common); 852 mutex_unlock(&vp9->lock); 853 854 return 0; 855} 856 857/* 858 * Program LAST & GOLDEN frames into the motion compensation reference cache 859 * controller 860 */ 861static void codec_vp9_set_mcrcc(struct amvdec_session *sess) 862{ 863 struct amvdec_core *core = sess->core; 864 struct codec_vp9 *vp9 = sess->priv; 865 u32 val; 866 867 /* Reset mcrcc */ 868 amvdec_write_dos(core, HEVCD_MCRCC_CTL1, 0x2); 869 /* Disable on I-frame */ 870 if (vp9->cur_frame->type == KEY_FRAME || vp9->cur_frame->intra_only) { 871 amvdec_write_dos(core, HEVCD_MCRCC_CTL1, 0x0); 872 return; 873 } 874 875 amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR, BIT(1)); 876 val = amvdec_read_dos(core, HEVCD_MPP_ANC_CANVAS_DATA_ADDR) & 0xffff; 877 val |= (val << 16); 878 amvdec_write_dos(core, HEVCD_MCRCC_CTL2, val); 879 val = amvdec_read_dos(core, HEVCD_MPP_ANC_CANVAS_DATA_ADDR) & 0xffff; 880 val |= (val << 16); 881 amvdec_write_dos(core, HEVCD_MCRCC_CTL3, val); 882 883 /* Enable mcrcc progressive-mode */ 884 amvdec_write_dos(core, HEVCD_MCRCC_CTL1, 0xff0); 885} 886 887static void codec_vp9_set_sao(struct amvdec_session *sess, 888 struct vb2_buffer *vb) 889{ 890 struct amvdec_core *core = sess->core; 891 struct codec_vp9 *vp9 = sess->priv; 892 893 dma_addr_t buf_y_paddr; 894 dma_addr_t buf_u_v_paddr; 895 u32 val; 896 897 if (codec_hevc_use_downsample(sess->pixfmt_cap, vp9->is_10bit)) 898 buf_y_paddr = 899 vp9->common.fbc_buffer_paddr[vb->index]; 900 else 901 buf_y_paddr = 902 vb2_dma_contig_plane_dma_addr(vb, 0); 903 904 if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) { 905 val = amvdec_read_dos(core, HEVC_SAO_CTRL5) & ~0xff0200; 906 amvdec_write_dos(core, HEVC_SAO_CTRL5, val); 907 amvdec_write_dos(core, HEVC_CM_BODY_START_ADDR, buf_y_paddr); 908 } 909 910 if (sess->pixfmt_cap == V4L2_PIX_FMT_NV12M) { 911 buf_y_paddr = 912 vb2_dma_contig_plane_dma_addr(vb, 0); 913 buf_u_v_paddr = 914 vb2_dma_contig_plane_dma_addr(vb, 1); 915 amvdec_write_dos(core, HEVC_SAO_Y_START_ADDR, buf_y_paddr); 916 amvdec_write_dos(core, HEVC_SAO_C_START_ADDR, buf_u_v_paddr); 917 amvdec_write_dos(core, HEVC_SAO_Y_WPTR, buf_y_paddr); 918 amvdec_write_dos(core, HEVC_SAO_C_WPTR, buf_u_v_paddr); 919 } 920 921 if (codec_hevc_use_mmu(core->platform->revision, sess->pixfmt_cap, 922 vp9->is_10bit)) { 923 amvdec_write_dos(core, HEVC_CM_HEADER_START_ADDR, 924 vp9->common.mmu_header_paddr[vb->index]); 925 /* use HEVC_CM_HEADER_START_ADDR */ 926 amvdec_write_dos_bits(core, HEVC_SAO_CTRL5, BIT(10)); 927 } 928 929 amvdec_write_dos(core, HEVC_SAO_Y_LENGTH, 930 amvdec_get_output_size(sess)); 931 amvdec_write_dos(core, HEVC_SAO_C_LENGTH, 932 (amvdec_get_output_size(sess) / 2)); 933 934 if (core->platform->revision >= VDEC_REVISION_G12A) { 935 amvdec_clear_dos_bits(core, HEVC_DBLK_CFGB, 936 BIT(4) | BIT(5) | BIT(8) | BIT(9)); 937 /* enable first, compressed write */ 938 if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) 939 amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(8)); 940 941 /* enable second, uncompressed write */ 942 if (sess->pixfmt_cap == V4L2_PIX_FMT_NV12M) 943 amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(9)); 944 945 /* dblk pipeline mode=1 for performance */ 946 if (sess->width >= 1280) 947 amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(4)); 948 949 pr_debug("HEVC_DBLK_CFGB: %08X\n", 950 amvdec_read_dos(core, HEVC_DBLK_CFGB)); 951 } 952 953 val = amvdec_read_dos(core, HEVC_SAO_CTRL1) & ~0x3ff0; 954 val |= 0xff0; /* Set endianness for 2-bytes swaps (nv12) */ 955 if (core->platform->revision < VDEC_REVISION_G12A) { 956 val &= ~0x3; 957 if (!codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) 958 val |= BIT(0); /* disable cm compression */ 959 /* TOFIX: Handle Amlogic Framebuffer compression */ 960 } 961 962 amvdec_write_dos(core, HEVC_SAO_CTRL1, val); 963 pr_debug("HEVC_SAO_CTRL1: %08X\n", val); 964 965 /* no downscale for NV12 */ 966 val = amvdec_read_dos(core, HEVC_SAO_CTRL5) & ~0xff0000; 967 amvdec_write_dos(core, HEVC_SAO_CTRL5, val); 968 969 val = amvdec_read_dos(core, HEVCD_IPP_AXIIF_CONFIG) & ~0x30; 970 val |= 0xf; 971 val &= ~BIT(12); /* NV12 */ 972 amvdec_write_dos(core, HEVCD_IPP_AXIIF_CONFIG, val); 973} 974 975static dma_addr_t codec_vp9_get_frame_mv_paddr(struct codec_vp9 *vp9, 976 struct vp9_frame *frame) 977{ 978 return vp9->workspace_paddr + MPRED_MV_OFFSET + 979 (frame->index * MPRED_MV_BUF_SIZE); 980} 981 982static void codec_vp9_set_mpred_mv(struct amvdec_core *core, 983 struct codec_vp9 *vp9) 984{ 985 int mpred_mv_rd_end_addr; 986 int use_prev_frame_mvs = vp9->prev_frame->width == 987 vp9->cur_frame->width && 988 vp9->prev_frame->height == 989 vp9->cur_frame->height && 990 !vp9->prev_frame->intra_only && 991 vp9->prev_frame->show && 992 vp9->prev_frame->type != KEY_FRAME; 993 994 amvdec_write_dos(core, HEVC_MPRED_CTRL3, 0x24122412); 995 amvdec_write_dos(core, HEVC_MPRED_ABV_START_ADDR, 996 vp9->workspace_paddr + MPRED_ABV_OFFSET); 997 998 amvdec_clear_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6)); 999 if (use_prev_frame_mvs) 1000 amvdec_write_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6)); 1001 1002 amvdec_write_dos(core, HEVC_MPRED_MV_WR_START_ADDR, 1003 codec_vp9_get_frame_mv_paddr(vp9, vp9->cur_frame)); 1004 amvdec_write_dos(core, HEVC_MPRED_MV_WPTR, 1005 codec_vp9_get_frame_mv_paddr(vp9, vp9->cur_frame)); 1006 1007 amvdec_write_dos(core, HEVC_MPRED_MV_RD_START_ADDR, 1008 codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame)); 1009 amvdec_write_dos(core, HEVC_MPRED_MV_RPTR, 1010 codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame)); 1011 1012 mpred_mv_rd_end_addr = 1013 codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame) + 1014 (vp9->lcu_total * MV_MEM_UNIT); 1015 amvdec_write_dos(core, HEVC_MPRED_MV_RD_END_ADDR, mpred_mv_rd_end_addr); 1016} 1017 1018static void codec_vp9_update_next_ref(struct codec_vp9 *vp9) 1019{ 1020 union rpm_param *param = &vp9->rpm_param; 1021 u32 buf_idx = vp9->cur_frame->index; 1022 int ref_index = 0; 1023 int refresh_frame_flags; 1024 int mask; 1025 1026 refresh_frame_flags = vp9->cur_frame->type == KEY_FRAME ? 1027 0xff : param->p.refresh_frame_flags; 1028 1029 for (mask = refresh_frame_flags; mask; mask >>= 1) { 1030 pr_debug("mask=%08X; ref_index=%d\n", mask, ref_index); 1031 if (mask & 1) 1032 vp9->next_ref_frame_map[ref_index] = buf_idx; 1033 else 1034 vp9->next_ref_frame_map[ref_index] = 1035 vp9->ref_frame_map[ref_index]; 1036 1037 ++ref_index; 1038 } 1039 1040 for (; ref_index < REF_FRAMES; ++ref_index) 1041 vp9->next_ref_frame_map[ref_index] = 1042 vp9->ref_frame_map[ref_index]; 1043} 1044 1045static void codec_vp9_save_refs(struct codec_vp9 *vp9) 1046{ 1047 union rpm_param *param = &vp9->rpm_param; 1048 int i; 1049 1050 for (i = 0; i < REFS_PER_FRAME; ++i) { 1051 const int ref = (param->p.ref_info >> 1052 (((REFS_PER_FRAME - i - 1) * 4) + 1)) & 0x7; 1053 1054 if (vp9->ref_frame_map[ref] < 0) 1055 continue; 1056 1057 pr_warn("%s: FIXME, would need to save ref %d\n", 1058 __func__, vp9->ref_frame_map[ref]); 1059 } 1060} 1061 1062static void codec_vp9_update_ref(struct codec_vp9 *vp9) 1063{ 1064 union rpm_param *param = &vp9->rpm_param; 1065 int ref_index = 0; 1066 int mask; 1067 int refresh_frame_flags; 1068 1069 if (!vp9->cur_frame) 1070 return; 1071 1072 refresh_frame_flags = vp9->cur_frame->type == KEY_FRAME ? 1073 0xff : param->p.refresh_frame_flags; 1074 1075 for (mask = refresh_frame_flags; mask; mask >>= 1) { 1076 vp9->ref_frame_map[ref_index] = 1077 vp9->next_ref_frame_map[ref_index]; 1078 ++ref_index; 1079 } 1080 1081 if (param->p.show_existing_frame) 1082 return; 1083 1084 for (; ref_index < REF_FRAMES; ++ref_index) 1085 vp9->ref_frame_map[ref_index] = 1086 vp9->next_ref_frame_map[ref_index]; 1087} 1088 1089static struct vp9_frame *codec_vp9_get_frame_by_idx(struct codec_vp9 *vp9, 1090 int idx) 1091{ 1092 struct vp9_frame *frame; 1093 1094 list_for_each_entry(frame, &vp9->ref_frames_list, list) { 1095 if (frame->index == idx) 1096 return frame; 1097 } 1098 1099 return NULL; 1100} 1101 1102static void codec_vp9_sync_ref(struct codec_vp9 *vp9) 1103{ 1104 union rpm_param *param = &vp9->rpm_param; 1105 int i; 1106 1107 for (i = 0; i < REFS_PER_FRAME; ++i) { 1108 const int ref = (param->p.ref_info >> 1109 (((REFS_PER_FRAME - i - 1) * 4) + 1)) & 0x7; 1110 const int idx = vp9->ref_frame_map[ref]; 1111 1112 vp9->frame_refs[i] = codec_vp9_get_frame_by_idx(vp9, idx); 1113 if (!vp9->frame_refs[i]) 1114 pr_warn("%s: couldn't find VP9 ref %d\n", __func__, 1115 idx); 1116 } 1117} 1118 1119static void codec_vp9_set_refs(struct amvdec_session *sess, 1120 struct codec_vp9 *vp9) 1121{ 1122 struct amvdec_core *core = sess->core; 1123 int i; 1124 1125 for (i = 0; i < REFS_PER_FRAME; ++i) { 1126 struct vp9_frame *frame = vp9->frame_refs[i]; 1127 int id_y; 1128 int id_u_v; 1129 1130 if (!frame) 1131 continue; 1132 1133 if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) { 1134 id_y = frame->index; 1135 id_u_v = id_y; 1136 } else { 1137 id_y = frame->index * 2; 1138 id_u_v = id_y + 1; 1139 } 1140 1141 amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_DATA_ADDR, 1142 (id_u_v << 16) | (id_u_v << 8) | id_y); 1143 } 1144} 1145 1146static void codec_vp9_set_mc(struct amvdec_session *sess, 1147 struct codec_vp9 *vp9) 1148{ 1149 struct amvdec_core *core = sess->core; 1150 u32 scale = 0; 1151 u32 sz; 1152 int i; 1153 1154 amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR, 1); 1155 codec_vp9_set_refs(sess, vp9); 1156 amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR, 1157 (16 << 8) | 1); 1158 codec_vp9_set_refs(sess, vp9); 1159 1160 amvdec_write_dos(core, VP9D_MPP_REFINFO_TBL_ACCCONFIG, BIT(2)); 1161 for (i = 0; i < REFS_PER_FRAME; ++i) { 1162 if (!vp9->frame_refs[i]) 1163 continue; 1164 1165 if (vp9->frame_refs[i]->width != vp9->width || 1166 vp9->frame_refs[i]->height != vp9->height) 1167 scale = 1; 1168 1169 sz = amvdec_am21c_body_size(vp9->frame_refs[i]->width, 1170 vp9->frame_refs[i]->height); 1171 1172 amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, 1173 vp9->frame_refs[i]->width); 1174 amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, 1175 vp9->frame_refs[i]->height); 1176 amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, 1177 (vp9->frame_refs[i]->width << 14) / 1178 vp9->width); 1179 amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, 1180 (vp9->frame_refs[i]->height << 14) / 1181 vp9->height); 1182 amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, sz >> 5); 1183 } 1184 1185 amvdec_write_dos(core, VP9D_MPP_REF_SCALE_ENBL, scale); 1186} 1187 1188static struct vp9_frame *codec_vp9_get_new_frame(struct amvdec_session *sess) 1189{ 1190 struct codec_vp9 *vp9 = sess->priv; 1191 union rpm_param *param = &vp9->rpm_param; 1192 struct vb2_v4l2_buffer *vbuf; 1193 struct vp9_frame *new_frame; 1194 1195 new_frame = kzalloc(sizeof(*new_frame), GFP_KERNEL); 1196 if (!new_frame) 1197 return NULL; 1198 1199 vbuf = v4l2_m2m_dst_buf_remove(sess->m2m_ctx); 1200 if (!vbuf) { 1201 dev_err(sess->core->dev, "No dst buffer available\n"); 1202 kfree(new_frame); 1203 return NULL; 1204 } 1205 1206 while (codec_vp9_get_frame_by_idx(vp9, vbuf->vb2_buf.index)) { 1207 struct vb2_v4l2_buffer *old_vbuf = vbuf; 1208 1209 vbuf = v4l2_m2m_dst_buf_remove(sess->m2m_ctx); 1210 v4l2_m2m_buf_queue(sess->m2m_ctx, old_vbuf); 1211 if (!vbuf) { 1212 dev_err(sess->core->dev, "No dst buffer available\n"); 1213 kfree(new_frame); 1214 return NULL; 1215 } 1216 } 1217 1218 new_frame->vbuf = vbuf; 1219 new_frame->index = vbuf->vb2_buf.index; 1220 new_frame->intra_only = param->p.intra_only; 1221 new_frame->show = param->p.show_frame; 1222 new_frame->type = param->p.frame_type; 1223 new_frame->width = vp9->width; 1224 new_frame->height = vp9->height; 1225 list_add_tail(&new_frame->list, &vp9->ref_frames_list); 1226 vp9->frames_num++; 1227 1228 return new_frame; 1229} 1230 1231static void codec_vp9_show_existing_frame(struct codec_vp9 *vp9) 1232{ 1233 union rpm_param *param = &vp9->rpm_param; 1234 1235 if (!param->p.show_existing_frame) 1236 return; 1237 1238 pr_debug("showing frame %u\n", param->p.frame_to_show_idx); 1239} 1240 1241static void codec_vp9_rm_noshow_frame(struct amvdec_session *sess) 1242{ 1243 struct codec_vp9 *vp9 = sess->priv; 1244 struct vp9_frame *tmp; 1245 1246 list_for_each_entry(tmp, &vp9->ref_frames_list, list) { 1247 if (tmp->show) 1248 continue; 1249 1250 pr_debug("rm noshow: %u\n", tmp->index); 1251 v4l2_m2m_buf_queue(sess->m2m_ctx, tmp->vbuf); 1252 list_del(&tmp->list); 1253 kfree(tmp); 1254 vp9->frames_num--; 1255 return; 1256 } 1257} 1258 1259static void codec_vp9_process_frame(struct amvdec_session *sess) 1260{ 1261 struct amvdec_core *core = sess->core; 1262 struct codec_vp9 *vp9 = sess->priv; 1263 union rpm_param *param = &vp9->rpm_param; 1264 int intra_only; 1265 1266 if (!param->p.show_frame) 1267 codec_vp9_rm_noshow_frame(sess); 1268 1269 vp9->cur_frame = codec_vp9_get_new_frame(sess); 1270 if (!vp9->cur_frame) 1271 return; 1272 1273 pr_debug("frame %d: type: %08X; show_exist: %u; show: %u, intra_only: %u\n", 1274 vp9->cur_frame->index, 1275 param->p.frame_type, param->p.show_existing_frame, 1276 param->p.show_frame, param->p.intra_only); 1277 1278 if (param->p.frame_type != KEY_FRAME) 1279 codec_vp9_sync_ref(vp9); 1280 codec_vp9_update_next_ref(vp9); 1281 codec_vp9_show_existing_frame(vp9); 1282 1283 if (codec_hevc_use_mmu(core->platform->revision, sess->pixfmt_cap, 1284 vp9->is_10bit)) 1285 codec_hevc_fill_mmu_map(sess, &vp9->common, 1286 &vp9->cur_frame->vbuf->vb2_buf); 1287 1288 intra_only = param->p.show_frame ? 0 : param->p.intra_only; 1289 1290 /* clear mpred (for keyframe only) */ 1291 if (param->p.frame_type != KEY_FRAME && !intra_only) { 1292 codec_vp9_set_mc(sess, vp9); 1293 codec_vp9_set_mpred_mv(core, vp9); 1294 } else { 1295 amvdec_clear_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6)); 1296 } 1297 1298 amvdec_write_dos(core, HEVC_PARSER_PICTURE_SIZE, 1299 (vp9->height << 16) | vp9->width); 1300 codec_vp9_set_mcrcc(sess); 1301 codec_vp9_set_sao(sess, &vp9->cur_frame->vbuf->vb2_buf); 1302 1303 vp9_loop_filter_frame_init(core, &vp9->seg_4lf, 1304 &vp9->lfi, &vp9->lf, 1305 vp9->default_filt_lvl); 1306 1307 /* ask uCode to start decoding */ 1308 amvdec_write_dos(core, VP9_DEC_STATUS_REG, VP9_10B_DECODE_SLICE); 1309} 1310 1311static void codec_vp9_process_lf(struct codec_vp9 *vp9) 1312{ 1313 union rpm_param *param = &vp9->rpm_param; 1314 int i; 1315 1316 vp9->lf.mode_ref_delta_enabled = param->p.mode_ref_delta_enabled; 1317 vp9->lf.sharpness_level = param->p.sharpness_level; 1318 vp9->default_filt_lvl = param->p.filter_level; 1319 vp9->seg_4lf.enabled = param->p.seg_enabled; 1320 vp9->seg_4lf.abs_delta = param->p.seg_abs_delta; 1321 1322 for (i = 0; i < 4; i++) 1323 vp9->lf.ref_deltas[i] = param->p.ref_deltas[i]; 1324 1325 for (i = 0; i < 2; i++) 1326 vp9->lf.mode_deltas[i] = param->p.mode_deltas[i]; 1327 1328 for (i = 0; i < MAX_SEGMENTS; i++) 1329 vp9->seg_4lf.feature_mask[i] = 1330 (param->p.seg_lf_info[i] & 0x8000) ? 1331 (1 << SEG_LVL_ALT_LF) : 0; 1332 1333 for (i = 0; i < MAX_SEGMENTS; i++) 1334 vp9->seg_4lf.feature_data[i][SEG_LVL_ALT_LF] = 1335 (param->p.seg_lf_info[i] & 0x100) ? 1336 -(param->p.seg_lf_info[i] & 0x3f) 1337 : (param->p.seg_lf_info[i] & 0x3f); 1338} 1339 1340static void codec_vp9_resume(struct amvdec_session *sess) 1341{ 1342 struct codec_vp9 *vp9 = sess->priv; 1343 1344 mutex_lock(&vp9->lock); 1345 if (codec_hevc_setup_buffers(sess, &vp9->common, vp9->is_10bit)) { 1346 mutex_unlock(&vp9->lock); 1347 amvdec_abort(sess); 1348 return; 1349 } 1350 1351 codec_vp9_setup_workspace(sess, vp9); 1352 codec_hevc_setup_decode_head(sess, vp9->is_10bit); 1353 codec_vp9_process_lf(vp9); 1354 codec_vp9_process_frame(sess); 1355 1356 mutex_unlock(&vp9->lock); 1357} 1358 1359/* 1360 * The RPM section within the workspace contains 1361 * many information regarding the parsed bitstream 1362 */ 1363static void codec_vp9_fetch_rpm(struct amvdec_session *sess) 1364{ 1365 struct codec_vp9 *vp9 = sess->priv; 1366 u16 *rpm_vaddr = vp9->workspace_vaddr + RPM_OFFSET; 1367 int i, j; 1368 1369 for (i = 0; i < RPM_BUF_SIZE; i += 4) 1370 for (j = 0; j < 4; j++) 1371 vp9->rpm_param.l.data[i + j] = rpm_vaddr[i + 3 - j]; 1372} 1373 1374static int codec_vp9_process_rpm(struct codec_vp9 *vp9) 1375{ 1376 union rpm_param *param = &vp9->rpm_param; 1377 int src_changed = 0; 1378 int is_10bit = 0; 1379 int pic_width_64 = ALIGN(param->p.width, 64); 1380 int pic_height_32 = ALIGN(param->p.height, 32); 1381 int pic_width_lcu = (pic_width_64 % LCU_SIZE) ? 1382 pic_width_64 / LCU_SIZE + 1 1383 : pic_width_64 / LCU_SIZE; 1384 int pic_height_lcu = (pic_height_32 % LCU_SIZE) ? 1385 pic_height_32 / LCU_SIZE + 1 1386 : pic_height_32 / LCU_SIZE; 1387 vp9->lcu_total = pic_width_lcu * pic_height_lcu; 1388 1389 if (param->p.bit_depth == 10) 1390 is_10bit = 1; 1391 1392 if (vp9->width != param->p.width || vp9->height != param->p.height || 1393 vp9->is_10bit != is_10bit) 1394 src_changed = 1; 1395 1396 vp9->width = param->p.width; 1397 vp9->height = param->p.height; 1398 vp9->is_10bit = is_10bit; 1399 1400 pr_debug("width: %u; height: %u; is_10bit: %d; src_changed: %d\n", 1401 vp9->width, vp9->height, is_10bit, src_changed); 1402 1403 return src_changed; 1404} 1405 1406static bool codec_vp9_is_ref(struct codec_vp9 *vp9, struct vp9_frame *frame) 1407{ 1408 int i; 1409 1410 for (i = 0; i < REF_FRAMES; ++i) 1411 if (vp9->ref_frame_map[i] == frame->index) 1412 return true; 1413 1414 return false; 1415} 1416 1417static void codec_vp9_show_frame(struct amvdec_session *sess) 1418{ 1419 struct codec_vp9 *vp9 = sess->priv; 1420 struct vp9_frame *tmp, *n; 1421 1422 list_for_each_entry_safe(tmp, n, &vp9->ref_frames_list, list) { 1423 if (!tmp->show || tmp == vp9->cur_frame) 1424 continue; 1425 1426 if (!tmp->done) { 1427 pr_debug("Doning %u\n", tmp->index); 1428 amvdec_dst_buf_done(sess, tmp->vbuf, V4L2_FIELD_NONE); 1429 tmp->done = 1; 1430 vp9->frames_num--; 1431 } 1432 1433 if (codec_vp9_is_ref(vp9, tmp) || tmp == vp9->prev_frame) 1434 continue; 1435 1436 pr_debug("deleting %d\n", tmp->index); 1437 list_del(&tmp->list); 1438 kfree(tmp); 1439 } 1440} 1441 1442static void vp9_tree_merge_probs(unsigned int *prev_prob, 1443 unsigned int *cur_prob, 1444 int coef_node_start, int tree_left, 1445 int tree_right, 1446 int tree_i, int node) 1447{ 1448 int prob_32, prob_res, prob_shift; 1449 int pre_prob, new_prob; 1450 int den, m_count, get_prob, factor; 1451 1452 prob_32 = prev_prob[coef_node_start / 4 * 2]; 1453 prob_res = coef_node_start & 3; 1454 prob_shift = prob_res * 8; 1455 pre_prob = (prob_32 >> prob_shift) & 0xff; 1456 1457 den = tree_left + tree_right; 1458 1459 if (den == 0) { 1460 new_prob = pre_prob; 1461 } else { 1462 m_count = min(den, MODE_MV_COUNT_SAT); 1463 get_prob = 1464 clip_prob(div_r32(((int64_t)tree_left * 256 + 1465 (den >> 1)), 1466 den)); 1467 1468 /* weighted_prob */ 1469 factor = count_to_update_factor[m_count]; 1470 new_prob = round_power_of_two(pre_prob * (256 - factor) + 1471 get_prob * factor, 8); 1472 } 1473 1474 cur_prob[coef_node_start / 4 * 2] = 1475 (cur_prob[coef_node_start / 4 * 2] & (~(0xff << prob_shift))) | 1476 (new_prob << prob_shift); 1477} 1478 1479static void adapt_coef_probs_cxt(unsigned int *prev_prob, 1480 unsigned int *cur_prob, 1481 unsigned int *count, 1482 int update_factor, 1483 int cxt_num, 1484 int coef_cxt_start, 1485 int coef_count_cxt_start) 1486{ 1487 int prob_32, prob_res, prob_shift; 1488 int pre_prob, new_prob; 1489 int num, den, m_count, get_prob, factor; 1490 int node, coef_node_start; 1491 int count_sat = 24; 1492 int cxt; 1493 1494 for (cxt = 0; cxt < cxt_num; cxt++) { 1495 const int n0 = count[coef_count_cxt_start]; 1496 const int n1 = count[coef_count_cxt_start + 1]; 1497 const int n2 = count[coef_count_cxt_start + 2]; 1498 const int neob = count[coef_count_cxt_start + 3]; 1499 const int nneob = count[coef_count_cxt_start + 4]; 1500 const unsigned int branch_ct[3][2] = { 1501 { neob, nneob }, 1502 { n0, n1 + n2 }, 1503 { n1, n2 } 1504 }; 1505 1506 coef_node_start = coef_cxt_start; 1507 for (node = 0 ; node < 3 ; node++) { 1508 prob_32 = prev_prob[coef_node_start / 4 * 2]; 1509 prob_res = coef_node_start & 3; 1510 prob_shift = prob_res * 8; 1511 pre_prob = (prob_32 >> prob_shift) & 0xff; 1512 1513 /* get binary prob */ 1514 num = branch_ct[node][0]; 1515 den = branch_ct[node][0] + branch_ct[node][1]; 1516 m_count = min(den, count_sat); 1517 1518 get_prob = (den == 0) ? 1519 128u : 1520 clip_prob(div_r32(((int64_t)num * 256 + 1521 (den >> 1)), den)); 1522 1523 factor = update_factor * m_count / count_sat; 1524 new_prob = 1525 round_power_of_two(pre_prob * (256 - factor) + 1526 get_prob * factor, 8); 1527 1528 cur_prob[coef_node_start / 4 * 2] = 1529 (cur_prob[coef_node_start / 4 * 2] & 1530 (~(0xff << prob_shift))) | 1531 (new_prob << prob_shift); 1532 1533 coef_node_start += 1; 1534 } 1535 1536 coef_cxt_start = coef_cxt_start + 3; 1537 coef_count_cxt_start = coef_count_cxt_start + 5; 1538 } 1539} 1540 1541static void adapt_coef_probs(int prev_kf, int cur_kf, int pre_fc, 1542 unsigned int *prev_prob, unsigned int *cur_prob, 1543 unsigned int *count) 1544{ 1545 int tx_size, coef_tx_size_start, coef_count_tx_size_start; 1546 int plane, coef_plane_start, coef_count_plane_start; 1547 int type, coef_type_start, coef_count_type_start; 1548 int band, coef_band_start, coef_count_band_start; 1549 int cxt_num; 1550 int coef_cxt_start, coef_count_cxt_start; 1551 int node, coef_node_start, coef_count_node_start; 1552 1553 int tree_i, tree_left, tree_right; 1554 int mvd_i; 1555 1556 int update_factor = cur_kf ? 112 : (prev_kf ? 128 : 112); 1557 1558 int prob_32; 1559 int prob_res; 1560 int prob_shift; 1561 int pre_prob; 1562 1563 int den; 1564 int get_prob; 1565 int m_count; 1566 int factor; 1567 1568 int new_prob; 1569 1570 for (tx_size = 0 ; tx_size < 4 ; tx_size++) { 1571 coef_tx_size_start = VP9_COEF_START + 1572 tx_size * 4 * VP9_COEF_SIZE_ONE_SET; 1573 coef_count_tx_size_start = VP9_COEF_COUNT_START + 1574 tx_size * 4 * VP9_COEF_COUNT_SIZE_ONE_SET; 1575 coef_plane_start = coef_tx_size_start; 1576 coef_count_plane_start = coef_count_tx_size_start; 1577 1578 for (plane = 0 ; plane < 2 ; plane++) { 1579 coef_type_start = coef_plane_start; 1580 coef_count_type_start = coef_count_plane_start; 1581 1582 for (type = 0 ; type < 2 ; type++) { 1583 coef_band_start = coef_type_start; 1584 coef_count_band_start = coef_count_type_start; 1585 1586 for (band = 0 ; band < 6 ; band++) { 1587 if (band == 0) 1588 cxt_num = 3; 1589 else 1590 cxt_num = 6; 1591 coef_cxt_start = coef_band_start; 1592 coef_count_cxt_start = 1593 coef_count_band_start; 1594 1595 adapt_coef_probs_cxt(prev_prob, 1596 cur_prob, 1597 count, 1598 update_factor, 1599 cxt_num, 1600 coef_cxt_start, 1601 coef_count_cxt_start); 1602 1603 if (band == 0) { 1604 coef_band_start += 10; 1605 coef_count_band_start += 15; 1606 } else { 1607 coef_band_start += 18; 1608 coef_count_band_start += 30; 1609 } 1610 } 1611 coef_type_start += VP9_COEF_SIZE_ONE_SET; 1612 coef_count_type_start += 1613 VP9_COEF_COUNT_SIZE_ONE_SET; 1614 } 1615 1616 coef_plane_start += 2 * VP9_COEF_SIZE_ONE_SET; 1617 coef_count_plane_start += 1618 2 * VP9_COEF_COUNT_SIZE_ONE_SET; 1619 } 1620 } 1621 1622 if (cur_kf == 0) { 1623 /* mode_mv_merge_probs - merge_intra_inter_prob */ 1624 for (coef_count_node_start = VP9_INTRA_INTER_COUNT_START; 1625 coef_count_node_start < (VP9_MV_CLASS0_HP_1_COUNT_START + 1626 VP9_MV_CLASS0_HP_1_COUNT_SIZE); 1627 coef_count_node_start += 2) { 1628 if (coef_count_node_start == 1629 VP9_INTRA_INTER_COUNT_START) 1630 coef_node_start = VP9_INTRA_INTER_START; 1631 else if (coef_count_node_start == 1632 VP9_COMP_INTER_COUNT_START) 1633 coef_node_start = VP9_COMP_INTER_START; 1634 else if (coef_count_node_start == 1635 VP9_TX_MODE_COUNT_START) 1636 coef_node_start = VP9_TX_MODE_START; 1637 else if (coef_count_node_start == 1638 VP9_SKIP_COUNT_START) 1639 coef_node_start = VP9_SKIP_START; 1640 else if (coef_count_node_start == 1641 VP9_MV_SIGN_0_COUNT_START) 1642 coef_node_start = VP9_MV_SIGN_0_START; 1643 else if (coef_count_node_start == 1644 VP9_MV_SIGN_1_COUNT_START) 1645 coef_node_start = VP9_MV_SIGN_1_START; 1646 else if (coef_count_node_start == 1647 VP9_MV_BITS_0_COUNT_START) 1648 coef_node_start = VP9_MV_BITS_0_START; 1649 else if (coef_count_node_start == 1650 VP9_MV_BITS_1_COUNT_START) 1651 coef_node_start = VP9_MV_BITS_1_START; 1652 else /* node_start == VP9_MV_CLASS0_HP_0_COUNT_START */ 1653 coef_node_start = VP9_MV_CLASS0_HP_0_START; 1654 1655 den = count[coef_count_node_start] + 1656 count[coef_count_node_start + 1]; 1657 1658 prob_32 = prev_prob[coef_node_start / 4 * 2]; 1659 prob_res = coef_node_start & 3; 1660 prob_shift = prob_res * 8; 1661 pre_prob = (prob_32 >> prob_shift) & 0xff; 1662 1663 if (den == 0) { 1664 new_prob = pre_prob; 1665 } else { 1666 m_count = min(den, MODE_MV_COUNT_SAT); 1667 get_prob = 1668 clip_prob(div_r32(((int64_t) 1669 count[coef_count_node_start] * 256 + 1670 (den >> 1)), 1671 den)); 1672 1673 /* weighted prob */ 1674 factor = count_to_update_factor[m_count]; 1675 new_prob = 1676 round_power_of_two(pre_prob * 1677 (256 - factor) + 1678 get_prob * factor, 1679 8); 1680 } 1681 1682 cur_prob[coef_node_start / 4 * 2] = 1683 (cur_prob[coef_node_start / 4 * 2] & 1684 (~(0xff << prob_shift))) | 1685 (new_prob << prob_shift); 1686 1687 coef_node_start = coef_node_start + 1; 1688 } 1689 1690 coef_node_start = VP9_INTER_MODE_START; 1691 coef_count_node_start = VP9_INTER_MODE_COUNT_START; 1692 for (tree_i = 0 ; tree_i < 7 ; tree_i++) { 1693 for (node = 0 ; node < 3 ; node++) { 1694 unsigned int start = coef_count_node_start; 1695 1696 switch (node) { 1697 case 2: 1698 tree_left = count[start + 1]; 1699 tree_right = count[start + 3]; 1700 break; 1701 case 1: 1702 tree_left = count[start + 0]; 1703 tree_right = count[start + 1] + 1704 count[start + 3]; 1705 break; 1706 default: 1707 tree_left = count[start + 2]; 1708 tree_right = count[start + 0] + 1709 count[start + 1] + 1710 count[start + 3]; 1711 break; 1712 } 1713 1714 vp9_tree_merge_probs(prev_prob, cur_prob, 1715 coef_node_start, 1716 tree_left, tree_right, 1717 tree_i, node); 1718 1719 coef_node_start = coef_node_start + 1; 1720 } 1721 1722 coef_count_node_start = coef_count_node_start + 4; 1723 } 1724 1725 coef_node_start = VP9_IF_Y_MODE_START; 1726 coef_count_node_start = VP9_IF_Y_MODE_COUNT_START; 1727 for (tree_i = 0 ; tree_i < 14 ; tree_i++) { 1728 for (node = 0 ; node < 9 ; node++) { 1729 unsigned int start = coef_count_node_start; 1730 1731 switch (node) { 1732 case 8: 1733 tree_left = 1734 count[start + D153_PRED]; 1735 tree_right = 1736 count[start + D207_PRED]; 1737 break; 1738 case 7: 1739 tree_left = 1740 count[start + D63_PRED]; 1741 tree_right = 1742 count[start + D207_PRED] + 1743 count[start + D153_PRED]; 1744 break; 1745 case 6: 1746 tree_left = 1747 count[start + D45_PRED]; 1748 tree_right = 1749 count[start + D207_PRED] + 1750 count[start + D153_PRED] + 1751 count[start + D63_PRED]; 1752 break; 1753 case 5: 1754 tree_left = 1755 count[start + D135_PRED]; 1756 tree_right = 1757 count[start + D117_PRED]; 1758 break; 1759 case 4: 1760 tree_left = 1761 count[start + H_PRED]; 1762 tree_right = 1763 count[start + D117_PRED] + 1764 count[start + D135_PRED]; 1765 break; 1766 case 3: 1767 tree_left = 1768 count[start + H_PRED] + 1769 count[start + D117_PRED] + 1770 count[start + D135_PRED]; 1771 tree_right = 1772 count[start + D45_PRED] + 1773 count[start + D207_PRED] + 1774 count[start + D153_PRED] + 1775 count[start + D63_PRED]; 1776 break; 1777 case 2: 1778 tree_left = 1779 count[start + V_PRED]; 1780 tree_right = 1781 count[start + H_PRED] + 1782 count[start + D117_PRED] + 1783 count[start + D135_PRED] + 1784 count[start + D45_PRED] + 1785 count[start + D207_PRED] + 1786 count[start + D153_PRED] + 1787 count[start + D63_PRED]; 1788 break; 1789 case 1: 1790 tree_left = 1791 count[start + TM_PRED]; 1792 tree_right = 1793 count[start + V_PRED] + 1794 count[start + H_PRED] + 1795 count[start + D117_PRED] + 1796 count[start + D135_PRED] + 1797 count[start + D45_PRED] + 1798 count[start + D207_PRED] + 1799 count[start + D153_PRED] + 1800 count[start + D63_PRED]; 1801 break; 1802 default: 1803 tree_left = 1804 count[start + DC_PRED]; 1805 tree_right = 1806 count[start + TM_PRED] + 1807 count[start + V_PRED] + 1808 count[start + H_PRED] + 1809 count[start + D117_PRED] + 1810 count[start + D135_PRED] + 1811 count[start + D45_PRED] + 1812 count[start + D207_PRED] + 1813 count[start + D153_PRED] + 1814 count[start + D63_PRED]; 1815 break; 1816 } 1817 1818 vp9_tree_merge_probs(prev_prob, cur_prob, 1819 coef_node_start, 1820 tree_left, tree_right, 1821 tree_i, node); 1822 1823 coef_node_start = coef_node_start + 1; 1824 } 1825 coef_count_node_start = coef_count_node_start + 10; 1826 } 1827 1828 coef_node_start = VP9_PARTITION_P_START; 1829 coef_count_node_start = VP9_PARTITION_P_COUNT_START; 1830 for (tree_i = 0 ; tree_i < 16 ; tree_i++) { 1831 for (node = 0 ; node < 3 ; node++) { 1832 unsigned int start = coef_count_node_start; 1833 1834 switch (node) { 1835 case 2: 1836 tree_left = count[start + 2]; 1837 tree_right = count[start + 3]; 1838 break; 1839 case 1: 1840 tree_left = count[start + 1]; 1841 tree_right = count[start + 2] + 1842 count[start + 3]; 1843 break; 1844 default: 1845 tree_left = count[start + 0]; 1846 tree_right = count[start + 1] + 1847 count[start + 2] + 1848 count[start + 3]; 1849 break; 1850 } 1851 1852 vp9_tree_merge_probs(prev_prob, cur_prob, 1853 coef_node_start, 1854 tree_left, tree_right, 1855 tree_i, node); 1856 1857 coef_node_start = coef_node_start + 1; 1858 } 1859 1860 coef_count_node_start = coef_count_node_start + 4; 1861 } 1862 1863 coef_node_start = VP9_INTERP_START; 1864 coef_count_node_start = VP9_INTERP_COUNT_START; 1865 for (tree_i = 0 ; tree_i < 4 ; tree_i++) { 1866 for (node = 0 ; node < 2 ; node++) { 1867 unsigned int start = coef_count_node_start; 1868 1869 switch (node) { 1870 case 1: 1871 tree_left = count[start + 1]; 1872 tree_right = count[start + 2]; 1873 break; 1874 default: 1875 tree_left = count[start + 0]; 1876 tree_right = count[start + 1] + 1877 count[start + 2]; 1878 break; 1879 } 1880 1881 vp9_tree_merge_probs(prev_prob, cur_prob, 1882 coef_node_start, 1883 tree_left, tree_right, 1884 tree_i, node); 1885 1886 coef_node_start = coef_node_start + 1; 1887 } 1888 coef_count_node_start = coef_count_node_start + 3; 1889 } 1890 1891 coef_node_start = VP9_MV_JOINTS_START; 1892 coef_count_node_start = VP9_MV_JOINTS_COUNT_START; 1893 for (tree_i = 0 ; tree_i < 1 ; tree_i++) { 1894 for (node = 0 ; node < 3 ; node++) { 1895 unsigned int start = coef_count_node_start; 1896 1897 switch (node) { 1898 case 2: 1899 tree_left = count[start + 2]; 1900 tree_right = count[start + 3]; 1901 break; 1902 case 1: 1903 tree_left = count[start + 1]; 1904 tree_right = count[start + 2] + 1905 count[start + 3]; 1906 break; 1907 default: 1908 tree_left = count[start + 0]; 1909 tree_right = count[start + 1] + 1910 count[start + 2] + 1911 count[start + 3]; 1912 break; 1913 } 1914 1915 vp9_tree_merge_probs(prev_prob, cur_prob, 1916 coef_node_start, 1917 tree_left, tree_right, 1918 tree_i, node); 1919 1920 coef_node_start = coef_node_start + 1; 1921 } 1922 coef_count_node_start = coef_count_node_start + 4; 1923 } 1924 1925 for (mvd_i = 0 ; mvd_i < 2 ; mvd_i++) { 1926 coef_node_start = mvd_i ? VP9_MV_CLASSES_1_START : 1927 VP9_MV_CLASSES_0_START; 1928 coef_count_node_start = mvd_i ? 1929 VP9_MV_CLASSES_1_COUNT_START : 1930 VP9_MV_CLASSES_0_COUNT_START; 1931 tree_i = 0; 1932 for (node = 0; node < 10; node++) { 1933 unsigned int start = coef_count_node_start; 1934 1935 switch (node) { 1936 case 9: 1937 tree_left = count[start + 9]; 1938 tree_right = count[start + 10]; 1939 break; 1940 case 8: 1941 tree_left = count[start + 7]; 1942 tree_right = count[start + 8]; 1943 break; 1944 case 7: 1945 tree_left = count[start + 7] + 1946 count[start + 8]; 1947 tree_right = count[start + 9] + 1948 count[start + 10]; 1949 break; 1950 case 6: 1951 tree_left = count[start + 6]; 1952 tree_right = count[start + 7] + 1953 count[start + 8] + 1954 count[start + 9] + 1955 count[start + 10]; 1956 break; 1957 case 5: 1958 tree_left = count[start + 4]; 1959 tree_right = count[start + 5]; 1960 break; 1961 case 4: 1962 tree_left = count[start + 4] + 1963 count[start + 5]; 1964 tree_right = count[start + 6] + 1965 count[start + 7] + 1966 count[start + 8] + 1967 count[start + 9] + 1968 count[start + 10]; 1969 break; 1970 case 3: 1971 tree_left = count[start + 2]; 1972 tree_right = count[start + 3]; 1973 break; 1974 case 2: 1975 tree_left = count[start + 2] + 1976 count[start + 3]; 1977 tree_right = count[start + 4] + 1978 count[start + 5] + 1979 count[start + 6] + 1980 count[start + 7] + 1981 count[start + 8] + 1982 count[start + 9] + 1983 count[start + 10]; 1984 break; 1985 case 1: 1986 tree_left = count[start + 1]; 1987 tree_right = count[start + 2] + 1988 count[start + 3] + 1989 count[start + 4] + 1990 count[start + 5] + 1991 count[start + 6] + 1992 count[start + 7] + 1993 count[start + 8] + 1994 count[start + 9] + 1995 count[start + 10]; 1996 break; 1997 default: 1998 tree_left = count[start + 0]; 1999 tree_right = count[start + 1] + 2000 count[start + 2] + 2001 count[start + 3] + 2002 count[start + 4] + 2003 count[start + 5] + 2004 count[start + 6] + 2005 count[start + 7] + 2006 count[start + 8] + 2007 count[start + 9] + 2008 count[start + 10]; 2009 break; 2010 } 2011 2012 vp9_tree_merge_probs(prev_prob, cur_prob, 2013 coef_node_start, 2014 tree_left, tree_right, 2015 tree_i, node); 2016 2017 coef_node_start = coef_node_start + 1; 2018 } 2019 2020 coef_node_start = mvd_i ? VP9_MV_CLASS0_1_START : 2021 VP9_MV_CLASS0_0_START; 2022 coef_count_node_start = mvd_i ? 2023 VP9_MV_CLASS0_1_COUNT_START : 2024 VP9_MV_CLASS0_0_COUNT_START; 2025 tree_i = 0; 2026 node = 0; 2027 tree_left = count[coef_count_node_start + 0]; 2028 tree_right = count[coef_count_node_start + 1]; 2029 2030 vp9_tree_merge_probs(prev_prob, cur_prob, 2031 coef_node_start, 2032 tree_left, tree_right, 2033 tree_i, node); 2034 coef_node_start = mvd_i ? VP9_MV_CLASS0_FP_1_START : 2035 VP9_MV_CLASS0_FP_0_START; 2036 coef_count_node_start = mvd_i ? 2037 VP9_MV_CLASS0_FP_1_COUNT_START : 2038 VP9_MV_CLASS0_FP_0_COUNT_START; 2039 2040 for (tree_i = 0; tree_i < 3; tree_i++) { 2041 for (node = 0; node < 3; node++) { 2042 unsigned int start = 2043 coef_count_node_start; 2044 switch (node) { 2045 case 2: 2046 tree_left = count[start + 2]; 2047 tree_right = count[start + 3]; 2048 break; 2049 case 1: 2050 tree_left = count[start + 1]; 2051 tree_right = count[start + 2] + 2052 count[start + 3]; 2053 break; 2054 default: 2055 tree_left = count[start + 0]; 2056 tree_right = count[start + 1] + 2057 count[start + 2] + 2058 count[start + 3]; 2059 break; 2060 } 2061 2062 vp9_tree_merge_probs(prev_prob, 2063 cur_prob, 2064 coef_node_start, 2065 tree_left, 2066 tree_right, 2067 tree_i, node); 2068 2069 coef_node_start = coef_node_start + 1; 2070 } 2071 coef_count_node_start = 2072 coef_count_node_start + 4; 2073 } 2074 } 2075 } 2076} 2077 2078static irqreturn_t codec_vp9_threaded_isr(struct amvdec_session *sess) 2079{ 2080 struct amvdec_core *core = sess->core; 2081 struct codec_vp9 *vp9 = sess->priv; 2082 u32 dec_status = amvdec_read_dos(core, VP9_DEC_STATUS_REG); 2083 u32 prob_status = amvdec_read_dos(core, VP9_ADAPT_PROB_REG); 2084 int i; 2085 2086 if (!vp9) 2087 return IRQ_HANDLED; 2088 2089 mutex_lock(&vp9->lock); 2090 if (dec_status != VP9_HEAD_PARSER_DONE) { 2091 dev_err(core->dev_dec, "Unrecognized dec_status: %08X\n", 2092 dec_status); 2093 amvdec_abort(sess); 2094 goto unlock; 2095 } 2096 2097 pr_debug("ISR: %08X;%08X\n", dec_status, prob_status); 2098 sess->keyframe_found = 1; 2099 2100 if ((prob_status & 0xff) == 0xfd && vp9->cur_frame) { 2101 /* VP9_REQ_ADAPT_PROB */ 2102 u8 *prev_prob_b = ((u8 *)vp9->workspace_vaddr + 2103 PROB_OFFSET) + 2104 ((prob_status >> 8) * 0x1000); 2105 u8 *cur_prob_b = ((u8 *)vp9->workspace_vaddr + 2106 PROB_OFFSET) + 0x4000; 2107 u8 *count_b = (u8 *)vp9->workspace_vaddr + 2108 COUNT_OFFSET; 2109 int last_frame_type = vp9->prev_frame ? 2110 vp9->prev_frame->type : 2111 KEY_FRAME; 2112 2113 adapt_coef_probs(last_frame_type == KEY_FRAME, 2114 vp9->cur_frame->type == KEY_FRAME ? 1 : 0, 2115 prob_status >> 8, 2116 (unsigned int *)prev_prob_b, 2117 (unsigned int *)cur_prob_b, 2118 (unsigned int *)count_b); 2119 2120 memcpy(prev_prob_b, cur_prob_b, ADAPT_PROB_SIZE); 2121 amvdec_write_dos(core, VP9_ADAPT_PROB_REG, 0); 2122 } 2123 2124 /* Invalidate first 3 refs */ 2125 for (i = 0; i < REFS_PER_FRAME ; ++i) 2126 vp9->frame_refs[i] = NULL; 2127 2128 vp9->prev_frame = vp9->cur_frame; 2129 codec_vp9_update_ref(vp9); 2130 2131 codec_vp9_fetch_rpm(sess); 2132 if (codec_vp9_process_rpm(vp9)) { 2133 amvdec_src_change(sess, vp9->width, vp9->height, 16); 2134 2135 /* No frame is actually processed */ 2136 vp9->cur_frame = NULL; 2137 2138 /* Show the remaining frame */ 2139 codec_vp9_show_frame(sess); 2140 2141 /* FIXME: Save refs for resized frame */ 2142 if (vp9->frames_num) 2143 codec_vp9_save_refs(vp9); 2144 2145 goto unlock; 2146 } 2147 2148 codec_vp9_process_lf(vp9); 2149 codec_vp9_process_frame(sess); 2150 codec_vp9_show_frame(sess); 2151 2152unlock: 2153 mutex_unlock(&vp9->lock); 2154 return IRQ_HANDLED; 2155} 2156 2157static irqreturn_t codec_vp9_isr(struct amvdec_session *sess) 2158{ 2159 return IRQ_WAKE_THREAD; 2160} 2161 2162struct amvdec_codec_ops codec_vp9_ops = { 2163 .start = codec_vp9_start, 2164 .stop = codec_vp9_stop, 2165 .isr = codec_vp9_isr, 2166 .threaded_isr = codec_vp9_threaded_isr, 2167 .num_pending_bufs = codec_vp9_num_pending_bufs, 2168 .drain = codec_vp9_flush_output, 2169 .resume = codec_vp9_resume, 2170}; 2171