1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Copyright (C) 2018 Maxime Jourdan <mjourdan@baylibre.com>
4 * Copyright (C) 2015 Amlogic, Inc. All rights reserved.
5 */
6
7#include <media/v4l2-mem2mem.h>
8#include <media/videobuf2-dma-contig.h>
9
10#include "dos_regs.h"
11#include "hevc_regs.h"
12#include "codec_vp9.h"
13#include "vdec_helpers.h"
14#include "codec_hevc_common.h"
15
16/* HEVC reg mapping */
17#define VP9_DEC_STATUS_REG	HEVC_ASSIST_SCRATCH_0
18	#define VP9_10B_DECODE_SLICE	5
19	#define VP9_HEAD_PARSER_DONE	0xf0
20#define VP9_RPM_BUFFER		HEVC_ASSIST_SCRATCH_1
21#define VP9_SHORT_TERM_RPS	HEVC_ASSIST_SCRATCH_2
22#define VP9_ADAPT_PROB_REG	HEVC_ASSIST_SCRATCH_3
23#define VP9_MMU_MAP_BUFFER	HEVC_ASSIST_SCRATCH_4
24#define VP9_PPS_BUFFER		HEVC_ASSIST_SCRATCH_5
25#define VP9_SAO_UP		HEVC_ASSIST_SCRATCH_6
26#define VP9_STREAM_SWAP_BUFFER	HEVC_ASSIST_SCRATCH_7
27#define VP9_STREAM_SWAP_BUFFER2 HEVC_ASSIST_SCRATCH_8
28#define VP9_PROB_SWAP_BUFFER	HEVC_ASSIST_SCRATCH_9
29#define VP9_COUNT_SWAP_BUFFER	HEVC_ASSIST_SCRATCH_A
30#define VP9_SEG_MAP_BUFFER	HEVC_ASSIST_SCRATCH_B
31#define VP9_SCALELUT		HEVC_ASSIST_SCRATCH_D
32#define VP9_WAIT_FLAG		HEVC_ASSIST_SCRATCH_E
33#define LMEM_DUMP_ADR		HEVC_ASSIST_SCRATCH_F
34#define NAL_SEARCH_CTL		HEVC_ASSIST_SCRATCH_I
35#define VP9_DECODE_MODE		HEVC_ASSIST_SCRATCH_J
36	#define DECODE_MODE_SINGLE 0
37#define DECODE_STOP_POS		HEVC_ASSIST_SCRATCH_K
38#define HEVC_DECODE_COUNT	HEVC_ASSIST_SCRATCH_M
39#define HEVC_DECODE_SIZE	HEVC_ASSIST_SCRATCH_N
40
41/* VP9 Constants */
42#define LCU_SIZE		64
43#define MAX_REF_PIC_NUM		24
44#define REFS_PER_FRAME		3
45#define REF_FRAMES		8
46#define MV_MEM_UNIT		0x240
47#define ADAPT_PROB_SIZE		0xf80
48
49enum FRAME_TYPE {
50	KEY_FRAME = 0,
51	INTER_FRAME = 1,
52	FRAME_TYPES,
53};
54
55/* VP9 Workspace layout */
56#define MPRED_MV_BUF_SIZE 0x120000
57
58#define IPP_SIZE	0x4000
59#define SAO_ABV_SIZE	0x30000
60#define SAO_VB_SIZE	0x30000
61#define SH_TM_RPS_SIZE	0x800
62#define VPS_SIZE	0x800
63#define SPS_SIZE	0x800
64#define PPS_SIZE	0x2000
65#define SAO_UP_SIZE	0x2800
66#define SWAP_BUF_SIZE	0x800
67#define SWAP_BUF2_SIZE	0x800
68#define SCALELUT_SIZE	0x8000
69#define DBLK_PARA_SIZE	0x80000
70#define DBLK_DATA_SIZE	0x80000
71#define SEG_MAP_SIZE	0xd800
72#define PROB_SIZE	0x5000
73#define COUNT_SIZE	0x3000
74#define MMU_VBH_SIZE	0x5000
75#define MPRED_ABV_SIZE	0x10000
76#define MPRED_MV_SIZE	(MPRED_MV_BUF_SIZE * MAX_REF_PIC_NUM)
77#define RPM_BUF_SIZE	0x100
78#define LMEM_SIZE	0x800
79
80#define IPP_OFFSET       0x00
81#define SAO_ABV_OFFSET   (IPP_OFFSET + IPP_SIZE)
82#define SAO_VB_OFFSET    (SAO_ABV_OFFSET + SAO_ABV_SIZE)
83#define SH_TM_RPS_OFFSET (SAO_VB_OFFSET + SAO_VB_SIZE)
84#define VPS_OFFSET       (SH_TM_RPS_OFFSET + SH_TM_RPS_SIZE)
85#define SPS_OFFSET       (VPS_OFFSET + VPS_SIZE)
86#define PPS_OFFSET       (SPS_OFFSET + SPS_SIZE)
87#define SAO_UP_OFFSET    (PPS_OFFSET + PPS_SIZE)
88#define SWAP_BUF_OFFSET  (SAO_UP_OFFSET + SAO_UP_SIZE)
89#define SWAP_BUF2_OFFSET (SWAP_BUF_OFFSET + SWAP_BUF_SIZE)
90#define SCALELUT_OFFSET  (SWAP_BUF2_OFFSET + SWAP_BUF2_SIZE)
91#define DBLK_PARA_OFFSET (SCALELUT_OFFSET + SCALELUT_SIZE)
92#define DBLK_DATA_OFFSET (DBLK_PARA_OFFSET + DBLK_PARA_SIZE)
93#define SEG_MAP_OFFSET   (DBLK_DATA_OFFSET + DBLK_DATA_SIZE)
94#define PROB_OFFSET      (SEG_MAP_OFFSET + SEG_MAP_SIZE)
95#define COUNT_OFFSET     (PROB_OFFSET + PROB_SIZE)
96#define MMU_VBH_OFFSET   (COUNT_OFFSET + COUNT_SIZE)
97#define MPRED_ABV_OFFSET (MMU_VBH_OFFSET + MMU_VBH_SIZE)
98#define MPRED_MV_OFFSET  (MPRED_ABV_OFFSET + MPRED_ABV_SIZE)
99#define RPM_OFFSET       (MPRED_MV_OFFSET + MPRED_MV_SIZE)
100#define LMEM_OFFSET      (RPM_OFFSET + RPM_BUF_SIZE)
101
102#define SIZE_WORKSPACE	ALIGN(LMEM_OFFSET + LMEM_SIZE, 64 * SZ_1K)
103
104#define NONE           -1
105#define INTRA_FRAME     0
106#define LAST_FRAME      1
107#define GOLDEN_FRAME    2
108#define ALTREF_FRAME    3
109#define MAX_REF_FRAMES  4
110
111/*
112 * Defines, declarations, sub-functions for vp9 de-block loop
113	filter Thr/Lvl table update
114 * - struct segmentation is for loop filter only (removed something)
115 * - function "vp9_loop_filter_init" and "vp9_loop_filter_frame_init" will
116	be instantiated in C_Entry
117 * - vp9_loop_filter_init run once before decoding start
118 * - vp9_loop_filter_frame_init run before every frame decoding start
119 * - set video format to VP9 is in vp9_loop_filter_init
120 */
121#define MAX_LOOP_FILTER		63
122#define MAX_REF_LF_DELTAS	4
123#define MAX_MODE_LF_DELTAS	2
124#define SEGMENT_DELTADATA	0
125#define SEGMENT_ABSDATA		1
126#define MAX_SEGMENTS		8
127
128/* VP9 PROB processing defines */
129#define VP9_PARTITION_START      0
130#define VP9_PARTITION_SIZE_STEP  (3 * 4)
131#define VP9_PARTITION_ONE_SIZE   (4 * VP9_PARTITION_SIZE_STEP)
132#define VP9_PARTITION_KEY_START  0
133#define VP9_PARTITION_P_START    VP9_PARTITION_ONE_SIZE
134#define VP9_PARTITION_SIZE       (2 * VP9_PARTITION_ONE_SIZE)
135#define VP9_SKIP_START           (VP9_PARTITION_START + VP9_PARTITION_SIZE)
136#define VP9_SKIP_SIZE            4 /* only use 3*/
137#define VP9_TX_MODE_START        (VP9_SKIP_START + VP9_SKIP_SIZE)
138#define VP9_TX_MODE_8_0_OFFSET   0
139#define VP9_TX_MODE_8_1_OFFSET   1
140#define VP9_TX_MODE_16_0_OFFSET  2
141#define VP9_TX_MODE_16_1_OFFSET  4
142#define VP9_TX_MODE_32_0_OFFSET  6
143#define VP9_TX_MODE_32_1_OFFSET  9
144#define VP9_TX_MODE_SIZE         12
145#define VP9_COEF_START           (VP9_TX_MODE_START + VP9_TX_MODE_SIZE)
146#define VP9_COEF_BAND_0_OFFSET   0
147#define VP9_COEF_BAND_1_OFFSET   (VP9_COEF_BAND_0_OFFSET + 3 * 3 + 1)
148#define VP9_COEF_BAND_2_OFFSET   (VP9_COEF_BAND_1_OFFSET + 6 * 3)
149#define VP9_COEF_BAND_3_OFFSET   (VP9_COEF_BAND_2_OFFSET + 6 * 3)
150#define VP9_COEF_BAND_4_OFFSET   (VP9_COEF_BAND_3_OFFSET + 6 * 3)
151#define VP9_COEF_BAND_5_OFFSET   (VP9_COEF_BAND_4_OFFSET + 6 * 3)
152#define VP9_COEF_SIZE_ONE_SET    100 /* ((3 + 5 * 6) * 3 + 1 padding)*/
153#define VP9_COEF_4X4_START       (VP9_COEF_START + 0 * VP9_COEF_SIZE_ONE_SET)
154#define VP9_COEF_8X8_START       (VP9_COEF_START + 4 * VP9_COEF_SIZE_ONE_SET)
155#define VP9_COEF_16X16_START     (VP9_COEF_START + 8 * VP9_COEF_SIZE_ONE_SET)
156#define VP9_COEF_32X32_START     (VP9_COEF_START + 12 * VP9_COEF_SIZE_ONE_SET)
157#define VP9_COEF_SIZE_PLANE      (2 * VP9_COEF_SIZE_ONE_SET)
158#define VP9_COEF_SIZE            (4 * 2 * 2 * VP9_COEF_SIZE_ONE_SET)
159#define VP9_INTER_MODE_START     (VP9_COEF_START + VP9_COEF_SIZE)
160#define VP9_INTER_MODE_SIZE      24 /* only use 21 (# * 7)*/
161#define VP9_INTERP_START         (VP9_INTER_MODE_START + VP9_INTER_MODE_SIZE)
162#define VP9_INTERP_SIZE          8
163#define VP9_INTRA_INTER_START    (VP9_INTERP_START + VP9_INTERP_SIZE)
164#define VP9_INTRA_INTER_SIZE     4
165#define VP9_INTERP_INTRA_INTER_START  VP9_INTERP_START
166#define VP9_INTERP_INTRA_INTER_SIZE   (VP9_INTERP_SIZE + VP9_INTRA_INTER_SIZE)
167#define VP9_COMP_INTER_START     \
168		(VP9_INTERP_INTRA_INTER_START + VP9_INTERP_INTRA_INTER_SIZE)
169#define VP9_COMP_INTER_SIZE      5
170#define VP9_COMP_REF_START       (VP9_COMP_INTER_START + VP9_COMP_INTER_SIZE)
171#define VP9_COMP_REF_SIZE        5
172#define VP9_SINGLE_REF_START     (VP9_COMP_REF_START + VP9_COMP_REF_SIZE)
173#define VP9_SINGLE_REF_SIZE      10
174#define VP9_REF_MODE_START       VP9_COMP_INTER_START
175#define VP9_REF_MODE_SIZE        \
176		(VP9_COMP_INTER_SIZE + VP9_COMP_REF_SIZE + VP9_SINGLE_REF_SIZE)
177#define VP9_IF_Y_MODE_START      (VP9_REF_MODE_START + VP9_REF_MODE_SIZE)
178#define VP9_IF_Y_MODE_SIZE       36
179#define VP9_IF_UV_MODE_START     (VP9_IF_Y_MODE_START + VP9_IF_Y_MODE_SIZE)
180#define VP9_IF_UV_MODE_SIZE      92 /* only use 90*/
181#define VP9_MV_JOINTS_START      (VP9_IF_UV_MODE_START + VP9_IF_UV_MODE_SIZE)
182#define VP9_MV_JOINTS_SIZE       3
183#define VP9_MV_SIGN_0_START      (VP9_MV_JOINTS_START + VP9_MV_JOINTS_SIZE)
184#define VP9_MV_SIGN_0_SIZE       1
185#define VP9_MV_CLASSES_0_START   (VP9_MV_SIGN_0_START + VP9_MV_SIGN_0_SIZE)
186#define VP9_MV_CLASSES_0_SIZE    10
187#define VP9_MV_CLASS0_0_START    \
188		(VP9_MV_CLASSES_0_START + VP9_MV_CLASSES_0_SIZE)
189#define VP9_MV_CLASS0_0_SIZE     1
190#define VP9_MV_BITS_0_START      (VP9_MV_CLASS0_0_START + VP9_MV_CLASS0_0_SIZE)
191#define VP9_MV_BITS_0_SIZE       10
192#define VP9_MV_SIGN_1_START      (VP9_MV_BITS_0_START + VP9_MV_BITS_0_SIZE)
193#define VP9_MV_SIGN_1_SIZE       1
194#define VP9_MV_CLASSES_1_START   \
195			(VP9_MV_SIGN_1_START + VP9_MV_SIGN_1_SIZE)
196#define VP9_MV_CLASSES_1_SIZE    10
197#define VP9_MV_CLASS0_1_START    \
198			(VP9_MV_CLASSES_1_START + VP9_MV_CLASSES_1_SIZE)
199#define VP9_MV_CLASS0_1_SIZE     1
200#define VP9_MV_BITS_1_START      \
201			(VP9_MV_CLASS0_1_START + VP9_MV_CLASS0_1_SIZE)
202#define VP9_MV_BITS_1_SIZE       10
203#define VP9_MV_CLASS0_FP_0_START \
204			(VP9_MV_BITS_1_START + VP9_MV_BITS_1_SIZE)
205#define VP9_MV_CLASS0_FP_0_SIZE  9
206#define VP9_MV_CLASS0_FP_1_START \
207			(VP9_MV_CLASS0_FP_0_START + VP9_MV_CLASS0_FP_0_SIZE)
208#define VP9_MV_CLASS0_FP_1_SIZE  9
209#define VP9_MV_CLASS0_HP_0_START \
210			(VP9_MV_CLASS0_FP_1_START + VP9_MV_CLASS0_FP_1_SIZE)
211#define VP9_MV_CLASS0_HP_0_SIZE  2
212#define VP9_MV_CLASS0_HP_1_START \
213			(VP9_MV_CLASS0_HP_0_START + VP9_MV_CLASS0_HP_0_SIZE)
214#define VP9_MV_CLASS0_HP_1_SIZE  2
215#define VP9_MV_START             VP9_MV_JOINTS_START
216#define VP9_MV_SIZE              72 /*only use 69*/
217
218#define VP9_TOTAL_SIZE           (VP9_MV_START + VP9_MV_SIZE)
219
220/* VP9 COUNT mem processing defines */
221#define VP9_COEF_COUNT_START           0
222#define VP9_COEF_COUNT_BAND_0_OFFSET   0
223#define VP9_COEF_COUNT_BAND_1_OFFSET   \
224			(VP9_COEF_COUNT_BAND_0_OFFSET + 3 * 5)
225#define VP9_COEF_COUNT_BAND_2_OFFSET   \
226			(VP9_COEF_COUNT_BAND_1_OFFSET + 6 * 5)
227#define VP9_COEF_COUNT_BAND_3_OFFSET   \
228			(VP9_COEF_COUNT_BAND_2_OFFSET + 6 * 5)
229#define VP9_COEF_COUNT_BAND_4_OFFSET   \
230			(VP9_COEF_COUNT_BAND_3_OFFSET + 6 * 5)
231#define VP9_COEF_COUNT_BAND_5_OFFSET   \
232			(VP9_COEF_COUNT_BAND_4_OFFSET + 6 * 5)
233#define VP9_COEF_COUNT_SIZE_ONE_SET    165 /* ((3 + 5 * 6) * 5 */
234#define VP9_COEF_COUNT_4X4_START       \
235		(VP9_COEF_COUNT_START + 0 * VP9_COEF_COUNT_SIZE_ONE_SET)
236#define VP9_COEF_COUNT_8X8_START       \
237		(VP9_COEF_COUNT_START + 4 * VP9_COEF_COUNT_SIZE_ONE_SET)
238#define VP9_COEF_COUNT_16X16_START     \
239		(VP9_COEF_COUNT_START + 8 * VP9_COEF_COUNT_SIZE_ONE_SET)
240#define VP9_COEF_COUNT_32X32_START     \
241		(VP9_COEF_COUNT_START + 12 * VP9_COEF_COUNT_SIZE_ONE_SET)
242#define VP9_COEF_COUNT_SIZE_PLANE      (2 * VP9_COEF_COUNT_SIZE_ONE_SET)
243#define VP9_COEF_COUNT_SIZE            (4 * 2 * 2 * VP9_COEF_COUNT_SIZE_ONE_SET)
244
245#define VP9_INTRA_INTER_COUNT_START    \
246		(VP9_COEF_COUNT_START + VP9_COEF_COUNT_SIZE)
247#define VP9_INTRA_INTER_COUNT_SIZE     (4 * 2)
248#define VP9_COMP_INTER_COUNT_START     \
249		(VP9_INTRA_INTER_COUNT_START + VP9_INTRA_INTER_COUNT_SIZE)
250#define VP9_COMP_INTER_COUNT_SIZE      (5 * 2)
251#define VP9_COMP_REF_COUNT_START       \
252		(VP9_COMP_INTER_COUNT_START + VP9_COMP_INTER_COUNT_SIZE)
253#define VP9_COMP_REF_COUNT_SIZE        (5 * 2)
254#define VP9_SINGLE_REF_COUNT_START     \
255		(VP9_COMP_REF_COUNT_START + VP9_COMP_REF_COUNT_SIZE)
256#define VP9_SINGLE_REF_COUNT_SIZE      (10 * 2)
257#define VP9_TX_MODE_COUNT_START        \
258		(VP9_SINGLE_REF_COUNT_START + VP9_SINGLE_REF_COUNT_SIZE)
259#define VP9_TX_MODE_COUNT_SIZE         (12 * 2)
260#define VP9_SKIP_COUNT_START           \
261		(VP9_TX_MODE_COUNT_START + VP9_TX_MODE_COUNT_SIZE)
262#define VP9_SKIP_COUNT_SIZE            (3 * 2)
263#define VP9_MV_SIGN_0_COUNT_START      \
264		(VP9_SKIP_COUNT_START + VP9_SKIP_COUNT_SIZE)
265#define VP9_MV_SIGN_0_COUNT_SIZE       (1 * 2)
266#define VP9_MV_SIGN_1_COUNT_START      \
267		(VP9_MV_SIGN_0_COUNT_START + VP9_MV_SIGN_0_COUNT_SIZE)
268#define VP9_MV_SIGN_1_COUNT_SIZE       (1 * 2)
269#define VP9_MV_BITS_0_COUNT_START      \
270		(VP9_MV_SIGN_1_COUNT_START + VP9_MV_SIGN_1_COUNT_SIZE)
271#define VP9_MV_BITS_0_COUNT_SIZE       (10 * 2)
272#define VP9_MV_BITS_1_COUNT_START      \
273		(VP9_MV_BITS_0_COUNT_START + VP9_MV_BITS_0_COUNT_SIZE)
274#define VP9_MV_BITS_1_COUNT_SIZE       (10 * 2)
275#define VP9_MV_CLASS0_HP_0_COUNT_START \
276		(VP9_MV_BITS_1_COUNT_START + VP9_MV_BITS_1_COUNT_SIZE)
277#define VP9_MV_CLASS0_HP_0_COUNT_SIZE  (2 * 2)
278#define VP9_MV_CLASS0_HP_1_COUNT_START \
279		(VP9_MV_CLASS0_HP_0_COUNT_START + VP9_MV_CLASS0_HP_0_COUNT_SIZE)
280#define VP9_MV_CLASS0_HP_1_COUNT_SIZE  (2 * 2)
281
282/* Start merge_tree */
283#define VP9_INTER_MODE_COUNT_START     \
284		(VP9_MV_CLASS0_HP_1_COUNT_START + VP9_MV_CLASS0_HP_1_COUNT_SIZE)
285#define VP9_INTER_MODE_COUNT_SIZE      (7 * 4)
286#define VP9_IF_Y_MODE_COUNT_START      \
287		(VP9_INTER_MODE_COUNT_START + VP9_INTER_MODE_COUNT_SIZE)
288#define VP9_IF_Y_MODE_COUNT_SIZE       (10 * 4)
289#define VP9_IF_UV_MODE_COUNT_START     \
290		(VP9_IF_Y_MODE_COUNT_START + VP9_IF_Y_MODE_COUNT_SIZE)
291#define VP9_IF_UV_MODE_COUNT_SIZE      (10 * 10)
292#define VP9_PARTITION_P_COUNT_START    \
293		(VP9_IF_UV_MODE_COUNT_START + VP9_IF_UV_MODE_COUNT_SIZE)
294#define VP9_PARTITION_P_COUNT_SIZE     (4 * 4 * 4)
295#define VP9_INTERP_COUNT_START         \
296		(VP9_PARTITION_P_COUNT_START + VP9_PARTITION_P_COUNT_SIZE)
297#define VP9_INTERP_COUNT_SIZE          (4 * 3)
298#define VP9_MV_JOINTS_COUNT_START      \
299		(VP9_INTERP_COUNT_START + VP9_INTERP_COUNT_SIZE)
300#define VP9_MV_JOINTS_COUNT_SIZE       (1 * 4)
301#define VP9_MV_CLASSES_0_COUNT_START   \
302		(VP9_MV_JOINTS_COUNT_START + VP9_MV_JOINTS_COUNT_SIZE)
303#define VP9_MV_CLASSES_0_COUNT_SIZE    (1 * 11)
304#define VP9_MV_CLASS0_0_COUNT_START    \
305		(VP9_MV_CLASSES_0_COUNT_START + VP9_MV_CLASSES_0_COUNT_SIZE)
306#define VP9_MV_CLASS0_0_COUNT_SIZE     (1 * 2)
307#define VP9_MV_CLASSES_1_COUNT_START   \
308		(VP9_MV_CLASS0_0_COUNT_START + VP9_MV_CLASS0_0_COUNT_SIZE)
309#define VP9_MV_CLASSES_1_COUNT_SIZE    (1 * 11)
310#define VP9_MV_CLASS0_1_COUNT_START    \
311		(VP9_MV_CLASSES_1_COUNT_START + VP9_MV_CLASSES_1_COUNT_SIZE)
312#define VP9_MV_CLASS0_1_COUNT_SIZE     (1 * 2)
313#define VP9_MV_CLASS0_FP_0_COUNT_START \
314		(VP9_MV_CLASS0_1_COUNT_START + VP9_MV_CLASS0_1_COUNT_SIZE)
315#define VP9_MV_CLASS0_FP_0_COUNT_SIZE  (3 * 4)
316#define VP9_MV_CLASS0_FP_1_COUNT_START \
317		(VP9_MV_CLASS0_FP_0_COUNT_START + VP9_MV_CLASS0_FP_0_COUNT_SIZE)
318#define VP9_MV_CLASS0_FP_1_COUNT_SIZE  (3 * 4)
319
320#define DC_PRED    0	/* Average of above and left pixels */
321#define V_PRED     1	/* Vertical */
322#define H_PRED     2	/* Horizontal */
323#define D45_PRED   3	/* Directional 45 deg = round(arctan(1/1) * 180/pi) */
324#define D135_PRED  4	/* Directional 135 deg = 180 - 45 */
325#define D117_PRED  5	/* Directional 117 deg = 180 - 63 */
326#define D153_PRED  6	/* Directional 153 deg = 180 - 27 */
327#define D207_PRED  7	/* Directional 207 deg = 180 + 27 */
328#define D63_PRED   8	/* Directional 63 deg = round(arctan(2/1) * 180/pi) */
329#define TM_PRED    9	/* True-motion */
330
331/* Use a static inline to avoid possible side effect from num being reused */
332static inline int round_power_of_two(int value, int num)
333{
334	return (value + (1 << (num - 1))) >> num;
335}
336
337#define MODE_MV_COUNT_SAT 20
338static const int count_to_update_factor[MODE_MV_COUNT_SAT + 1] = {
339	0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64,
340	70, 76, 83, 89, 96, 102, 108, 115, 121, 128
341};
342
343union rpm_param {
344	struct {
345		u16 data[RPM_BUF_SIZE];
346	} l;
347	struct {
348		u16 profile;
349		u16 show_existing_frame;
350		u16 frame_to_show_idx;
351		u16 frame_type; /*1 bit*/
352		u16 show_frame; /*1 bit*/
353		u16 error_resilient_mode; /*1 bit*/
354		u16 intra_only; /*1 bit*/
355		u16 display_size_present; /*1 bit*/
356		u16 reset_frame_context;
357		u16 refresh_frame_flags;
358		u16 width;
359		u16 height;
360		u16 display_width;
361		u16 display_height;
362		u16 ref_info;
363		u16 same_frame_size;
364		u16 mode_ref_delta_enabled;
365		u16 ref_deltas[4];
366		u16 mode_deltas[2];
367		u16 filter_level;
368		u16 sharpness_level;
369		u16 bit_depth;
370		u16 seg_quant_info[8];
371		u16 seg_enabled;
372		u16 seg_abs_delta;
373		/* bit 15: feature enabled; bit 8, sign; bit[5:0], data */
374		u16 seg_lf_info[8];
375	} p;
376};
377
378enum SEG_LVL_FEATURES {
379	SEG_LVL_ALT_Q = 0,	/* Use alternate Quantizer */
380	SEG_LVL_ALT_LF = 1,	/* Use alternate loop filter value */
381	SEG_LVL_REF_FRAME = 2,	/* Optional Segment reference frame */
382	SEG_LVL_SKIP = 3,	/* Optional Segment (0,0) + skip mode */
383	SEG_LVL_MAX = 4		/* Number of features supported */
384};
385
386struct segmentation {
387	u8 enabled;
388	u8 update_map;
389	u8 update_data;
390	u8 abs_delta;
391	u8 temporal_update;
392	s16 feature_data[MAX_SEGMENTS][SEG_LVL_MAX];
393	unsigned int feature_mask[MAX_SEGMENTS];
394};
395
396struct loop_filter_thresh {
397	u8 mblim;
398	u8 lim;
399	u8 hev_thr;
400};
401
402struct loop_filter_info_n {
403	struct loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1];
404	u8 lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS];
405};
406
407struct loopfilter {
408	int filter_level;
409
410	int sharpness_level;
411	int last_sharpness_level;
412
413	u8 mode_ref_delta_enabled;
414	u8 mode_ref_delta_update;
415
416	/*0 = Intra, Last, GF, ARF*/
417	signed char ref_deltas[MAX_REF_LF_DELTAS];
418	signed char last_ref_deltas[MAX_REF_LF_DELTAS];
419
420	/*0 = ZERO_MV, MV*/
421	signed char mode_deltas[MAX_MODE_LF_DELTAS];
422	signed char last_mode_deltas[MAX_MODE_LF_DELTAS];
423};
424
425struct vp9_frame {
426	struct list_head list;
427	struct vb2_v4l2_buffer *vbuf;
428	int index;
429	int intra_only;
430	int show;
431	int type;
432	int done;
433	unsigned int width;
434	unsigned int height;
435};
436
437struct codec_vp9 {
438	/* VP9 context lock */
439	struct mutex lock;
440
441	/* Common part with the HEVC decoder */
442	struct codec_hevc_common common;
443
444	/* Buffer for the VP9 Workspace */
445	void      *workspace_vaddr;
446	dma_addr_t workspace_paddr;
447
448	/* Contains many information parsed from the bitstream */
449	union rpm_param rpm_param;
450
451	/* Whether we detected the bitstream as 10-bit */
452	int is_10bit;
453
454	/* Coded resolution reported by the hardware */
455	u32 width, height;
456
457	/* All ref frames used by the HW at a given time */
458	struct list_head ref_frames_list;
459	u32 frames_num;
460
461	/* In case of downsampling (decoding with FBC but outputting in NV12M),
462	 * we need to allocate additional buffers for FBC.
463	 */
464	void      *fbc_buffer_vaddr[MAX_REF_PIC_NUM];
465	dma_addr_t fbc_buffer_paddr[MAX_REF_PIC_NUM];
466
467	int ref_frame_map[REF_FRAMES];
468	int next_ref_frame_map[REF_FRAMES];
469	struct vp9_frame *frame_refs[REFS_PER_FRAME];
470
471	u32 lcu_total;
472
473	/* loop filter */
474	int default_filt_lvl;
475	struct loop_filter_info_n lfi;
476	struct loopfilter lf;
477	struct segmentation seg_4lf;
478
479	struct vp9_frame *cur_frame;
480	struct vp9_frame *prev_frame;
481};
482
483static int div_r32(s64 m, int n)
484{
485	s64 qu = div_s64(m, n);
486
487	return (int)qu;
488}
489
490static int clip_prob(int p)
491{
492	return clamp_val(p, 1, 255);
493}
494
495static int segfeature_active(struct segmentation *seg, int segment_id,
496			     enum SEG_LVL_FEATURES feature_id)
497{
498	return seg->enabled &&
499		(seg->feature_mask[segment_id] & (1 << feature_id));
500}
501
502static int get_segdata(struct segmentation *seg, int segment_id,
503		       enum SEG_LVL_FEATURES feature_id)
504{
505	return seg->feature_data[segment_id][feature_id];
506}
507
508static void vp9_update_sharpness(struct loop_filter_info_n *lfi,
509				 int sharpness_lvl)
510{
511	int lvl;
512
513	/* For each possible value for the loop filter fill out limits*/
514	for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
515		/* Set loop filter parameters that control sharpness.*/
516		int block_inside_limit = lvl >> ((sharpness_lvl > 0) +
517					(sharpness_lvl > 4));
518
519		if (sharpness_lvl > 0) {
520			if (block_inside_limit > (9 - sharpness_lvl))
521				block_inside_limit = (9 - sharpness_lvl);
522		}
523
524		if (block_inside_limit < 1)
525			block_inside_limit = 1;
526
527		lfi->lfthr[lvl].lim = (u8)block_inside_limit;
528		lfi->lfthr[lvl].mblim = (u8)(2 * (lvl + 2) +
529				block_inside_limit);
530	}
531}
532
533/* Instantiate this function once when decode is started */
534static void
535vp9_loop_filter_init(struct amvdec_core *core, struct codec_vp9 *vp9)
536{
537	struct loop_filter_info_n *lfi = &vp9->lfi;
538	struct loopfilter *lf = &vp9->lf;
539	struct segmentation *seg_4lf = &vp9->seg_4lf;
540	int i;
541
542	memset(lfi, 0, sizeof(struct loop_filter_info_n));
543	memset(lf, 0, sizeof(struct loopfilter));
544	memset(seg_4lf, 0, sizeof(struct segmentation));
545	lf->sharpness_level = 0;
546	vp9_update_sharpness(lfi, lf->sharpness_level);
547	lf->last_sharpness_level = lf->sharpness_level;
548
549	for (i = 0; i < 32; i++) {
550		unsigned int thr;
551
552		thr = ((lfi->lfthr[i * 2 + 1].lim & 0x3f) << 8) |
553			(lfi->lfthr[i * 2 + 1].mblim & 0xff);
554		thr = (thr << 16) | ((lfi->lfthr[i * 2].lim & 0x3f) << 8) |
555			(lfi->lfthr[i * 2].mblim & 0xff);
556
557		amvdec_write_dos(core, HEVC_DBLK_CFG9, thr);
558	}
559
560	if (core->platform->revision >= VDEC_REVISION_SM1)
561		amvdec_write_dos(core, HEVC_DBLK_CFGB,
562				 (0x3 << 14) | /* dw fifo thres r and b */
563				 (0x3 << 12) | /* dw fifo thres r or b */
564				 (0x3 << 10) | /* dw fifo thres not r/b */
565				 BIT(0)); /* VP9 video format */
566	else if (core->platform->revision >= VDEC_REVISION_G12A)
567		/* VP9 video format */
568		amvdec_write_dos(core, HEVC_DBLK_CFGB, (0x54 << 8) | BIT(0));
569	else
570		amvdec_write_dos(core, HEVC_DBLK_CFGB, 0x40400001);
571}
572
573static void
574vp9_loop_filter_frame_init(struct amvdec_core *core, struct segmentation *seg,
575			   struct loop_filter_info_n *lfi,
576			   struct loopfilter *lf, int default_filt_lvl)
577{
578	int i;
579	int seg_id;
580
581	/*
582	 * n_shift is the multiplier for lf_deltas
583	 * the multiplier is:
584	 * - 1 for when filter_lvl is between 0 and 31
585	 * - 2 when filter_lvl is between 32 and 63
586	 */
587	const int scale = 1 << (default_filt_lvl >> 5);
588
589	/* update limits if sharpness has changed */
590	if (lf->last_sharpness_level != lf->sharpness_level) {
591		vp9_update_sharpness(lfi, lf->sharpness_level);
592		lf->last_sharpness_level = lf->sharpness_level;
593
594		/* Write to register */
595		for (i = 0; i < 32; i++) {
596			unsigned int thr;
597
598			thr = ((lfi->lfthr[i * 2 + 1].lim & 0x3f) << 8) |
599			      (lfi->lfthr[i * 2 + 1].mblim & 0xff);
600			thr = (thr << 16) |
601			      ((lfi->lfthr[i * 2].lim & 0x3f) << 8) |
602			      (lfi->lfthr[i * 2].mblim & 0xff);
603
604			amvdec_write_dos(core, HEVC_DBLK_CFG9, thr);
605		}
606	}
607
608	for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
609		int lvl_seg = default_filt_lvl;
610
611		if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
612			const int data = get_segdata(seg, seg_id,
613						SEG_LVL_ALT_LF);
614			lvl_seg = clamp_t(int,
615					  seg->abs_delta == SEGMENT_ABSDATA ?
616						data : default_filt_lvl + data,
617					  0, MAX_LOOP_FILTER);
618		}
619
620		if (!lf->mode_ref_delta_enabled) {
621			/*
622			 * We could get rid of this if we assume that deltas
623			 * are set to zero when not in use.
624			 * encoder always uses deltas
625			 */
626			memset(lfi->lvl[seg_id], lvl_seg,
627			       sizeof(lfi->lvl[seg_id]));
628		} else {
629			int ref, mode;
630			const int intra_lvl =
631				lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
632			lfi->lvl[seg_id][INTRA_FRAME][0] =
633				clamp_val(intra_lvl, 0, MAX_LOOP_FILTER);
634
635			for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) {
636				for (mode = 0; mode < MAX_MODE_LF_DELTAS;
637				     ++mode) {
638					const int inter_lvl =
639						lvl_seg +
640						lf->ref_deltas[ref] * scale +
641						lf->mode_deltas[mode] * scale;
642					lfi->lvl[seg_id][ref][mode] =
643						clamp_val(inter_lvl, 0,
644							  MAX_LOOP_FILTER);
645				}
646			}
647		}
648	}
649
650	for (i = 0; i < 16; i++) {
651		unsigned int level;
652
653		level = ((lfi->lvl[i >> 1][3][i & 1] & 0x3f) << 24) |
654			((lfi->lvl[i >> 1][2][i & 1] & 0x3f) << 16) |
655			((lfi->lvl[i >> 1][1][i & 1] & 0x3f) << 8) |
656			(lfi->lvl[i >> 1][0][i & 1] & 0x3f);
657		if (!default_filt_lvl)
658			level = 0;
659
660		amvdec_write_dos(core, HEVC_DBLK_CFGA, level);
661	}
662}
663
664static void codec_vp9_flush_output(struct amvdec_session *sess)
665{
666	struct codec_vp9 *vp9 = sess->priv;
667	struct vp9_frame *tmp, *n;
668
669	mutex_lock(&vp9->lock);
670	list_for_each_entry_safe(tmp, n, &vp9->ref_frames_list, list) {
671		if (!tmp->done) {
672			if (tmp->show)
673				amvdec_dst_buf_done(sess, tmp->vbuf,
674						    V4L2_FIELD_NONE);
675			else
676				v4l2_m2m_buf_queue(sess->m2m_ctx, tmp->vbuf);
677
678			vp9->frames_num--;
679		}
680
681		list_del(&tmp->list);
682		kfree(tmp);
683	}
684	mutex_unlock(&vp9->lock);
685}
686
687static u32 codec_vp9_num_pending_bufs(struct amvdec_session *sess)
688{
689	struct codec_vp9 *vp9 = sess->priv;
690
691	if (!vp9)
692		return 0;
693
694	return vp9->frames_num;
695}
696
697static int codec_vp9_alloc_workspace(struct amvdec_core *core,
698				     struct codec_vp9 *vp9)
699{
700	/* Allocate some memory for the VP9 decoder's state */
701	vp9->workspace_vaddr = dma_alloc_coherent(core->dev, SIZE_WORKSPACE,
702						  &vp9->workspace_paddr,
703						  GFP_KERNEL);
704	if (!vp9->workspace_vaddr) {
705		dev_err(core->dev, "Failed to allocate VP9 Workspace\n");
706		return -ENOMEM;
707	}
708
709	return 0;
710}
711
712static void codec_vp9_setup_workspace(struct amvdec_session *sess,
713				      struct codec_vp9 *vp9)
714{
715	struct amvdec_core *core = sess->core;
716	u32 revision = core->platform->revision;
717	dma_addr_t wkaddr = vp9->workspace_paddr;
718
719	amvdec_write_dos(core, HEVCD_IPP_LINEBUFF_BASE, wkaddr + IPP_OFFSET);
720	amvdec_write_dos(core, VP9_RPM_BUFFER, wkaddr + RPM_OFFSET);
721	amvdec_write_dos(core, VP9_SHORT_TERM_RPS, wkaddr + SH_TM_RPS_OFFSET);
722	amvdec_write_dos(core, VP9_PPS_BUFFER, wkaddr + PPS_OFFSET);
723	amvdec_write_dos(core, VP9_SAO_UP, wkaddr + SAO_UP_OFFSET);
724
725	amvdec_write_dos(core, VP9_STREAM_SWAP_BUFFER,
726			 wkaddr + SWAP_BUF_OFFSET);
727	amvdec_write_dos(core, VP9_STREAM_SWAP_BUFFER2,
728			 wkaddr + SWAP_BUF2_OFFSET);
729	amvdec_write_dos(core, VP9_SCALELUT, wkaddr + SCALELUT_OFFSET);
730
731	if (core->platform->revision >= VDEC_REVISION_G12A)
732		amvdec_write_dos(core, HEVC_DBLK_CFGE,
733				 wkaddr + DBLK_PARA_OFFSET);
734
735	amvdec_write_dos(core, HEVC_DBLK_CFG4, wkaddr + DBLK_PARA_OFFSET);
736	amvdec_write_dos(core, HEVC_DBLK_CFG5, wkaddr + DBLK_DATA_OFFSET);
737	amvdec_write_dos(core, VP9_SEG_MAP_BUFFER, wkaddr + SEG_MAP_OFFSET);
738	amvdec_write_dos(core, VP9_PROB_SWAP_BUFFER, wkaddr + PROB_OFFSET);
739	amvdec_write_dos(core, VP9_COUNT_SWAP_BUFFER, wkaddr + COUNT_OFFSET);
740	amvdec_write_dos(core, LMEM_DUMP_ADR, wkaddr + LMEM_OFFSET);
741
742	if (codec_hevc_use_mmu(revision, sess->pixfmt_cap, vp9->is_10bit)) {
743		amvdec_write_dos(core, HEVC_SAO_MMU_VH0_ADDR,
744				 wkaddr + MMU_VBH_OFFSET);
745		amvdec_write_dos(core, HEVC_SAO_MMU_VH1_ADDR,
746				 wkaddr + MMU_VBH_OFFSET + (MMU_VBH_SIZE / 2));
747
748		if (revision >= VDEC_REVISION_G12A)
749			amvdec_write_dos(core, HEVC_ASSIST_MMU_MAP_ADDR,
750					 vp9->common.mmu_map_paddr);
751		else
752			amvdec_write_dos(core, VP9_MMU_MAP_BUFFER,
753					 vp9->common.mmu_map_paddr);
754	}
755}
756
757static int codec_vp9_start(struct amvdec_session *sess)
758{
759	struct amvdec_core *core = sess->core;
760	struct codec_vp9 *vp9;
761	u32 val;
762	int i;
763	int ret;
764
765	vp9 = kzalloc(sizeof(*vp9), GFP_KERNEL);
766	if (!vp9)
767		return -ENOMEM;
768
769	ret = codec_vp9_alloc_workspace(core, vp9);
770	if (ret)
771		goto free_vp9;
772
773	codec_vp9_setup_workspace(sess, vp9);
774	amvdec_write_dos_bits(core, HEVC_STREAM_CONTROL, BIT(0));
775	/* stream_fifo_hole */
776	if (core->platform->revision >= VDEC_REVISION_G12A)
777		amvdec_write_dos_bits(core, HEVC_STREAM_FIFO_CTL, BIT(29));
778
779	val = amvdec_read_dos(core, HEVC_PARSER_INT_CONTROL) & 0x7fffffff;
780	val |= (3 << 29) | BIT(24) | BIT(22) | BIT(7) | BIT(4) | BIT(0);
781	amvdec_write_dos(core, HEVC_PARSER_INT_CONTROL, val);
782	amvdec_write_dos_bits(core, HEVC_SHIFT_STATUS, BIT(0));
783	amvdec_write_dos(core, HEVC_SHIFT_CONTROL, BIT(10) | BIT(9) |
784			 (3 << 6) | BIT(5) | BIT(2) | BIT(1) | BIT(0));
785	amvdec_write_dos(core, HEVC_CABAC_CONTROL, BIT(0));
786	amvdec_write_dos(core, HEVC_PARSER_CORE_CONTROL, BIT(0));
787	amvdec_write_dos(core, HEVC_SHIFT_STARTCODE, 0x00000001);
788
789	amvdec_write_dos(core, VP9_DEC_STATUS_REG, 0);
790
791	amvdec_write_dos(core, HEVC_PARSER_CMD_WRITE, BIT(16));
792	for (i = 0; i < ARRAY_SIZE(vdec_hevc_parser_cmd); ++i)
793		amvdec_write_dos(core, HEVC_PARSER_CMD_WRITE,
794				 vdec_hevc_parser_cmd[i]);
795
796	amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_0, PARSER_CMD_SKIP_CFG_0);
797	amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_1, PARSER_CMD_SKIP_CFG_1);
798	amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_2, PARSER_CMD_SKIP_CFG_2);
799	amvdec_write_dos(core, HEVC_PARSER_IF_CONTROL,
800			 BIT(5) | BIT(2) | BIT(0));
801
802	amvdec_write_dos(core, HEVCD_IPP_TOP_CNTL, BIT(0));
803	amvdec_write_dos(core, HEVCD_IPP_TOP_CNTL, BIT(1));
804
805	amvdec_write_dos(core, VP9_WAIT_FLAG, 1);
806
807	/* clear mailbox interrupt */
808	amvdec_write_dos(core, HEVC_ASSIST_MBOX1_CLR_REG, 1);
809	/* enable mailbox interrupt */
810	amvdec_write_dos(core, HEVC_ASSIST_MBOX1_MASK, 1);
811	/* disable PSCALE for hardware sharing */
812	amvdec_write_dos(core, HEVC_PSCALE_CTRL, 0);
813	/* Let the uCode do all the parsing */
814	amvdec_write_dos(core, NAL_SEARCH_CTL, 0x8);
815
816	amvdec_write_dos(core, DECODE_STOP_POS, 0);
817	amvdec_write_dos(core, VP9_DECODE_MODE, DECODE_MODE_SINGLE);
818
819	pr_debug("decode_count: %u; decode_size: %u\n",
820		 amvdec_read_dos(core, HEVC_DECODE_COUNT),
821		 amvdec_read_dos(core, HEVC_DECODE_SIZE));
822
823	vp9_loop_filter_init(core, vp9);
824
825	INIT_LIST_HEAD(&vp9->ref_frames_list);
826	mutex_init(&vp9->lock);
827	memset(&vp9->ref_frame_map, -1, sizeof(vp9->ref_frame_map));
828	memset(&vp9->next_ref_frame_map, -1, sizeof(vp9->next_ref_frame_map));
829	for (i = 0; i < REFS_PER_FRAME; ++i)
830		vp9->frame_refs[i] = NULL;
831	sess->priv = vp9;
832
833	return 0;
834
835free_vp9:
836	kfree(vp9);
837	return ret;
838}
839
840static int codec_vp9_stop(struct amvdec_session *sess)
841{
842	struct amvdec_core *core = sess->core;
843	struct codec_vp9 *vp9 = sess->priv;
844
845	mutex_lock(&vp9->lock);
846	if (vp9->workspace_vaddr)
847		dma_free_coherent(core->dev, SIZE_WORKSPACE,
848				  vp9->workspace_vaddr,
849				  vp9->workspace_paddr);
850
851	codec_hevc_free_fbc_buffers(sess, &vp9->common);
852	mutex_unlock(&vp9->lock);
853
854	return 0;
855}
856
857/*
858 * Program LAST & GOLDEN frames into the motion compensation reference cache
859 * controller
860 */
861static void codec_vp9_set_mcrcc(struct amvdec_session *sess)
862{
863	struct amvdec_core *core = sess->core;
864	struct codec_vp9 *vp9 = sess->priv;
865	u32 val;
866
867	/* Reset mcrcc */
868	amvdec_write_dos(core, HEVCD_MCRCC_CTL1, 0x2);
869	/* Disable on I-frame */
870	if (vp9->cur_frame->type == KEY_FRAME || vp9->cur_frame->intra_only) {
871		amvdec_write_dos(core, HEVCD_MCRCC_CTL1, 0x0);
872		return;
873	}
874
875	amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR, BIT(1));
876	val = amvdec_read_dos(core, HEVCD_MPP_ANC_CANVAS_DATA_ADDR) & 0xffff;
877	val |= (val << 16);
878	amvdec_write_dos(core, HEVCD_MCRCC_CTL2, val);
879	val = amvdec_read_dos(core, HEVCD_MPP_ANC_CANVAS_DATA_ADDR) & 0xffff;
880	val |= (val << 16);
881	amvdec_write_dos(core, HEVCD_MCRCC_CTL3, val);
882
883	/* Enable mcrcc progressive-mode */
884	amvdec_write_dos(core, HEVCD_MCRCC_CTL1, 0xff0);
885}
886
887static void codec_vp9_set_sao(struct amvdec_session *sess,
888			      struct vb2_buffer *vb)
889{
890	struct amvdec_core *core = sess->core;
891	struct codec_vp9 *vp9 = sess->priv;
892
893	dma_addr_t buf_y_paddr;
894	dma_addr_t buf_u_v_paddr;
895	u32 val;
896
897	if (codec_hevc_use_downsample(sess->pixfmt_cap, vp9->is_10bit))
898		buf_y_paddr =
899			vp9->common.fbc_buffer_paddr[vb->index];
900	else
901		buf_y_paddr =
902		       vb2_dma_contig_plane_dma_addr(vb, 0);
903
904	if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) {
905		val = amvdec_read_dos(core, HEVC_SAO_CTRL5) & ~0xff0200;
906		amvdec_write_dos(core, HEVC_SAO_CTRL5, val);
907		amvdec_write_dos(core, HEVC_CM_BODY_START_ADDR, buf_y_paddr);
908	}
909
910	if (sess->pixfmt_cap == V4L2_PIX_FMT_NV12M) {
911		buf_y_paddr =
912		       vb2_dma_contig_plane_dma_addr(vb, 0);
913		buf_u_v_paddr =
914		       vb2_dma_contig_plane_dma_addr(vb, 1);
915		amvdec_write_dos(core, HEVC_SAO_Y_START_ADDR, buf_y_paddr);
916		amvdec_write_dos(core, HEVC_SAO_C_START_ADDR, buf_u_v_paddr);
917		amvdec_write_dos(core, HEVC_SAO_Y_WPTR, buf_y_paddr);
918		amvdec_write_dos(core, HEVC_SAO_C_WPTR, buf_u_v_paddr);
919	}
920
921	if (codec_hevc_use_mmu(core->platform->revision, sess->pixfmt_cap,
922			       vp9->is_10bit)) {
923		amvdec_write_dos(core, HEVC_CM_HEADER_START_ADDR,
924				 vp9->common.mmu_header_paddr[vb->index]);
925		/* use HEVC_CM_HEADER_START_ADDR */
926		amvdec_write_dos_bits(core, HEVC_SAO_CTRL5, BIT(10));
927	}
928
929	amvdec_write_dos(core, HEVC_SAO_Y_LENGTH,
930			 amvdec_get_output_size(sess));
931	amvdec_write_dos(core, HEVC_SAO_C_LENGTH,
932			 (amvdec_get_output_size(sess) / 2));
933
934	if (core->platform->revision >= VDEC_REVISION_G12A) {
935		amvdec_clear_dos_bits(core, HEVC_DBLK_CFGB,
936				      BIT(4) | BIT(5) | BIT(8) | BIT(9));
937		/* enable first, compressed write */
938		if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit))
939			amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(8));
940
941		/* enable second, uncompressed write */
942		if (sess->pixfmt_cap == V4L2_PIX_FMT_NV12M)
943			amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(9));
944
945		/* dblk pipeline mode=1 for performance */
946		if (sess->width >= 1280)
947			amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(4));
948
949		pr_debug("HEVC_DBLK_CFGB: %08X\n",
950			 amvdec_read_dos(core, HEVC_DBLK_CFGB));
951	}
952
953	val = amvdec_read_dos(core, HEVC_SAO_CTRL1) & ~0x3ff0;
954	val |= 0xff0; /* Set endianness for 2-bytes swaps (nv12) */
955	if (core->platform->revision < VDEC_REVISION_G12A) {
956		val &= ~0x3;
957		if (!codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit))
958			val |= BIT(0); /* disable cm compression */
959		/* TOFIX: Handle Amlogic Framebuffer compression */
960	}
961
962	amvdec_write_dos(core, HEVC_SAO_CTRL1, val);
963	pr_debug("HEVC_SAO_CTRL1: %08X\n", val);
964
965	/* no downscale for NV12 */
966	val = amvdec_read_dos(core, HEVC_SAO_CTRL5) & ~0xff0000;
967	amvdec_write_dos(core, HEVC_SAO_CTRL5, val);
968
969	val = amvdec_read_dos(core, HEVCD_IPP_AXIIF_CONFIG) & ~0x30;
970	val |= 0xf;
971	val &= ~BIT(12); /* NV12 */
972	amvdec_write_dos(core, HEVCD_IPP_AXIIF_CONFIG, val);
973}
974
975static dma_addr_t codec_vp9_get_frame_mv_paddr(struct codec_vp9 *vp9,
976					       struct vp9_frame *frame)
977{
978	return vp9->workspace_paddr + MPRED_MV_OFFSET +
979	       (frame->index * MPRED_MV_BUF_SIZE);
980}
981
982static void codec_vp9_set_mpred_mv(struct amvdec_core *core,
983				   struct codec_vp9 *vp9)
984{
985	int mpred_mv_rd_end_addr;
986	int use_prev_frame_mvs = vp9->prev_frame->width ==
987					vp9->cur_frame->width &&
988				 vp9->prev_frame->height ==
989					vp9->cur_frame->height &&
990				 !vp9->prev_frame->intra_only &&
991				 vp9->prev_frame->show &&
992				 vp9->prev_frame->type != KEY_FRAME;
993
994	amvdec_write_dos(core, HEVC_MPRED_CTRL3, 0x24122412);
995	amvdec_write_dos(core, HEVC_MPRED_ABV_START_ADDR,
996			 vp9->workspace_paddr + MPRED_ABV_OFFSET);
997
998	amvdec_clear_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6));
999	if (use_prev_frame_mvs)
1000		amvdec_write_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6));
1001
1002	amvdec_write_dos(core, HEVC_MPRED_MV_WR_START_ADDR,
1003			 codec_vp9_get_frame_mv_paddr(vp9, vp9->cur_frame));
1004	amvdec_write_dos(core, HEVC_MPRED_MV_WPTR,
1005			 codec_vp9_get_frame_mv_paddr(vp9, vp9->cur_frame));
1006
1007	amvdec_write_dos(core, HEVC_MPRED_MV_RD_START_ADDR,
1008			 codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame));
1009	amvdec_write_dos(core, HEVC_MPRED_MV_RPTR,
1010			 codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame));
1011
1012	mpred_mv_rd_end_addr =
1013			codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame) +
1014			(vp9->lcu_total * MV_MEM_UNIT);
1015	amvdec_write_dos(core, HEVC_MPRED_MV_RD_END_ADDR, mpred_mv_rd_end_addr);
1016}
1017
1018static void codec_vp9_update_next_ref(struct codec_vp9 *vp9)
1019{
1020	union rpm_param *param = &vp9->rpm_param;
1021	u32 buf_idx = vp9->cur_frame->index;
1022	int ref_index = 0;
1023	int refresh_frame_flags;
1024	int mask;
1025
1026	refresh_frame_flags = vp9->cur_frame->type == KEY_FRAME ?
1027				0xff : param->p.refresh_frame_flags;
1028
1029	for (mask = refresh_frame_flags; mask; mask >>= 1) {
1030		pr_debug("mask=%08X; ref_index=%d\n", mask, ref_index);
1031		if (mask & 1)
1032			vp9->next_ref_frame_map[ref_index] = buf_idx;
1033		else
1034			vp9->next_ref_frame_map[ref_index] =
1035				vp9->ref_frame_map[ref_index];
1036
1037		++ref_index;
1038	}
1039
1040	for (; ref_index < REF_FRAMES; ++ref_index)
1041		vp9->next_ref_frame_map[ref_index] =
1042			vp9->ref_frame_map[ref_index];
1043}
1044
1045static void codec_vp9_save_refs(struct codec_vp9 *vp9)
1046{
1047	union rpm_param *param = &vp9->rpm_param;
1048	int i;
1049
1050	for (i = 0; i < REFS_PER_FRAME; ++i) {
1051		const int ref = (param->p.ref_info >>
1052				 (((REFS_PER_FRAME - i - 1) * 4) + 1)) & 0x7;
1053
1054		if (vp9->ref_frame_map[ref] < 0)
1055			continue;
1056
1057		pr_warn("%s: FIXME, would need to save ref %d\n",
1058			__func__, vp9->ref_frame_map[ref]);
1059	}
1060}
1061
1062static void codec_vp9_update_ref(struct codec_vp9 *vp9)
1063{
1064	union rpm_param *param = &vp9->rpm_param;
1065	int ref_index = 0;
1066	int mask;
1067	int refresh_frame_flags;
1068
1069	if (!vp9->cur_frame)
1070		return;
1071
1072	refresh_frame_flags = vp9->cur_frame->type == KEY_FRAME ?
1073				0xff : param->p.refresh_frame_flags;
1074
1075	for (mask = refresh_frame_flags; mask; mask >>= 1) {
1076		vp9->ref_frame_map[ref_index] =
1077			vp9->next_ref_frame_map[ref_index];
1078		++ref_index;
1079	}
1080
1081	if (param->p.show_existing_frame)
1082		return;
1083
1084	for (; ref_index < REF_FRAMES; ++ref_index)
1085		vp9->ref_frame_map[ref_index] =
1086			vp9->next_ref_frame_map[ref_index];
1087}
1088
1089static struct vp9_frame *codec_vp9_get_frame_by_idx(struct codec_vp9 *vp9,
1090						    int idx)
1091{
1092	struct vp9_frame *frame;
1093
1094	list_for_each_entry(frame, &vp9->ref_frames_list, list) {
1095		if (frame->index == idx)
1096			return frame;
1097	}
1098
1099	return NULL;
1100}
1101
1102static void codec_vp9_sync_ref(struct codec_vp9 *vp9)
1103{
1104	union rpm_param *param = &vp9->rpm_param;
1105	int i;
1106
1107	for (i = 0; i < REFS_PER_FRAME; ++i) {
1108		const int ref = (param->p.ref_info >>
1109				 (((REFS_PER_FRAME - i - 1) * 4) + 1)) & 0x7;
1110		const int idx = vp9->ref_frame_map[ref];
1111
1112		vp9->frame_refs[i] = codec_vp9_get_frame_by_idx(vp9, idx);
1113		if (!vp9->frame_refs[i])
1114			pr_warn("%s: couldn't find VP9 ref %d\n", __func__,
1115				idx);
1116	}
1117}
1118
1119static void codec_vp9_set_refs(struct amvdec_session *sess,
1120			       struct codec_vp9 *vp9)
1121{
1122	struct amvdec_core *core = sess->core;
1123	int i;
1124
1125	for (i = 0; i < REFS_PER_FRAME; ++i) {
1126		struct vp9_frame *frame = vp9->frame_refs[i];
1127		int id_y;
1128		int id_u_v;
1129
1130		if (!frame)
1131			continue;
1132
1133		if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) {
1134			id_y = frame->index;
1135			id_u_v = id_y;
1136		} else {
1137			id_y = frame->index * 2;
1138			id_u_v = id_y + 1;
1139		}
1140
1141		amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_DATA_ADDR,
1142				 (id_u_v << 16) | (id_u_v << 8) | id_y);
1143	}
1144}
1145
1146static void codec_vp9_set_mc(struct amvdec_session *sess,
1147			     struct codec_vp9 *vp9)
1148{
1149	struct amvdec_core *core = sess->core;
1150	u32 scale = 0;
1151	u32 sz;
1152	int i;
1153
1154	amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR, 1);
1155	codec_vp9_set_refs(sess, vp9);
1156	amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR,
1157			 (16 << 8) | 1);
1158	codec_vp9_set_refs(sess, vp9);
1159
1160	amvdec_write_dos(core, VP9D_MPP_REFINFO_TBL_ACCCONFIG, BIT(2));
1161	for (i = 0; i < REFS_PER_FRAME; ++i) {
1162		if (!vp9->frame_refs[i])
1163			continue;
1164
1165		if (vp9->frame_refs[i]->width != vp9->width ||
1166		    vp9->frame_refs[i]->height != vp9->height)
1167			scale = 1;
1168
1169		sz = amvdec_am21c_body_size(vp9->frame_refs[i]->width,
1170					    vp9->frame_refs[i]->height);
1171
1172		amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA,
1173				 vp9->frame_refs[i]->width);
1174		amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA,
1175				 vp9->frame_refs[i]->height);
1176		amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA,
1177				 (vp9->frame_refs[i]->width << 14) /
1178				 vp9->width);
1179		amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA,
1180				 (vp9->frame_refs[i]->height << 14) /
1181				 vp9->height);
1182		amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, sz >> 5);
1183	}
1184
1185	amvdec_write_dos(core, VP9D_MPP_REF_SCALE_ENBL, scale);
1186}
1187
1188static struct vp9_frame *codec_vp9_get_new_frame(struct amvdec_session *sess)
1189{
1190	struct codec_vp9 *vp9 = sess->priv;
1191	union rpm_param *param = &vp9->rpm_param;
1192	struct vb2_v4l2_buffer *vbuf;
1193	struct vp9_frame *new_frame;
1194
1195	new_frame = kzalloc(sizeof(*new_frame), GFP_KERNEL);
1196	if (!new_frame)
1197		return NULL;
1198
1199	vbuf = v4l2_m2m_dst_buf_remove(sess->m2m_ctx);
1200	if (!vbuf) {
1201		dev_err(sess->core->dev, "No dst buffer available\n");
1202		kfree(new_frame);
1203		return NULL;
1204	}
1205
1206	while (codec_vp9_get_frame_by_idx(vp9, vbuf->vb2_buf.index)) {
1207		struct vb2_v4l2_buffer *old_vbuf = vbuf;
1208
1209		vbuf = v4l2_m2m_dst_buf_remove(sess->m2m_ctx);
1210		v4l2_m2m_buf_queue(sess->m2m_ctx, old_vbuf);
1211		if (!vbuf) {
1212			dev_err(sess->core->dev, "No dst buffer available\n");
1213			kfree(new_frame);
1214			return NULL;
1215		}
1216	}
1217
1218	new_frame->vbuf = vbuf;
1219	new_frame->index = vbuf->vb2_buf.index;
1220	new_frame->intra_only = param->p.intra_only;
1221	new_frame->show = param->p.show_frame;
1222	new_frame->type = param->p.frame_type;
1223	new_frame->width = vp9->width;
1224	new_frame->height = vp9->height;
1225	list_add_tail(&new_frame->list, &vp9->ref_frames_list);
1226	vp9->frames_num++;
1227
1228	return new_frame;
1229}
1230
1231static void codec_vp9_show_existing_frame(struct codec_vp9 *vp9)
1232{
1233	union rpm_param *param = &vp9->rpm_param;
1234
1235	if (!param->p.show_existing_frame)
1236		return;
1237
1238	pr_debug("showing frame %u\n", param->p.frame_to_show_idx);
1239}
1240
1241static void codec_vp9_rm_noshow_frame(struct amvdec_session *sess)
1242{
1243	struct codec_vp9 *vp9 = sess->priv;
1244	struct vp9_frame *tmp;
1245
1246	list_for_each_entry(tmp, &vp9->ref_frames_list, list) {
1247		if (tmp->show)
1248			continue;
1249
1250		pr_debug("rm noshow: %u\n", tmp->index);
1251		v4l2_m2m_buf_queue(sess->m2m_ctx, tmp->vbuf);
1252		list_del(&tmp->list);
1253		kfree(tmp);
1254		vp9->frames_num--;
1255		return;
1256	}
1257}
1258
1259static void codec_vp9_process_frame(struct amvdec_session *sess)
1260{
1261	struct amvdec_core *core = sess->core;
1262	struct codec_vp9 *vp9 = sess->priv;
1263	union rpm_param *param = &vp9->rpm_param;
1264	int intra_only;
1265
1266	if (!param->p.show_frame)
1267		codec_vp9_rm_noshow_frame(sess);
1268
1269	vp9->cur_frame = codec_vp9_get_new_frame(sess);
1270	if (!vp9->cur_frame)
1271		return;
1272
1273	pr_debug("frame %d: type: %08X; show_exist: %u; show: %u, intra_only: %u\n",
1274		 vp9->cur_frame->index,
1275		 param->p.frame_type, param->p.show_existing_frame,
1276		 param->p.show_frame, param->p.intra_only);
1277
1278	if (param->p.frame_type != KEY_FRAME)
1279		codec_vp9_sync_ref(vp9);
1280	codec_vp9_update_next_ref(vp9);
1281	codec_vp9_show_existing_frame(vp9);
1282
1283	if (codec_hevc_use_mmu(core->platform->revision, sess->pixfmt_cap,
1284			       vp9->is_10bit))
1285		codec_hevc_fill_mmu_map(sess, &vp9->common,
1286					&vp9->cur_frame->vbuf->vb2_buf);
1287
1288	intra_only = param->p.show_frame ? 0 : param->p.intra_only;
1289
1290	/* clear mpred (for keyframe only) */
1291	if (param->p.frame_type != KEY_FRAME && !intra_only) {
1292		codec_vp9_set_mc(sess, vp9);
1293		codec_vp9_set_mpred_mv(core, vp9);
1294	} else {
1295		amvdec_clear_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6));
1296	}
1297
1298	amvdec_write_dos(core, HEVC_PARSER_PICTURE_SIZE,
1299			 (vp9->height << 16) | vp9->width);
1300	codec_vp9_set_mcrcc(sess);
1301	codec_vp9_set_sao(sess, &vp9->cur_frame->vbuf->vb2_buf);
1302
1303	vp9_loop_filter_frame_init(core, &vp9->seg_4lf,
1304				   &vp9->lfi, &vp9->lf,
1305				   vp9->default_filt_lvl);
1306
1307	/* ask uCode to start decoding */
1308	amvdec_write_dos(core, VP9_DEC_STATUS_REG, VP9_10B_DECODE_SLICE);
1309}
1310
1311static void codec_vp9_process_lf(struct codec_vp9 *vp9)
1312{
1313	union rpm_param *param = &vp9->rpm_param;
1314	int i;
1315
1316	vp9->lf.mode_ref_delta_enabled = param->p.mode_ref_delta_enabled;
1317	vp9->lf.sharpness_level = param->p.sharpness_level;
1318	vp9->default_filt_lvl = param->p.filter_level;
1319	vp9->seg_4lf.enabled = param->p.seg_enabled;
1320	vp9->seg_4lf.abs_delta = param->p.seg_abs_delta;
1321
1322	for (i = 0; i < 4; i++)
1323		vp9->lf.ref_deltas[i] = param->p.ref_deltas[i];
1324
1325	for (i = 0; i < 2; i++)
1326		vp9->lf.mode_deltas[i] = param->p.mode_deltas[i];
1327
1328	for (i = 0; i < MAX_SEGMENTS; i++)
1329		vp9->seg_4lf.feature_mask[i] =
1330			(param->p.seg_lf_info[i] & 0x8000) ?
1331				(1 << SEG_LVL_ALT_LF) : 0;
1332
1333	for (i = 0; i < MAX_SEGMENTS; i++)
1334		vp9->seg_4lf.feature_data[i][SEG_LVL_ALT_LF] =
1335			(param->p.seg_lf_info[i] & 0x100) ?
1336				-(param->p.seg_lf_info[i] & 0x3f)
1337				: (param->p.seg_lf_info[i] & 0x3f);
1338}
1339
1340static void codec_vp9_resume(struct amvdec_session *sess)
1341{
1342	struct codec_vp9 *vp9 = sess->priv;
1343
1344	mutex_lock(&vp9->lock);
1345	if (codec_hevc_setup_buffers(sess, &vp9->common, vp9->is_10bit)) {
1346		mutex_unlock(&vp9->lock);
1347		amvdec_abort(sess);
1348		return;
1349	}
1350
1351	codec_vp9_setup_workspace(sess, vp9);
1352	codec_hevc_setup_decode_head(sess, vp9->is_10bit);
1353	codec_vp9_process_lf(vp9);
1354	codec_vp9_process_frame(sess);
1355
1356	mutex_unlock(&vp9->lock);
1357}
1358
1359/*
1360 * The RPM section within the workspace contains
1361 * many information regarding the parsed bitstream
1362 */
1363static void codec_vp9_fetch_rpm(struct amvdec_session *sess)
1364{
1365	struct codec_vp9 *vp9 = sess->priv;
1366	u16 *rpm_vaddr = vp9->workspace_vaddr + RPM_OFFSET;
1367	int i, j;
1368
1369	for (i = 0; i < RPM_BUF_SIZE; i += 4)
1370		for (j = 0; j < 4; j++)
1371			vp9->rpm_param.l.data[i + j] = rpm_vaddr[i + 3 - j];
1372}
1373
1374static int codec_vp9_process_rpm(struct codec_vp9 *vp9)
1375{
1376	union rpm_param *param = &vp9->rpm_param;
1377	int src_changed = 0;
1378	int is_10bit = 0;
1379	int pic_width_64 = ALIGN(param->p.width, 64);
1380	int pic_height_32 = ALIGN(param->p.height, 32);
1381	int pic_width_lcu  = (pic_width_64 % LCU_SIZE) ?
1382				pic_width_64 / LCU_SIZE  + 1
1383				: pic_width_64 / LCU_SIZE;
1384	int pic_height_lcu = (pic_height_32 % LCU_SIZE) ?
1385				pic_height_32 / LCU_SIZE + 1
1386				: pic_height_32 / LCU_SIZE;
1387	vp9->lcu_total = pic_width_lcu * pic_height_lcu;
1388
1389	if (param->p.bit_depth == 10)
1390		is_10bit = 1;
1391
1392	if (vp9->width != param->p.width || vp9->height != param->p.height ||
1393	    vp9->is_10bit != is_10bit)
1394		src_changed = 1;
1395
1396	vp9->width = param->p.width;
1397	vp9->height = param->p.height;
1398	vp9->is_10bit = is_10bit;
1399
1400	pr_debug("width: %u; height: %u; is_10bit: %d; src_changed: %d\n",
1401		 vp9->width, vp9->height, is_10bit, src_changed);
1402
1403	return src_changed;
1404}
1405
1406static bool codec_vp9_is_ref(struct codec_vp9 *vp9, struct vp9_frame *frame)
1407{
1408	int i;
1409
1410	for (i = 0; i < REF_FRAMES; ++i)
1411		if (vp9->ref_frame_map[i] == frame->index)
1412			return true;
1413
1414	return false;
1415}
1416
1417static void codec_vp9_show_frame(struct amvdec_session *sess)
1418{
1419	struct codec_vp9 *vp9 = sess->priv;
1420	struct vp9_frame *tmp, *n;
1421
1422	list_for_each_entry_safe(tmp, n, &vp9->ref_frames_list, list) {
1423		if (!tmp->show || tmp == vp9->cur_frame)
1424			continue;
1425
1426		if (!tmp->done) {
1427			pr_debug("Doning %u\n", tmp->index);
1428			amvdec_dst_buf_done(sess, tmp->vbuf, V4L2_FIELD_NONE);
1429			tmp->done = 1;
1430			vp9->frames_num--;
1431		}
1432
1433		if (codec_vp9_is_ref(vp9, tmp) || tmp == vp9->prev_frame)
1434			continue;
1435
1436		pr_debug("deleting %d\n", tmp->index);
1437		list_del(&tmp->list);
1438		kfree(tmp);
1439	}
1440}
1441
1442static void vp9_tree_merge_probs(unsigned int *prev_prob,
1443				 unsigned int *cur_prob,
1444				 int coef_node_start, int tree_left,
1445				 int tree_right,
1446				 int tree_i, int node)
1447{
1448	int prob_32, prob_res, prob_shift;
1449	int pre_prob, new_prob;
1450	int den, m_count, get_prob, factor;
1451
1452	prob_32 = prev_prob[coef_node_start / 4 * 2];
1453	prob_res = coef_node_start & 3;
1454	prob_shift = prob_res * 8;
1455	pre_prob = (prob_32 >> prob_shift) & 0xff;
1456
1457	den = tree_left + tree_right;
1458
1459	if (den == 0) {
1460		new_prob = pre_prob;
1461	} else {
1462		m_count = min(den, MODE_MV_COUNT_SAT);
1463		get_prob =
1464			clip_prob(div_r32(((int64_t)tree_left * 256 +
1465					   (den >> 1)),
1466					  den));
1467
1468		/* weighted_prob */
1469		factor = count_to_update_factor[m_count];
1470		new_prob = round_power_of_two(pre_prob * (256 - factor) +
1471					      get_prob * factor, 8);
1472	}
1473
1474	cur_prob[coef_node_start / 4 * 2] =
1475		(cur_prob[coef_node_start / 4 * 2] & (~(0xff << prob_shift))) |
1476		(new_prob << prob_shift);
1477}
1478
1479static void adapt_coef_probs_cxt(unsigned int *prev_prob,
1480				 unsigned int *cur_prob,
1481				 unsigned int *count,
1482				 int update_factor,
1483				 int cxt_num,
1484				 int coef_cxt_start,
1485				 int coef_count_cxt_start)
1486{
1487	int prob_32, prob_res, prob_shift;
1488	int pre_prob, new_prob;
1489	int num, den, m_count, get_prob, factor;
1490	int node, coef_node_start;
1491	int count_sat = 24;
1492	int cxt;
1493
1494	for (cxt = 0; cxt < cxt_num; cxt++) {
1495		const int n0 = count[coef_count_cxt_start];
1496		const int n1 = count[coef_count_cxt_start + 1];
1497		const int n2 = count[coef_count_cxt_start + 2];
1498		const int neob = count[coef_count_cxt_start + 3];
1499		const int nneob = count[coef_count_cxt_start + 4];
1500		const unsigned int branch_ct[3][2] = {
1501			{ neob, nneob },
1502			{ n0, n1 + n2 },
1503			{ n1, n2 }
1504		};
1505
1506		coef_node_start = coef_cxt_start;
1507		for (node = 0 ; node < 3 ; node++) {
1508			prob_32 = prev_prob[coef_node_start / 4 * 2];
1509			prob_res = coef_node_start & 3;
1510			prob_shift = prob_res * 8;
1511			pre_prob = (prob_32 >> prob_shift) & 0xff;
1512
1513			/* get binary prob */
1514			num = branch_ct[node][0];
1515			den = branch_ct[node][0] + branch_ct[node][1];
1516			m_count = min(den, count_sat);
1517
1518			get_prob = (den == 0) ?
1519					128u :
1520					clip_prob(div_r32(((int64_t)num * 256 +
1521							  (den >> 1)), den));
1522
1523			factor = update_factor * m_count / count_sat;
1524			new_prob =
1525				round_power_of_two(pre_prob * (256 - factor) +
1526						   get_prob * factor, 8);
1527
1528			cur_prob[coef_node_start / 4 * 2] =
1529				(cur_prob[coef_node_start / 4 * 2] &
1530				 (~(0xff << prob_shift))) |
1531				(new_prob << prob_shift);
1532
1533			coef_node_start += 1;
1534		}
1535
1536		coef_cxt_start = coef_cxt_start + 3;
1537		coef_count_cxt_start = coef_count_cxt_start + 5;
1538	}
1539}
1540
1541static void adapt_coef_probs(int prev_kf, int cur_kf, int pre_fc,
1542			     unsigned int *prev_prob, unsigned int *cur_prob,
1543			     unsigned int *count)
1544{
1545	int tx_size, coef_tx_size_start, coef_count_tx_size_start;
1546	int plane, coef_plane_start, coef_count_plane_start;
1547	int type, coef_type_start, coef_count_type_start;
1548	int band, coef_band_start, coef_count_band_start;
1549	int cxt_num;
1550	int coef_cxt_start, coef_count_cxt_start;
1551	int node, coef_node_start, coef_count_node_start;
1552
1553	int tree_i, tree_left, tree_right;
1554	int mvd_i;
1555
1556	int update_factor = cur_kf ? 112 : (prev_kf ? 128 : 112);
1557
1558	int prob_32;
1559	int prob_res;
1560	int prob_shift;
1561	int pre_prob;
1562
1563	int den;
1564	int get_prob;
1565	int m_count;
1566	int factor;
1567
1568	int new_prob;
1569
1570	for (tx_size = 0 ; tx_size < 4 ; tx_size++) {
1571		coef_tx_size_start = VP9_COEF_START +
1572				tx_size * 4 * VP9_COEF_SIZE_ONE_SET;
1573		coef_count_tx_size_start = VP9_COEF_COUNT_START +
1574				tx_size * 4 * VP9_COEF_COUNT_SIZE_ONE_SET;
1575		coef_plane_start = coef_tx_size_start;
1576		coef_count_plane_start = coef_count_tx_size_start;
1577
1578		for (plane = 0 ; plane < 2 ; plane++) {
1579			coef_type_start = coef_plane_start;
1580			coef_count_type_start = coef_count_plane_start;
1581
1582			for (type = 0 ; type < 2 ; type++) {
1583				coef_band_start = coef_type_start;
1584				coef_count_band_start = coef_count_type_start;
1585
1586				for (band = 0 ; band < 6 ; band++) {
1587					if (band == 0)
1588						cxt_num = 3;
1589					else
1590						cxt_num = 6;
1591					coef_cxt_start = coef_band_start;
1592					coef_count_cxt_start =
1593						coef_count_band_start;
1594
1595					adapt_coef_probs_cxt(prev_prob,
1596							     cur_prob,
1597							     count,
1598							     update_factor,
1599							     cxt_num,
1600							     coef_cxt_start,
1601							coef_count_cxt_start);
1602
1603					if (band == 0) {
1604						coef_band_start += 10;
1605						coef_count_band_start += 15;
1606					} else {
1607						coef_band_start += 18;
1608						coef_count_band_start += 30;
1609					}
1610				}
1611				coef_type_start += VP9_COEF_SIZE_ONE_SET;
1612				coef_count_type_start +=
1613					VP9_COEF_COUNT_SIZE_ONE_SET;
1614			}
1615
1616			coef_plane_start += 2 * VP9_COEF_SIZE_ONE_SET;
1617			coef_count_plane_start +=
1618				2 * VP9_COEF_COUNT_SIZE_ONE_SET;
1619		}
1620	}
1621
1622	if (cur_kf == 0) {
1623		/* mode_mv_merge_probs - merge_intra_inter_prob */
1624		for (coef_count_node_start = VP9_INTRA_INTER_COUNT_START;
1625		     coef_count_node_start < (VP9_MV_CLASS0_HP_1_COUNT_START +
1626					      VP9_MV_CLASS0_HP_1_COUNT_SIZE);
1627		     coef_count_node_start += 2) {
1628			if (coef_count_node_start ==
1629					VP9_INTRA_INTER_COUNT_START)
1630				coef_node_start = VP9_INTRA_INTER_START;
1631			else if (coef_count_node_start ==
1632					VP9_COMP_INTER_COUNT_START)
1633				coef_node_start = VP9_COMP_INTER_START;
1634			else if (coef_count_node_start ==
1635					VP9_TX_MODE_COUNT_START)
1636				coef_node_start = VP9_TX_MODE_START;
1637			else if (coef_count_node_start ==
1638					VP9_SKIP_COUNT_START)
1639				coef_node_start = VP9_SKIP_START;
1640			else if (coef_count_node_start ==
1641					VP9_MV_SIGN_0_COUNT_START)
1642				coef_node_start = VP9_MV_SIGN_0_START;
1643			else if (coef_count_node_start ==
1644					VP9_MV_SIGN_1_COUNT_START)
1645				coef_node_start = VP9_MV_SIGN_1_START;
1646			else if (coef_count_node_start ==
1647					VP9_MV_BITS_0_COUNT_START)
1648				coef_node_start = VP9_MV_BITS_0_START;
1649			else if (coef_count_node_start ==
1650					VP9_MV_BITS_1_COUNT_START)
1651				coef_node_start = VP9_MV_BITS_1_START;
1652			else /* node_start == VP9_MV_CLASS0_HP_0_COUNT_START */
1653				coef_node_start = VP9_MV_CLASS0_HP_0_START;
1654
1655			den = count[coef_count_node_start] +
1656			      count[coef_count_node_start + 1];
1657
1658			prob_32 = prev_prob[coef_node_start / 4 * 2];
1659			prob_res = coef_node_start & 3;
1660			prob_shift = prob_res * 8;
1661			pre_prob = (prob_32 >> prob_shift) & 0xff;
1662
1663			if (den == 0) {
1664				new_prob = pre_prob;
1665			} else {
1666				m_count = min(den, MODE_MV_COUNT_SAT);
1667				get_prob =
1668				clip_prob(div_r32(((int64_t)
1669					count[coef_count_node_start] * 256 +
1670					(den >> 1)),
1671					den));
1672
1673				/* weighted prob */
1674				factor = count_to_update_factor[m_count];
1675				new_prob =
1676					round_power_of_two(pre_prob *
1677							   (256 - factor) +
1678							   get_prob * factor,
1679							   8);
1680			}
1681
1682			cur_prob[coef_node_start / 4 * 2] =
1683				(cur_prob[coef_node_start / 4 * 2] &
1684				 (~(0xff << prob_shift))) |
1685				(new_prob << prob_shift);
1686
1687			coef_node_start = coef_node_start + 1;
1688		}
1689
1690		coef_node_start = VP9_INTER_MODE_START;
1691		coef_count_node_start = VP9_INTER_MODE_COUNT_START;
1692		for (tree_i = 0 ; tree_i < 7 ; tree_i++) {
1693			for (node = 0 ; node < 3 ; node++) {
1694				unsigned int start = coef_count_node_start;
1695
1696				switch (node) {
1697				case 2:
1698					tree_left = count[start + 1];
1699					tree_right = count[start + 3];
1700					break;
1701				case 1:
1702					tree_left = count[start + 0];
1703					tree_right = count[start + 1] +
1704						     count[start + 3];
1705					break;
1706				default:
1707					tree_left = count[start + 2];
1708					tree_right = count[start + 0] +
1709						     count[start + 1] +
1710						     count[start + 3];
1711					break;
1712				}
1713
1714				vp9_tree_merge_probs(prev_prob, cur_prob,
1715						     coef_node_start,
1716						     tree_left, tree_right,
1717						     tree_i, node);
1718
1719				coef_node_start = coef_node_start + 1;
1720			}
1721
1722			coef_count_node_start = coef_count_node_start + 4;
1723		}
1724
1725		coef_node_start = VP9_IF_Y_MODE_START;
1726		coef_count_node_start = VP9_IF_Y_MODE_COUNT_START;
1727		for (tree_i = 0 ; tree_i < 14 ; tree_i++) {
1728			for (node = 0 ; node < 9 ; node++) {
1729				unsigned int start = coef_count_node_start;
1730
1731				switch (node) {
1732				case 8:
1733					tree_left =
1734						count[start + D153_PRED];
1735					tree_right =
1736						count[start + D207_PRED];
1737					break;
1738				case 7:
1739					tree_left =
1740						count[start + D63_PRED];
1741					tree_right =
1742						count[start + D207_PRED] +
1743						count[start + D153_PRED];
1744					break;
1745				case 6:
1746					tree_left =
1747						count[start + D45_PRED];
1748					tree_right =
1749						count[start + D207_PRED] +
1750						count[start + D153_PRED] +
1751						count[start + D63_PRED];
1752					break;
1753				case 5:
1754					tree_left =
1755						count[start + D135_PRED];
1756					tree_right =
1757						count[start + D117_PRED];
1758					break;
1759				case 4:
1760					tree_left =
1761						count[start + H_PRED];
1762					tree_right =
1763						count[start + D117_PRED] +
1764						count[start + D135_PRED];
1765					break;
1766				case 3:
1767					tree_left =
1768						count[start + H_PRED] +
1769						count[start + D117_PRED] +
1770						count[start + D135_PRED];
1771					tree_right =
1772						count[start + D45_PRED] +
1773						count[start + D207_PRED] +
1774						count[start + D153_PRED] +
1775						count[start + D63_PRED];
1776					break;
1777				case 2:
1778					tree_left =
1779						count[start + V_PRED];
1780					tree_right =
1781						count[start + H_PRED] +
1782						count[start + D117_PRED] +
1783						count[start + D135_PRED] +
1784						count[start + D45_PRED] +
1785						count[start + D207_PRED] +
1786						count[start + D153_PRED] +
1787						count[start + D63_PRED];
1788					break;
1789				case 1:
1790					tree_left =
1791						count[start + TM_PRED];
1792					tree_right =
1793						count[start + V_PRED] +
1794						count[start + H_PRED] +
1795						count[start + D117_PRED] +
1796						count[start + D135_PRED] +
1797						count[start + D45_PRED] +
1798						count[start + D207_PRED] +
1799						count[start + D153_PRED] +
1800						count[start + D63_PRED];
1801					break;
1802				default:
1803					tree_left =
1804						count[start + DC_PRED];
1805					tree_right =
1806						count[start + TM_PRED] +
1807						count[start + V_PRED] +
1808						count[start + H_PRED] +
1809						count[start + D117_PRED] +
1810						count[start + D135_PRED] +
1811						count[start + D45_PRED] +
1812						count[start + D207_PRED] +
1813						count[start + D153_PRED] +
1814						count[start + D63_PRED];
1815					break;
1816				}
1817
1818				vp9_tree_merge_probs(prev_prob, cur_prob,
1819						     coef_node_start,
1820						     tree_left, tree_right,
1821						     tree_i, node);
1822
1823				coef_node_start = coef_node_start + 1;
1824			}
1825			coef_count_node_start = coef_count_node_start + 10;
1826		}
1827
1828		coef_node_start = VP9_PARTITION_P_START;
1829		coef_count_node_start = VP9_PARTITION_P_COUNT_START;
1830		for (tree_i = 0 ; tree_i < 16 ; tree_i++) {
1831			for (node = 0 ; node < 3 ; node++) {
1832				unsigned int start = coef_count_node_start;
1833
1834				switch (node) {
1835				case 2:
1836					tree_left = count[start + 2];
1837					tree_right = count[start + 3];
1838					break;
1839				case 1:
1840					tree_left = count[start + 1];
1841					tree_right = count[start + 2] +
1842						     count[start + 3];
1843					break;
1844				default:
1845					tree_left = count[start + 0];
1846					tree_right = count[start + 1] +
1847						     count[start + 2] +
1848						     count[start + 3];
1849					break;
1850				}
1851
1852				vp9_tree_merge_probs(prev_prob, cur_prob,
1853						     coef_node_start,
1854						     tree_left, tree_right,
1855						     tree_i, node);
1856
1857				coef_node_start = coef_node_start + 1;
1858			}
1859
1860			coef_count_node_start = coef_count_node_start + 4;
1861		}
1862
1863		coef_node_start = VP9_INTERP_START;
1864		coef_count_node_start = VP9_INTERP_COUNT_START;
1865		for (tree_i = 0 ; tree_i < 4 ; tree_i++) {
1866			for (node = 0 ; node < 2 ; node++) {
1867				unsigned int start = coef_count_node_start;
1868
1869				switch (node) {
1870				case 1:
1871					tree_left = count[start + 1];
1872					tree_right = count[start + 2];
1873					break;
1874				default:
1875					tree_left = count[start + 0];
1876					tree_right = count[start + 1] +
1877						     count[start + 2];
1878					break;
1879				}
1880
1881				vp9_tree_merge_probs(prev_prob, cur_prob,
1882						     coef_node_start,
1883						     tree_left, tree_right,
1884						     tree_i, node);
1885
1886				coef_node_start = coef_node_start + 1;
1887			}
1888			coef_count_node_start = coef_count_node_start + 3;
1889		}
1890
1891		coef_node_start = VP9_MV_JOINTS_START;
1892		coef_count_node_start = VP9_MV_JOINTS_COUNT_START;
1893		for (tree_i = 0 ; tree_i < 1 ; tree_i++) {
1894			for (node = 0 ; node < 3 ; node++) {
1895				unsigned int start = coef_count_node_start;
1896
1897				switch (node) {
1898				case 2:
1899					tree_left = count[start + 2];
1900					tree_right = count[start + 3];
1901					break;
1902				case 1:
1903					tree_left = count[start + 1];
1904					tree_right = count[start + 2] +
1905						     count[start + 3];
1906					break;
1907				default:
1908					tree_left = count[start + 0];
1909					tree_right = count[start + 1] +
1910						     count[start + 2] +
1911						     count[start + 3];
1912					break;
1913				}
1914
1915				vp9_tree_merge_probs(prev_prob, cur_prob,
1916						     coef_node_start,
1917						     tree_left, tree_right,
1918						     tree_i, node);
1919
1920				coef_node_start = coef_node_start + 1;
1921			}
1922			coef_count_node_start = coef_count_node_start + 4;
1923		}
1924
1925		for (mvd_i = 0 ; mvd_i < 2 ; mvd_i++) {
1926			coef_node_start = mvd_i ? VP9_MV_CLASSES_1_START :
1927						  VP9_MV_CLASSES_0_START;
1928			coef_count_node_start = mvd_i ?
1929					VP9_MV_CLASSES_1_COUNT_START :
1930					VP9_MV_CLASSES_0_COUNT_START;
1931			tree_i = 0;
1932			for (node = 0; node < 10; node++) {
1933				unsigned int start = coef_count_node_start;
1934
1935				switch (node) {
1936				case 9:
1937					tree_left = count[start + 9];
1938					tree_right = count[start + 10];
1939					break;
1940				case 8:
1941					tree_left = count[start + 7];
1942					tree_right = count[start + 8];
1943					break;
1944				case 7:
1945					tree_left = count[start + 7] +
1946						     count[start + 8];
1947					tree_right = count[start + 9] +
1948						     count[start + 10];
1949					break;
1950				case 6:
1951					tree_left = count[start + 6];
1952					tree_right = count[start + 7] +
1953						     count[start + 8] +
1954						     count[start + 9] +
1955						     count[start + 10];
1956					break;
1957				case 5:
1958					tree_left = count[start + 4];
1959					tree_right = count[start + 5];
1960					break;
1961				case 4:
1962					tree_left = count[start + 4] +
1963						    count[start + 5];
1964					tree_right = count[start + 6] +
1965						     count[start + 7] +
1966						     count[start + 8] +
1967						     count[start + 9] +
1968						     count[start + 10];
1969					break;
1970				case 3:
1971					tree_left = count[start + 2];
1972					tree_right = count[start + 3];
1973					break;
1974				case 2:
1975					tree_left = count[start + 2] +
1976						    count[start + 3];
1977					tree_right = count[start + 4] +
1978						     count[start + 5] +
1979						     count[start + 6] +
1980						     count[start + 7] +
1981						     count[start + 8] +
1982						     count[start + 9] +
1983						     count[start + 10];
1984					break;
1985				case 1:
1986					tree_left = count[start + 1];
1987					tree_right = count[start + 2] +
1988						     count[start + 3] +
1989						     count[start + 4] +
1990						     count[start + 5] +
1991						     count[start + 6] +
1992						     count[start + 7] +
1993						     count[start + 8] +
1994						     count[start + 9] +
1995						     count[start + 10];
1996					break;
1997				default:
1998					tree_left = count[start + 0];
1999					tree_right = count[start + 1] +
2000						     count[start + 2] +
2001						     count[start + 3] +
2002						     count[start + 4] +
2003						     count[start + 5] +
2004						     count[start + 6] +
2005						     count[start + 7] +
2006						     count[start + 8] +
2007						     count[start + 9] +
2008						     count[start + 10];
2009					break;
2010				}
2011
2012				vp9_tree_merge_probs(prev_prob, cur_prob,
2013						     coef_node_start,
2014						     tree_left, tree_right,
2015						     tree_i, node);
2016
2017				coef_node_start = coef_node_start + 1;
2018			}
2019
2020			coef_node_start = mvd_i ? VP9_MV_CLASS0_1_START :
2021						  VP9_MV_CLASS0_0_START;
2022			coef_count_node_start =	mvd_i ?
2023						VP9_MV_CLASS0_1_COUNT_START :
2024						VP9_MV_CLASS0_0_COUNT_START;
2025			tree_i = 0;
2026			node = 0;
2027			tree_left = count[coef_count_node_start + 0];
2028			tree_right = count[coef_count_node_start + 1];
2029
2030			vp9_tree_merge_probs(prev_prob, cur_prob,
2031					     coef_node_start,
2032					     tree_left, tree_right,
2033					     tree_i, node);
2034			coef_node_start = mvd_i ? VP9_MV_CLASS0_FP_1_START :
2035						  VP9_MV_CLASS0_FP_0_START;
2036			coef_count_node_start =	mvd_i ?
2037					VP9_MV_CLASS0_FP_1_COUNT_START :
2038					VP9_MV_CLASS0_FP_0_COUNT_START;
2039
2040			for (tree_i = 0; tree_i < 3; tree_i++) {
2041				for (node = 0; node < 3; node++) {
2042					unsigned int start =
2043						coef_count_node_start;
2044					switch (node) {
2045					case 2:
2046						tree_left = count[start + 2];
2047						tree_right = count[start + 3];
2048						break;
2049					case 1:
2050						tree_left = count[start + 1];
2051						tree_right = count[start + 2] +
2052							     count[start + 3];
2053						break;
2054					default:
2055						tree_left = count[start + 0];
2056						tree_right = count[start + 1] +
2057							     count[start + 2] +
2058							     count[start + 3];
2059						break;
2060					}
2061
2062					vp9_tree_merge_probs(prev_prob,
2063							     cur_prob,
2064							     coef_node_start,
2065							     tree_left,
2066							     tree_right,
2067							     tree_i, node);
2068
2069					coef_node_start = coef_node_start + 1;
2070				}
2071				coef_count_node_start =
2072					coef_count_node_start + 4;
2073			}
2074		}
2075	}
2076}
2077
2078static irqreturn_t codec_vp9_threaded_isr(struct amvdec_session *sess)
2079{
2080	struct amvdec_core *core = sess->core;
2081	struct codec_vp9 *vp9 = sess->priv;
2082	u32 dec_status = amvdec_read_dos(core, VP9_DEC_STATUS_REG);
2083	u32 prob_status = amvdec_read_dos(core, VP9_ADAPT_PROB_REG);
2084	int i;
2085
2086	if (!vp9)
2087		return IRQ_HANDLED;
2088
2089	mutex_lock(&vp9->lock);
2090	if (dec_status != VP9_HEAD_PARSER_DONE) {
2091		dev_err(core->dev_dec, "Unrecognized dec_status: %08X\n",
2092			dec_status);
2093		amvdec_abort(sess);
2094		goto unlock;
2095	}
2096
2097	pr_debug("ISR: %08X;%08X\n", dec_status, prob_status);
2098	sess->keyframe_found = 1;
2099
2100	if ((prob_status & 0xff) == 0xfd && vp9->cur_frame) {
2101		/* VP9_REQ_ADAPT_PROB */
2102		u8 *prev_prob_b = ((u8 *)vp9->workspace_vaddr +
2103					 PROB_OFFSET) +
2104					((prob_status >> 8) * 0x1000);
2105		u8 *cur_prob_b = ((u8 *)vp9->workspace_vaddr +
2106					 PROB_OFFSET) + 0x4000;
2107		u8 *count_b = (u8 *)vp9->workspace_vaddr +
2108				   COUNT_OFFSET;
2109		int last_frame_type = vp9->prev_frame ?
2110						vp9->prev_frame->type :
2111						KEY_FRAME;
2112
2113		adapt_coef_probs(last_frame_type == KEY_FRAME,
2114				 vp9->cur_frame->type == KEY_FRAME ? 1 : 0,
2115				 prob_status >> 8,
2116				 (unsigned int *)prev_prob_b,
2117				 (unsigned int *)cur_prob_b,
2118				 (unsigned int *)count_b);
2119
2120		memcpy(prev_prob_b, cur_prob_b, ADAPT_PROB_SIZE);
2121		amvdec_write_dos(core, VP9_ADAPT_PROB_REG, 0);
2122	}
2123
2124	/* Invalidate first 3 refs */
2125	for (i = 0; i < REFS_PER_FRAME ; ++i)
2126		vp9->frame_refs[i] = NULL;
2127
2128	vp9->prev_frame = vp9->cur_frame;
2129	codec_vp9_update_ref(vp9);
2130
2131	codec_vp9_fetch_rpm(sess);
2132	if (codec_vp9_process_rpm(vp9)) {
2133		amvdec_src_change(sess, vp9->width, vp9->height, 16);
2134
2135		/* No frame is actually processed */
2136		vp9->cur_frame = NULL;
2137
2138		/* Show the remaining frame */
2139		codec_vp9_show_frame(sess);
2140
2141		/* FIXME: Save refs for resized frame */
2142		if (vp9->frames_num)
2143			codec_vp9_save_refs(vp9);
2144
2145		goto unlock;
2146	}
2147
2148	codec_vp9_process_lf(vp9);
2149	codec_vp9_process_frame(sess);
2150	codec_vp9_show_frame(sess);
2151
2152unlock:
2153	mutex_unlock(&vp9->lock);
2154	return IRQ_HANDLED;
2155}
2156
2157static irqreturn_t codec_vp9_isr(struct amvdec_session *sess)
2158{
2159	return IRQ_WAKE_THREAD;
2160}
2161
2162struct amvdec_codec_ops codec_vp9_ops = {
2163	.start = codec_vp9_start,
2164	.stop = codec_vp9_stop,
2165	.isr = codec_vp9_isr,
2166	.threaded_isr = codec_vp9_threaded_isr,
2167	.num_pending_bufs = codec_vp9_num_pending_bufs,
2168	.drain = codec_vp9_flush_output,
2169	.resume = codec_vp9_resume,
2170};
2171