1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2021 MediaTek Inc.
4 * Author: George Sun <george.sun@mediatek.com>
5 */
6
7#include <linux/module.h>
8#include <linux/slab.h>
9#include <media/videobuf2-dma-contig.h>
10#include <media/v4l2-vp9.h>
11
12#include "../mtk_vcodec_dec.h"
13#include "../../common/mtk_vcodec_intr.h"
14#include "../vdec_drv_base.h"
15#include "../vdec_drv_if.h"
16#include "../vdec_vpu_if.h"
17
18/* reset_frame_context defined in VP9 spec */
19#define VP9_RESET_FRAME_CONTEXT_NONE0 0
20#define VP9_RESET_FRAME_CONTEXT_NONE1 1
21#define VP9_RESET_FRAME_CONTEXT_SPEC 2
22#define VP9_RESET_FRAME_CONTEXT_ALL 3
23
24#define VP9_TILE_BUF_SIZE 4096
25#define VP9_PROB_BUF_SIZE 2560
26#define VP9_COUNTS_BUF_SIZE 16384
27
28#define HDR_FLAG(x) (!!((hdr)->flags & V4L2_VP9_FRAME_FLAG_##x))
29#define LF_FLAG(x) (!!((lf)->flags & V4L2_VP9_LOOP_FILTER_FLAG_##x))
30#define SEG_FLAG(x) (!!((seg)->flags & V4L2_VP9_SEGMENTATION_FLAG_##x))
31#define VP9_BAND_6(band) ((band) == 0 ? 3 : 6)
32
33/*
34 * struct vdec_vp9_slice_frame_ctx - vp9 prob tables footprint
35 */
36struct vdec_vp9_slice_frame_ctx {
37	struct {
38		u8 probs[6][3];
39		u8 padding[2];
40	} coef_probs[4][2][2][6];
41
42	u8 y_mode_prob[4][16];
43	u8 switch_interp_prob[4][16];
44	u8 seg[32];  /* ignore */
45	u8 comp_inter_prob[16];
46	u8 comp_ref_prob[16];
47	u8 single_ref_prob[5][2];
48	u8 single_ref_prob_padding[6];
49
50	u8 joint[3];
51	u8 joint_padding[13];
52	struct {
53		u8 sign;
54		u8 classes[10];
55		u8 padding[5];
56	} sign_classes[2];
57	struct {
58		u8 class0[1];
59		u8 bits[10];
60		u8 padding[5];
61	} class0_bits[2];
62	struct {
63		u8 class0_fp[2][3];
64		u8 fp[3];
65		u8 class0_hp;
66		u8 hp;
67		u8 padding[5];
68	} class0_fp_hp[2];
69
70	u8 uv_mode_prob[10][16];
71	u8 uv_mode_prob_padding[2][16];
72
73	u8 partition_prob[16][4];
74
75	u8 inter_mode_probs[7][4];
76	u8 skip_probs[4];
77
78	u8 tx_p8x8[2][4];
79	u8 tx_p16x16[2][4];
80	u8 tx_p32x32[2][4];
81	u8 intra_inter_prob[8];
82};
83
84/*
85 * struct vdec_vp9_slice_frame_counts - vp9 counts tables footprint
86 */
87struct vdec_vp9_slice_frame_counts {
88	union {
89		struct {
90			u32 band_0[3];
91			u32 padding0[1];
92			u32 band_1_5[5][6];
93			u32 padding1[2];
94		} eob_branch[4][2][2];
95		u32 eob_branch_space[256 * 4];
96	};
97
98	struct {
99		u32 band_0[3][4];
100		u32 band_1_5[5][6][4];
101	} coef_probs[4][2][2];
102
103	u32 intra_inter[4][2];
104	u32 comp_inter[5][2];
105	u32 comp_inter_padding[2];
106	u32 comp_ref[5][2];
107	u32 comp_ref_padding[2];
108	u32 single_ref[5][2][2];
109	u32 inter_mode[7][4];
110	u32 y_mode[4][12];
111	u32 uv_mode[10][10];
112	u32 partition[16][4];
113	u32 switchable_interp[4][4];
114
115	u32 tx_p8x8[2][2];
116	u32 tx_p16x16[2][4];
117	u32 tx_p32x32[2][4];
118
119	u32 skip[3][4];
120
121	u32 joint[4];
122
123	struct {
124		u32 sign[2];
125		u32 class0[2];
126		u32 classes[12];
127		u32 bits[10][2];
128		u32 padding[4];
129		u32 class0_fp[2][4];
130		u32 fp[4];
131		u32 class0_hp[2];
132		u32 hp[2];
133	} mvcomp[2];
134
135	u32 reserved[126][4];
136};
137
138/**
139 * struct vdec_vp9_slice_counts_map - vp9 counts tables to map
140 *                                    v4l2_vp9_frame_symbol_counts
141 * @skip:	skip counts.
142 * @y_mode:	Y prediction mode counts.
143 * @filter:	interpolation filter counts.
144 * @sign:	motion vector sign counts.
145 * @classes:	motion vector class counts.
146 * @class0:	motion vector class0 bit counts.
147 * @bits:	motion vector bits counts.
148 * @class0_fp:	motion vector class0 fractional bit counts.
149 * @fp:	motion vector fractional bit counts.
150 * @class0_hp:	motion vector class0 high precision fractional bit counts.
151 * @hp:	motion vector high precision fractional bit counts.
152 */
153struct vdec_vp9_slice_counts_map {
154	u32 skip[3][2];
155	u32 y_mode[4][10];
156	u32 filter[4][3];
157	u32 sign[2][2];
158	u32 classes[2][11];
159	u32 class0[2][2];
160	u32 bits[2][10][2];
161	u32 class0_fp[2][2][4];
162	u32 fp[2][4];
163	u32 class0_hp[2][2];
164	u32 hp[2][2];
165};
166
167/*
168 * struct vdec_vp9_slice_uncompressed_header - vp9 uncompressed header syntax
169 *                                             used for decoding
170 */
171struct vdec_vp9_slice_uncompressed_header {
172	u8 profile;
173	u8 last_frame_type;
174	u8 frame_type;
175
176	u8 last_show_frame;
177	u8 show_frame;
178	u8 error_resilient_mode;
179
180	u8 bit_depth;
181	u8 padding0[1];
182	u16 last_frame_width;
183	u16 last_frame_height;
184	u16 frame_width;
185	u16 frame_height;
186
187	u8 intra_only;
188	u8 reset_frame_context;
189	u8 ref_frame_sign_bias[4];
190	u8 allow_high_precision_mv;
191	u8 interpolation_filter;
192
193	u8 refresh_frame_context;
194	u8 frame_parallel_decoding_mode;
195	u8 frame_context_idx;
196
197	/* loop_filter_params */
198	u8 loop_filter_level;
199	u8 loop_filter_sharpness;
200	u8 loop_filter_delta_enabled;
201	s8 loop_filter_ref_deltas[4];
202	s8 loop_filter_mode_deltas[2];
203
204	/* quantization_params */
205	u8 base_q_idx;
206	s8 delta_q_y_dc;
207	s8 delta_q_uv_dc;
208	s8 delta_q_uv_ac;
209
210	/* segmentation_params */
211	u8 segmentation_enabled;
212	u8 segmentation_update_map;
213	u8 segmentation_tree_probs[7];
214	u8 padding1[1];
215	u8 segmentation_temporal_udpate;
216	u8 segmentation_pred_prob[3];
217	u8 segmentation_update_data;
218	u8 segmentation_abs_or_delta_update;
219	u8 feature_enabled[8];
220	s16 feature_value[8][4];
221
222	/* tile_info */
223	u8 tile_cols_log2;
224	u8 tile_rows_log2;
225	u8 padding2[2];
226
227	u16 uncompressed_header_size;
228	u16 header_size_in_bytes;
229
230	/* LAT OUT, CORE IN */
231	u32 dequant[8][4];
232};
233
234/*
235 * struct vdec_vp9_slice_compressed_header - vp9 compressed header syntax
236 *                                           used for decoding.
237 */
238struct vdec_vp9_slice_compressed_header {
239	u8 tx_mode;
240	u8 ref_mode;
241	u8 comp_fixed_ref;
242	u8 comp_var_ref[2];
243	u8 padding[3];
244};
245
246/*
247 * struct vdec_vp9_slice_tiles - vp9 tile syntax
248 */
249struct vdec_vp9_slice_tiles {
250	u32 size[4][64];
251	u32 mi_rows[4];
252	u32 mi_cols[64];
253	u8 actual_rows;
254	u8 padding[7];
255};
256
257/*
258 * struct vdec_vp9_slice_reference - vp9 reference frame information
259 */
260struct vdec_vp9_slice_reference {
261	u16 frame_width;
262	u16 frame_height;
263	u8 bit_depth;
264	u8 subsampling_x;
265	u8 subsampling_y;
266	u8 padding;
267};
268
269/*
270 * struct vdec_vp9_slice_frame - vp9 syntax used for decoding
271 */
272struct vdec_vp9_slice_frame {
273	struct vdec_vp9_slice_uncompressed_header uh;
274	struct vdec_vp9_slice_compressed_header ch;
275	struct vdec_vp9_slice_tiles tiles;
276	struct vdec_vp9_slice_reference ref[3];
277};
278
279/*
280 * struct vdec_vp9_slice_init_vsi - VSI used to initialize instance
281 */
282struct vdec_vp9_slice_init_vsi {
283	unsigned int architecture;
284	unsigned int reserved;
285	u64 core_vsi;
286	/* default frame context's position in MicroP */
287	u64 default_frame_ctx;
288};
289
290/*
291 * struct vdec_vp9_slice_mem - memory address and size
292 */
293struct vdec_vp9_slice_mem {
294	union {
295		u64 buf;
296		dma_addr_t dma_addr;
297	};
298	union {
299		size_t size;
300		dma_addr_t dma_addr_end;
301		u64 padding;
302	};
303};
304
305/*
306 * struct vdec_vp9_slice_bs - input buffer for decoding
307 */
308struct vdec_vp9_slice_bs {
309	struct vdec_vp9_slice_mem buf;
310	struct vdec_vp9_slice_mem frame;
311};
312
313/*
314 * struct vdec_vp9_slice_fb - frame buffer for decoding
315 */
316struct vdec_vp9_slice_fb {
317	struct vdec_vp9_slice_mem y;
318	struct vdec_vp9_slice_mem c;
319};
320
321/*
322 * struct vdec_vp9_slice_state - decoding state
323 */
324struct vdec_vp9_slice_state {
325	int err;
326	unsigned int full;
327	unsigned int timeout;
328	unsigned int perf;
329
330	unsigned int crc[12];
331};
332
333/**
334 * struct vdec_vp9_slice_vsi - exchange decoding information
335 *                             between Main CPU and MicroP
336 *
337 * @bs:	input buffer
338 * @fb:	output buffer
339 * @ref:	3 reference buffers
340 * @mv:	mv working buffer
341 * @seg:	segmentation working buffer
342 * @tile:	tile buffer
343 * @prob:	prob table buffer, used to set/update prob table
344 * @counts:	counts table buffer, used to update prob table
345 * @ube:	general buffer
346 * @trans:	trans buffer position in general buffer
347 * @err_map:	error buffer
348 * @row_info:	row info buffer
349 * @frame:	decoding syntax
350 * @state:	decoding state
351 */
352struct vdec_vp9_slice_vsi {
353	/* used in LAT stage */
354	struct vdec_vp9_slice_bs bs;
355	/* used in Core stage */
356	struct vdec_vp9_slice_fb fb;
357	struct vdec_vp9_slice_fb ref[3];
358
359	struct vdec_vp9_slice_mem mv[2];
360	struct vdec_vp9_slice_mem seg[2];
361	struct vdec_vp9_slice_mem tile;
362	struct vdec_vp9_slice_mem prob;
363	struct vdec_vp9_slice_mem counts;
364
365	/* LAT stage's output, Core stage's input */
366	struct vdec_vp9_slice_mem ube;
367	struct vdec_vp9_slice_mem trans;
368	struct vdec_vp9_slice_mem err_map;
369	struct vdec_vp9_slice_mem row_info;
370
371	/* decoding parameters */
372	struct vdec_vp9_slice_frame frame;
373
374	struct vdec_vp9_slice_state state;
375};
376
377/**
378 * struct vdec_vp9_slice_pfc - per-frame context that contains a local vsi.
379 *                             pass it from lat to core
380 *
381 * @vsi:	local vsi. copy to/from remote vsi before/after decoding
382 * @ref_idx:	reference buffer index
383 * @seq:	picture sequence
384 * @state:	decoding state
385 */
386struct vdec_vp9_slice_pfc {
387	struct vdec_vp9_slice_vsi vsi;
388
389	u64 ref_idx[3];
390
391	int seq;
392
393	/* LAT/Core CRC */
394	struct vdec_vp9_slice_state state[2];
395};
396
397/*
398 * enum vdec_vp9_slice_resolution_level
399 */
400enum vdec_vp9_slice_resolution_level {
401	VP9_RES_NONE,
402	VP9_RES_FHD,
403	VP9_RES_4K,
404	VP9_RES_8K,
405};
406
407/*
408 * struct vdec_vp9_slice_ref - picture's width & height should kept
409 *                             for later decoding as reference picture
410 */
411struct vdec_vp9_slice_ref {
412	unsigned int width;
413	unsigned int height;
414};
415
416/**
417 * struct vdec_vp9_slice_instance - represent one vp9 instance
418 *
419 * @ctx:		pointer to codec's context
420 * @vpu:		VPU instance
421 * @seq:		global picture sequence
422 * @level:		level of current resolution
423 * @width:		width of last picture
424 * @height:		height of last picture
425 * @frame_type:	frame_type of last picture
426 * @irq:		irq to Main CPU or MicroP
427 * @show_frame:	show_frame of last picture
428 * @dpb:		picture information (width/height) for reference
429 * @mv:		mv working buffer
430 * @seg:		segmentation working buffer
431 * @tile:		tile buffer
432 * @prob:		prob table buffer, used to set/update prob table
433 * @counts:		counts table buffer, used to update prob table
434 * @frame_ctx:		4 frame context according to VP9 Spec
435 * @frame_ctx_helper:	4 frame context according to newest kernel spec
436 * @dirty:		state of each frame context
437 * @init_vsi:		vsi used for initialized VP9 instance
438 * @vsi:		vsi used for decoding/flush ...
439 * @core_vsi:		vsi used for Core stage
440 *
441 * @sc_pfc:		per frame context single core
442 * @counts_map:	used map to counts_helper
443 * @counts_helper:	counts table according to newest kernel spec
444 */
445struct vdec_vp9_slice_instance {
446	struct mtk_vcodec_dec_ctx *ctx;
447	struct vdec_vpu_inst vpu;
448
449	int seq;
450
451	enum vdec_vp9_slice_resolution_level level;
452
453	/* for resolution change and get_pic_info */
454	unsigned int width;
455	unsigned int height;
456
457	/* for last_frame_type */
458	unsigned int frame_type;
459	unsigned int irq;
460
461	unsigned int show_frame;
462
463	/* maintain vp9 reference frame state */
464	struct vdec_vp9_slice_ref dpb[VB2_MAX_FRAME];
465
466	/*
467	 * normal working buffers
468	 * mv[0]/seg[0]/tile/prob/counts is used for LAT
469	 * mv[1]/seg[1] is used for CORE
470	 */
471	struct mtk_vcodec_mem mv[2];
472	struct mtk_vcodec_mem seg[2];
473	struct mtk_vcodec_mem tile;
474	struct mtk_vcodec_mem prob;
475	struct mtk_vcodec_mem counts;
476
477	/* 4 prob tables */
478	struct vdec_vp9_slice_frame_ctx frame_ctx[4];
479	/*4 helper tables */
480	struct v4l2_vp9_frame_context frame_ctx_helper;
481	unsigned char dirty[4];
482
483	/* MicroP vsi */
484	union {
485		struct vdec_vp9_slice_init_vsi *init_vsi;
486		struct vdec_vp9_slice_vsi *vsi;
487	};
488	struct vdec_vp9_slice_vsi *core_vsi;
489
490	struct vdec_vp9_slice_pfc sc_pfc;
491	struct vdec_vp9_slice_counts_map counts_map;
492	struct v4l2_vp9_frame_symbol_counts counts_helper;
493};
494
495/*
496 * all VP9 instances could share this default frame context.
497 */
498static struct vdec_vp9_slice_frame_ctx *vdec_vp9_slice_default_frame_ctx;
499static DEFINE_MUTEX(vdec_vp9_slice_frame_ctx_lock);
500
501static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf);
502
503static int vdec_vp9_slice_init_default_frame_ctx(struct vdec_vp9_slice_instance *instance)
504{
505	struct vdec_vp9_slice_frame_ctx *remote_frame_ctx;
506	struct vdec_vp9_slice_frame_ctx *frame_ctx;
507	struct mtk_vcodec_dec_ctx *ctx;
508	struct vdec_vp9_slice_init_vsi *vsi;
509	int ret = 0;
510
511	ctx = instance->ctx;
512	vsi = instance->vpu.vsi;
513	if (!ctx || !vsi)
514		return -EINVAL;
515
516	remote_frame_ctx = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
517						     (u32)vsi->default_frame_ctx);
518	if (!remote_frame_ctx) {
519		mtk_vdec_err(ctx, "failed to map default frame ctx\n");
520		return -EINVAL;
521	}
522
523	mutex_lock(&vdec_vp9_slice_frame_ctx_lock);
524	if (vdec_vp9_slice_default_frame_ctx)
525		goto out;
526
527	frame_ctx = kmemdup(remote_frame_ctx, sizeof(*frame_ctx), GFP_KERNEL);
528	if (!frame_ctx) {
529		ret = -ENOMEM;
530		goto out;
531	}
532
533	vdec_vp9_slice_default_frame_ctx = frame_ctx;
534
535out:
536	mutex_unlock(&vdec_vp9_slice_frame_ctx_lock);
537
538	return ret;
539}
540
541static int vdec_vp9_slice_alloc_working_buffer(struct vdec_vp9_slice_instance *instance,
542					       struct vdec_vp9_slice_vsi *vsi)
543{
544	struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
545	enum vdec_vp9_slice_resolution_level level;
546	/* super blocks */
547	unsigned int max_sb_w;
548	unsigned int max_sb_h;
549	unsigned int max_w;
550	unsigned int max_h;
551	unsigned int w;
552	unsigned int h;
553	size_t size;
554	int ret;
555	int i;
556
557	w = vsi->frame.uh.frame_width;
558	h = vsi->frame.uh.frame_height;
559
560	if (w > VCODEC_DEC_4K_CODED_WIDTH ||
561	    h > VCODEC_DEC_4K_CODED_HEIGHT) {
562		return -EINVAL;
563	} else if (w > MTK_VDEC_MAX_W || h > MTK_VDEC_MAX_H) {
564		/* 4K */
565		level = VP9_RES_4K;
566		max_w = VCODEC_DEC_4K_CODED_WIDTH;
567		max_h = VCODEC_DEC_4K_CODED_HEIGHT;
568	} else {
569		/* FHD */
570		level = VP9_RES_FHD;
571		max_w = MTK_VDEC_MAX_W;
572		max_h = MTK_VDEC_MAX_H;
573	}
574
575	if (level == instance->level)
576		return 0;
577
578	mtk_vdec_debug(ctx, "resolution level changed, from %u to %u, %ux%u",
579		       instance->level, level, w, h);
580
581	max_sb_w = DIV_ROUND_UP(max_w, 64);
582	max_sb_h = DIV_ROUND_UP(max_h, 64);
583	ret = -ENOMEM;
584
585	/*
586	 * Lat-flush must wait core idle, otherwise core will
587	 * use released buffers
588	 */
589
590	size = (max_sb_w * max_sb_h + 2) * 576;
591	for (i = 0; i < 2; i++) {
592		if (instance->mv[i].va)
593			mtk_vcodec_mem_free(ctx, &instance->mv[i]);
594		instance->mv[i].size = size;
595		if (mtk_vcodec_mem_alloc(ctx, &instance->mv[i]))
596			goto err;
597	}
598
599	size = (max_sb_w * max_sb_h * 32) + 256;
600	for (i = 0; i < 2; i++) {
601		if (instance->seg[i].va)
602			mtk_vcodec_mem_free(ctx, &instance->seg[i]);
603		instance->seg[i].size = size;
604		if (mtk_vcodec_mem_alloc(ctx, &instance->seg[i]))
605			goto err;
606	}
607
608	if (!instance->tile.va) {
609		instance->tile.size = VP9_TILE_BUF_SIZE;
610		if (mtk_vcodec_mem_alloc(ctx, &instance->tile))
611			goto err;
612	}
613
614	if (!instance->prob.va) {
615		instance->prob.size = VP9_PROB_BUF_SIZE;
616		if (mtk_vcodec_mem_alloc(ctx, &instance->prob))
617			goto err;
618	}
619
620	if (!instance->counts.va) {
621		instance->counts.size = VP9_COUNTS_BUF_SIZE;
622		if (mtk_vcodec_mem_alloc(ctx, &instance->counts))
623			goto err;
624	}
625
626	instance->level = level;
627	return 0;
628
629err:
630	instance->level = VP9_RES_NONE;
631	return ret;
632}
633
634static void vdec_vp9_slice_free_working_buffer(struct vdec_vp9_slice_instance *instance)
635{
636	struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
637	int i;
638
639	for (i = 0; i < ARRAY_SIZE(instance->mv); i++) {
640		if (instance->mv[i].va)
641			mtk_vcodec_mem_free(ctx, &instance->mv[i]);
642	}
643	for (i = 0; i < ARRAY_SIZE(instance->seg); i++) {
644		if (instance->seg[i].va)
645			mtk_vcodec_mem_free(ctx, &instance->seg[i]);
646	}
647	if (instance->tile.va)
648		mtk_vcodec_mem_free(ctx, &instance->tile);
649	if (instance->prob.va)
650		mtk_vcodec_mem_free(ctx, &instance->prob);
651	if (instance->counts.va)
652		mtk_vcodec_mem_free(ctx, &instance->counts);
653
654	instance->level = VP9_RES_NONE;
655}
656
657static void vdec_vp9_slice_vsi_from_remote(struct vdec_vp9_slice_vsi *vsi,
658					   struct vdec_vp9_slice_vsi *remote_vsi,
659					   int skip)
660{
661	struct vdec_vp9_slice_frame *rf;
662	struct vdec_vp9_slice_frame *f;
663
664	/*
665	 * compressed header
666	 * dequant
667	 * buffer position
668	 * decode state
669	 */
670	if (!skip) {
671		rf = &remote_vsi->frame;
672		f = &vsi->frame;
673		memcpy(&f->ch, &rf->ch, sizeof(f->ch));
674		memcpy(&f->uh.dequant, &rf->uh.dequant, sizeof(f->uh.dequant));
675		memcpy(&vsi->trans, &remote_vsi->trans, sizeof(vsi->trans));
676	}
677
678	memcpy(&vsi->state, &remote_vsi->state, sizeof(vsi->state));
679}
680
681static void vdec_vp9_slice_vsi_to_remote(struct vdec_vp9_slice_vsi *vsi,
682					 struct vdec_vp9_slice_vsi *remote_vsi)
683{
684	memcpy(remote_vsi, vsi, sizeof(*vsi));
685}
686
687static int vdec_vp9_slice_tile_offset(int idx, int mi_num, int tile_log2)
688{
689	int sbs = (mi_num + 7) >> 3;
690	int offset = ((idx * sbs) >> tile_log2) << 3;
691
692	return min(offset, mi_num);
693}
694
695static
696int vdec_vp9_slice_setup_single_from_src_to_dst(struct vdec_vp9_slice_instance *instance)
697{
698	struct vb2_v4l2_buffer *src;
699	struct vb2_v4l2_buffer *dst;
700
701	src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx);
702	if (!src)
703		return -EINVAL;
704
705	dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx);
706	if (!dst)
707		return -EINVAL;
708
709	v4l2_m2m_buf_copy_metadata(src, dst, true);
710
711	return 0;
712}
713
714static int vdec_vp9_slice_setup_lat_from_src_buf(struct vdec_vp9_slice_instance *instance,
715						 struct vdec_lat_buf *lat_buf)
716{
717	struct vb2_v4l2_buffer *src;
718	struct vb2_v4l2_buffer *dst;
719
720	src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx);
721	if (!src)
722		return -EINVAL;
723
724	lat_buf->src_buf_req = src->vb2_buf.req_obj.req;
725
726	dst = &lat_buf->ts_info;
727	v4l2_m2m_buf_copy_metadata(src, dst, true);
728	return 0;
729}
730
731static void vdec_vp9_slice_setup_hdr(struct vdec_vp9_slice_instance *instance,
732				     struct vdec_vp9_slice_uncompressed_header *uh,
733				     struct v4l2_ctrl_vp9_frame *hdr)
734{
735	int i;
736
737	uh->profile = hdr->profile;
738	uh->last_frame_type = instance->frame_type;
739	uh->frame_type = !HDR_FLAG(KEY_FRAME);
740	uh->last_show_frame = instance->show_frame;
741	uh->show_frame = HDR_FLAG(SHOW_FRAME);
742	uh->error_resilient_mode = HDR_FLAG(ERROR_RESILIENT);
743	uh->bit_depth = hdr->bit_depth;
744	uh->last_frame_width = instance->width;
745	uh->last_frame_height = instance->height;
746	uh->frame_width = hdr->frame_width_minus_1 + 1;
747	uh->frame_height = hdr->frame_height_minus_1 + 1;
748	uh->intra_only = HDR_FLAG(INTRA_ONLY);
749	/* map v4l2 enum to values defined in VP9 spec for firmware */
750	switch (hdr->reset_frame_context) {
751	case V4L2_VP9_RESET_FRAME_CTX_NONE:
752		uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0;
753		break;
754	case V4L2_VP9_RESET_FRAME_CTX_SPEC:
755		uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_SPEC;
756		break;
757	case V4L2_VP9_RESET_FRAME_CTX_ALL:
758		uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_ALL;
759		break;
760	default:
761		uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0;
762		break;
763	}
764	/*
765	 * ref_frame_sign_bias specifies the intended direction
766	 * of the motion vector in time for each reference frame.
767	 * - INTRA_FRAME = 0,
768	 * - LAST_FRAME = 1,
769	 * - GOLDEN_FRAME = 2,
770	 * - ALTREF_FRAME = 3,
771	 * ref_frame_sign_bias[INTRA_FRAME] is always 0
772	 * and VDA only passes another 3 directions
773	 */
774	uh->ref_frame_sign_bias[0] = 0;
775	for (i = 0; i < 3; i++)
776		uh->ref_frame_sign_bias[i + 1] =
777			!!(hdr->ref_frame_sign_bias & (1 << i));
778	uh->allow_high_precision_mv = HDR_FLAG(ALLOW_HIGH_PREC_MV);
779	uh->interpolation_filter = hdr->interpolation_filter;
780	uh->refresh_frame_context = HDR_FLAG(REFRESH_FRAME_CTX);
781	uh->frame_parallel_decoding_mode = HDR_FLAG(PARALLEL_DEC_MODE);
782	uh->frame_context_idx = hdr->frame_context_idx;
783
784	/* tile info */
785	uh->tile_cols_log2 = hdr->tile_cols_log2;
786	uh->tile_rows_log2 = hdr->tile_rows_log2;
787
788	uh->uncompressed_header_size = hdr->uncompressed_header_size;
789	uh->header_size_in_bytes = hdr->compressed_header_size;
790}
791
792static void vdec_vp9_slice_setup_frame_ctx(struct vdec_vp9_slice_instance *instance,
793					   struct vdec_vp9_slice_uncompressed_header *uh,
794					   struct v4l2_ctrl_vp9_frame *hdr)
795{
796	int error_resilient_mode;
797	int reset_frame_context;
798	int key_frame;
799	int intra_only;
800	int i;
801
802	key_frame = HDR_FLAG(KEY_FRAME);
803	intra_only = HDR_FLAG(INTRA_ONLY);
804	error_resilient_mode = HDR_FLAG(ERROR_RESILIENT);
805	reset_frame_context = uh->reset_frame_context;
806
807	/*
808	 * according to "6.2 Uncompressed header syntax" in
809	 * "VP9 Bitstream & Decoding Process Specification",
810	 * reset @frame_context_idx when (FrameIsIntra || error_resilient_mode)
811	 */
812	if (key_frame || intra_only || error_resilient_mode) {
813		/*
814		 * @reset_frame_context specifies
815		 * whether the frame context should be
816		 * reset to default values:
817		 * 0 or 1 means do not reset any frame context
818		 * 2 resets just the context specified in the frame header
819		 * 3 resets all contexts
820		 */
821		if (key_frame || error_resilient_mode ||
822		    reset_frame_context == 3) {
823			/* use default table */
824			for (i = 0; i < 4; i++)
825				instance->dirty[i] = 0;
826		} else if (reset_frame_context == 2) {
827			instance->dirty[uh->frame_context_idx] = 0;
828		}
829		uh->frame_context_idx = 0;
830	}
831}
832
833static void vdec_vp9_slice_setup_loop_filter(struct vdec_vp9_slice_uncompressed_header *uh,
834					     struct v4l2_vp9_loop_filter *lf)
835{
836	int i;
837
838	uh->loop_filter_level = lf->level;
839	uh->loop_filter_sharpness = lf->sharpness;
840	uh->loop_filter_delta_enabled = LF_FLAG(DELTA_ENABLED);
841	for (i = 0; i < 4; i++)
842		uh->loop_filter_ref_deltas[i] = lf->ref_deltas[i];
843	for (i = 0; i < 2; i++)
844		uh->loop_filter_mode_deltas[i] = lf->mode_deltas[i];
845}
846
847static void vdec_vp9_slice_setup_quantization(struct vdec_vp9_slice_uncompressed_header *uh,
848					      struct v4l2_vp9_quantization *quant)
849{
850	uh->base_q_idx = quant->base_q_idx;
851	uh->delta_q_y_dc = quant->delta_q_y_dc;
852	uh->delta_q_uv_dc = quant->delta_q_uv_dc;
853	uh->delta_q_uv_ac = quant->delta_q_uv_ac;
854}
855
856static void vdec_vp9_slice_setup_segmentation(struct vdec_vp9_slice_uncompressed_header *uh,
857					      struct v4l2_vp9_segmentation *seg)
858{
859	int i;
860	int j;
861
862	uh->segmentation_enabled = SEG_FLAG(ENABLED);
863	uh->segmentation_update_map = SEG_FLAG(UPDATE_MAP);
864	for (i = 0; i < 7; i++)
865		uh->segmentation_tree_probs[i] = seg->tree_probs[i];
866	uh->segmentation_temporal_udpate = SEG_FLAG(TEMPORAL_UPDATE);
867	for (i = 0; i < 3; i++)
868		uh->segmentation_pred_prob[i] = seg->pred_probs[i];
869	uh->segmentation_update_data = SEG_FLAG(UPDATE_DATA);
870	uh->segmentation_abs_or_delta_update = SEG_FLAG(ABS_OR_DELTA_UPDATE);
871	for (i = 0; i < 8; i++) {
872		uh->feature_enabled[i] = seg->feature_enabled[i];
873		for (j = 0; j < 4; j++)
874			uh->feature_value[i][j] = seg->feature_data[i][j];
875	}
876}
877
878static int vdec_vp9_slice_setup_tile(struct vdec_vp9_slice_vsi *vsi,
879				     struct v4l2_ctrl_vp9_frame *hdr)
880{
881	unsigned int rows_log2;
882	unsigned int cols_log2;
883	unsigned int rows;
884	unsigned int cols;
885	unsigned int mi_rows;
886	unsigned int mi_cols;
887	struct vdec_vp9_slice_tiles *tiles;
888	int offset;
889	int start;
890	int end;
891	int i;
892
893	rows_log2 = hdr->tile_rows_log2;
894	cols_log2 = hdr->tile_cols_log2;
895	rows = 1 << rows_log2;
896	cols = 1 << cols_log2;
897	tiles = &vsi->frame.tiles;
898	tiles->actual_rows = 0;
899
900	if (rows > 4 || cols > 64)
901		return -EINVAL;
902
903	/* setup mi rows/cols information */
904	mi_rows = (hdr->frame_height_minus_1 + 1 + 7) >> 3;
905	mi_cols = (hdr->frame_width_minus_1 + 1 + 7) >> 3;
906
907	for (i = 0; i < rows; i++) {
908		start = vdec_vp9_slice_tile_offset(i, mi_rows, rows_log2);
909		end = vdec_vp9_slice_tile_offset(i + 1, mi_rows, rows_log2);
910		offset = end - start;
911		tiles->mi_rows[i] = (offset + 7) >> 3;
912		if (tiles->mi_rows[i])
913			tiles->actual_rows++;
914	}
915
916	for (i = 0; i < cols; i++) {
917		start = vdec_vp9_slice_tile_offset(i, mi_cols, cols_log2);
918		end = vdec_vp9_slice_tile_offset(i + 1, mi_cols, cols_log2);
919		offset = end - start;
920		tiles->mi_cols[i] = (offset + 7) >> 3;
921	}
922
923	return 0;
924}
925
926static void vdec_vp9_slice_setup_state(struct vdec_vp9_slice_vsi *vsi)
927{
928	memset(&vsi->state, 0, sizeof(vsi->state));
929}
930
931static void vdec_vp9_slice_setup_ref_idx(struct vdec_vp9_slice_pfc *pfc,
932					 struct v4l2_ctrl_vp9_frame *hdr)
933{
934	pfc->ref_idx[0] = hdr->last_frame_ts;
935	pfc->ref_idx[1] = hdr->golden_frame_ts;
936	pfc->ref_idx[2] = hdr->alt_frame_ts;
937}
938
939static int vdec_vp9_slice_setup_pfc(struct vdec_vp9_slice_instance *instance,
940				    struct vdec_vp9_slice_pfc *pfc)
941{
942	struct v4l2_ctrl_vp9_frame *hdr;
943	struct vdec_vp9_slice_uncompressed_header *uh;
944	struct v4l2_ctrl *hdr_ctrl;
945	struct vdec_vp9_slice_vsi *vsi;
946	int ret;
947
948	/* frame header */
949	hdr_ctrl = v4l2_ctrl_find(&instance->ctx->ctrl_hdl, V4L2_CID_STATELESS_VP9_FRAME);
950	if (!hdr_ctrl || !hdr_ctrl->p_cur.p)
951		return -EINVAL;
952
953	hdr = hdr_ctrl->p_cur.p;
954	vsi = &pfc->vsi;
955	uh = &vsi->frame.uh;
956
957	/* setup vsi information */
958	vdec_vp9_slice_setup_hdr(instance, uh, hdr);
959	vdec_vp9_slice_setup_frame_ctx(instance, uh, hdr);
960	vdec_vp9_slice_setup_loop_filter(uh, &hdr->lf);
961	vdec_vp9_slice_setup_quantization(uh, &hdr->quant);
962	vdec_vp9_slice_setup_segmentation(uh, &hdr->seg);
963	ret = vdec_vp9_slice_setup_tile(vsi, hdr);
964	if (ret)
965		return ret;
966	vdec_vp9_slice_setup_state(vsi);
967
968	/* core stage needs buffer index to get ref y/c ... */
969	vdec_vp9_slice_setup_ref_idx(pfc, hdr);
970
971	pfc->seq = instance->seq;
972	instance->seq++;
973
974	return 0;
975}
976
977static int vdec_vp9_slice_setup_lat_buffer(struct vdec_vp9_slice_instance *instance,
978					   struct vdec_vp9_slice_vsi *vsi,
979					   struct mtk_vcodec_mem *bs,
980					   struct vdec_lat_buf *lat_buf)
981{
982	int i;
983
984	vsi->bs.buf.dma_addr = bs->dma_addr;
985	vsi->bs.buf.size = bs->size;
986	vsi->bs.frame.dma_addr = bs->dma_addr;
987	vsi->bs.frame.size = bs->size;
988
989	for (i = 0; i < 2; i++) {
990		vsi->mv[i].dma_addr = instance->mv[i].dma_addr;
991		vsi->mv[i].size = instance->mv[i].size;
992	}
993	for (i = 0; i < 2; i++) {
994		vsi->seg[i].dma_addr = instance->seg[i].dma_addr;
995		vsi->seg[i].size = instance->seg[i].size;
996	}
997	vsi->tile.dma_addr = instance->tile.dma_addr;
998	vsi->tile.size = instance->tile.size;
999	vsi->prob.dma_addr = instance->prob.dma_addr;
1000	vsi->prob.size = instance->prob.size;
1001	vsi->counts.dma_addr = instance->counts.dma_addr;
1002	vsi->counts.size = instance->counts.size;
1003
1004	vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr;
1005	vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size;
1006	vsi->trans.dma_addr = lat_buf->ctx->msg_queue.wdma_wptr_addr;
1007	/* used to store trans end */
1008	vsi->trans.dma_addr_end = lat_buf->ctx->msg_queue.wdma_rptr_addr;
1009	vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr;
1010	vsi->err_map.size = lat_buf->wdma_err_addr.size;
1011
1012	vsi->row_info.buf = 0;
1013	vsi->row_info.size = 0;
1014
1015	return 0;
1016}
1017
1018static int vdec_vp9_slice_setup_prob_buffer(struct vdec_vp9_slice_instance *instance,
1019					    struct vdec_vp9_slice_vsi *vsi)
1020{
1021	struct vdec_vp9_slice_frame_ctx *frame_ctx;
1022	struct vdec_vp9_slice_uncompressed_header *uh;
1023
1024	uh = &vsi->frame.uh;
1025
1026	mtk_vdec_debug(instance->ctx, "ctx dirty %u idx %d\n",
1027		       instance->dirty[uh->frame_context_idx],
1028		       uh->frame_context_idx);
1029
1030	if (instance->dirty[uh->frame_context_idx])
1031		frame_ctx = &instance->frame_ctx[uh->frame_context_idx];
1032	else
1033		frame_ctx = vdec_vp9_slice_default_frame_ctx;
1034	memcpy(instance->prob.va, frame_ctx, sizeof(*frame_ctx));
1035
1036	return 0;
1037}
1038
1039static void vdec_vp9_slice_setup_seg_buffer(struct vdec_vp9_slice_instance *instance,
1040					    struct vdec_vp9_slice_vsi *vsi,
1041					    struct mtk_vcodec_mem *buf)
1042{
1043	struct vdec_vp9_slice_uncompressed_header *uh;
1044
1045	/* reset segment buffer */
1046	uh = &vsi->frame.uh;
1047	if (uh->frame_type == 0 ||
1048	    uh->intra_only ||
1049	    uh->error_resilient_mode ||
1050	    uh->frame_width != instance->width ||
1051	    uh->frame_height != instance->height) {
1052		mtk_vdec_debug(instance->ctx, "reset seg\n");
1053		memset(buf->va, 0, buf->size);
1054	}
1055}
1056
1057/*
1058 * parse tiles according to `6.4 Decode tiles syntax`
1059 * in "vp9-bitstream-specification"
1060 *
1061 * frame contains uncompress header, compressed header and several tiles.
1062 * this function parses tiles' position and size, stores them to tile buffer
1063 * for decoding.
1064 */
1065static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *instance,
1066					    struct vdec_vp9_slice_vsi *vsi,
1067					    struct mtk_vcodec_mem *bs)
1068{
1069	struct vdec_vp9_slice_uncompressed_header *uh;
1070	unsigned int rows_log2;
1071	unsigned int cols_log2;
1072	unsigned int rows;
1073	unsigned int cols;
1074	unsigned int mi_row;
1075	unsigned int mi_col;
1076	unsigned int offset;
1077	dma_addr_t pa;
1078	unsigned int size;
1079	struct vdec_vp9_slice_tiles *tiles;
1080	unsigned char *pos;
1081	unsigned char *end;
1082	unsigned char *va;
1083	unsigned int *tb;
1084	int i;
1085	int j;
1086
1087	uh = &vsi->frame.uh;
1088	rows_log2 = uh->tile_rows_log2;
1089	cols_log2 = uh->tile_cols_log2;
1090	rows = 1 << rows_log2;
1091	cols = 1 << cols_log2;
1092
1093	if (rows > 4 || cols > 64) {
1094		mtk_vdec_err(instance->ctx, "tile_rows %u tile_cols %u\n", rows, cols);
1095		return -EINVAL;
1096	}
1097
1098	offset = uh->uncompressed_header_size +
1099		uh->header_size_in_bytes;
1100	if (bs->size <= offset) {
1101		mtk_vdec_err(instance->ctx, "bs size %zu tile offset %u\n", bs->size, offset);
1102		return -EINVAL;
1103	}
1104
1105	tiles = &vsi->frame.tiles;
1106	/* setup tile buffer */
1107
1108	va = (unsigned char *)bs->va;
1109	pos = va + offset;
1110	end = va + bs->size;
1111	/* truncated */
1112	pa = bs->dma_addr + offset;
1113	tb = instance->tile.va;
1114	for (i = 0; i < rows; i++) {
1115		for (j = 0; j < cols; j++) {
1116			if (i == rows - 1 &&
1117			    j == cols - 1) {
1118				size = (unsigned int)(end - pos);
1119			} else {
1120				if (end - pos < 4)
1121					return -EINVAL;
1122
1123				size = (pos[0] << 24) | (pos[1] << 16) |
1124					(pos[2] << 8) | pos[3];
1125				pos += 4;
1126				pa += 4;
1127				offset += 4;
1128				if (end - pos < size)
1129					return -EINVAL;
1130			}
1131			tiles->size[i][j] = size;
1132			if (tiles->mi_rows[i]) {
1133				*tb++ = (size << 3) + ((offset << 3) & 0x7f);
1134				*tb++ = pa & ~0xf;
1135				*tb++ = (pa << 3) & 0x7f;
1136				mi_row = (tiles->mi_rows[i] - 1) & 0x1ff;
1137				mi_col = (tiles->mi_cols[j] - 1) & 0x3f;
1138				*tb++ = (mi_row << 6) + mi_col;
1139			}
1140			pos += size;
1141			pa += size;
1142			offset += size;
1143		}
1144	}
1145
1146	return 0;
1147}
1148
1149static int vdec_vp9_slice_setup_lat(struct vdec_vp9_slice_instance *instance,
1150				    struct mtk_vcodec_mem *bs,
1151				    struct vdec_lat_buf *lat_buf,
1152				    struct vdec_vp9_slice_pfc *pfc)
1153{
1154	struct vdec_vp9_slice_vsi *vsi = &pfc->vsi;
1155	int ret;
1156
1157	ret = vdec_vp9_slice_setup_lat_from_src_buf(instance, lat_buf);
1158	if (ret)
1159		goto err;
1160
1161	ret = vdec_vp9_slice_setup_pfc(instance, pfc);
1162	if (ret)
1163		goto err;
1164
1165	ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi);
1166	if (ret)
1167		goto err;
1168
1169	ret = vdec_vp9_slice_setup_lat_buffer(instance, vsi, bs, lat_buf);
1170	if (ret)
1171		goto err;
1172
1173	vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[0]);
1174
1175	/* setup prob/tile buffers for LAT */
1176
1177	ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi);
1178	if (ret)
1179		goto err;
1180
1181	ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs);
1182	if (ret)
1183		goto err;
1184
1185	return 0;
1186
1187err:
1188	return ret;
1189}
1190
1191static
1192void vdec_vp9_slice_map_counts_eob_coef(unsigned int i, unsigned int j, unsigned int k,
1193					struct vdec_vp9_slice_frame_counts *counts,
1194					struct v4l2_vp9_frame_symbol_counts *counts_helper)
1195{
1196	u32 l = 0, m;
1197
1198	/*
1199	 * helper eo -> mtk eo
1200	 * helpre e1 -> mtk c3
1201	 * helper c0 -> c0
1202	 * helper c1 -> c1
1203	 * helper c2 -> c2
1204	 */
1205	for (m = 0; m < 3; m++) {
1206		counts_helper->coeff[i][j][k][l][m] =
1207			(u32 (*)[3]) & counts->coef_probs[i][j][k].band_0[m];
1208		counts_helper->eob[i][j][k][l][m][0] =
1209			&counts->eob_branch[i][j][k].band_0[m];
1210		counts_helper->eob[i][j][k][l][m][1] =
1211			&counts->coef_probs[i][j][k].band_0[m][3];
1212	}
1213
1214	for (l = 1; l < 6; l++) {
1215		for (m = 0; m < 6; m++) {
1216			counts_helper->coeff[i][j][k][l][m] =
1217				(u32 (*)[3]) & counts->coef_probs[i][j][k].band_1_5[l - 1][m];
1218			counts_helper->eob[i][j][k][l][m][0] =
1219				&counts->eob_branch[i][j][k].band_1_5[l - 1][m];
1220			counts_helper->eob[i][j][k][l][m][1] =
1221				&counts->coef_probs[i][j][k].band_1_5[l - 1][m][3];
1222		}
1223	}
1224}
1225
1226static void vdec_vp9_slice_counts_map_helper(struct vdec_vp9_slice_counts_map *counts_map,
1227					     struct vdec_vp9_slice_frame_counts *counts,
1228					     struct v4l2_vp9_frame_symbol_counts *counts_helper)
1229{
1230	int i, j, k;
1231
1232	counts_helper->partition = &counts->partition;
1233	counts_helper->intra_inter = &counts->intra_inter;
1234	counts_helper->tx32p = &counts->tx_p32x32;
1235	counts_helper->tx16p = &counts->tx_p16x16;
1236	counts_helper->tx8p = &counts->tx_p8x8;
1237	counts_helper->uv_mode = &counts->uv_mode;
1238
1239	counts_helper->comp = &counts->comp_inter;
1240	counts_helper->comp_ref = &counts->comp_ref;
1241	counts_helper->single_ref = &counts->single_ref;
1242	counts_helper->mv_mode = &counts->inter_mode;
1243	counts_helper->mv_joint = &counts->joint;
1244
1245	for (i = 0; i < ARRAY_SIZE(counts_map->skip); i++)
1246		memcpy(counts_map->skip[i], counts->skip[i],
1247		       sizeof(counts_map->skip[0]));
1248	counts_helper->skip = &counts_map->skip;
1249
1250	for (i = 0; i < ARRAY_SIZE(counts_map->y_mode); i++)
1251		memcpy(counts_map->y_mode[i], counts->y_mode[i],
1252		       sizeof(counts_map->y_mode[0]));
1253	counts_helper->y_mode = &counts_map->y_mode;
1254
1255	for (i = 0; i < ARRAY_SIZE(counts_map->filter); i++)
1256		memcpy(counts_map->filter[i], counts->switchable_interp[i],
1257		       sizeof(counts_map->filter[0]));
1258	counts_helper->filter = &counts_map->filter;
1259
1260	for (i = 0; i < ARRAY_SIZE(counts_map->sign); i++)
1261		memcpy(counts_map->sign[i], counts->mvcomp[i].sign,
1262		       sizeof(counts_map->sign[0]));
1263	counts_helper->sign = &counts_map->sign;
1264
1265	for (i = 0; i < ARRAY_SIZE(counts_map->classes); i++)
1266		memcpy(counts_map->classes[i], counts->mvcomp[i].classes,
1267		       sizeof(counts_map->classes[0]));
1268	counts_helper->classes = &counts_map->classes;
1269
1270	for (i = 0; i < ARRAY_SIZE(counts_map->class0); i++)
1271		memcpy(counts_map->class0[i], counts->mvcomp[i].class0,
1272		       sizeof(counts_map->class0[0]));
1273	counts_helper->class0 = &counts_map->class0;
1274
1275	for (i = 0; i < ARRAY_SIZE(counts_map->bits); i++)
1276		for (j = 0; j < ARRAY_SIZE(counts_map->bits[0]); j++)
1277			memcpy(counts_map->bits[i][j], counts->mvcomp[i].bits[j],
1278			       sizeof(counts_map->bits[0][0]));
1279	counts_helper->bits = &counts_map->bits;
1280
1281	for (i = 0; i < ARRAY_SIZE(counts_map->class0_fp); i++)
1282		for (j = 0; j < ARRAY_SIZE(counts_map->class0_fp[0]); j++)
1283			memcpy(counts_map->class0_fp[i][j], counts->mvcomp[i].class0_fp[j],
1284			       sizeof(counts_map->class0_fp[0][0]));
1285	counts_helper->class0_fp = &counts_map->class0_fp;
1286
1287	for (i = 0; i < ARRAY_SIZE(counts_map->fp); i++)
1288		memcpy(counts_map->fp[i], counts->mvcomp[i].fp,
1289		       sizeof(counts_map->fp[0]));
1290	counts_helper->fp = &counts_map->fp;
1291
1292	for (i = 0; i < ARRAY_SIZE(counts_map->class0_hp); i++)
1293		memcpy(counts_map->class0_hp[i], counts->mvcomp[i].class0_hp,
1294		       sizeof(counts_map->class0_hp[0]));
1295	counts_helper->class0_hp = &counts_map->class0_hp;
1296
1297	for (i = 0; i < ARRAY_SIZE(counts_map->hp); i++)
1298		memcpy(counts_map->hp[i], counts->mvcomp[i].hp, sizeof(counts_map->hp[0]));
1299
1300	counts_helper->hp = &counts_map->hp;
1301
1302	for (i = 0; i < 4; i++)
1303		for (j = 0; j < 2; j++)
1304			for (k = 0; k < 2; k++)
1305				vdec_vp9_slice_map_counts_eob_coef(i, j, k, counts, counts_helper);
1306}
1307
1308static void vdec_vp9_slice_map_to_coef(unsigned int i, unsigned int j, unsigned int k,
1309				       struct vdec_vp9_slice_frame_ctx *frame_ctx,
1310				       struct v4l2_vp9_frame_context *frame_ctx_helper)
1311{
1312	u32 l, m;
1313
1314	for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) {
1315		for (m = 0; m < VP9_BAND_6(l); m++) {
1316			memcpy(frame_ctx_helper->coef[i][j][k][l][m],
1317			       frame_ctx->coef_probs[i][j][k][l].probs[m],
1318			       sizeof(frame_ctx_helper->coef[i][j][k][l][0]));
1319		}
1320	}
1321}
1322
1323static void vdec_vp9_slice_map_from_coef(unsigned int i, unsigned int j, unsigned int k,
1324					 struct vdec_vp9_slice_frame_ctx *frame_ctx,
1325					 struct v4l2_vp9_frame_context *frame_ctx_helper)
1326{
1327	u32 l, m;
1328
1329	for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) {
1330		for (m = 0; m < VP9_BAND_6(l); m++) {
1331			memcpy(frame_ctx->coef_probs[i][j][k][l].probs[m],
1332			       frame_ctx_helper->coef[i][j][k][l][m],
1333			       sizeof(frame_ctx_helper->coef[i][j][k][l][0]));
1334		}
1335	}
1336}
1337
1338static
1339void vdec_vp9_slice_framectx_map_helper(bool frame_is_intra,
1340					struct vdec_vp9_slice_frame_ctx *pre_frame_ctx,
1341					struct vdec_vp9_slice_frame_ctx *frame_ctx,
1342					struct v4l2_vp9_frame_context *frame_ctx_helper)
1343{
1344	struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv;
1345	u32 i, j, k;
1346
1347	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++)
1348		for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++)
1349			for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++)
1350				vdec_vp9_slice_map_to_coef(i, j, k, pre_frame_ctx,
1351							   frame_ctx_helper);
1352
1353	/*
1354	 * use previous prob when frame is not intra or
1355	 * we should use the prob updated by the compressed header parse
1356	 */
1357	if (!frame_is_intra)
1358		frame_ctx = pre_frame_ctx;
1359
1360	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++)
1361		memcpy(frame_ctx_helper->tx8[i], frame_ctx->tx_p8x8[i],
1362		       sizeof(frame_ctx_helper->tx8[0]));
1363
1364	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++)
1365		memcpy(frame_ctx_helper->tx16[i], frame_ctx->tx_p16x16[i],
1366		       sizeof(frame_ctx_helper->tx16[0]));
1367
1368	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++)
1369		memcpy(frame_ctx_helper->tx32[i], frame_ctx->tx_p32x32[i],
1370		       sizeof(frame_ctx_helper->tx32[0]));
1371
1372	memcpy(frame_ctx_helper->skip, frame_ctx->skip_probs, sizeof(frame_ctx_helper->skip));
1373
1374	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++)
1375		memcpy(frame_ctx_helper->inter_mode[i], frame_ctx->inter_mode_probs[i],
1376		       sizeof(frame_ctx_helper->inter_mode[0]));
1377
1378	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++)
1379		memcpy(frame_ctx_helper->interp_filter[i], frame_ctx->switch_interp_prob[i],
1380		       sizeof(frame_ctx_helper->interp_filter[0]));
1381
1382	memcpy(frame_ctx_helper->is_inter, frame_ctx->intra_inter_prob,
1383	       sizeof(frame_ctx_helper->is_inter));
1384
1385	memcpy(frame_ctx_helper->comp_mode, frame_ctx->comp_inter_prob,
1386	       sizeof(frame_ctx_helper->comp_mode));
1387
1388	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++)
1389		memcpy(frame_ctx_helper->single_ref[i], frame_ctx->single_ref_prob[i],
1390		       sizeof(frame_ctx_helper->single_ref[0]));
1391
1392	memcpy(frame_ctx_helper->comp_ref, frame_ctx->comp_ref_prob,
1393	       sizeof(frame_ctx_helper->comp_ref));
1394
1395	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++)
1396		memcpy(frame_ctx_helper->y_mode[i], frame_ctx->y_mode_prob[i],
1397		       sizeof(frame_ctx_helper->y_mode[0]));
1398
1399	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++)
1400		memcpy(frame_ctx_helper->uv_mode[i], frame_ctx->uv_mode_prob[i],
1401		       sizeof(frame_ctx_helper->uv_mode[0]));
1402
1403	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++)
1404		memcpy(frame_ctx_helper->partition[i], frame_ctx->partition_prob[i],
1405		       sizeof(frame_ctx_helper->partition[0]));
1406
1407	memcpy(mv->joint, frame_ctx->joint, sizeof(mv->joint));
1408
1409	for (i = 0; i < ARRAY_SIZE(mv->sign); i++)
1410		mv->sign[i] = frame_ctx->sign_classes[i].sign;
1411
1412	for (i = 0; i < ARRAY_SIZE(mv->classes); i++)
1413		memcpy(mv->classes[i], frame_ctx->sign_classes[i].classes,
1414		       sizeof(mv->classes[i]));
1415
1416	for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++)
1417		mv->class0_bit[i] = frame_ctx->class0_bits[i].class0[0];
1418
1419	for (i = 0; i < ARRAY_SIZE(mv->bits); i++)
1420		memcpy(mv->bits[i], frame_ctx->class0_bits[i].bits, sizeof(mv->bits[0]));
1421
1422	for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++)
1423		for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++)
1424			memcpy(mv->class0_fr[i][j], frame_ctx->class0_fp_hp[i].class0_fp[j],
1425			       sizeof(mv->class0_fr[0][0]));
1426
1427	for (i = 0; i < ARRAY_SIZE(mv->fr); i++)
1428		memcpy(mv->fr[i], frame_ctx->class0_fp_hp[i].fp, sizeof(mv->fr[0]));
1429
1430	for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++)
1431		mv->class0_hp[i] = frame_ctx->class0_fp_hp[i].class0_hp;
1432
1433	for (i = 0; i < ARRAY_SIZE(mv->hp); i++)
1434		mv->hp[i] = frame_ctx->class0_fp_hp[i].hp;
1435}
1436
1437static void vdec_vp9_slice_helper_map_framectx(struct v4l2_vp9_frame_context *frame_ctx_helper,
1438					       struct vdec_vp9_slice_frame_ctx *frame_ctx)
1439{
1440	struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv;
1441	u32 i, j, k;
1442
1443	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++)
1444		memcpy(frame_ctx->tx_p8x8[i], frame_ctx_helper->tx8[i],
1445		       sizeof(frame_ctx_helper->tx8[0]));
1446
1447	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++)
1448		memcpy(frame_ctx->tx_p16x16[i], frame_ctx_helper->tx16[i],
1449		       sizeof(frame_ctx_helper->tx16[0]));
1450
1451	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++)
1452		memcpy(frame_ctx->tx_p32x32[i], frame_ctx_helper->tx32[i],
1453		       sizeof(frame_ctx_helper->tx32[0]));
1454
1455	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++)
1456		for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++)
1457			for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++)
1458				vdec_vp9_slice_map_from_coef(i, j, k, frame_ctx,
1459							     frame_ctx_helper);
1460
1461	memcpy(frame_ctx->skip_probs, frame_ctx_helper->skip, sizeof(frame_ctx_helper->skip));
1462
1463	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++)
1464		memcpy(frame_ctx->inter_mode_probs[i], frame_ctx_helper->inter_mode[i],
1465		       sizeof(frame_ctx_helper->inter_mode[0]));
1466
1467	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++)
1468		memcpy(frame_ctx->switch_interp_prob[i], frame_ctx_helper->interp_filter[i],
1469		       sizeof(frame_ctx_helper->interp_filter[0]));
1470
1471	memcpy(frame_ctx->intra_inter_prob, frame_ctx_helper->is_inter,
1472	       sizeof(frame_ctx_helper->is_inter));
1473
1474	memcpy(frame_ctx->comp_inter_prob, frame_ctx_helper->comp_mode,
1475	       sizeof(frame_ctx_helper->comp_mode));
1476
1477	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++)
1478		memcpy(frame_ctx->single_ref_prob[i], frame_ctx_helper->single_ref[i],
1479		       sizeof(frame_ctx_helper->single_ref[0]));
1480
1481	memcpy(frame_ctx->comp_ref_prob, frame_ctx_helper->comp_ref,
1482	       sizeof(frame_ctx_helper->comp_ref));
1483
1484	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++)
1485		memcpy(frame_ctx->y_mode_prob[i], frame_ctx_helper->y_mode[i],
1486		       sizeof(frame_ctx_helper->y_mode[0]));
1487
1488	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++)
1489		memcpy(frame_ctx->uv_mode_prob[i], frame_ctx_helper->uv_mode[i],
1490		       sizeof(frame_ctx_helper->uv_mode[0]));
1491
1492	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++)
1493		memcpy(frame_ctx->partition_prob[i], frame_ctx_helper->partition[i],
1494		       sizeof(frame_ctx_helper->partition[0]));
1495
1496	memcpy(frame_ctx->joint, mv->joint, sizeof(mv->joint));
1497
1498	for (i = 0; i < ARRAY_SIZE(mv->sign); i++)
1499		frame_ctx->sign_classes[i].sign = mv->sign[i];
1500
1501	for (i = 0; i < ARRAY_SIZE(mv->classes); i++)
1502		memcpy(frame_ctx->sign_classes[i].classes, mv->classes[i],
1503		       sizeof(mv->classes[i]));
1504
1505	for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++)
1506		frame_ctx->class0_bits[i].class0[0] = mv->class0_bit[i];
1507
1508	for (i = 0; i < ARRAY_SIZE(mv->bits); i++)
1509		memcpy(frame_ctx->class0_bits[i].bits, mv->bits[i], sizeof(mv->bits[0]));
1510
1511	for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++)
1512		for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++)
1513			memcpy(frame_ctx->class0_fp_hp[i].class0_fp[j], mv->class0_fr[i][j],
1514			       sizeof(mv->class0_fr[0][0]));
1515
1516	for (i = 0; i < ARRAY_SIZE(mv->fr); i++)
1517		memcpy(frame_ctx->class0_fp_hp[i].fp, mv->fr[i], sizeof(mv->fr[0]));
1518
1519	for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++)
1520		frame_ctx->class0_fp_hp[i].class0_hp = mv->class0_hp[i];
1521
1522	for (i = 0; i < ARRAY_SIZE(mv->hp); i++)
1523		frame_ctx->class0_fp_hp[i].hp = mv->hp[i];
1524}
1525
1526static int vdec_vp9_slice_update_prob(struct vdec_vp9_slice_instance *instance,
1527				      struct vdec_vp9_slice_vsi *vsi)
1528{
1529	struct vdec_vp9_slice_frame_ctx *pre_frame_ctx;
1530	struct v4l2_vp9_frame_context *pre_frame_ctx_helper;
1531	struct vdec_vp9_slice_frame_ctx *frame_ctx;
1532	struct vdec_vp9_slice_frame_counts *counts;
1533	struct v4l2_vp9_frame_symbol_counts *counts_helper;
1534	struct vdec_vp9_slice_uncompressed_header *uh;
1535	bool frame_is_intra;
1536	bool use_128;
1537
1538	uh = &vsi->frame.uh;
1539	pre_frame_ctx = &instance->frame_ctx[uh->frame_context_idx];
1540	pre_frame_ctx_helper = &instance->frame_ctx_helper;
1541	frame_ctx = (struct vdec_vp9_slice_frame_ctx *)instance->prob.va;
1542	counts = (struct vdec_vp9_slice_frame_counts *)instance->counts.va;
1543	counts_helper = &instance->counts_helper;
1544
1545	if (!uh->refresh_frame_context)
1546		return 0;
1547
1548	if (!uh->frame_parallel_decoding_mode) {
1549		vdec_vp9_slice_counts_map_helper(&instance->counts_map, counts, counts_helper);
1550
1551		frame_is_intra = !vsi->frame.uh.frame_type || vsi->frame.uh.intra_only;
1552		/* check default prob */
1553		if (!instance->dirty[uh->frame_context_idx])
1554			vdec_vp9_slice_framectx_map_helper(frame_is_intra,
1555							   vdec_vp9_slice_default_frame_ctx,
1556							   frame_ctx,
1557							   pre_frame_ctx_helper);
1558		else
1559			vdec_vp9_slice_framectx_map_helper(frame_is_intra,
1560							   pre_frame_ctx,
1561							   frame_ctx,
1562							   pre_frame_ctx_helper);
1563
1564		use_128 = !frame_is_intra && !vsi->frame.uh.last_frame_type;
1565		v4l2_vp9_adapt_coef_probs(pre_frame_ctx_helper,
1566					  counts_helper,
1567					  use_128,
1568					  frame_is_intra);
1569		if (!frame_is_intra)
1570			v4l2_vp9_adapt_noncoef_probs(pre_frame_ctx_helper,
1571						     counts_helper,
1572						     V4L2_VP9_REFERENCE_MODE_SINGLE_REFERENCE,
1573						     vsi->frame.uh.interpolation_filter,
1574						     vsi->frame.ch.tx_mode,
1575						     vsi->frame.uh.allow_high_precision_mv ?
1576						     V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV : 0);
1577		vdec_vp9_slice_helper_map_framectx(pre_frame_ctx_helper, pre_frame_ctx);
1578	} else {
1579		memcpy(pre_frame_ctx, frame_ctx, sizeof(*frame_ctx));
1580	}
1581
1582	instance->dirty[uh->frame_context_idx] = 1;
1583
1584	return 0;
1585}
1586
1587static int vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance *instance,
1588					struct vdec_vp9_slice_pfc *pfc)
1589{
1590	struct vdec_vp9_slice_vsi *vsi;
1591
1592	vsi = &pfc->vsi;
1593	memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state));
1594
1595	mtk_vdec_debug(instance->ctx, "Frame %u Y_CRC %08x %08x %08x %08x\n",
1596		       pfc->seq, vsi->state.crc[0], vsi->state.crc[1],
1597		       vsi->state.crc[2], vsi->state.crc[3]);
1598	mtk_vdec_debug(instance->ctx, "Frame %u C_CRC %08x %08x %08x %08x\n",
1599		       pfc->seq, vsi->state.crc[4], vsi->state.crc[5],
1600		       vsi->state.crc[6], vsi->state.crc[7]);
1601
1602	vdec_vp9_slice_update_prob(instance, vsi);
1603
1604	instance->width = vsi->frame.uh.frame_width;
1605	instance->height = vsi->frame.uh.frame_height;
1606	instance->frame_type = vsi->frame.uh.frame_type;
1607	instance->show_frame = vsi->frame.uh.show_frame;
1608
1609	return 0;
1610}
1611
1612static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance *instance,
1613				     struct vdec_lat_buf *lat_buf,
1614				     struct vdec_vp9_slice_pfc *pfc)
1615{
1616	struct vdec_vp9_slice_vsi *vsi;
1617
1618	vsi = &pfc->vsi;
1619	memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state));
1620
1621	mtk_vdec_debug(instance->ctx, "Frame %u LAT CRC 0x%08x %lx %lx\n",
1622		       pfc->seq, vsi->state.crc[0],
1623		       (unsigned long)vsi->trans.dma_addr,
1624		       (unsigned long)vsi->trans.dma_addr_end);
1625
1626	/* buffer full, need to re-decode */
1627	if (vsi->state.full) {
1628		/* buffer not enough */
1629		if (vsi->trans.dma_addr_end - vsi->trans.dma_addr ==
1630			vsi->ube.size)
1631			return -ENOMEM;
1632		return -EAGAIN;
1633	}
1634
1635	vdec_vp9_slice_update_prob(instance, vsi);
1636
1637	instance->width = vsi->frame.uh.frame_width;
1638	instance->height = vsi->frame.uh.frame_height;
1639	instance->frame_type = vsi->frame.uh.frame_type;
1640	instance->show_frame = vsi->frame.uh.show_frame;
1641
1642	return 0;
1643}
1644
1645static int vdec_vp9_slice_setup_core_to_dst_buf(struct vdec_vp9_slice_instance *instance,
1646						struct vdec_lat_buf *lat_buf)
1647{
1648	struct vb2_v4l2_buffer *dst;
1649
1650	dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx);
1651	if (!dst)
1652		return -EINVAL;
1653
1654	v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, dst, true);
1655	return 0;
1656}
1657
1658static int vdec_vp9_slice_setup_core_buffer(struct vdec_vp9_slice_instance *instance,
1659					    struct vdec_vp9_slice_pfc *pfc,
1660					    struct vdec_vp9_slice_vsi *vsi,
1661					    struct vdec_fb *fb,
1662					    struct vdec_lat_buf *lat_buf)
1663{
1664	struct vb2_buffer *vb;
1665	struct vb2_queue *vq;
1666	struct vdec_vp9_slice_reference *ref;
1667	int plane;
1668	int size;
1669	int w;
1670	int h;
1671	int i;
1672
1673	plane = instance->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes;
1674	w = vsi->frame.uh.frame_width;
1675	h = vsi->frame.uh.frame_height;
1676	size = ALIGN(w, 64) * ALIGN(h, 64);
1677
1678	/* frame buffer */
1679	vsi->fb.y.dma_addr = fb->base_y.dma_addr;
1680	if (plane == 1)
1681		vsi->fb.c.dma_addr = fb->base_y.dma_addr + size;
1682	else
1683		vsi->fb.c.dma_addr = fb->base_c.dma_addr;
1684
1685	/* reference buffers */
1686	vq = v4l2_m2m_get_vq(instance->ctx->m2m_ctx,
1687			     V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
1688	if (!vq)
1689		return -EINVAL;
1690
1691	/* get current output buffer */
1692	vb = &v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx)->vb2_buf;
1693	if (!vb)
1694		return -EINVAL;
1695
1696	/* update internal buffer's width/height */
1697	instance->dpb[vb->index].width = w;
1698	instance->dpb[vb->index].height = h;
1699
1700	/*
1701	 * get buffer's width/height from instance
1702	 * get buffer address from vb2buf
1703	 */
1704	for (i = 0; i < 3; i++) {
1705		ref = &vsi->frame.ref[i];
1706		vb = vb2_find_buffer(vq, pfc->ref_idx[i]);
1707		if (!vb) {
1708			ref->frame_width = w;
1709			ref->frame_height = h;
1710			memset(&vsi->ref[i], 0, sizeof(vsi->ref[i]));
1711		} else {
1712			int idx = vb->index;
1713
1714			ref->frame_width = instance->dpb[idx].width;
1715			ref->frame_height = instance->dpb[idx].height;
1716			vsi->ref[i].y.dma_addr =
1717				vb2_dma_contig_plane_dma_addr(vb, 0);
1718			if (plane == 1)
1719				vsi->ref[i].c.dma_addr =
1720					vsi->ref[i].y.dma_addr + size;
1721			else
1722				vsi->ref[i].c.dma_addr =
1723					vb2_dma_contig_plane_dma_addr(vb, 1);
1724		}
1725	}
1726
1727	return 0;
1728}
1729
1730static void vdec_vp9_slice_setup_single_buffer(struct vdec_vp9_slice_instance *instance,
1731					       struct vdec_vp9_slice_pfc *pfc,
1732					       struct vdec_vp9_slice_vsi *vsi,
1733					       struct mtk_vcodec_mem *bs,
1734					       struct vdec_fb *fb)
1735{
1736	int i;
1737
1738	vsi->bs.buf.dma_addr = bs->dma_addr;
1739	vsi->bs.buf.size = bs->size;
1740	vsi->bs.frame.dma_addr = bs->dma_addr;
1741	vsi->bs.frame.size = bs->size;
1742
1743	for (i = 0; i < 2; i++) {
1744		vsi->mv[i].dma_addr = instance->mv[i].dma_addr;
1745		vsi->mv[i].size = instance->mv[i].size;
1746	}
1747	for (i = 0; i < 2; i++) {
1748		vsi->seg[i].dma_addr = instance->seg[i].dma_addr;
1749		vsi->seg[i].size = instance->seg[i].size;
1750	}
1751	vsi->tile.dma_addr = instance->tile.dma_addr;
1752	vsi->tile.size = instance->tile.size;
1753	vsi->prob.dma_addr = instance->prob.dma_addr;
1754	vsi->prob.size = instance->prob.size;
1755	vsi->counts.dma_addr = instance->counts.dma_addr;
1756	vsi->counts.size = instance->counts.size;
1757
1758	vsi->row_info.buf = 0;
1759	vsi->row_info.size = 0;
1760
1761	vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, NULL);
1762}
1763
1764static int vdec_vp9_slice_setup_core(struct vdec_vp9_slice_instance *instance,
1765				     struct vdec_fb *fb,
1766				     struct vdec_lat_buf *lat_buf,
1767				     struct vdec_vp9_slice_pfc *pfc)
1768{
1769	struct vdec_vp9_slice_vsi *vsi = &pfc->vsi;
1770	int ret;
1771
1772	vdec_vp9_slice_setup_state(vsi);
1773
1774	ret = vdec_vp9_slice_setup_core_to_dst_buf(instance, lat_buf);
1775	if (ret)
1776		goto err;
1777
1778	ret = vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, lat_buf);
1779	if (ret)
1780		goto err;
1781
1782	vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[1]);
1783
1784	return 0;
1785
1786err:
1787	return ret;
1788}
1789
1790static int vdec_vp9_slice_setup_single(struct vdec_vp9_slice_instance *instance,
1791				       struct mtk_vcodec_mem *bs,
1792				       struct vdec_fb *fb,
1793				       struct vdec_vp9_slice_pfc *pfc)
1794{
1795	struct vdec_vp9_slice_vsi *vsi = &pfc->vsi;
1796	int ret;
1797
1798	ret = vdec_vp9_slice_setup_single_from_src_to_dst(instance);
1799	if (ret)
1800		goto err;
1801
1802	ret = vdec_vp9_slice_setup_pfc(instance, pfc);
1803	if (ret)
1804		goto err;
1805
1806	ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi);
1807	if (ret)
1808		goto err;
1809
1810	vdec_vp9_slice_setup_single_buffer(instance, pfc, vsi, bs, fb);
1811	vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[0]);
1812
1813	ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi);
1814	if (ret)
1815		goto err;
1816
1817	ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs);
1818	if (ret)
1819		goto err;
1820
1821	return 0;
1822
1823err:
1824	return ret;
1825}
1826
1827static int vdec_vp9_slice_update_core(struct vdec_vp9_slice_instance *instance,
1828				      struct vdec_lat_buf *lat_buf,
1829				      struct vdec_vp9_slice_pfc *pfc)
1830{
1831	struct vdec_vp9_slice_vsi *vsi;
1832
1833	vsi = &pfc->vsi;
1834	memcpy(&pfc->state[1], &vsi->state, sizeof(vsi->state));
1835
1836	mtk_vdec_debug(instance->ctx, "Frame %u Y_CRC %08x %08x %08x %08x\n",
1837		       pfc->seq, vsi->state.crc[0], vsi->state.crc[1],
1838		       vsi->state.crc[2], vsi->state.crc[3]);
1839	mtk_vdec_debug(instance->ctx, "Frame %u C_CRC %08x %08x %08x %08x\n",
1840		       pfc->seq, vsi->state.crc[4], vsi->state.crc[5],
1841		       vsi->state.crc[6], vsi->state.crc[7]);
1842
1843	return 0;
1844}
1845
1846static int vdec_vp9_slice_init(struct mtk_vcodec_dec_ctx *ctx)
1847{
1848	struct vdec_vp9_slice_instance *instance;
1849	struct vdec_vp9_slice_init_vsi *vsi;
1850	int ret;
1851
1852	instance = kzalloc(sizeof(*instance), GFP_KERNEL);
1853	if (!instance)
1854		return -ENOMEM;
1855
1856	instance->ctx = ctx;
1857	instance->vpu.id = SCP_IPI_VDEC_LAT;
1858	instance->vpu.core_id = SCP_IPI_VDEC_CORE;
1859	instance->vpu.ctx = ctx;
1860	instance->vpu.codec_type = ctx->current_codec;
1861
1862	ret = vpu_dec_init(&instance->vpu);
1863	if (ret) {
1864		mtk_vdec_err(ctx, "failed to init vpu dec, ret %d\n", ret);
1865		goto error_vpu_init;
1866	}
1867
1868	/* init vsi and global flags */
1869
1870	vsi = instance->vpu.vsi;
1871	if (!vsi) {
1872		mtk_vdec_err(ctx, "failed to get VP9 vsi\n");
1873		ret = -EINVAL;
1874		goto error_vsi;
1875	}
1876	instance->init_vsi = vsi;
1877	instance->core_vsi = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
1878						       (u32)vsi->core_vsi);
1879	if (!instance->core_vsi) {
1880		mtk_vdec_err(ctx, "failed to get VP9 core vsi\n");
1881		ret = -EINVAL;
1882		goto error_vsi;
1883	}
1884
1885	instance->irq = 1;
1886
1887	ret = vdec_vp9_slice_init_default_frame_ctx(instance);
1888	if (ret)
1889		goto error_default_frame_ctx;
1890
1891	ctx->drv_handle = instance;
1892
1893	return 0;
1894
1895error_default_frame_ctx:
1896error_vsi:
1897	vpu_dec_deinit(&instance->vpu);
1898error_vpu_init:
1899	kfree(instance);
1900	return ret;
1901}
1902
1903static void vdec_vp9_slice_deinit(void *h_vdec)
1904{
1905	struct vdec_vp9_slice_instance *instance = h_vdec;
1906
1907	if (!instance)
1908		return;
1909
1910	vpu_dec_deinit(&instance->vpu);
1911	vdec_vp9_slice_free_working_buffer(instance);
1912	vdec_msg_queue_deinit(&instance->ctx->msg_queue, instance->ctx);
1913	kfree(instance);
1914}
1915
1916static int vdec_vp9_slice_flush(void *h_vdec, struct mtk_vcodec_mem *bs,
1917				struct vdec_fb *fb, bool *res_chg)
1918{
1919	struct vdec_vp9_slice_instance *instance = h_vdec;
1920
1921	mtk_vdec_debug(instance->ctx, "flush ...\n");
1922	if (instance->ctx->dev->vdec_pdata->hw_arch != MTK_VDEC_PURE_SINGLE_CORE)
1923		vdec_msg_queue_wait_lat_buf_full(&instance->ctx->msg_queue);
1924	return vpu_dec_reset(&instance->vpu);
1925}
1926
1927static void vdec_vp9_slice_get_pic_info(struct vdec_vp9_slice_instance *instance)
1928{
1929	struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
1930	unsigned int data[3];
1931
1932	mtk_vdec_debug(instance->ctx, "w %u h %u\n", ctx->picinfo.pic_w, ctx->picinfo.pic_h);
1933
1934	data[0] = ctx->picinfo.pic_w;
1935	data[1] = ctx->picinfo.pic_h;
1936	data[2] = ctx->capture_fourcc;
1937	vpu_dec_get_param(&instance->vpu, data, 3, GET_PARAM_PIC_INFO);
1938
1939	ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, 64);
1940	ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, 64);
1941	ctx->picinfo.fb_sz[0] = instance->vpu.fb_sz[0];
1942	ctx->picinfo.fb_sz[1] = instance->vpu.fb_sz[1];
1943}
1944
1945static void vdec_vp9_slice_get_dpb_size(struct vdec_vp9_slice_instance *instance,
1946					unsigned int *dpb_sz)
1947{
1948	/* refer VP9 specification */
1949	*dpb_sz = 9;
1950}
1951
1952static int vdec_vp9_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out)
1953{
1954	struct vdec_vp9_slice_instance *instance = h_vdec;
1955
1956	switch (type) {
1957	case GET_PARAM_PIC_INFO:
1958		vdec_vp9_slice_get_pic_info(instance);
1959		break;
1960	case GET_PARAM_DPB_SIZE:
1961		vdec_vp9_slice_get_dpb_size(instance, out);
1962		break;
1963	case GET_PARAM_CROP_INFO:
1964		mtk_vdec_debug(instance->ctx, "No need to get vp9 crop information.");
1965		break;
1966	default:
1967		mtk_vdec_err(instance->ctx, "invalid get parameter type=%d\n", type);
1968		return -EINVAL;
1969	}
1970
1971	return 0;
1972}
1973
1974static int vdec_vp9_slice_single_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
1975					struct vdec_fb *fb, bool *res_chg)
1976{
1977	struct vdec_vp9_slice_instance *instance = h_vdec;
1978	struct vdec_vp9_slice_pfc *pfc = &instance->sc_pfc;
1979	struct vdec_vp9_slice_vsi *vsi;
1980	struct mtk_vcodec_dec_ctx *ctx;
1981	int ret;
1982
1983	if (!instance || !instance->ctx)
1984		return -EINVAL;
1985	ctx = instance->ctx;
1986
1987	/* bs NULL means flush decoder */
1988	if (!bs)
1989		return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg);
1990
1991	fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
1992	if (!fb)
1993		return -EBUSY;
1994
1995	vsi = &pfc->vsi;
1996
1997	ret = vdec_vp9_slice_setup_single(instance, bs, fb, pfc);
1998	if (ret) {
1999		mtk_vdec_err(ctx, "Failed to setup VP9 single ret %d\n", ret);
2000		return ret;
2001	}
2002	vdec_vp9_slice_vsi_to_remote(vsi, instance->vsi);
2003
2004	ret = vpu_dec_start(&instance->vpu, NULL, 0);
2005	if (ret) {
2006		mtk_vdec_err(ctx, "Failed to dec VP9 ret %d\n", ret);
2007		return ret;
2008	}
2009
2010	ret = mtk_vcodec_wait_for_done_ctx(ctx,	MTK_INST_IRQ_RECEIVED,
2011					   WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE);
2012	/* update remote vsi if decode timeout */
2013	if (ret) {
2014		mtk_vdec_err(ctx, "VP9 decode timeout %d\n", ret);
2015		WRITE_ONCE(instance->vsi->state.timeout, 1);
2016	}
2017
2018	vpu_dec_end(&instance->vpu);
2019
2020	vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
2021	ret = vdec_vp9_slice_update_single(instance, pfc);
2022	if (ret) {
2023		mtk_vdec_err(ctx, "VP9 decode error: %d\n", ret);
2024		return ret;
2025	}
2026
2027	instance->ctx->decoded_frame_cnt++;
2028	return 0;
2029}
2030
2031static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
2032				     struct vdec_fb *fb, bool *res_chg)
2033{
2034	struct vdec_vp9_slice_instance *instance = h_vdec;
2035	struct vdec_lat_buf *lat_buf;
2036	struct vdec_vp9_slice_pfc *pfc;
2037	struct vdec_vp9_slice_vsi *vsi;
2038	struct mtk_vcodec_dec_ctx *ctx;
2039	int ret;
2040
2041	if (!instance || !instance->ctx)
2042		return -EINVAL;
2043	ctx = instance->ctx;
2044
2045	/* init msgQ for the first time */
2046	if (vdec_msg_queue_init(&ctx->msg_queue, ctx,
2047				vdec_vp9_slice_core_decode,
2048				sizeof(*pfc)))
2049		return -ENOMEM;
2050
2051	/* bs NULL means flush decoder */
2052	if (!bs)
2053		return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg);
2054
2055	lat_buf = vdec_msg_queue_dqbuf(&instance->ctx->msg_queue.lat_ctx);
2056	if (!lat_buf) {
2057		mtk_vdec_debug(ctx, "Failed to get VP9 lat buf\n");
2058		return -EAGAIN;
2059	}
2060	pfc = (struct vdec_vp9_slice_pfc *)lat_buf->private_data;
2061	if (!pfc) {
2062		ret = -EINVAL;
2063		goto err_free_fb_out;
2064	}
2065	vsi = &pfc->vsi;
2066
2067	ret = vdec_vp9_slice_setup_lat(instance, bs, lat_buf, pfc);
2068	if (ret) {
2069		mtk_vdec_err(ctx, "Failed to setup VP9 lat ret %d\n", ret);
2070		goto err_free_fb_out;
2071	}
2072	vdec_vp9_slice_vsi_to_remote(vsi, instance->vsi);
2073
2074	ret = vpu_dec_start(&instance->vpu, NULL, 0);
2075	if (ret) {
2076		mtk_vdec_err(ctx, "Failed to dec VP9 ret %d\n", ret);
2077		goto err_free_fb_out;
2078	}
2079
2080	if (instance->irq) {
2081		ret = mtk_vcodec_wait_for_done_ctx(ctx,	MTK_INST_IRQ_RECEIVED,
2082						   WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0);
2083		/* update remote vsi if decode timeout */
2084		if (ret) {
2085			mtk_vdec_err(ctx, "VP9 decode timeout %d pic %d\n", ret, pfc->seq);
2086			WRITE_ONCE(instance->vsi->state.timeout, 1);
2087		}
2088		vpu_dec_end(&instance->vpu);
2089	}
2090
2091	vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
2092	ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc);
2093
2094	/* LAT trans full, no more UBE or decode timeout */
2095	if (ret) {
2096		mtk_vdec_err(ctx, "VP9 decode error: %d\n", ret);
2097		goto err_free_fb_out;
2098	}
2099
2100	mtk_vdec_debug(ctx, "lat dma addr: 0x%lx 0x%lx\n",
2101		       (unsigned long)pfc->vsi.trans.dma_addr,
2102		       (unsigned long)pfc->vsi.trans.dma_addr_end);
2103
2104	vdec_msg_queue_update_ube_wptr(&ctx->msg_queue,
2105				       vsi->trans.dma_addr_end +
2106				       ctx->msg_queue.wdma_addr.dma_addr);
2107	vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf);
2108
2109	return 0;
2110err_free_fb_out:
2111	vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf);
2112	return ret;
2113}
2114
2115static int vdec_vp9_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
2116				 struct vdec_fb *fb, bool *res_chg)
2117{
2118	struct vdec_vp9_slice_instance *instance = h_vdec;
2119	int ret;
2120
2121	if (instance->ctx->dev->vdec_pdata->hw_arch == MTK_VDEC_PURE_SINGLE_CORE)
2122		ret = vdec_vp9_slice_single_decode(h_vdec, bs, fb, res_chg);
2123	else
2124		ret = vdec_vp9_slice_lat_decode(h_vdec, bs, fb, res_chg);
2125
2126	return ret;
2127}
2128
2129static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf)
2130{
2131	struct vdec_vp9_slice_instance *instance;
2132	struct vdec_vp9_slice_pfc *pfc;
2133	struct mtk_vcodec_dec_ctx *ctx = NULL;
2134	struct vdec_fb *fb = NULL;
2135	int ret = -EINVAL;
2136
2137	if (!lat_buf)
2138		goto err;
2139
2140	pfc = lat_buf->private_data;
2141	ctx = lat_buf->ctx;
2142	if (!pfc || !ctx)
2143		goto err;
2144
2145	instance = ctx->drv_handle;
2146	if (!instance)
2147		goto err;
2148
2149	fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
2150	if (!fb) {
2151		ret = -EBUSY;
2152		goto err;
2153	}
2154
2155	ret = vdec_vp9_slice_setup_core(instance, fb, lat_buf, pfc);
2156	if (ret) {
2157		mtk_vdec_err(ctx, "vdec_vp9_slice_setup_core\n");
2158		goto err;
2159	}
2160	vdec_vp9_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi);
2161
2162	ret = vpu_dec_core(&instance->vpu);
2163	if (ret) {
2164		mtk_vdec_err(ctx, "vpu_dec_core\n");
2165		goto err;
2166	}
2167
2168	if (instance->irq) {
2169		ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
2170						   WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE);
2171		/* update remote vsi if decode timeout */
2172		if (ret) {
2173			mtk_vdec_err(ctx, "VP9 core timeout pic %d\n", pfc->seq);
2174			WRITE_ONCE(instance->core_vsi->state.timeout, 1);
2175		}
2176		vpu_dec_core_end(&instance->vpu);
2177	}
2178
2179	vdec_vp9_slice_vsi_from_remote(&pfc->vsi, instance->core_vsi, 1);
2180	ret = vdec_vp9_slice_update_core(instance, lat_buf, pfc);
2181	if (ret) {
2182		mtk_vdec_err(ctx, "vdec_vp9_slice_update_core\n");
2183		goto err;
2184	}
2185
2186	pfc->vsi.trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
2187	mtk_vdec_debug(ctx, "core dma_addr_end 0x%lx\n",
2188		       (unsigned long)pfc->vsi.trans.dma_addr_end);
2189	vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
2190	ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req);
2191
2192	return 0;
2193
2194err:
2195	if (ctx && pfc) {
2196		/* always update read pointer */
2197		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
2198
2199		if (fb)
2200			ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req);
2201	}
2202	return ret;
2203}
2204
2205const struct vdec_common_if vdec_vp9_slice_lat_if = {
2206	.init		= vdec_vp9_slice_init,
2207	.decode		= vdec_vp9_slice_decode,
2208	.get_param	= vdec_vp9_slice_get_param,
2209	.deinit		= vdec_vp9_slice_deinit,
2210};
2211