1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (c) 2023, Collabora
4 *
5 * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
6 */
7
8#include <media/v4l2-mem2mem.h>
9#include "hantro.h"
10#include "hantro_v4l2.h"
11#include "rockchip_vpu981_regs.h"
12
13#define AV1_DEC_MODE		17
14#define GM_GLOBAL_MODELS_PER_FRAME	7
15#define GLOBAL_MODEL_TOTAL_SIZE	(6 * 4 + 4 * 2)
16#define GLOBAL_MODEL_SIZE	ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
17#define AV1_MAX_TILES		128
18#define AV1_TILE_INFO_SIZE	(AV1_MAX_TILES * 16)
19#define AV1DEC_MAX_PIC_BUFFERS	24
20#define AV1_REF_SCALE_SHIFT	14
21#define AV1_INVALID_IDX		-1
22#define MAX_FRAME_DISTANCE	31
23#define AV1_PRIMARY_REF_NONE	7
24#define AV1_TILE_SIZE		ALIGN(32 * 128, 4096)
25/*
26 * These 3 values aren't defined enum v4l2_av1_segment_feature because
27 * they are not part of the specification
28 */
29#define V4L2_AV1_SEG_LVL_ALT_LF_Y_H	2
30#define V4L2_AV1_SEG_LVL_ALT_LF_U	3
31#define V4L2_AV1_SEG_LVL_ALT_LF_V	4
32
33#define SUPERRES_SCALE_BITS 3
34#define SCALE_NUMERATOR 8
35#define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
36
37#define RS_SUBPEL_BITS 6
38#define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
39#define RS_SCALE_SUBPEL_BITS 14
40#define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
41#define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
42#define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
43
44#define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
45
46#define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
47#define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
48#define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
49#define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
50#define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
51#define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
52#define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
53
54#define DIV_LUT_PREC_BITS 14
55#define DIV_LUT_BITS 8
56#define DIV_LUT_NUM BIT(DIV_LUT_BITS)
57#define WARP_PARAM_REDUCE_BITS 6
58#define WARPEDMODEL_PREC_BITS 16
59
60#define AV1_DIV_ROUND_UP_POW2(value, n)			\
61({							\
62	typeof(n) _n  = n;				\
63	typeof(value) _value = value;			\
64	(_value + (BIT(_n) >> 1)) >> _n;		\
65})
66
67#define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n)				\
68({									\
69	typeof(n) _n_  = n;						\
70	typeof(value) _value_ = value;					\
71	(((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_))	\
72		: AV1_DIV_ROUND_UP_POW2((_value_), (_n_)));		\
73})
74
75struct rockchip_av1_film_grain {
76	u8 scaling_lut_y[256];
77	u8 scaling_lut_cb[256];
78	u8 scaling_lut_cr[256];
79	s16 cropped_luma_grain_block[4096];
80	s16 cropped_chroma_grain_block[1024 * 2];
81};
82
83static const short div_lut[DIV_LUT_NUM + 1] = {
84	16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
85	15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
86	15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
87	14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
88	13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
89	13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
90	13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
91	12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
92	12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
93	11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
94	11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
95	11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
96	10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
97	10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
98	10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
99	9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
100	9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
101	9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
102	9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
103	9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
104	8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
105	8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
106	8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
107	8240,  8224,  8208,  8192,
108};
109
110static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
111{
112	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
113	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
114	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
115	u64 timestamp;
116	int i, idx = frame->ref_frame_idx[ref];
117
118	if (idx >= V4L2_AV1_TOTAL_REFS_PER_FRAME || idx < 0)
119		return AV1_INVALID_IDX;
120
121	timestamp = frame->reference_frame_ts[idx];
122	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
123		if (!av1_dec->frame_refs[i].used)
124			continue;
125		if (av1_dec->frame_refs[i].timestamp == timestamp)
126			return i;
127	}
128
129	return AV1_INVALID_IDX;
130}
131
132static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
133{
134	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
135	int idx = rockchip_vpu981_get_frame_index(ctx, ref);
136
137	if (idx != AV1_INVALID_IDX)
138		return av1_dec->frame_refs[idx].order_hint;
139
140	return 0;
141}
142
143static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
144					     u64 timestamp)
145{
146	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
147	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
148	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
149	int i;
150
151	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
152		int j;
153
154		if (av1_dec->frame_refs[i].used)
155			continue;
156
157		av1_dec->frame_refs[i].width = frame->frame_width_minus_1 + 1;
158		av1_dec->frame_refs[i].height = frame->frame_height_minus_1 + 1;
159		av1_dec->frame_refs[i].mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
160		av1_dec->frame_refs[i].mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
161		av1_dec->frame_refs[i].timestamp = timestamp;
162		av1_dec->frame_refs[i].frame_type = frame->frame_type;
163		av1_dec->frame_refs[i].order_hint = frame->order_hint;
164		if (!av1_dec->frame_refs[i].vb2_ref)
165			av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
166
167		for (j = 0; j < V4L2_AV1_TOTAL_REFS_PER_FRAME; j++)
168			av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
169		av1_dec->frame_refs[i].used = true;
170		av1_dec->current_frame_index = i;
171
172		return i;
173	}
174
175	return AV1_INVALID_IDX;
176}
177
178static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
179{
180	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
181
182	if (idx >= 0)
183		av1_dec->frame_refs[idx].used = false;
184}
185
186static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
187{
188	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
189	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
190
191	int ref, idx;
192
193	for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
194		u64 timestamp = av1_dec->frame_refs[idx].timestamp;
195		bool used = false;
196
197		if (!av1_dec->frame_refs[idx].used)
198			continue;
199
200		for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
201			if (ctrls->frame->reference_frame_ts[ref] == timestamp)
202				used = true;
203		}
204
205		if (!used)
206			rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
207	}
208}
209
210static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
211{
212	return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
213}
214
215static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
216{
217	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
218
219	return ALIGN((cr_offset * 3) / 2, 64);
220}
221
222static void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
223{
224	struct hantro_dev *vpu = ctx->dev;
225	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
226
227	if (av1_dec->db_data_col.cpu)
228		dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
229				  av1_dec->db_data_col.cpu,
230				  av1_dec->db_data_col.dma);
231	av1_dec->db_data_col.cpu = NULL;
232
233	if (av1_dec->db_ctrl_col.cpu)
234		dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
235				  av1_dec->db_ctrl_col.cpu,
236				  av1_dec->db_ctrl_col.dma);
237	av1_dec->db_ctrl_col.cpu = NULL;
238
239	if (av1_dec->cdef_col.cpu)
240		dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
241				  av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
242	av1_dec->cdef_col.cpu = NULL;
243
244	if (av1_dec->sr_col.cpu)
245		dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
246				  av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
247	av1_dec->sr_col.cpu = NULL;
248
249	if (av1_dec->lr_col.cpu)
250		dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
251				  av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
252	av1_dec->lr_col.cpu = NULL;
253}
254
255static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
256{
257	struct hantro_dev *vpu = ctx->dev;
258	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
259	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
260	unsigned int num_tile_cols = 1 << ctrls->tile_group_entry->tile_col;
261	unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
262	unsigned int height_in_sb = height / 64;
263	unsigned int stripe_num = ((height + 8) + 63) / 64;
264	size_t size;
265
266	if (av1_dec->db_data_col.size >=
267	    ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols)
268		return 0;
269
270	rockchip_vpu981_av1_dec_tiles_free(ctx);
271
272	size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
273	av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
274						      &av1_dec->db_data_col.dma,
275						      GFP_KERNEL);
276	if (!av1_dec->db_data_col.cpu)
277		goto buffer_allocation_error;
278	av1_dec->db_data_col.size = size;
279
280	size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
281	av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
282						      &av1_dec->db_ctrl_col.dma,
283						      GFP_KERNEL);
284	if (!av1_dec->db_ctrl_col.cpu)
285		goto buffer_allocation_error;
286	av1_dec->db_ctrl_col.size = size;
287
288	size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
289	av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
290						   &av1_dec->cdef_col.dma,
291						   GFP_KERNEL);
292	if (!av1_dec->cdef_col.cpu)
293		goto buffer_allocation_error;
294	av1_dec->cdef_col.size = size;
295
296	size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
297	av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
298						 &av1_dec->sr_col.dma,
299						 GFP_KERNEL);
300	if (!av1_dec->sr_col.cpu)
301		goto buffer_allocation_error;
302	av1_dec->sr_col.size = size;
303
304	size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
305	av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
306						 &av1_dec->lr_col.dma,
307						 GFP_KERNEL);
308	if (!av1_dec->lr_col.cpu)
309		goto buffer_allocation_error;
310	av1_dec->lr_col.size = size;
311
312	av1_dec->num_tile_cols_allocated = num_tile_cols;
313	return 0;
314
315buffer_allocation_error:
316	rockchip_vpu981_av1_dec_tiles_free(ctx);
317	return -ENOMEM;
318}
319
320void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
321{
322	struct hantro_dev *vpu = ctx->dev;
323	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
324
325	if (av1_dec->global_model.cpu)
326		dma_free_coherent(vpu->dev, av1_dec->global_model.size,
327				  av1_dec->global_model.cpu,
328				  av1_dec->global_model.dma);
329	av1_dec->global_model.cpu = NULL;
330
331	if (av1_dec->tile_info.cpu)
332		dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
333				  av1_dec->tile_info.cpu,
334				  av1_dec->tile_info.dma);
335	av1_dec->tile_info.cpu = NULL;
336
337	if (av1_dec->film_grain.cpu)
338		dma_free_coherent(vpu->dev, av1_dec->film_grain.size,
339				  av1_dec->film_grain.cpu,
340				  av1_dec->film_grain.dma);
341	av1_dec->film_grain.cpu = NULL;
342
343	if (av1_dec->prob_tbl.cpu)
344		dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
345				  av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
346	av1_dec->prob_tbl.cpu = NULL;
347
348	if (av1_dec->prob_tbl_out.cpu)
349		dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
350				  av1_dec->prob_tbl_out.cpu,
351				  av1_dec->prob_tbl_out.dma);
352	av1_dec->prob_tbl_out.cpu = NULL;
353
354	if (av1_dec->tile_buf.cpu)
355		dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
356				  av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
357	av1_dec->tile_buf.cpu = NULL;
358
359	rockchip_vpu981_av1_dec_tiles_free(ctx);
360}
361
362int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
363{
364	struct hantro_dev *vpu = ctx->dev;
365	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
366
367	memset(av1_dec, 0, sizeof(*av1_dec));
368
369	av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
370						       &av1_dec->global_model.dma,
371						       GFP_KERNEL);
372	if (!av1_dec->global_model.cpu)
373		return -ENOMEM;
374	av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
375
376	av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_MAX_TILES,
377						    &av1_dec->tile_info.dma,
378						    GFP_KERNEL);
379	if (!av1_dec->tile_info.cpu)
380		return -ENOMEM;
381	av1_dec->tile_info.size = AV1_MAX_TILES;
382
383	av1_dec->film_grain.cpu = dma_alloc_coherent(vpu->dev,
384						     ALIGN(sizeof(struct rockchip_av1_film_grain), 2048),
385						     &av1_dec->film_grain.dma,
386						     GFP_KERNEL);
387	if (!av1_dec->film_grain.cpu)
388		return -ENOMEM;
389	av1_dec->film_grain.size = ALIGN(sizeof(struct rockchip_av1_film_grain), 2048);
390
391	av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
392						   ALIGN(sizeof(struct av1cdfs), 2048),
393						   &av1_dec->prob_tbl.dma,
394						   GFP_KERNEL);
395	if (!av1_dec->prob_tbl.cpu)
396		return -ENOMEM;
397	av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
398
399	av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
400						       ALIGN(sizeof(struct av1cdfs), 2048),
401						       &av1_dec->prob_tbl_out.dma,
402						       GFP_KERNEL);
403	if (!av1_dec->prob_tbl_out.cpu)
404		return -ENOMEM;
405	av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
406	av1_dec->cdfs = &av1_dec->default_cdfs;
407	av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
408
409	rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
410
411	av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
412						   AV1_TILE_SIZE,
413						   &av1_dec->tile_buf.dma,
414						   GFP_KERNEL);
415	if (!av1_dec->tile_buf.cpu)
416		return -ENOMEM;
417	av1_dec->tile_buf.size = AV1_TILE_SIZE;
418
419	return 0;
420}
421
422static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
423{
424	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
425	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
426
427	ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
428	if (WARN_ON(!ctrls->sequence))
429		return -EINVAL;
430
431	ctrls->tile_group_entry =
432	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
433	if (WARN_ON(!ctrls->tile_group_entry))
434		return -EINVAL;
435
436	ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
437	if (WARN_ON(!ctrls->frame))
438		return -EINVAL;
439
440	ctrls->film_grain =
441	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
442
443	return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
444}
445
446static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
447{
448	if (n == 0)
449		return 0;
450	return 31 ^ __builtin_clz(n);
451}
452
453static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
454{
455	int f;
456	u64 e;
457
458	*shift = rockchip_vpu981_av1_dec_get_msb(d);
459	/* e is obtained from D after resetting the most significant 1 bit. */
460	e = d - ((u32)1 << *shift);
461	/* Get the most significant DIV_LUT_BITS (8) bits of e into f */
462	if (*shift > DIV_LUT_BITS)
463		f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
464	else
465		f = e << (DIV_LUT_BITS - *shift);
466	if (f > DIV_LUT_NUM)
467		return -1;
468	*shift += DIV_LUT_PREC_BITS;
469	/* Use f as lookup into the precomputed table of multipliers */
470	return div_lut[f];
471}
472
473static void
474rockchip_vpu981_av1_dec_get_shear_params(const u32 *params, s64 *alpha,
475					 s64 *beta, s64 *gamma, s64 *delta)
476{
477	const int *mat = params;
478	short shift;
479	short y;
480	long long gv, dv;
481
482	if (mat[2] <= 0)
483		return;
484
485	*alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
486	*beta = clamp_val(mat[3], S16_MIN, S16_MAX);
487
488	y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
489
490	gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
491
492	*gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
493
494	dv = ((long long)mat[3] * mat[4]) * y;
495	*delta = clamp_val(mat[5] -
496		(int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
497		S16_MIN, S16_MAX);
498
499	*alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
500		 * (1 << WARP_PARAM_REDUCE_BITS);
501	*beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
502		* (1 << WARP_PARAM_REDUCE_BITS);
503	*gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
504		 * (1 << WARP_PARAM_REDUCE_BITS);
505	*delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
506		* (1 << WARP_PARAM_REDUCE_BITS);
507}
508
509static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
510{
511	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
512	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
513	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
514	const struct v4l2_av1_global_motion *gm = &frame->global_motion;
515	u8 *dst = av1_dec->global_model.cpu;
516	struct hantro_dev *vpu = ctx->dev;
517	int ref_frame, i;
518
519	memset(dst, 0, GLOBAL_MODEL_SIZE);
520	for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
521		s64 alpha = 0, beta = 0, gamma = 0, delta = 0;
522
523		for (i = 0; i < 6; ++i) {
524			if (i == 2)
525				*(s32 *)dst =
526					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
527			else if (i == 3)
528				*(s32 *)dst =
529					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
530			else
531				*(s32 *)dst =
532					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
533			dst += 4;
534		}
535
536		if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
537			rockchip_vpu981_av1_dec_get_shear_params(&gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
538								 &alpha, &beta, &gamma, &delta);
539
540		*(s16 *)dst = alpha;
541		dst += 2;
542		*(s16 *)dst = beta;
543		dst += 2;
544		*(s16 *)dst = gamma;
545		dst += 2;
546		*(s16 *)dst = delta;
547		dst += 2;
548	}
549
550	hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
551}
552
553static int rockchip_vpu981_av1_tile_log2(int target)
554{
555	int k;
556
557	/*
558	 * returns the smallest value for k such that 1 << k is greater
559	 * than or equal to target
560	 */
561	for (k = 0; (1 << k) < target; k++);
562
563	return k;
564}
565
566static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
567{
568	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
569	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
570	const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
571	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
572	    ctrls->tile_group_entry;
573	int context_update_y =
574	    tile_info->context_update_tile_id / tile_info->tile_cols;
575	int context_update_x =
576	    tile_info->context_update_tile_id % tile_info->tile_cols;
577	int context_update_tile_id =
578	    context_update_x * tile_info->tile_rows + context_update_y;
579	u8 *dst = av1_dec->tile_info.cpu;
580	struct hantro_dev *vpu = ctx->dev;
581	int tile0, tile1;
582
583	memset(dst, 0, av1_dec->tile_info.size);
584
585	for (tile0 = 0; tile0 < tile_info->tile_cols; tile0++) {
586		for (tile1 = 0; tile1 < tile_info->tile_rows; tile1++) {
587			int tile_id = tile1 * tile_info->tile_cols + tile0;
588			u32 start, end;
589			u32 y0 =
590			    tile_info->height_in_sbs_minus_1[tile1] + 1;
591			u32 x0 = tile_info->width_in_sbs_minus_1[tile0] + 1;
592
593			/* tile size in SB units (width,height) */
594			*dst++ = x0;
595			*dst++ = 0;
596			*dst++ = 0;
597			*dst++ = 0;
598			*dst++ = y0;
599			*dst++ = 0;
600			*dst++ = 0;
601			*dst++ = 0;
602
603			/* tile start position */
604			start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
605			*dst++ = start & 255;
606			*dst++ = (start >> 8) & 255;
607			*dst++ = (start >> 16) & 255;
608			*dst++ = (start >> 24) & 255;
609
610			/* number of bytes in tile data */
611			end = start + group_entry[tile_id].tile_size;
612			*dst++ = end & 255;
613			*dst++ = (end >> 8) & 255;
614			*dst++ = (end >> 16) & 255;
615			*dst++ = (end >> 24) & 255;
616		}
617	}
618
619	hantro_reg_write(vpu, &av1_multicore_expect_context_update, !!(context_update_x == 0));
620	hantro_reg_write(vpu, &av1_tile_enable,
621			 !!((tile_info->tile_cols > 1) || (tile_info->tile_rows > 1)));
622	hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info->tile_cols);
623	hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info->tile_rows);
624	hantro_reg_write(vpu, &av1_context_update_tile_id, context_update_tile_id);
625	hantro_reg_write(vpu, &av1_tile_transpose, 1);
626	if (rockchip_vpu981_av1_tile_log2(tile_info->tile_cols) ||
627	    rockchip_vpu981_av1_tile_log2(tile_info->tile_rows))
628		hantro_reg_write(vpu, &av1_dec_tile_size_mag, tile_info->tile_size_bytes - 1);
629	else
630		hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
631
632	hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
633}
634
635static int rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx *ctx,
636					    int a, int b)
637{
638	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
639	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
640	int bits = ctrls->sequence->order_hint_bits - 1;
641	int diff, m;
642
643	if (!ctrls->sequence->order_hint_bits)
644		return 0;
645
646	diff = a - b;
647	m = 1 << bits;
648	diff = (diff & (m - 1)) - (diff & m);
649
650	return diff;
651}
652
653static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
654{
655	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
656	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
657	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
658	const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
659	int i;
660
661	if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
662		for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
663			av1_dec->ref_frame_sign_bias[i] = 0;
664
665		return;
666	}
667	// Identify the nearest forward and backward references.
668	for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; i++) {
669		if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
670			int rel_off =
671			    rockchip_vpu981_av1_dec_get_dist(ctx,
672							     rockchip_vpu981_get_order_hint(ctx, i),
673							     frame->order_hint);
674			av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
675		}
676	}
677}
678
679static bool
680rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
681				int width, int height)
682{
683	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
684	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
685	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
686	struct hantro_dev *vpu = ctx->dev;
687	struct hantro_decoded_buffer *dst;
688	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
689	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
690	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
691	int cur_width = frame->frame_width_minus_1 + 1;
692	int cur_height = frame->frame_height_minus_1 + 1;
693	int scale_width =
694	    ((width << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
695	int scale_height =
696	    ((height << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
697
698	switch (ref) {
699	case 0:
700		hantro_reg_write(vpu, &av1_ref0_height, height);
701		hantro_reg_write(vpu, &av1_ref0_width, width);
702		hantro_reg_write(vpu, &av1_ref0_ver_scale, scale_width);
703		hantro_reg_write(vpu, &av1_ref0_hor_scale, scale_height);
704		break;
705	case 1:
706		hantro_reg_write(vpu, &av1_ref1_height, height);
707		hantro_reg_write(vpu, &av1_ref1_width, width);
708		hantro_reg_write(vpu, &av1_ref1_ver_scale, scale_width);
709		hantro_reg_write(vpu, &av1_ref1_hor_scale, scale_height);
710		break;
711	case 2:
712		hantro_reg_write(vpu, &av1_ref2_height, height);
713		hantro_reg_write(vpu, &av1_ref2_width, width);
714		hantro_reg_write(vpu, &av1_ref2_ver_scale, scale_width);
715		hantro_reg_write(vpu, &av1_ref2_hor_scale, scale_height);
716		break;
717	case 3:
718		hantro_reg_write(vpu, &av1_ref3_height, height);
719		hantro_reg_write(vpu, &av1_ref3_width, width);
720		hantro_reg_write(vpu, &av1_ref3_ver_scale, scale_width);
721		hantro_reg_write(vpu, &av1_ref3_hor_scale, scale_height);
722		break;
723	case 4:
724		hantro_reg_write(vpu, &av1_ref4_height, height);
725		hantro_reg_write(vpu, &av1_ref4_width, width);
726		hantro_reg_write(vpu, &av1_ref4_ver_scale, scale_width);
727		hantro_reg_write(vpu, &av1_ref4_hor_scale, scale_height);
728		break;
729	case 5:
730		hantro_reg_write(vpu, &av1_ref5_height, height);
731		hantro_reg_write(vpu, &av1_ref5_width, width);
732		hantro_reg_write(vpu, &av1_ref5_ver_scale, scale_width);
733		hantro_reg_write(vpu, &av1_ref5_hor_scale, scale_height);
734		break;
735	case 6:
736		hantro_reg_write(vpu, &av1_ref6_height, height);
737		hantro_reg_write(vpu, &av1_ref6_width, width);
738		hantro_reg_write(vpu, &av1_ref6_ver_scale, scale_width);
739		hantro_reg_write(vpu, &av1_ref6_hor_scale, scale_height);
740		break;
741	default:
742		pr_warn("AV1 invalid reference frame index\n");
743	}
744
745	dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
746	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
747	chroma_addr = luma_addr + cr_offset;
748	mv_addr = luma_addr + mv_offset;
749
750	hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
751	hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
752	hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
753
754	return (scale_width != (1 << AV1_REF_SCALE_SHIFT)) ||
755		(scale_height != (1 << AV1_REF_SCALE_SHIFT));
756}
757
758static void rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx *ctx,
759						  int ref, int val)
760{
761	struct hantro_dev *vpu = ctx->dev;
762
763	switch (ref) {
764	case 0:
765		hantro_reg_write(vpu, &av1_ref0_sign_bias, val);
766		break;
767	case 1:
768		hantro_reg_write(vpu, &av1_ref1_sign_bias, val);
769		break;
770	case 2:
771		hantro_reg_write(vpu, &av1_ref2_sign_bias, val);
772		break;
773	case 3:
774		hantro_reg_write(vpu, &av1_ref3_sign_bias, val);
775		break;
776	case 4:
777		hantro_reg_write(vpu, &av1_ref4_sign_bias, val);
778		break;
779	case 5:
780		hantro_reg_write(vpu, &av1_ref5_sign_bias, val);
781		break;
782	case 6:
783		hantro_reg_write(vpu, &av1_ref6_sign_bias, val);
784		break;
785	default:
786		pr_warn("AV1 invalid sign bias index\n");
787		break;
788	}
789}
790
791static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
792{
793	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
794	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
795	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
796	const struct v4l2_av1_segmentation *seg = &frame->segmentation;
797	u32 segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
798	struct hantro_dev *vpu = ctx->dev;
799	u8 segsign = 0, preskip_segid = 0, last_active_seg = 0, i, j;
800
801	if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) &&
802	    frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME) {
803		int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
804
805		if (idx >= 0) {
806			dma_addr_t luma_addr, mv_addr = 0;
807			struct hantro_decoded_buffer *seg;
808			size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
809
810			seg = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
811			luma_addr = hantro_get_dec_buf_addr(ctx, &seg->base.vb.vb2_buf);
812			mv_addr = luma_addr + mv_offset;
813
814			hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
815			hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
816		}
817	}
818
819	hantro_reg_write(vpu, &av1_segment_temp_upd_e,
820			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
821	hantro_reg_write(vpu, &av1_segment_upd_e,
822			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
823	hantro_reg_write(vpu, &av1_segment_e,
824			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
825
826	hantro_reg_write(vpu, &av1_error_resilient,
827			 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
828
829	if (IS_INTRA(frame->frame_type) ||
830	    !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
831		hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
832	}
833
834	if (seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) {
835		int s;
836
837		for (s = 0; s < V4L2_AV1_MAX_SEGMENTS; s++) {
838			if (seg->feature_enabled[s] &
839			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
840				segval[s][V4L2_AV1_SEG_LVL_ALT_Q] =
841				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]),
842					  0, 255);
843				segsign |=
844					(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q] < 0) << s;
845			}
846
847			if (seg->feature_enabled[s] &
848			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_V))
849				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V] =
850					clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]),
851					      -63, 63);
852
853			if (seg->feature_enabled[s] &
854			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_H))
855				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H] =
856				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]),
857					  -63, 63);
858
859			if (seg->feature_enabled[s] &
860			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_U))
861				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_U] =
862				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_U]),
863					  -63, 63);
864
865			if (seg->feature_enabled[s] &
866			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_V))
867				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_V] =
868				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_V]),
869					  -63, 63);
870
871			if (frame->frame_type && seg->feature_enabled[s] &
872			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_FRAME))
873				segval[s][V4L2_AV1_SEG_LVL_REF_FRAME]++;
874
875			if (seg->feature_enabled[s] &
876			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_SKIP))
877				segval[s][V4L2_AV1_SEG_LVL_REF_SKIP] = 1;
878
879			if (seg->feature_enabled[s] &
880			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_GLOBALMV))
881				segval[s][V4L2_AV1_SEG_LVL_REF_GLOBALMV] = 1;
882		}
883	}
884
885	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
886		for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) {
887			if (seg->feature_enabled[i]
888			    & V4L2_AV1_SEGMENT_FEATURE_ENABLED(j)) {
889				preskip_segid |= (j >= V4L2_AV1_SEG_LVL_REF_FRAME);
890				last_active_seg = max(i, last_active_seg);
891			}
892		}
893	}
894
895	hantro_reg_write(vpu, &av1_last_active_seg, last_active_seg);
896	hantro_reg_write(vpu, &av1_preskip_segid, preskip_segid);
897
898	hantro_reg_write(vpu, &av1_seg_quant_sign, segsign);
899
900	/* Write QP, filter level, ref frame and skip for every segment */
901	hantro_reg_write(vpu, &av1_quant_seg0,
902			 segval[0][V4L2_AV1_SEG_LVL_ALT_Q]);
903	hantro_reg_write(vpu, &av1_filt_level_delta0_seg0,
904			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
905	hantro_reg_write(vpu, &av1_filt_level_delta1_seg0,
906			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
907	hantro_reg_write(vpu, &av1_filt_level_delta2_seg0,
908			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_U]);
909	hantro_reg_write(vpu, &av1_filt_level_delta3_seg0,
910			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_V]);
911	hantro_reg_write(vpu, &av1_refpic_seg0,
912			 segval[0][V4L2_AV1_SEG_LVL_REF_FRAME]);
913	hantro_reg_write(vpu, &av1_skip_seg0,
914			 segval[0][V4L2_AV1_SEG_LVL_REF_SKIP]);
915	hantro_reg_write(vpu, &av1_global_mv_seg0,
916			 segval[0][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
917
918	hantro_reg_write(vpu, &av1_quant_seg1,
919			 segval[1][V4L2_AV1_SEG_LVL_ALT_Q]);
920	hantro_reg_write(vpu, &av1_filt_level_delta0_seg1,
921			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
922	hantro_reg_write(vpu, &av1_filt_level_delta1_seg1,
923			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
924	hantro_reg_write(vpu, &av1_filt_level_delta2_seg1,
925			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_U]);
926	hantro_reg_write(vpu, &av1_filt_level_delta3_seg1,
927			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_V]);
928	hantro_reg_write(vpu, &av1_refpic_seg1,
929			 segval[1][V4L2_AV1_SEG_LVL_REF_FRAME]);
930	hantro_reg_write(vpu, &av1_skip_seg1,
931			 segval[1][V4L2_AV1_SEG_LVL_REF_SKIP]);
932	hantro_reg_write(vpu, &av1_global_mv_seg1,
933			 segval[1][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
934
935	hantro_reg_write(vpu, &av1_quant_seg2,
936			 segval[2][V4L2_AV1_SEG_LVL_ALT_Q]);
937	hantro_reg_write(vpu, &av1_filt_level_delta0_seg2,
938			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
939	hantro_reg_write(vpu, &av1_filt_level_delta1_seg2,
940			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
941	hantro_reg_write(vpu, &av1_filt_level_delta2_seg2,
942			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_U]);
943	hantro_reg_write(vpu, &av1_filt_level_delta3_seg2,
944			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_V]);
945	hantro_reg_write(vpu, &av1_refpic_seg2,
946			 segval[2][V4L2_AV1_SEG_LVL_REF_FRAME]);
947	hantro_reg_write(vpu, &av1_skip_seg2,
948			 segval[2][V4L2_AV1_SEG_LVL_REF_SKIP]);
949	hantro_reg_write(vpu, &av1_global_mv_seg2,
950			 segval[2][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
951
952	hantro_reg_write(vpu, &av1_quant_seg3,
953			 segval[3][V4L2_AV1_SEG_LVL_ALT_Q]);
954	hantro_reg_write(vpu, &av1_filt_level_delta0_seg3,
955			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
956	hantro_reg_write(vpu, &av1_filt_level_delta1_seg3,
957			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
958	hantro_reg_write(vpu, &av1_filt_level_delta2_seg3,
959			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_U]);
960	hantro_reg_write(vpu, &av1_filt_level_delta3_seg3,
961			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_V]);
962	hantro_reg_write(vpu, &av1_refpic_seg3,
963			 segval[3][V4L2_AV1_SEG_LVL_REF_FRAME]);
964	hantro_reg_write(vpu, &av1_skip_seg3,
965			 segval[3][V4L2_AV1_SEG_LVL_REF_SKIP]);
966	hantro_reg_write(vpu, &av1_global_mv_seg3,
967			 segval[3][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
968
969	hantro_reg_write(vpu, &av1_quant_seg4,
970			 segval[4][V4L2_AV1_SEG_LVL_ALT_Q]);
971	hantro_reg_write(vpu, &av1_filt_level_delta0_seg4,
972			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
973	hantro_reg_write(vpu, &av1_filt_level_delta1_seg4,
974			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
975	hantro_reg_write(vpu, &av1_filt_level_delta2_seg4,
976			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_U]);
977	hantro_reg_write(vpu, &av1_filt_level_delta3_seg4,
978			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_V]);
979	hantro_reg_write(vpu, &av1_refpic_seg4,
980			 segval[4][V4L2_AV1_SEG_LVL_REF_FRAME]);
981	hantro_reg_write(vpu, &av1_skip_seg4,
982			 segval[4][V4L2_AV1_SEG_LVL_REF_SKIP]);
983	hantro_reg_write(vpu, &av1_global_mv_seg4,
984			 segval[4][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
985
986	hantro_reg_write(vpu, &av1_quant_seg5,
987			 segval[5][V4L2_AV1_SEG_LVL_ALT_Q]);
988	hantro_reg_write(vpu, &av1_filt_level_delta0_seg5,
989			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
990	hantro_reg_write(vpu, &av1_filt_level_delta1_seg5,
991			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
992	hantro_reg_write(vpu, &av1_filt_level_delta2_seg5,
993			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_U]);
994	hantro_reg_write(vpu, &av1_filt_level_delta3_seg5,
995			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_V]);
996	hantro_reg_write(vpu, &av1_refpic_seg5,
997			 segval[5][V4L2_AV1_SEG_LVL_REF_FRAME]);
998	hantro_reg_write(vpu, &av1_skip_seg5,
999			 segval[5][V4L2_AV1_SEG_LVL_REF_SKIP]);
1000	hantro_reg_write(vpu, &av1_global_mv_seg5,
1001			 segval[5][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1002
1003	hantro_reg_write(vpu, &av1_quant_seg6,
1004			 segval[6][V4L2_AV1_SEG_LVL_ALT_Q]);
1005	hantro_reg_write(vpu, &av1_filt_level_delta0_seg6,
1006			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1007	hantro_reg_write(vpu, &av1_filt_level_delta1_seg6,
1008			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1009	hantro_reg_write(vpu, &av1_filt_level_delta2_seg6,
1010			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1011	hantro_reg_write(vpu, &av1_filt_level_delta3_seg6,
1012			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1013	hantro_reg_write(vpu, &av1_refpic_seg6,
1014			 segval[6][V4L2_AV1_SEG_LVL_REF_FRAME]);
1015	hantro_reg_write(vpu, &av1_skip_seg6,
1016			 segval[6][V4L2_AV1_SEG_LVL_REF_SKIP]);
1017	hantro_reg_write(vpu, &av1_global_mv_seg6,
1018			 segval[6][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1019
1020	hantro_reg_write(vpu, &av1_quant_seg7,
1021			 segval[7][V4L2_AV1_SEG_LVL_ALT_Q]);
1022	hantro_reg_write(vpu, &av1_filt_level_delta0_seg7,
1023			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1024	hantro_reg_write(vpu, &av1_filt_level_delta1_seg7,
1025			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1026	hantro_reg_write(vpu, &av1_filt_level_delta2_seg7,
1027			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1028	hantro_reg_write(vpu, &av1_filt_level_delta3_seg7,
1029			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1030	hantro_reg_write(vpu, &av1_refpic_seg7,
1031			 segval[7][V4L2_AV1_SEG_LVL_REF_FRAME]);
1032	hantro_reg_write(vpu, &av1_skip_seg7,
1033			 segval[7][V4L2_AV1_SEG_LVL_REF_SKIP]);
1034	hantro_reg_write(vpu, &av1_global_mv_seg7,
1035			 segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1036}
1037
1038static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
1039{
1040	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1041	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1042	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1043	const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
1044	const struct v4l2_av1_quantization *quantization = &frame->quantization;
1045	int i;
1046
1047	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
1048		int qindex = quantization->base_q_idx;
1049
1050		if (segmentation->feature_enabled[i] &
1051		    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
1052			qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
1053		}
1054		qindex = clamp(qindex, 0, 255);
1055
1056		if (qindex ||
1057		    quantization->delta_q_y_dc ||
1058		    quantization->delta_q_u_dc ||
1059		    quantization->delta_q_u_ac ||
1060		    quantization->delta_q_v_dc ||
1061		    quantization->delta_q_v_ac)
1062			return false;
1063	}
1064	return true;
1065}
1066
1067static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
1068{
1069	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1070	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1071	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1072	const struct v4l2_av1_loop_filter *loop_filter = &frame->loop_filter;
1073	bool filtering_dis = (loop_filter->level[0] == 0) && (loop_filter->level[1] == 0);
1074	struct hantro_dev *vpu = ctx->dev;
1075
1076	hantro_reg_write(vpu, &av1_filtering_dis, filtering_dis);
1077	hantro_reg_write(vpu, &av1_filt_level_base_gt32, loop_filter->level[0] > 32);
1078	hantro_reg_write(vpu, &av1_filt_sharpness, loop_filter->sharpness);
1079
1080	hantro_reg_write(vpu, &av1_filt_level0, loop_filter->level[0]);
1081	hantro_reg_write(vpu, &av1_filt_level1, loop_filter->level[1]);
1082	hantro_reg_write(vpu, &av1_filt_level2, loop_filter->level[2]);
1083	hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
1084
1085	if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED &&
1086	    !rockchip_vpu981_av1_dec_is_lossless(ctx) &&
1087	    !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
1088		hantro_reg_write(vpu, &av1_filt_ref_adj_0,
1089				 loop_filter->ref_deltas[0]);
1090		hantro_reg_write(vpu, &av1_filt_ref_adj_1,
1091				 loop_filter->ref_deltas[1]);
1092		hantro_reg_write(vpu, &av1_filt_ref_adj_2,
1093				 loop_filter->ref_deltas[2]);
1094		hantro_reg_write(vpu, &av1_filt_ref_adj_3,
1095				 loop_filter->ref_deltas[3]);
1096		hantro_reg_write(vpu, &av1_filt_ref_adj_4,
1097				 loop_filter->ref_deltas[4]);
1098		hantro_reg_write(vpu, &av1_filt_ref_adj_5,
1099				 loop_filter->ref_deltas[5]);
1100		hantro_reg_write(vpu, &av1_filt_ref_adj_6,
1101				 loop_filter->ref_deltas[6]);
1102		hantro_reg_write(vpu, &av1_filt_ref_adj_7,
1103				 loop_filter->ref_deltas[7]);
1104		hantro_reg_write(vpu, &av1_filt_mb_adj_0,
1105				 loop_filter->mode_deltas[0]);
1106		hantro_reg_write(vpu, &av1_filt_mb_adj_1,
1107				 loop_filter->mode_deltas[1]);
1108	} else {
1109		hantro_reg_write(vpu, &av1_filt_ref_adj_0, 0);
1110		hantro_reg_write(vpu, &av1_filt_ref_adj_1, 0);
1111		hantro_reg_write(vpu, &av1_filt_ref_adj_2, 0);
1112		hantro_reg_write(vpu, &av1_filt_ref_adj_3, 0);
1113		hantro_reg_write(vpu, &av1_filt_ref_adj_4, 0);
1114		hantro_reg_write(vpu, &av1_filt_ref_adj_5, 0);
1115		hantro_reg_write(vpu, &av1_filt_ref_adj_6, 0);
1116		hantro_reg_write(vpu, &av1_filt_ref_adj_7, 0);
1117		hantro_reg_write(vpu, &av1_filt_mb_adj_0, 0);
1118		hantro_reg_write(vpu, &av1_filt_mb_adj_1, 0);
1119	}
1120
1121	hantro_write_addr(vpu, AV1_DB_DATA_COL, av1_dec->db_data_col.dma);
1122	hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
1123}
1124
1125static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
1126{
1127	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1128	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1129	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1130	bool frame_is_intra = IS_INTRA(frame->frame_type);
1131	struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
1132	int i;
1133
1134	if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
1135		return;
1136
1137	for (i = 0; i < NUM_REF_FRAMES; i++) {
1138		if (frame->refresh_frame_flags & BIT(i)) {
1139			struct mvcdfs stored_mv_cdf;
1140
1141			rockchip_av1_get_cdfs(ctx, i);
1142			stored_mv_cdf = av1_dec->cdfs->mv_cdf;
1143			*av1_dec->cdfs = *out_cdfs;
1144			if (frame_is_intra) {
1145				av1_dec->cdfs->mv_cdf = stored_mv_cdf;
1146				*av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
1147			}
1148			rockchip_av1_store_cdfs(ctx,
1149						frame->refresh_frame_flags);
1150			break;
1151		}
1152	}
1153}
1154
1155void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
1156{
1157	rockchip_vpu981_av1_dec_update_prob(ctx);
1158}
1159
1160static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
1161{
1162	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1163	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1164	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1165	const struct v4l2_av1_quantization *quantization = &frame->quantization;
1166	struct hantro_dev *vpu = ctx->dev;
1167	bool error_resilient_mode =
1168	    !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
1169	bool frame_is_intra = IS_INTRA(frame->frame_type);
1170
1171	if (error_resilient_mode || frame_is_intra ||
1172	    frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
1173		av1_dec->cdfs = &av1_dec->default_cdfs;
1174		av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
1175		rockchip_av1_default_coeff_probs(quantization->base_q_idx,
1176						 av1_dec->cdfs);
1177	} else {
1178		rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
1179	}
1180	rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
1181
1182	memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
1183
1184	if (frame_is_intra) {
1185		int mv_offset = offsetof(struct av1cdfs, mv_cdf);
1186		/* Overwrite MV context area with intrabc MV context */
1187		memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
1188		       sizeof(struct mvcdfs));
1189	}
1190
1191	hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
1192	hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
1193}
1194
1195static void
1196rockchip_vpu981_av1_dec_init_scaling_function(const u8 *values, const u8 *scaling,
1197					      u8 num_points, u8 *scaling_lut)
1198{
1199	int i, point;
1200
1201	if (num_points == 0) {
1202		memset(scaling_lut, 0, 256);
1203		return;
1204	}
1205
1206	for (point = 0; point < num_points - 1; point++) {
1207		int x;
1208		s32 delta_y = scaling[point + 1] - scaling[point];
1209		s32 delta_x = values[point + 1] - values[point];
1210		s64 delta =
1211		    delta_x ? delta_y * ((65536 + (delta_x >> 1)) /
1212					 delta_x) : 0;
1213
1214		for (x = 0; x < delta_x; x++) {
1215			scaling_lut[values[point] + x] =
1216			    scaling[point] +
1217			    (s32)((x * delta + 32768) >> 16);
1218		}
1219	}
1220
1221	for (i = values[num_points - 1]; i < 256; i++)
1222		scaling_lut[i] = scaling[num_points - 1];
1223}
1224
1225static void rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx *ctx)
1226{
1227	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1228	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1229	const struct v4l2_ctrl_av1_film_grain *film_grain = ctrls->film_grain;
1230	struct rockchip_av1_film_grain *fgmem = av1_dec->film_grain.cpu;
1231	struct hantro_dev *vpu = ctx->dev;
1232	bool scaling_from_luma =
1233		!!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA);
1234	s32 (*ar_coeffs_y)[24];
1235	s32 (*ar_coeffs_cb)[25];
1236	s32 (*ar_coeffs_cr)[25];
1237	s32 (*luma_grain_block)[73][82];
1238	s32 (*cb_grain_block)[38][44];
1239	s32 (*cr_grain_block)[38][44];
1240	s32 ar_coeff_lag, ar_coeff_shift;
1241	s32 grain_scale_shift, bitdepth;
1242	s32 grain_center, grain_min, grain_max;
1243	int i, j;
1244
1245	hantro_reg_write(vpu, &av1_apply_grain, 0);
1246
1247	if (!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_APPLY_GRAIN)) {
1248		hantro_reg_write(vpu, &av1_num_y_points_b, 0);
1249		hantro_reg_write(vpu, &av1_num_cb_points_b, 0);
1250		hantro_reg_write(vpu, &av1_num_cr_points_b, 0);
1251		hantro_reg_write(vpu, &av1_scaling_shift, 0);
1252		hantro_reg_write(vpu, &av1_cb_mult, 0);
1253		hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1254		hantro_reg_write(vpu, &av1_cb_offset, 0);
1255		hantro_reg_write(vpu, &av1_cr_mult, 0);
1256		hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1257		hantro_reg_write(vpu, &av1_cr_offset, 0);
1258		hantro_reg_write(vpu, &av1_overlap_flag, 0);
1259		hantro_reg_write(vpu, &av1_clip_to_restricted_range, 0);
1260		hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, 0);
1261		hantro_reg_write(vpu, &av1_random_seed, 0);
1262		hantro_write_addr(vpu, AV1_FILM_GRAIN, 0);
1263		return;
1264	}
1265
1266	ar_coeffs_y = kzalloc(sizeof(int32_t) * 24, GFP_KERNEL);
1267	ar_coeffs_cb = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1268	ar_coeffs_cr = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1269	luma_grain_block = kzalloc(sizeof(int32_t) * 73 * 82, GFP_KERNEL);
1270	cb_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1271	cr_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1272
1273	if (!ar_coeffs_y || !ar_coeffs_cb || !ar_coeffs_cr ||
1274	    !luma_grain_block || !cb_grain_block || !cr_grain_block) {
1275		pr_warn("Fail allocating memory for film grain parameters\n");
1276		goto alloc_fail;
1277	}
1278
1279	hantro_reg_write(vpu, &av1_apply_grain, 1);
1280
1281	hantro_reg_write(vpu, &av1_num_y_points_b,
1282			 film_grain->num_y_points > 0);
1283	hantro_reg_write(vpu, &av1_num_cb_points_b,
1284			 film_grain->num_cb_points > 0);
1285	hantro_reg_write(vpu, &av1_num_cr_points_b,
1286			 film_grain->num_cr_points > 0);
1287	hantro_reg_write(vpu, &av1_scaling_shift,
1288			 film_grain->grain_scaling_minus_8 + 8);
1289
1290	if (!scaling_from_luma) {
1291		hantro_reg_write(vpu, &av1_cb_mult, film_grain->cb_mult - 128);
1292		hantro_reg_write(vpu, &av1_cb_luma_mult, film_grain->cb_luma_mult - 128);
1293		hantro_reg_write(vpu, &av1_cb_offset, film_grain->cb_offset - 256);
1294		hantro_reg_write(vpu, &av1_cr_mult, film_grain->cr_mult - 128);
1295		hantro_reg_write(vpu, &av1_cr_luma_mult, film_grain->cr_luma_mult - 128);
1296		hantro_reg_write(vpu, &av1_cr_offset, film_grain->cr_offset - 256);
1297	} else {
1298		hantro_reg_write(vpu, &av1_cb_mult, 0);
1299		hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1300		hantro_reg_write(vpu, &av1_cb_offset, 0);
1301		hantro_reg_write(vpu, &av1_cr_mult, 0);
1302		hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1303		hantro_reg_write(vpu, &av1_cr_offset, 0);
1304	}
1305
1306	hantro_reg_write(vpu, &av1_overlap_flag,
1307			 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_OVERLAP));
1308	hantro_reg_write(vpu, &av1_clip_to_restricted_range,
1309			 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CLIP_TO_RESTRICTED_RANGE));
1310	hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, scaling_from_luma);
1311	hantro_reg_write(vpu, &av1_random_seed, film_grain->grain_seed);
1312
1313	rockchip_vpu981_av1_dec_init_scaling_function(film_grain->point_y_value,
1314						      film_grain->point_y_scaling,
1315						      film_grain->num_y_points,
1316						      fgmem->scaling_lut_y);
1317
1318	if (film_grain->flags &
1319	    V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA) {
1320		memcpy(fgmem->scaling_lut_cb, fgmem->scaling_lut_y,
1321		       sizeof(*fgmem->scaling_lut_y) * 256);
1322		memcpy(fgmem->scaling_lut_cr, fgmem->scaling_lut_y,
1323		       sizeof(*fgmem->scaling_lut_y) * 256);
1324	} else {
1325		rockchip_vpu981_av1_dec_init_scaling_function
1326		    (film_grain->point_cb_value, film_grain->point_cb_scaling,
1327		     film_grain->num_cb_points, fgmem->scaling_lut_cb);
1328		rockchip_vpu981_av1_dec_init_scaling_function
1329		    (film_grain->point_cr_value, film_grain->point_cr_scaling,
1330		     film_grain->num_cr_points, fgmem->scaling_lut_cr);
1331	}
1332
1333	for (i = 0; i < V4L2_AV1_AR_COEFFS_SIZE; i++) {
1334		if (i < 24)
1335			(*ar_coeffs_y)[i] = film_grain->ar_coeffs_y_plus_128[i] - 128;
1336		(*ar_coeffs_cb)[i] = film_grain->ar_coeffs_cb_plus_128[i] - 128;
1337		(*ar_coeffs_cr)[i] = film_grain->ar_coeffs_cr_plus_128[i] - 128;
1338	}
1339
1340	ar_coeff_lag = film_grain->ar_coeff_lag;
1341	ar_coeff_shift = film_grain->ar_coeff_shift_minus_6 + 6;
1342	grain_scale_shift = film_grain->grain_scale_shift;
1343	bitdepth = ctx->bit_depth;
1344	grain_center = 128 << (bitdepth - 8);
1345	grain_min = 0 - grain_center;
1346	grain_max = (256 << (bitdepth - 8)) - 1 - grain_center;
1347
1348	rockchip_av1_generate_luma_grain_block(luma_grain_block, bitdepth,
1349					       film_grain->num_y_points, grain_scale_shift,
1350					       ar_coeff_lag, ar_coeffs_y, ar_coeff_shift,
1351					       grain_min, grain_max, film_grain->grain_seed);
1352
1353	rockchip_av1_generate_chroma_grain_block(luma_grain_block, cb_grain_block,
1354						 cr_grain_block, bitdepth,
1355						 film_grain->num_y_points,
1356						 film_grain->num_cb_points,
1357						 film_grain->num_cr_points,
1358						 grain_scale_shift, ar_coeff_lag, ar_coeffs_cb,
1359						 ar_coeffs_cr, ar_coeff_shift, grain_min,
1360						 grain_max,
1361						 scaling_from_luma,
1362						 film_grain->grain_seed);
1363
1364	for (i = 0; i < 64; i++) {
1365		for (j = 0; j < 64; j++)
1366			fgmem->cropped_luma_grain_block[i * 64 + j] =
1367				(*luma_grain_block)[i + 9][j + 9];
1368	}
1369
1370	for (i = 0; i < 32; i++) {
1371		for (j = 0; j < 32; j++) {
1372			fgmem->cropped_chroma_grain_block[i * 64 + 2 * j] =
1373				(*cb_grain_block)[i + 6][j + 6];
1374			fgmem->cropped_chroma_grain_block[i * 64 + 2 * j + 1] =
1375				(*cr_grain_block)[i + 6][j + 6];
1376		}
1377	}
1378
1379	hantro_write_addr(vpu, AV1_FILM_GRAIN, av1_dec->film_grain.dma);
1380
1381alloc_fail:
1382	kfree(ar_coeffs_y);
1383	kfree(ar_coeffs_cb);
1384	kfree(ar_coeffs_cr);
1385	kfree(luma_grain_block);
1386	kfree(cb_grain_block);
1387	kfree(cr_grain_block);
1388}
1389
1390static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
1391{
1392	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1393	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1394	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1395	const struct v4l2_av1_cdef *cdef = &frame->cdef;
1396	struct hantro_dev *vpu = ctx->dev;
1397	u32 luma_pri_strength = 0;
1398	u16 luma_sec_strength = 0;
1399	u32 chroma_pri_strength = 0;
1400	u16 chroma_sec_strength = 0;
1401	int i;
1402
1403	hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
1404	hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
1405
1406	for (i = 0; i < BIT(cdef->bits); i++) {
1407		luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
1408		if (cdef->y_sec_strength[i] == 4)
1409			luma_sec_strength |= 3 << (i * 2);
1410		else
1411			luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
1412
1413		chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
1414		if (cdef->uv_sec_strength[i] == 4)
1415			chroma_sec_strength |= 3 << (i * 2);
1416		else
1417			chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
1418	}
1419
1420	hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
1421			 luma_pri_strength);
1422	hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
1423			 luma_sec_strength);
1424	hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
1425			 chroma_pri_strength);
1426	hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
1427			 chroma_sec_strength);
1428
1429	hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
1430}
1431
1432static void rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx *ctx)
1433{
1434	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1435	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1436	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1437	const struct v4l2_av1_loop_restoration *loop_restoration =
1438	    &frame->loop_restoration;
1439	struct hantro_dev *vpu = ctx->dev;
1440	u16 lr_type = 0, lr_unit_size = 0;
1441	u8 restoration_unit_size[V4L2_AV1_NUM_PLANES_MAX] = { 3, 3, 3 };
1442	int i;
1443
1444	if (loop_restoration->flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
1445		restoration_unit_size[0] = 1 + loop_restoration->lr_unit_shift;
1446		restoration_unit_size[1] =
1447		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1448		restoration_unit_size[2] =
1449		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1450	}
1451
1452	for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
1453		lr_type |=
1454		    loop_restoration->frame_restoration_type[i] << (i * 2);
1455		lr_unit_size |= restoration_unit_size[i] << (i * 2);
1456	}
1457
1458	hantro_reg_write(vpu, &av1_lr_type, lr_type);
1459	hantro_reg_write(vpu, &av1_lr_unit_size, lr_unit_size);
1460	hantro_write_addr(vpu, AV1_LR_COL, av1_dec->lr_col.dma);
1461}
1462
1463static void rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx *ctx)
1464{
1465	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1466	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1467	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1468	struct hantro_dev *vpu = ctx->dev;
1469	u8 superres_scale_denominator = SCALE_NUMERATOR;
1470	int superres_luma_step = RS_SCALE_SUBPEL_BITS;
1471	int superres_chroma_step = RS_SCALE_SUBPEL_BITS;
1472	int superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
1473	int superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
1474	int superres_init_luma_subpel_x = 0;
1475	int superres_init_chroma_subpel_x = 0;
1476	int superres_is_scaled = 0;
1477	int min_w = min_t(uint32_t, 16, frame->upscaled_width);
1478	int upscaled_luma, downscaled_luma;
1479	int downscaled_chroma, upscaled_chroma;
1480	int step_luma, step_chroma;
1481	int err_luma, err_chroma;
1482	int initial_luma, initial_chroma;
1483	int width = 0;
1484
1485	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1486		superres_scale_denominator = frame->superres_denom;
1487
1488	if (superres_scale_denominator <= SCALE_NUMERATOR)
1489		goto set_regs;
1490
1491	width = (frame->upscaled_width * SCALE_NUMERATOR +
1492		(superres_scale_denominator / 2)) / superres_scale_denominator;
1493
1494	if (width < min_w)
1495		width = min_w;
1496
1497	if (width == frame->upscaled_width)
1498		goto set_regs;
1499
1500	superres_is_scaled = 1;
1501	upscaled_luma = frame->upscaled_width;
1502	downscaled_luma = width;
1503	downscaled_chroma = (downscaled_luma + 1) >> 1;
1504	upscaled_chroma = (upscaled_luma + 1) >> 1;
1505	step_luma =
1506		((downscaled_luma << RS_SCALE_SUBPEL_BITS) +
1507		 (upscaled_luma / 2)) / upscaled_luma;
1508	step_chroma =
1509		((downscaled_chroma << RS_SCALE_SUBPEL_BITS) +
1510		 (upscaled_chroma / 2)) / upscaled_chroma;
1511	err_luma =
1512		(upscaled_luma * step_luma)
1513		- (downscaled_luma << RS_SCALE_SUBPEL_BITS);
1514	err_chroma =
1515		(upscaled_chroma * step_chroma)
1516		- (downscaled_chroma << RS_SCALE_SUBPEL_BITS);
1517	initial_luma =
1518		((-((upscaled_luma - downscaled_luma) << (RS_SCALE_SUBPEL_BITS - 1))
1519		  + upscaled_luma / 2)
1520		 / upscaled_luma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_luma / 2)
1521		& RS_SCALE_SUBPEL_MASK;
1522	initial_chroma =
1523		((-((upscaled_chroma - downscaled_chroma) << (RS_SCALE_SUBPEL_BITS - 1))
1524		  + upscaled_chroma / 2)
1525		 / upscaled_chroma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_chroma / 2)
1526		& RS_SCALE_SUBPEL_MASK;
1527	superres_luma_step = step_luma;
1528	superres_chroma_step = step_chroma;
1529	superres_luma_step_invra =
1530		((upscaled_luma << RS_SCALE_SUBPEL_BITS) + (downscaled_luma / 2))
1531		/ downscaled_luma;
1532	superres_chroma_step_invra =
1533		((upscaled_chroma << RS_SCALE_SUBPEL_BITS) + (downscaled_chroma / 2))
1534		/ downscaled_chroma;
1535	superres_init_luma_subpel_x = initial_luma;
1536	superres_init_chroma_subpel_x = initial_chroma;
1537
1538set_regs:
1539	hantro_reg_write(vpu, &av1_superres_pic_width, frame->upscaled_width);
1540
1541	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1542		hantro_reg_write(vpu, &av1_scale_denom_minus9,
1543				 frame->superres_denom - SUPERRES_SCALE_DENOMINATOR_MIN);
1544	else
1545		hantro_reg_write(vpu, &av1_scale_denom_minus9, frame->superres_denom);
1546
1547	hantro_reg_write(vpu, &av1_superres_luma_step, superres_luma_step);
1548	hantro_reg_write(vpu, &av1_superres_chroma_step, superres_chroma_step);
1549	hantro_reg_write(vpu, &av1_superres_luma_step_invra,
1550			 superres_luma_step_invra);
1551	hantro_reg_write(vpu, &av1_superres_chroma_step_invra,
1552			 superres_chroma_step_invra);
1553	hantro_reg_write(vpu, &av1_superres_init_luma_subpel_x,
1554			 superres_init_luma_subpel_x);
1555	hantro_reg_write(vpu, &av1_superres_init_chroma_subpel_x,
1556			 superres_init_chroma_subpel_x);
1557	hantro_reg_write(vpu, &av1_superres_is_scaled, superres_is_scaled);
1558
1559	hantro_write_addr(vpu, AV1_SR_COL, av1_dec->sr_col.dma);
1560}
1561
1562static void rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx *ctx)
1563{
1564	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1565	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1566	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1567	struct hantro_dev *vpu = ctx->dev;
1568	int pic_width_in_cbs = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1569	int pic_height_in_cbs = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1570	int pic_width_pad = ALIGN(frame->frame_width_minus_1 + 1, 8)
1571			    - (frame->frame_width_minus_1 + 1);
1572	int pic_height_pad = ALIGN(frame->frame_height_minus_1 + 1, 8)
1573			     - (frame->frame_height_minus_1 + 1);
1574
1575	hantro_reg_write(vpu, &av1_pic_width_in_cbs, pic_width_in_cbs);
1576	hantro_reg_write(vpu, &av1_pic_height_in_cbs, pic_height_in_cbs);
1577	hantro_reg_write(vpu, &av1_pic_width_pad, pic_width_pad);
1578	hantro_reg_write(vpu, &av1_pic_height_pad, pic_height_pad);
1579
1580	rockchip_vpu981_av1_dec_set_superres_params(ctx);
1581}
1582
1583static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
1584{
1585	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1586	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1587	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1588	struct hantro_dev *vpu = ctx->dev;
1589	bool use_ref_frame_mvs =
1590	    !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
1591	int cur_frame_offset = frame->order_hint;
1592	int alt_frame_offset = 0;
1593	int gld_frame_offset = 0;
1594	int bwd_frame_offset = 0;
1595	int alt2_frame_offset = 0;
1596	int refs_selected[3] = { 0, 0, 0 };
1597	int cur_mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1598	int cur_mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1599	int cur_offset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1600	int cur_roffset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1601	int mf_types[3] = { 0, 0, 0 };
1602	int ref_stamp = 2;
1603	int ref_ind = 0;
1604	int rf, idx;
1605
1606	alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
1607	gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
1608	bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
1609	alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
1610
1611	idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
1612	if (idx >= 0) {
1613		int alt_frame_offset_in_lst =
1614			av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
1615		bool is_lst_overlay =
1616		    (alt_frame_offset_in_lst == gld_frame_offset);
1617
1618		if (!is_lst_overlay) {
1619			int lst_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1620			int lst_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1621			bool lst_intra_only =
1622			    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1623
1624			if (lst_mi_cols == cur_mi_cols &&
1625			    lst_mi_rows == cur_mi_rows && !lst_intra_only) {
1626				mf_types[ref_ind] = V4L2_AV1_REF_LAST_FRAME;
1627				refs_selected[ref_ind++] = LST_BUF_IDX;
1628			}
1629		}
1630		ref_stamp--;
1631	}
1632
1633	idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
1634	if (rockchip_vpu981_av1_dec_get_dist(ctx, bwd_frame_offset, cur_frame_offset) > 0) {
1635		int bwd_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1636		int bwd_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1637		bool bwd_intra_only =
1638		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1639
1640		if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
1641		    !bwd_intra_only) {
1642			mf_types[ref_ind] = V4L2_AV1_REF_BWDREF_FRAME;
1643			refs_selected[ref_ind++] = BWD_BUF_IDX;
1644			ref_stamp--;
1645		}
1646	}
1647
1648	idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
1649	if (rockchip_vpu981_av1_dec_get_dist(ctx, alt2_frame_offset, cur_frame_offset) > 0) {
1650		int alt2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1651		int alt2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1652		bool alt2_intra_only =
1653		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1654
1655		if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows &&
1656		    !alt2_intra_only) {
1657			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF2_FRAME;
1658			refs_selected[ref_ind++] = ALT2_BUF_IDX;
1659			ref_stamp--;
1660		}
1661	}
1662
1663	idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
1664	if (rockchip_vpu981_av1_dec_get_dist(ctx, alt_frame_offset, cur_frame_offset) > 0 &&
1665	    ref_stamp >= 0) {
1666		int alt_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1667		int alt_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1668		bool alt_intra_only =
1669		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1670
1671		if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
1672		    !alt_intra_only) {
1673			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF_FRAME;
1674			refs_selected[ref_ind++] = ALT_BUF_IDX;
1675			ref_stamp--;
1676		}
1677	}
1678
1679	idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
1680	if (idx >= 0 && ref_stamp >= 0) {
1681		int lst2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1682		int lst2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1683		bool lst2_intra_only =
1684		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1685
1686		if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows &&
1687		    !lst2_intra_only) {
1688			mf_types[ref_ind] = V4L2_AV1_REF_LAST2_FRAME;
1689			refs_selected[ref_ind++] = LST2_BUF_IDX;
1690			ref_stamp--;
1691		}
1692	}
1693
1694	for (rf = 0; rf < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; ++rf) {
1695		idx = rockchip_vpu981_get_frame_index(ctx, rf);
1696		if (idx >= 0) {
1697			int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
1698
1699			cur_offset[rf] =
1700			    rockchip_vpu981_av1_dec_get_dist(ctx, cur_frame_offset, rf_order_hint);
1701			cur_roffset[rf] =
1702			    rockchip_vpu981_av1_dec_get_dist(ctx, rf_order_hint, cur_frame_offset);
1703		} else {
1704			cur_offset[rf] = 0;
1705			cur_roffset[rf] = 0;
1706		}
1707	}
1708
1709	hantro_reg_write(vpu, &av1_use_temporal0_mvs, 0);
1710	hantro_reg_write(vpu, &av1_use_temporal1_mvs, 0);
1711	hantro_reg_write(vpu, &av1_use_temporal2_mvs, 0);
1712	hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
1713
1714	hantro_reg_write(vpu, &av1_mf1_last_offset, 0);
1715	hantro_reg_write(vpu, &av1_mf1_last2_offset, 0);
1716	hantro_reg_write(vpu, &av1_mf1_last3_offset, 0);
1717	hantro_reg_write(vpu, &av1_mf1_golden_offset, 0);
1718	hantro_reg_write(vpu, &av1_mf1_bwdref_offset, 0);
1719	hantro_reg_write(vpu, &av1_mf1_altref2_offset, 0);
1720	hantro_reg_write(vpu, &av1_mf1_altref_offset, 0);
1721
1722	if (use_ref_frame_mvs && ref_ind > 0 &&
1723	    cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1724	    cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1725		int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
1726		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
1727		u32 *oh = av1_dec->frame_refs[idx].order_hints;
1728		int val;
1729
1730		hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
1731
1732		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1733		hantro_reg_write(vpu, &av1_mf1_last_offset, val);
1734
1735		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1736		hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
1737
1738		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1739		hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
1740
1741		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1742		hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
1743
1744		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1745		hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
1746
1747		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1748		hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
1749
1750		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1751		hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
1752	}
1753
1754	hantro_reg_write(vpu, &av1_mf2_last_offset, 0);
1755	hantro_reg_write(vpu, &av1_mf2_last2_offset, 0);
1756	hantro_reg_write(vpu, &av1_mf2_last3_offset, 0);
1757	hantro_reg_write(vpu, &av1_mf2_golden_offset, 0);
1758	hantro_reg_write(vpu, &av1_mf2_bwdref_offset, 0);
1759	hantro_reg_write(vpu, &av1_mf2_altref2_offset, 0);
1760	hantro_reg_write(vpu, &av1_mf2_altref_offset, 0);
1761
1762	if (use_ref_frame_mvs && ref_ind > 1 &&
1763	    cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1764	    cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1765		int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
1766		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
1767		u32 *oh = av1_dec->frame_refs[idx].order_hints;
1768		int val;
1769
1770		hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
1771
1772		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1773		hantro_reg_write(vpu, &av1_mf2_last_offset, val);
1774
1775		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1776		hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
1777
1778		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1779		hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
1780
1781		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1782		hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
1783
1784		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1785		hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
1786
1787		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1788		hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
1789
1790		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1791		hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
1792	}
1793
1794	hantro_reg_write(vpu, &av1_mf3_last_offset, 0);
1795	hantro_reg_write(vpu, &av1_mf3_last2_offset, 0);
1796	hantro_reg_write(vpu, &av1_mf3_last3_offset, 0);
1797	hantro_reg_write(vpu, &av1_mf3_golden_offset, 0);
1798	hantro_reg_write(vpu, &av1_mf3_bwdref_offset, 0);
1799	hantro_reg_write(vpu, &av1_mf3_altref2_offset, 0);
1800	hantro_reg_write(vpu, &av1_mf3_altref_offset, 0);
1801
1802	if (use_ref_frame_mvs && ref_ind > 2 &&
1803	    cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1804	    cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1805		int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
1806		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
1807		u32 *oh = av1_dec->frame_refs[idx].order_hints;
1808		int val;
1809
1810		hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
1811
1812		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1813		hantro_reg_write(vpu, &av1_mf3_last_offset, val);
1814
1815		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1816		hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
1817
1818		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1819		hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
1820
1821		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1822		hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
1823
1824		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1825		hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
1826
1827		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1828		hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
1829
1830		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1831		hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
1832	}
1833
1834	hantro_reg_write(vpu, &av1_cur_last_offset, cur_offset[0]);
1835	hantro_reg_write(vpu, &av1_cur_last2_offset, cur_offset[1]);
1836	hantro_reg_write(vpu, &av1_cur_last3_offset, cur_offset[2]);
1837	hantro_reg_write(vpu, &av1_cur_golden_offset, cur_offset[3]);
1838	hantro_reg_write(vpu, &av1_cur_bwdref_offset, cur_offset[4]);
1839	hantro_reg_write(vpu, &av1_cur_altref2_offset, cur_offset[5]);
1840	hantro_reg_write(vpu, &av1_cur_altref_offset, cur_offset[6]);
1841
1842	hantro_reg_write(vpu, &av1_cur_last_roffset, cur_roffset[0]);
1843	hantro_reg_write(vpu, &av1_cur_last2_roffset, cur_roffset[1]);
1844	hantro_reg_write(vpu, &av1_cur_last3_roffset, cur_roffset[2]);
1845	hantro_reg_write(vpu, &av1_cur_golden_roffset, cur_roffset[3]);
1846	hantro_reg_write(vpu, &av1_cur_bwdref_roffset, cur_roffset[4]);
1847	hantro_reg_write(vpu, &av1_cur_altref2_roffset, cur_roffset[5]);
1848	hantro_reg_write(vpu, &av1_cur_altref_roffset, cur_roffset[6]);
1849
1850	hantro_reg_write(vpu, &av1_mf1_type, mf_types[0] - V4L2_AV1_REF_LAST_FRAME);
1851	hantro_reg_write(vpu, &av1_mf2_type, mf_types[1] - V4L2_AV1_REF_LAST_FRAME);
1852	hantro_reg_write(vpu, &av1_mf3_type, mf_types[2] - V4L2_AV1_REF_LAST_FRAME);
1853}
1854
1855static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
1856{
1857	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1858	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1859	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1860	int frame_type = frame->frame_type;
1861	bool allow_intrabc = !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
1862	int ref_count[AV1DEC_MAX_PIC_BUFFERS] = { 0 };
1863	struct hantro_dev *vpu = ctx->dev;
1864	int i, ref_frames = 0;
1865	bool scale_enable = false;
1866
1867	if (IS_INTRA(frame_type) && !allow_intrabc)
1868		return;
1869
1870	if (!allow_intrabc) {
1871		for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
1872			int idx = rockchip_vpu981_get_frame_index(ctx, i);
1873
1874			if (idx >= 0)
1875				ref_count[idx]++;
1876		}
1877
1878		for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
1879			if (ref_count[i])
1880				ref_frames++;
1881		}
1882	} else {
1883		ref_frames = 1;
1884	}
1885	hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
1886
1887	rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
1888
1889	for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) {
1890		u32 ref = i - 1;
1891		int idx = 0;
1892		int width, height;
1893
1894		if (allow_intrabc) {
1895			idx = av1_dec->current_frame_index;
1896			width = frame->frame_width_minus_1 + 1;
1897			height = frame->frame_height_minus_1 + 1;
1898		} else {
1899			if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
1900				idx = rockchip_vpu981_get_frame_index(ctx, ref);
1901			width = av1_dec->frame_refs[idx].width;
1902			height = av1_dec->frame_refs[idx].height;
1903		}
1904
1905		scale_enable |=
1906		    rockchip_vpu981_av1_dec_set_ref(ctx, ref, idx, width,
1907						    height);
1908
1909		rockchip_vpu981_av1_dec_set_sign_bias(ctx, ref,
1910						      av1_dec->ref_frame_sign_bias[i]);
1911	}
1912	hantro_reg_write(vpu, &av1_ref_scaling_enable, scale_enable);
1913
1914	hantro_reg_write(vpu, &av1_ref0_gm_mode,
1915			 frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME]);
1916	hantro_reg_write(vpu, &av1_ref1_gm_mode,
1917			 frame->global_motion.type[V4L2_AV1_REF_LAST2_FRAME]);
1918	hantro_reg_write(vpu, &av1_ref2_gm_mode,
1919			 frame->global_motion.type[V4L2_AV1_REF_LAST3_FRAME]);
1920	hantro_reg_write(vpu, &av1_ref3_gm_mode,
1921			 frame->global_motion.type[V4L2_AV1_REF_GOLDEN_FRAME]);
1922	hantro_reg_write(vpu, &av1_ref4_gm_mode,
1923			 frame->global_motion.type[V4L2_AV1_REF_BWDREF_FRAME]);
1924	hantro_reg_write(vpu, &av1_ref5_gm_mode,
1925			 frame->global_motion.type[V4L2_AV1_REF_ALTREF2_FRAME]);
1926	hantro_reg_write(vpu, &av1_ref6_gm_mode,
1927			 frame->global_motion.type[V4L2_AV1_REF_ALTREF_FRAME]);
1928
1929	rockchip_vpu981_av1_dec_set_other_frames(ctx);
1930}
1931
1932static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
1933{
1934	struct hantro_dev *vpu = ctx->dev;
1935	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1936	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1937
1938	hantro_reg_write(vpu, &av1_skip_mode,
1939			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT));
1940	hantro_reg_write(vpu, &av1_tempor_mvp_e,
1941			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS));
1942	hantro_reg_write(vpu, &av1_delta_lf_res_log,
1943			 ctrls->frame->loop_filter.delta_lf_res);
1944	hantro_reg_write(vpu, &av1_delta_lf_multi,
1945			 !!(ctrls->frame->loop_filter.flags
1946			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI));
1947	hantro_reg_write(vpu, &av1_delta_lf_present,
1948			 !!(ctrls->frame->loop_filter.flags
1949			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT));
1950	hantro_reg_write(vpu, &av1_disable_cdf_update,
1951			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE));
1952	hantro_reg_write(vpu, &av1_allow_warp,
1953			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION));
1954	hantro_reg_write(vpu, &av1_show_frame,
1955			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SHOW_FRAME));
1956	hantro_reg_write(vpu, &av1_switchable_motion_mode,
1957			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE));
1958	hantro_reg_write(vpu, &av1_enable_cdef,
1959			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF));
1960	hantro_reg_write(vpu, &av1_allow_masked_compound,
1961			 !!(ctrls->sequence->flags
1962			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND));
1963	hantro_reg_write(vpu, &av1_allow_interintra,
1964			 !!(ctrls->sequence->flags
1965			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND));
1966	hantro_reg_write(vpu, &av1_enable_intra_edge_filter,
1967			 !!(ctrls->sequence->flags
1968			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER));
1969	hantro_reg_write(vpu, &av1_allow_filter_intra,
1970			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA));
1971	hantro_reg_write(vpu, &av1_enable_jnt_comp,
1972			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP));
1973	hantro_reg_write(vpu, &av1_enable_dual_filter,
1974			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER));
1975	hantro_reg_write(vpu, &av1_reduced_tx_set_used,
1976			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET));
1977	hantro_reg_write(vpu, &av1_allow_screen_content_tools,
1978			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS));
1979	hantro_reg_write(vpu, &av1_allow_intrabc,
1980			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC));
1981
1982	if (!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS))
1983		hantro_reg_write(vpu, &av1_force_interger_mv, 0);
1984	else
1985		hantro_reg_write(vpu, &av1_force_interger_mv,
1986				 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV));
1987
1988	hantro_reg_write(vpu, &av1_blackwhite_e, 0);
1989	hantro_reg_write(vpu, &av1_delta_q_res_log, ctrls->frame->quantization.delta_q_res);
1990	hantro_reg_write(vpu, &av1_delta_q_present,
1991			 !!(ctrls->frame->quantization.flags
1992			    & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT));
1993
1994	hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type);
1995	hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx);
1996	hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8);
1997	hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8);
1998
1999	hantro_reg_write(vpu, &av1_mcomp_filt_type, ctrls->frame->interpolation_filter);
2000	hantro_reg_write(vpu, &av1_high_prec_mv_e,
2001			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV));
2002	hantro_reg_write(vpu, &av1_comp_pred_mode,
2003			 (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
2004	hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4);
2005	hantro_reg_write(vpu, &av1_max_cb_size,
2006			 (ctrls->sequence->flags
2007			  & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6);
2008	hantro_reg_write(vpu, &av1_min_cb_size, 3);
2009
2010	hantro_reg_write(vpu, &av1_comp_pred_fixed_ref, 0);
2011	hantro_reg_write(vpu, &av1_comp_pred_var_ref0_av1, 0);
2012	hantro_reg_write(vpu, &av1_comp_pred_var_ref1_av1, 0);
2013	hantro_reg_write(vpu, &av1_filt_level_seg0, 0);
2014	hantro_reg_write(vpu, &av1_filt_level_seg1, 0);
2015	hantro_reg_write(vpu, &av1_filt_level_seg2, 0);
2016	hantro_reg_write(vpu, &av1_filt_level_seg3, 0);
2017	hantro_reg_write(vpu, &av1_filt_level_seg4, 0);
2018	hantro_reg_write(vpu, &av1_filt_level_seg5, 0);
2019	hantro_reg_write(vpu, &av1_filt_level_seg6, 0);
2020	hantro_reg_write(vpu, &av1_filt_level_seg7, 0);
2021
2022	hantro_reg_write(vpu, &av1_qp_delta_y_dc_av1, ctrls->frame->quantization.delta_q_y_dc);
2023	hantro_reg_write(vpu, &av1_qp_delta_ch_dc_av1, ctrls->frame->quantization.delta_q_u_dc);
2024	hantro_reg_write(vpu, &av1_qp_delta_ch_ac_av1, ctrls->frame->quantization.delta_q_u_ac);
2025	if (ctrls->frame->quantization.flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
2026		hantro_reg_write(vpu, &av1_qmlevel_y, ctrls->frame->quantization.qm_y);
2027		hantro_reg_write(vpu, &av1_qmlevel_u, ctrls->frame->quantization.qm_u);
2028		hantro_reg_write(vpu, &av1_qmlevel_v, ctrls->frame->quantization.qm_v);
2029	} else {
2030		hantro_reg_write(vpu, &av1_qmlevel_y, 0xff);
2031		hantro_reg_write(vpu, &av1_qmlevel_u, 0xff);
2032		hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
2033	}
2034
2035	hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
2036	hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
2037	hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
2038
2039	hantro_reg_write(vpu, &av1_skip_ref0,
2040			 (ctrls->frame->skip_mode_frame[0]) ? ctrls->frame->skip_mode_frame[0] : 1);
2041	hantro_reg_write(vpu, &av1_skip_ref1,
2042			 (ctrls->frame->skip_mode_frame[1]) ? ctrls->frame->skip_mode_frame[1] : 1);
2043
2044	hantro_write_addr(vpu, AV1_MC_SYNC_CURR, av1_dec->tile_buf.dma);
2045	hantro_write_addr(vpu, AV1_MC_SYNC_LEFT, av1_dec->tile_buf.dma);
2046}
2047
2048static void
2049rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx *ctx,
2050					 struct vb2_v4l2_buffer *vb2_src)
2051{
2052	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2053	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
2054	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
2055	    ctrls->tile_group_entry;
2056	struct hantro_dev *vpu = ctx->dev;
2057	dma_addr_t src_dma;
2058	u32 src_len, src_buf_len;
2059	int start_bit, offset;
2060
2061	src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
2062	src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
2063	src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
2064
2065	start_bit = (group_entry[0].tile_offset & 0xf) * 8;
2066	offset = group_entry[0].tile_offset & ~0xf;
2067
2068	hantro_reg_write(vpu, &av1_strm_buffer_len, src_buf_len);
2069	hantro_reg_write(vpu, &av1_strm_start_bit, start_bit);
2070	hantro_reg_write(vpu, &av1_stream_len, src_len);
2071	hantro_reg_write(vpu, &av1_strm_start_offset, 0);
2072	hantro_write_addr(vpu, AV1_INPUT_STREAM, src_dma + offset);
2073}
2074
2075static void
2076rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
2077{
2078	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2079	struct hantro_dev *vpu = ctx->dev;
2080	struct hantro_decoded_buffer *dst;
2081	struct vb2_v4l2_buffer *vb2_dst;
2082	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
2083	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
2084	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
2085
2086	vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
2087	dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
2088	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
2089	chroma_addr = luma_addr + cr_offset;
2090	mv_addr = luma_addr + mv_offset;
2091
2092	hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
2093	hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
2094	hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
2095}
2096
2097int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
2098{
2099	struct hantro_dev *vpu = ctx->dev;
2100	struct vb2_v4l2_buffer *vb2_src;
2101	int ret;
2102
2103	hantro_start_prepare_run(ctx);
2104
2105	ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
2106	if (ret)
2107		goto prepare_error;
2108
2109	vb2_src = hantro_get_src_buf(ctx);
2110	if (!vb2_src) {
2111		ret = -EINVAL;
2112		goto prepare_error;
2113	}
2114
2115	rockchip_vpu981_av1_dec_clean_refs(ctx);
2116	rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
2117
2118	rockchip_vpu981_av1_dec_set_parameters(ctx);
2119	rockchip_vpu981_av1_dec_set_global_model(ctx);
2120	rockchip_vpu981_av1_dec_set_tile_info(ctx);
2121	rockchip_vpu981_av1_dec_set_reference_frames(ctx);
2122	rockchip_vpu981_av1_dec_set_segmentation(ctx);
2123	rockchip_vpu981_av1_dec_set_loopfilter(ctx);
2124	rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
2125	rockchip_vpu981_av1_dec_set_cdef(ctx);
2126	rockchip_vpu981_av1_dec_set_lr(ctx);
2127	rockchip_vpu981_av1_dec_set_fgs(ctx);
2128	rockchip_vpu981_av1_dec_set_prob(ctx);
2129
2130	hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
2131	hantro_reg_write(vpu, &av1_dec_out_ec_byte_word, 0);
2132	hantro_reg_write(vpu, &av1_write_mvs_e, 1);
2133	hantro_reg_write(vpu, &av1_dec_out_ec_bypass, 1);
2134	hantro_reg_write(vpu, &av1_dec_clk_gate_e, 1);
2135
2136	hantro_reg_write(vpu, &av1_dec_abort_e, 0);
2137	hantro_reg_write(vpu, &av1_dec_tile_int_e, 0);
2138
2139	hantro_reg_write(vpu, &av1_dec_alignment, 64);
2140	hantro_reg_write(vpu, &av1_apf_disable, 0);
2141	hantro_reg_write(vpu, &av1_apf_threshold, 8);
2142	hantro_reg_write(vpu, &av1_dec_buswidth, 2);
2143	hantro_reg_write(vpu, &av1_dec_max_burst, 16);
2144	hantro_reg_write(vpu, &av1_error_conceal_e, 0);
2145	hantro_reg_write(vpu, &av1_axi_rd_ostd_threshold, 64);
2146	hantro_reg_write(vpu, &av1_axi_wr_ostd_threshold, 64);
2147
2148	hantro_reg_write(vpu, &av1_ext_timeout_cycles, 0xfffffff);
2149	hantro_reg_write(vpu, &av1_ext_timeout_override_e, 1);
2150	hantro_reg_write(vpu, &av1_timeout_cycles, 0xfffffff);
2151	hantro_reg_write(vpu, &av1_timeout_override_e, 1);
2152
2153	rockchip_vpu981_av1_dec_set_output_buffer(ctx);
2154	rockchip_vpu981_av1_dec_set_input_buffer(ctx, vb2_src);
2155
2156	hantro_end_prepare_run(ctx);
2157
2158	hantro_reg_write(vpu, &av1_dec_e, 1);
2159
2160	return 0;
2161
2162prepare_error:
2163	hantro_end_prepare_run(ctx);
2164	hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
2165	return ret;
2166}
2167
2168static void rockchip_vpu981_postproc_enable(struct hantro_ctx *ctx)
2169{
2170	struct hantro_dev *vpu = ctx->dev;
2171	int width = ctx->dst_fmt.width;
2172	int height = ctx->dst_fmt.height;
2173	struct vb2_v4l2_buffer *vb2_dst;
2174	size_t chroma_offset;
2175	dma_addr_t dst_dma;
2176
2177	vb2_dst = hantro_get_dst_buf(ctx);
2178
2179	dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
2180	chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
2181	    ctx->dst_fmt.height;
2182
2183	/* enable post processor */
2184	hantro_reg_write(vpu, &av1_pp_out_e, 1);
2185	hantro_reg_write(vpu, &av1_pp_in_format, 0);
2186	hantro_reg_write(vpu, &av1_pp0_dup_hor, 1);
2187	hantro_reg_write(vpu, &av1_pp0_dup_ver, 1);
2188
2189	hantro_reg_write(vpu, &av1_pp_in_height, height / 2);
2190	hantro_reg_write(vpu, &av1_pp_in_width, width / 2);
2191	hantro_reg_write(vpu, &av1_pp_out_height, height);
2192	hantro_reg_write(vpu, &av1_pp_out_width, width);
2193	hantro_reg_write(vpu, &av1_pp_out_y_stride,
2194			 ctx->dst_fmt.plane_fmt[0].bytesperline);
2195	hantro_reg_write(vpu, &av1_pp_out_c_stride,
2196			 ctx->dst_fmt.plane_fmt[0].bytesperline);
2197	switch (ctx->dst_fmt.pixelformat) {
2198	case V4L2_PIX_FMT_P010:
2199		hantro_reg_write(vpu, &av1_pp_out_format, 1);
2200		break;
2201	case V4L2_PIX_FMT_NV12:
2202		hantro_reg_write(vpu, &av1_pp_out_format, 3);
2203		break;
2204	default:
2205		hantro_reg_write(vpu, &av1_pp_out_format, 0);
2206	}
2207
2208	hantro_reg_write(vpu, &av1_ppd_blend_exist, 0);
2209	hantro_reg_write(vpu, &av1_ppd_dith_exist, 0);
2210	hantro_reg_write(vpu, &av1_ablend_crop_e, 0);
2211	hantro_reg_write(vpu, &av1_pp_format_customer1_e, 0);
2212	hantro_reg_write(vpu, &av1_pp_crop_exist, 0);
2213	hantro_reg_write(vpu, &av1_pp_up_level, 0);
2214	hantro_reg_write(vpu, &av1_pp_down_level, 0);
2215	hantro_reg_write(vpu, &av1_pp_exist, 0);
2216
2217	hantro_write_addr(vpu, AV1_PP_OUT_LU, dst_dma);
2218	hantro_write_addr(vpu, AV1_PP_OUT_CH, dst_dma + chroma_offset);
2219}
2220
2221static void rockchip_vpu981_postproc_disable(struct hantro_ctx *ctx)
2222{
2223	struct hantro_dev *vpu = ctx->dev;
2224
2225	/* disable post processor */
2226	hantro_reg_write(vpu, &av1_pp_out_e, 0);
2227}
2228
2229const struct hantro_postproc_ops rockchip_vpu981_postproc_ops = {
2230	.enable = rockchip_vpu981_postproc_enable,
2231	.disable = rockchip_vpu981_postproc_disable,
2232};
2233