1148456Spjd// SPDX-License-Identifier: GPL-2.0
2161127Spjd/*
3148456Spjd * Hantro VPU codec driver
4148456Spjd *
5148456Spjd * Copyright (c) 2014 Rockchip Electronics Co., Ltd.
6148456Spjd *	Hertz Wong <hertz.wong@rock-chips.com>
7148456Spjd *	Herman Chen <herman.chen@rock-chips.com>
8148456Spjd *
9148456Spjd * Copyright (C) 2014 Google, Inc.
10148456Spjd *	Tomasz Figa <tfiga@chromium.org>
11148456Spjd */
12148456Spjd
13155175Spjd#include <linux/types.h>
14148456Spjd#include <linux/sort.h>
15148456Spjd
16148456Spjd#include <media/v4l2-mem2mem.h>
17148456Spjd
18148456Spjd#include "hantro_hw.h"
19148456Spjd#include "hantro_v4l2.h"
20148456Spjd
21148456Spjd#define VDPU_SWREG(nr)			((nr) * 4)
22148456Spjd
23148456Spjd#define VDPU_REG_DEC_OUT_BASE		VDPU_SWREG(63)
24148456Spjd#define VDPU_REG_RLC_VLC_BASE		VDPU_SWREG(64)
25148456Spjd#define VDPU_REG_QTABLE_BASE		VDPU_SWREG(61)
26148456Spjd#define VDPU_REG_DIR_MV_BASE		VDPU_SWREG(62)
27148456Spjd#define VDPU_REG_REFER_BASE(i)		(VDPU_SWREG(84 + (i)))
28148456Spjd#define VDPU_REG_DEC_E(v)		((v) ? BIT(0) : 0)
29148456Spjd
30148456Spjd#define VDPU_REG_DEC_ADV_PRE_DIS(v)	((v) ? BIT(11) : 0)
31148456Spjd#define VDPU_REG_DEC_SCMD_DIS(v)	((v) ? BIT(10) : 0)
32148456Spjd#define VDPU_REG_FILTERING_DIS(v)	((v) ? BIT(8) : 0)
33148456Spjd#define VDPU_REG_PIC_FIXED_QUANT(v)	((v) ? BIT(7) : 0)
34148456Spjd#define VDPU_REG_DEC_LATENCY(v)		(((v) << 1) & GENMASK(6, 1))
35148456Spjd
36148456Spjd#define VDPU_REG_INIT_QP(v)		(((v) << 25) & GENMASK(30, 25))
37148456Spjd#define VDPU_REG_STREAM_LEN(v)		(((v) << 0) & GENMASK(23, 0))
38148456Spjd
39148456Spjd#define VDPU_REG_APF_THRESHOLD(v)	(((v) << 17) & GENMASK(30, 17))
40148456Spjd#define VDPU_REG_STARTMB_X(v)		(((v) << 8) & GENMASK(16, 8))
41148456Spjd#define VDPU_REG_STARTMB_Y(v)		(((v) << 0) & GENMASK(7, 0))
42148456Spjd
43148456Spjd#define VDPU_REG_DEC_MODE(v)		(((v) << 0) & GENMASK(3, 0))
44148456Spjd
45148456Spjd#define VDPU_REG_DEC_STRENDIAN_E(v)	((v) ? BIT(5) : 0)
46148456Spjd#define VDPU_REG_DEC_STRSWAP32_E(v)	((v) ? BIT(4) : 0)
47148456Spjd#define VDPU_REG_DEC_OUTSWAP32_E(v)	((v) ? BIT(3) : 0)
48148456Spjd#define VDPU_REG_DEC_INSWAP32_E(v)	((v) ? BIT(2) : 0)
49148456Spjd#define VDPU_REG_DEC_OUT_ENDIAN(v)	((v) ? BIT(1) : 0)
50148456Spjd#define VDPU_REG_DEC_IN_ENDIAN(v)	((v) ? BIT(0) : 0)
51148456Spjd
52148456Spjd#define VDPU_REG_DEC_DATA_DISC_E(v)	((v) ? BIT(22) : 0)
53148456Spjd#define VDPU_REG_DEC_MAX_BURST(v)	(((v) << 16) & GENMASK(20, 16))
54148456Spjd#define VDPU_REG_DEC_AXI_WR_ID(v)	(((v) << 8) & GENMASK(15, 8))
55148456Spjd#define VDPU_REG_DEC_AXI_RD_ID(v)	(((v) << 0) & GENMASK(7, 0))
56148456Spjd
57159308Spjd#define VDPU_REG_START_CODE_E(v)	((v) ? BIT(22) : 0)
58159308Spjd#define VDPU_REG_CH_8PIX_ILEAV_E(v)	((v) ? BIT(21) : 0)
59148456Spjd#define VDPU_REG_RLC_MODE_E(v)		((v) ? BIT(20) : 0)
60148456Spjd#define VDPU_REG_PIC_INTERLACE_E(v)	((v) ? BIT(17) : 0)
61148456Spjd#define VDPU_REG_PIC_FIELDMODE_E(v)	((v) ? BIT(16) : 0)
62148456Spjd#define VDPU_REG_PIC_TOPFIELD_E(v)	((v) ? BIT(13) : 0)
63148456Spjd#define VDPU_REG_WRITE_MVS_E(v)		((v) ? BIT(10) : 0)
64148456Spjd#define VDPU_REG_SEQ_MBAFF_E(v)		((v) ? BIT(7) : 0)
65148456Spjd#define VDPU_REG_PICORD_COUNT_E(v)	((v) ? BIT(6) : 0)
66148456Spjd#define VDPU_REG_DEC_TIMEOUT_E(v)	((v) ? BIT(5) : 0)
67148456Spjd#define VDPU_REG_DEC_CLK_GATE_E(v)	((v) ? BIT(4) : 0)
68162353Spjd
69148456Spjd#define VDPU_REG_PRED_BC_TAP_0_0(v)	(((v) << 22) & GENMASK(31, 22))
70148456Spjd#define VDPU_REG_PRED_BC_TAP_0_1(v)	(((v) << 12) & GENMASK(21, 12))
71148456Spjd#define VDPU_REG_PRED_BC_TAP_0_2(v)	(((v) << 2) & GENMASK(11, 2))
72148456Spjd
73148456Spjd#define VDPU_REG_REFBU_E(v)		((v) ? BIT(31) : 0)
74148456Spjd
75148456Spjd#define VDPU_REG_PINIT_RLIST_F9(v)	(((v) << 25) & GENMASK(29, 25))
76148456Spjd#define VDPU_REG_PINIT_RLIST_F8(v)	(((v) << 20) & GENMASK(24, 20))
77148456Spjd#define VDPU_REG_PINIT_RLIST_F7(v)	(((v) << 15) & GENMASK(19, 15))
78148456Spjd#define VDPU_REG_PINIT_RLIST_F6(v)	(((v) << 10) & GENMASK(14, 10))
79148456Spjd#define VDPU_REG_PINIT_RLIST_F5(v)	(((v) << 5) & GENMASK(9, 5))
80159308Spjd#define VDPU_REG_PINIT_RLIST_F4(v)	(((v) << 0) & GENMASK(4, 0))
81148456Spjd
82161127Spjd#define VDPU_REG_PINIT_RLIST_F15(v)	(((v) << 25) & GENMASK(29, 25))
83148456Spjd#define VDPU_REG_PINIT_RLIST_F14(v)	(((v) << 20) & GENMASK(24, 20))
84148456Spjd#define VDPU_REG_PINIT_RLIST_F13(v)	(((v) << 15) & GENMASK(19, 15))
85159308Spjd#define VDPU_REG_PINIT_RLIST_F12(v)	(((v) << 10) & GENMASK(14, 10))
86162353Spjd#define VDPU_REG_PINIT_RLIST_F11(v)	(((v) << 5) & GENMASK(9, 5))
87148456Spjd#define VDPU_REG_PINIT_RLIST_F10(v)	(((v) << 0) & GENMASK(4, 0))
88148456Spjd
89148456Spjd#define VDPU_REG_REFER1_NBR(v)		(((v) << 16) & GENMASK(31, 16))
90148456Spjd#define VDPU_REG_REFER0_NBR(v)		(((v) << 0) & GENMASK(15, 0))
91148456Spjd
92148456Spjd#define VDPU_REG_REFER3_NBR(v)		(((v) << 16) & GENMASK(31, 16))
93148456Spjd#define VDPU_REG_REFER2_NBR(v)		(((v) << 0) & GENMASK(15, 0))
94148456Spjd
95148456Spjd#define VDPU_REG_REFER5_NBR(v)		(((v) << 16) & GENMASK(31, 16))
96148456Spjd#define VDPU_REG_REFER4_NBR(v)		(((v) << 0) & GENMASK(15, 0))
97148456Spjd
98159308Spjd#define VDPU_REG_REFER7_NBR(v)		(((v) << 16) & GENMASK(31, 16))
99148456Spjd#define VDPU_REG_REFER6_NBR(v)		(((v) << 0) & GENMASK(15, 0))
100159308Spjd
101148456Spjd#define VDPU_REG_REFER9_NBR(v)		(((v) << 16) & GENMASK(31, 16))
102148456Spjd#define VDPU_REG_REFER8_NBR(v)		(((v) << 0) & GENMASK(15, 0))
103148456Spjd
104148456Spjd#define VDPU_REG_REFER11_NBR(v)		(((v) << 16) & GENMASK(31, 16))
105148456Spjd#define VDPU_REG_REFER10_NBR(v)		(((v) << 0) & GENMASK(15, 0))
106148456Spjd
107148456Spjd#define VDPU_REG_REFER13_NBR(v)		(((v) << 16) & GENMASK(31, 16))
108159308Spjd#define VDPU_REG_REFER12_NBR(v)		(((v) << 0) & GENMASK(15, 0))
109148456Spjd
110148456Spjd#define VDPU_REG_REFER15_NBR(v)		(((v) << 16) & GENMASK(31, 16))
111148456Spjd#define VDPU_REG_REFER14_NBR(v)		(((v) << 0) & GENMASK(15, 0))
112159308Spjd
113148456Spjd#define VDPU_REG_BINIT_RLIST_F5(v)	(((v) << 25) & GENMASK(29, 25))
114159308Spjd#define VDPU_REG_BINIT_RLIST_F4(v)	(((v) << 20) & GENMASK(24, 20))
115148456Spjd#define VDPU_REG_BINIT_RLIST_F3(v)	(((v) << 15) & GENMASK(19, 15))
116148456Spjd#define VDPU_REG_BINIT_RLIST_F2(v)	(((v) << 10) & GENMASK(14, 10))
117148456Spjd#define VDPU_REG_BINIT_RLIST_F1(v)	(((v) << 5) & GENMASK(9, 5))
118148456Spjd#define VDPU_REG_BINIT_RLIST_F0(v)	(((v) << 0) & GENMASK(4, 0))
119148456Spjd
120148456Spjd#define VDPU_REG_BINIT_RLIST_F11(v)	(((v) << 25) & GENMASK(29, 25))
121148456Spjd#define VDPU_REG_BINIT_RLIST_F10(v)	(((v) << 20) & GENMASK(24, 20))
122148456Spjd#define VDPU_REG_BINIT_RLIST_F9(v)	(((v) << 15) & GENMASK(19, 15))
123148456Spjd#define VDPU_REG_BINIT_RLIST_F8(v)	(((v) << 10) & GENMASK(14, 10))
124148456Spjd#define VDPU_REG_BINIT_RLIST_F7(v)	(((v) << 5) & GENMASK(9, 5))
125148456Spjd#define VDPU_REG_BINIT_RLIST_F6(v)	(((v) << 0) & GENMASK(4, 0))
126148456Spjd
127148456Spjd#define VDPU_REG_BINIT_RLIST_F15(v)	(((v) << 15) & GENMASK(19, 15))
128148456Spjd#define VDPU_REG_BINIT_RLIST_F14(v)	(((v) << 10) & GENMASK(14, 10))
129161127Spjd#define VDPU_REG_BINIT_RLIST_F13(v)	(((v) << 5) & GENMASK(9, 5))
130148456Spjd#define VDPU_REG_BINIT_RLIST_F12(v)	(((v) << 0) & GENMASK(4, 0))
131148456Spjd
132161127Spjd#define VDPU_REG_BINIT_RLIST_B5(v)	(((v) << 25) & GENMASK(29, 25))
133148456Spjd#define VDPU_REG_BINIT_RLIST_B4(v)	(((v) << 20) & GENMASK(24, 20))
134148456Spjd#define VDPU_REG_BINIT_RLIST_B3(v)	(((v) << 15) & GENMASK(19, 15))
135148456Spjd#define VDPU_REG_BINIT_RLIST_B2(v)	(((v) << 10) & GENMASK(14, 10))
136148456Spjd#define VDPU_REG_BINIT_RLIST_B1(v)	(((v) << 5) & GENMASK(9, 5))
137148456Spjd#define VDPU_REG_BINIT_RLIST_B0(v)	(((v) << 0) & GENMASK(4, 0))
138148456Spjd
139148456Spjd#define VDPU_REG_BINIT_RLIST_B11(v)	(((v) << 25) & GENMASK(29, 25))
140148456Spjd#define VDPU_REG_BINIT_RLIST_B10(v)	(((v) << 20) & GENMASK(24, 20))
141148456Spjd#define VDPU_REG_BINIT_RLIST_B9(v)	(((v) << 15) & GENMASK(19, 15))
142148456Spjd#define VDPU_REG_BINIT_RLIST_B8(v)	(((v) << 10) & GENMASK(14, 10))
143148456Spjd#define VDPU_REG_BINIT_RLIST_B7(v)	(((v) << 5) & GENMASK(9, 5))
144148456Spjd#define VDPU_REG_BINIT_RLIST_B6(v)	(((v) << 0) & GENMASK(4, 0))
145148456Spjd
146148456Spjd#define VDPU_REG_BINIT_RLIST_B15(v)	(((v) << 15) & GENMASK(19, 15))
147148456Spjd#define VDPU_REG_BINIT_RLIST_B14(v)	(((v) << 10) & GENMASK(14, 10))
148148456Spjd#define VDPU_REG_BINIT_RLIST_B13(v)	(((v) << 5) & GENMASK(9, 5))
149148456Spjd#define VDPU_REG_BINIT_RLIST_B12(v)	(((v) << 0) & GENMASK(4, 0))
150148456Spjd
151148456Spjd#define VDPU_REG_PINIT_RLIST_F3(v)	(((v) << 15) & GENMASK(19, 15))
152159308Spjd#define VDPU_REG_PINIT_RLIST_F2(v)	(((v) << 10) & GENMASK(14, 10))
153148456Spjd#define VDPU_REG_PINIT_RLIST_F1(v)	(((v) << 5) & GENMASK(9, 5))
154159308Spjd#define VDPU_REG_PINIT_RLIST_F0(v)	(((v) << 0) & GENMASK(4, 0))
155148456Spjd
156148456Spjd#define VDPU_REG_REFER_LTERM_E(v)	(((v) << 0) & GENMASK(31, 0))
157148456Spjd
158148456Spjd#define VDPU_REG_REFER_VALID_E(v)	(((v) << 0) & GENMASK(31, 0))
159159308Spjd
160148456Spjd#define VDPU_REG_STRM_START_BIT(v)	(((v) << 0) & GENMASK(5, 0))
161162353Spjd
162162353Spjd#define VDPU_REG_CH_QP_OFFSET2(v)	(((v) << 22) & GENMASK(26, 22))
163162353Spjd#define VDPU_REG_CH_QP_OFFSET(v)	(((v) << 17) & GENMASK(21, 17))
164162353Spjd#define VDPU_REG_PIC_MB_HEIGHT_P(v)	(((v) << 9) & GENMASK(16, 9))
165162353Spjd#define VDPU_REG_PIC_MB_WIDTH(v)	(((v) << 0) & GENMASK(8, 0))
166162353Spjd
167162353Spjd#define VDPU_REG_WEIGHT_BIPR_IDC(v)	(((v) << 16) & GENMASK(17, 16))
168162353Spjd#define VDPU_REG_REF_FRAMES(v)		(((v) << 0) & GENMASK(4, 0))
169148456Spjd
170148456Spjd#define VDPU_REG_FILT_CTRL_PRES(v)	((v) ? BIT(31) : 0)
171149304Spjd#define VDPU_REG_RDPIC_CNT_PRES(v)	((v) ? BIT(30) : 0)
172148456Spjd#define VDPU_REG_FRAMENUM_LEN(v)	(((v) << 16) & GENMASK(20, 16))
173148456Spjd#define VDPU_REG_FRAMENUM(v)		(((v) << 0) & GENMASK(15, 0))
174148456Spjd
175148456Spjd#define VDPU_REG_REFPIC_MK_LEN(v)	(((v) << 16) & GENMASK(26, 16))
176148456Spjd#define VDPU_REG_IDR_PIC_ID(v)		(((v) << 0) & GENMASK(15, 0))
177148456Spjd
178148456Spjd#define VDPU_REG_PPS_ID(v)		(((v) << 24) & GENMASK(31, 24))
179149304Spjd#define VDPU_REG_REFIDX1_ACTIVE(v)	(((v) << 19) & GENMASK(23, 19))
180148456Spjd#define VDPU_REG_REFIDX0_ACTIVE(v)	(((v) << 14) & GENMASK(18, 14))
181148456Spjd#define VDPU_REG_POC_LENGTH(v)		(((v) << 0) & GENMASK(7, 0))
182148456Spjd
183148456Spjd#define VDPU_REG_IDR_PIC_E(v)		((v) ? BIT(8) : 0)
184148456Spjd#define VDPU_REG_DIR_8X8_INFER_E(v)	((v) ? BIT(7) : 0)
185148456Spjd#define VDPU_REG_BLACKWHITE_E(v)	((v) ? BIT(6) : 0)
186148456Spjd#define VDPU_REG_CABAC_E(v)		((v) ? BIT(5) : 0)
187148456Spjd#define VDPU_REG_WEIGHT_PRED_E(v)	((v) ? BIT(4) : 0)
188148456Spjd#define VDPU_REG_CONST_INTRA_E(v)	((v) ? BIT(3) : 0)
189148456Spjd#define VDPU_REG_8X8TRANS_FLAG_E(v)	((v) ? BIT(2) : 0)
190148456Spjd#define VDPU_REG_TYPE1_QUANT_E(v)	((v) ? BIT(1) : 0)
191148456Spjd#define VDPU_REG_FIELDPIC_FLAG_E(v)	((v) ? BIT(0) : 0)
192148456Spjd
193148456Spjdstatic void set_params(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf)
194148456Spjd{
195148456Spjd	const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
196148456Spjd	const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode;
197148456Spjd	const struct v4l2_ctrl_h264_sps *sps = ctrls->sps;
198148456Spjd	const struct v4l2_ctrl_h264_pps *pps = ctrls->pps;
199148456Spjd	struct hantro_dev *vpu = ctx->dev;
200148456Spjd	u32 reg;
201148456Spjd
202148456Spjd	reg = VDPU_REG_DEC_ADV_PRE_DIS(0) |
203148456Spjd	      VDPU_REG_DEC_SCMD_DIS(0) |
204148456Spjd	      VDPU_REG_FILTERING_DIS(0) |
205148456Spjd	      VDPU_REG_PIC_FIXED_QUANT(0) |
206148456Spjd	      VDPU_REG_DEC_LATENCY(0);
207148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(50));
208148456Spjd
209148456Spjd	reg = VDPU_REG_INIT_QP(pps->pic_init_qp_minus26 + 26) |
210148456Spjd	      VDPU_REG_STREAM_LEN(vb2_get_plane_payload(&src_buf->vb2_buf, 0));
211148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(51));
212148456Spjd
213148456Spjd	reg = VDPU_REG_APF_THRESHOLD(8) |
214148456Spjd	      VDPU_REG_STARTMB_X(0) |
215148456Spjd	      VDPU_REG_STARTMB_Y(0);
216148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(52));
217148456Spjd
218148456Spjd	reg = VDPU_REG_DEC_MODE(0);
219148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(53));
220148456Spjd
221148456Spjd	reg = VDPU_REG_DEC_STRENDIAN_E(1) |
222148456Spjd	      VDPU_REG_DEC_STRSWAP32_E(1) |
223148456Spjd	      VDPU_REG_DEC_OUTSWAP32_E(1) |
224148456Spjd	      VDPU_REG_DEC_INSWAP32_E(1) |
225148456Spjd	      VDPU_REG_DEC_OUT_ENDIAN(1) |
226148456Spjd	      VDPU_REG_DEC_IN_ENDIAN(0);
227148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(54));
228148456Spjd
229148456Spjd	reg = VDPU_REG_DEC_DATA_DISC_E(0) |
230148456Spjd	      VDPU_REG_DEC_MAX_BURST(16) |
231148456Spjd	      VDPU_REG_DEC_AXI_WR_ID(0) |
232148456Spjd	      VDPU_REG_DEC_AXI_RD_ID(0xff);
233148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(56));
234148456Spjd
235148456Spjd	reg = VDPU_REG_START_CODE_E(1) |
236148456Spjd	      VDPU_REG_CH_8PIX_ILEAV_E(0) |
237148456Spjd	      VDPU_REG_RLC_MODE_E(0) |
238148456Spjd	      VDPU_REG_PIC_INTERLACE_E(!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY) &&
239148456Spjd				       (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD ||
240148456Spjd					dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)) |
241148456Spjd	      VDPU_REG_PIC_FIELDMODE_E(dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) |
242148456Spjd	      VDPU_REG_PIC_TOPFIELD_E(!(dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)) |
243148456Spjd	      VDPU_REG_WRITE_MVS_E((sps->profile_idc > 66) && dec_param->nal_ref_idc) |
244148456Spjd	      VDPU_REG_SEQ_MBAFF_E(sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD) |
245148456Spjd	      VDPU_REG_PICORD_COUNT_E(sps->profile_idc > 66) |
246153190Spjd	      VDPU_REG_DEC_TIMEOUT_E(1) |
247148456Spjd	      VDPU_REG_DEC_CLK_GATE_E(1);
248148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(57));
249148456Spjd
250148456Spjd	reg = VDPU_REG_PRED_BC_TAP_0_0(1) |
251148456Spjd	      VDPU_REG_PRED_BC_TAP_0_1((u32)-5) |
252148456Spjd	      VDPU_REG_PRED_BC_TAP_0_2(20);
253148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(59));
254148456Spjd
255162353Spjd	reg = VDPU_REG_REFBU_E(0);
256162353Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(65));
257148456Spjd
258148456Spjd	reg = VDPU_REG_STRM_START_BIT(0);
259148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(109));
260148456Spjd
261148456Spjd	reg = VDPU_REG_CH_QP_OFFSET2(pps->second_chroma_qp_index_offset) |
262148456Spjd	      VDPU_REG_CH_QP_OFFSET(pps->chroma_qp_index_offset) |
263148456Spjd	      VDPU_REG_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->src_fmt.height)) |
264148456Spjd	      VDPU_REG_PIC_MB_WIDTH(MB_WIDTH(ctx->src_fmt.width));
265148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(110));
266148456Spjd
267148456Spjd	reg = VDPU_REG_WEIGHT_BIPR_IDC(pps->weighted_bipred_idc) |
268148456Spjd	      VDPU_REG_REF_FRAMES(sps->max_num_ref_frames);
269148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(111));
270148456Spjd
271148456Spjd	reg = VDPU_REG_FILT_CTRL_PRES(pps->flags & V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT) |
272148456Spjd	      VDPU_REG_RDPIC_CNT_PRES(pps->flags & V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT) |
273148456Spjd	      VDPU_REG_FRAMENUM_LEN(sps->log2_max_frame_num_minus4 + 4) |
274148456Spjd	      VDPU_REG_FRAMENUM(dec_param->frame_num);
275148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(112));
276148456Spjd
277148456Spjd	reg = VDPU_REG_REFPIC_MK_LEN(dec_param->dec_ref_pic_marking_bit_size) |
278148456Spjd	      VDPU_REG_IDR_PIC_ID(dec_param->idr_pic_id);
279148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(113));
280148456Spjd
281148456Spjd	reg = VDPU_REG_PPS_ID(pps->pic_parameter_set_id) |
282148456Spjd	      VDPU_REG_REFIDX1_ACTIVE(pps->num_ref_idx_l1_default_active_minus1 + 1) |
283148456Spjd	      VDPU_REG_REFIDX0_ACTIVE(pps->num_ref_idx_l0_default_active_minus1 + 1) |
284148456Spjd	      VDPU_REG_POC_LENGTH(dec_param->pic_order_cnt_bit_size);
285148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(114));
286148456Spjd
287148456Spjd	reg = VDPU_REG_IDR_PIC_E(dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC) |
288148456Spjd	      VDPU_REG_DIR_8X8_INFER_E(sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE) |
289148456Spjd	      VDPU_REG_BLACKWHITE_E(sps->profile_idc >= 100 && sps->chroma_format_idc == 0) |
290148456Spjd	      VDPU_REG_CABAC_E(pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE) |
291148456Spjd	      VDPU_REG_WEIGHT_PRED_E(pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) |
292148456Spjd	      VDPU_REG_CONST_INTRA_E(pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED) |
293148456Spjd	      VDPU_REG_8X8TRANS_FLAG_E(pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE) |
294148456Spjd	      VDPU_REG_TYPE1_QUANT_E(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT) |
295148456Spjd	      VDPU_REG_FIELDPIC_FLAG_E(!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY));
296148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(115));
297148456Spjd}
298148456Spjd
299148456Spjdstatic void set_ref(struct hantro_ctx *ctx)
300148456Spjd{
301148456Spjd	const struct v4l2_h264_reference *b0_reflist, *b1_reflist, *p_reflist;
302148456Spjd	struct hantro_dev *vpu = ctx->dev;
303148456Spjd	u32 reg;
304148456Spjd	int i;
305148456Spjd
306148456Spjd	b0_reflist = ctx->h264_dec.reflists.b0;
307148456Spjd	b1_reflist = ctx->h264_dec.reflists.b1;
308148456Spjd	p_reflist = ctx->h264_dec.reflists.p;
309148456Spjd
310148456Spjd	reg = VDPU_REG_PINIT_RLIST_F9(p_reflist[9].index) |
311148456Spjd	      VDPU_REG_PINIT_RLIST_F8(p_reflist[8].index) |
312148456Spjd	      VDPU_REG_PINIT_RLIST_F7(p_reflist[7].index) |
313148456Spjd	      VDPU_REG_PINIT_RLIST_F6(p_reflist[6].index) |
314148456Spjd	      VDPU_REG_PINIT_RLIST_F5(p_reflist[5].index) |
315153190Spjd	      VDPU_REG_PINIT_RLIST_F4(p_reflist[4].index);
316148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(74));
317153190Spjd
318153190Spjd	reg = VDPU_REG_PINIT_RLIST_F15(p_reflist[15].index) |
319148456Spjd	      VDPU_REG_PINIT_RLIST_F14(p_reflist[14].index) |
320148456Spjd	      VDPU_REG_PINIT_RLIST_F13(p_reflist[13].index) |
321148456Spjd	      VDPU_REG_PINIT_RLIST_F12(p_reflist[12].index) |
322153190Spjd	      VDPU_REG_PINIT_RLIST_F11(p_reflist[11].index) |
323155183Spjd	      VDPU_REG_PINIT_RLIST_F10(p_reflist[10].index);
324155183Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(75));
325155183Spjd
326155183Spjd	reg = VDPU_REG_REFER1_NBR(hantro_h264_get_ref_nbr(ctx, 1)) |
327148456Spjd	      VDPU_REG_REFER0_NBR(hantro_h264_get_ref_nbr(ctx, 0));
328148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(76));
329148456Spjd
330148456Spjd	reg = VDPU_REG_REFER3_NBR(hantro_h264_get_ref_nbr(ctx, 3)) |
331148456Spjd	      VDPU_REG_REFER2_NBR(hantro_h264_get_ref_nbr(ctx, 2));
332148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(77));
333148456Spjd
334148456Spjd	reg = VDPU_REG_REFER5_NBR(hantro_h264_get_ref_nbr(ctx, 5)) |
335148456Spjd	      VDPU_REG_REFER4_NBR(hantro_h264_get_ref_nbr(ctx, 4));
336148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(78));
337148456Spjd
338148456Spjd	reg = VDPU_REG_REFER7_NBR(hantro_h264_get_ref_nbr(ctx, 7)) |
339148456Spjd	      VDPU_REG_REFER6_NBR(hantro_h264_get_ref_nbr(ctx, 6));
340148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(79));
341148456Spjd
342148456Spjd	reg = VDPU_REG_REFER9_NBR(hantro_h264_get_ref_nbr(ctx, 9)) |
343148456Spjd	      VDPU_REG_REFER8_NBR(hantro_h264_get_ref_nbr(ctx, 8));
344148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(80));
345148456Spjd
346148456Spjd	reg = VDPU_REG_REFER11_NBR(hantro_h264_get_ref_nbr(ctx, 11)) |
347148456Spjd	      VDPU_REG_REFER10_NBR(hantro_h264_get_ref_nbr(ctx, 10));
348148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(81));
349148456Spjd
350148456Spjd	reg = VDPU_REG_REFER13_NBR(hantro_h264_get_ref_nbr(ctx, 13)) |
351148456Spjd	      VDPU_REG_REFER12_NBR(hantro_h264_get_ref_nbr(ctx, 12));
352148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(82));
353148456Spjd
354153190Spjd	reg = VDPU_REG_REFER15_NBR(hantro_h264_get_ref_nbr(ctx, 15)) |
355148456Spjd	      VDPU_REG_REFER14_NBR(hantro_h264_get_ref_nbr(ctx, 14));
356148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(83));
357149047Spjd
358149047Spjd	reg = VDPU_REG_BINIT_RLIST_F5(b0_reflist[5].index) |
359149047Spjd	      VDPU_REG_BINIT_RLIST_F4(b0_reflist[4].index) |
360149047Spjd	      VDPU_REG_BINIT_RLIST_F3(b0_reflist[3].index) |
361148456Spjd	      VDPU_REG_BINIT_RLIST_F2(b0_reflist[2].index) |
362148456Spjd	      VDPU_REG_BINIT_RLIST_F1(b0_reflist[1].index) |
363148456Spjd	      VDPU_REG_BINIT_RLIST_F0(b0_reflist[0].index);
364148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(100));
365148456Spjd
366148456Spjd	reg = VDPU_REG_BINIT_RLIST_F11(b0_reflist[11].index) |
367148456Spjd	      VDPU_REG_BINIT_RLIST_F10(b0_reflist[10].index) |
368148456Spjd	      VDPU_REG_BINIT_RLIST_F9(b0_reflist[9].index) |
369148456Spjd	      VDPU_REG_BINIT_RLIST_F8(b0_reflist[8].index) |
370148456Spjd	      VDPU_REG_BINIT_RLIST_F7(b0_reflist[7].index) |
371148456Spjd	      VDPU_REG_BINIT_RLIST_F6(b0_reflist[6].index);
372148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(101));
373148456Spjd
374148456Spjd	reg = VDPU_REG_BINIT_RLIST_F15(b0_reflist[15].index) |
375148456Spjd	      VDPU_REG_BINIT_RLIST_F14(b0_reflist[14].index) |
376148456Spjd	      VDPU_REG_BINIT_RLIST_F13(b0_reflist[13].index) |
377148456Spjd	      VDPU_REG_BINIT_RLIST_F12(b0_reflist[12].index);
378148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(102));
379148456Spjd
380148456Spjd	reg = VDPU_REG_BINIT_RLIST_B5(b1_reflist[5].index) |
381148456Spjd	      VDPU_REG_BINIT_RLIST_B4(b1_reflist[4].index) |
382148456Spjd	      VDPU_REG_BINIT_RLIST_B3(b1_reflist[3].index) |
383148456Spjd	      VDPU_REG_BINIT_RLIST_B2(b1_reflist[2].index) |
384148456Spjd	      VDPU_REG_BINIT_RLIST_B1(b1_reflist[1].index) |
385148456Spjd	      VDPU_REG_BINIT_RLIST_B0(b1_reflist[0].index);
386148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(103));
387148456Spjd
388148456Spjd	reg = VDPU_REG_BINIT_RLIST_B11(b1_reflist[11].index) |
389148456Spjd	      VDPU_REG_BINIT_RLIST_B10(b1_reflist[10].index) |
390148456Spjd	      VDPU_REG_BINIT_RLIST_B9(b1_reflist[9].index) |
391148456Spjd	      VDPU_REG_BINIT_RLIST_B8(b1_reflist[8].index) |
392148456Spjd	      VDPU_REG_BINIT_RLIST_B7(b1_reflist[7].index) |
393148456Spjd	      VDPU_REG_BINIT_RLIST_B6(b1_reflist[6].index);
394148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(104));
395148456Spjd
396148456Spjd	reg = VDPU_REG_BINIT_RLIST_B15(b1_reflist[15].index) |
397148456Spjd	      VDPU_REG_BINIT_RLIST_B14(b1_reflist[14].index) |
398148456Spjd	      VDPU_REG_BINIT_RLIST_B13(b1_reflist[13].index) |
399148456Spjd	      VDPU_REG_BINIT_RLIST_B12(b1_reflist[12].index);
400148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(105));
401148456Spjd
402148456Spjd	reg = VDPU_REG_PINIT_RLIST_F3(p_reflist[3].index) |
403148456Spjd	      VDPU_REG_PINIT_RLIST_F2(p_reflist[2].index) |
404148456Spjd	      VDPU_REG_PINIT_RLIST_F1(p_reflist[1].index) |
405148456Spjd	      VDPU_REG_PINIT_RLIST_F0(p_reflist[0].index);
406148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(106));
407148456Spjd
408148456Spjd	reg = VDPU_REG_REFER_LTERM_E(ctx->h264_dec.dpb_longterm);
409161052Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(107));
410148456Spjd
411148456Spjd	reg = VDPU_REG_REFER_VALID_E(ctx->h264_dec.dpb_valid);
412148456Spjd	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(108));
413148456Spjd
414148456Spjd	/* Set up addresses of DPB buffers. */
415148456Spjd	for (i = 0; i < HANTRO_H264_DPB_SIZE; i++) {
416148456Spjd		dma_addr_t dma_addr = hantro_h264_get_ref_buf(ctx, i);
417148456Spjd
418148456Spjd		vdpu_write_relaxed(vpu, dma_addr, VDPU_REG_REFER_BASE(i));
419148456Spjd	}
420148456Spjd}
421148456Spjd
422148456Spjdstatic void set_buffers(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf)
423148456Spjd{
424148456Spjd	const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
425148456Spjd	struct vb2_v4l2_buffer *dst_buf;
426148456Spjd	struct hantro_dev *vpu = ctx->dev;
427148456Spjd	dma_addr_t src_dma, dst_dma;
428148456Spjd	size_t offset = 0;
429148456Spjd
430148456Spjd	/* Source (stream) buffer. */
431148456Spjd	src_dma = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
432148456Spjd	vdpu_write_relaxed(vpu, src_dma, VDPU_REG_RLC_VLC_BASE);
433148456Spjd
434148456Spjd	/* Destination (decoded frame) buffer. */
435148456Spjd	dst_buf = hantro_get_dst_buf(ctx);
436148456Spjd	dst_dma = hantro_get_dec_buf_addr(ctx, &dst_buf->vb2_buf);
437148456Spjd	/* Adjust dma addr to start at second line for bottom field */
438148456Spjd	if (ctrls->decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)
439148456Spjd		offset = ALIGN(ctx->src_fmt.width, MB_DIM);
440148456Spjd	vdpu_write_relaxed(vpu, dst_dma + offset, VDPU_REG_DEC_OUT_BASE);
441148456Spjd
442148456Spjd	/* Higher profiles require DMV buffer appended to reference frames. */
443148456Spjd	if (ctrls->sps->profile_idc > 66 && ctrls->decode->nal_ref_idc) {
444148456Spjd		unsigned int bytes_per_mb = 384;
445148456Spjd
446148456Spjd		/* DMV buffer for monochrome start directly after Y-plane */
447148456Spjd		if (ctrls->sps->profile_idc >= 100 &&
448148456Spjd		    ctrls->sps->chroma_format_idc == 0)
449148456Spjd			bytes_per_mb = 256;
450148456Spjd		offset = bytes_per_mb * MB_WIDTH(ctx->src_fmt.width) *
451148456Spjd			 MB_HEIGHT(ctx->src_fmt.height);
452148456Spjd
453148456Spjd		/*
454148456Spjd		 * DMV buffer is split in two for field encoded frames,
455148456Spjd		 * adjust offset for bottom field
456148456Spjd		 */
457148456Spjd		if (ctrls->decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)
458148456Spjd			offset += 32 * MB_WIDTH(ctx->src_fmt.width) *
459148456Spjd				  MB_HEIGHT(ctx->src_fmt.height);
460148456Spjd		vdpu_write_relaxed(vpu, dst_dma + offset, VDPU_REG_DIR_MV_BASE);
461148456Spjd	}
462148456Spjd
463148456Spjd	/* Auxiliary buffer prepared in hantro_h264_dec_init(). */
464148456Spjd	vdpu_write_relaxed(vpu, ctx->h264_dec.priv.dma, VDPU_REG_QTABLE_BASE);
465148456Spjd}
466148456Spjd
467148456Spjdint rockchip_vpu2_h264_dec_run(struct hantro_ctx *ctx)
468148456Spjd{
469148456Spjd	struct hantro_dev *vpu = ctx->dev;
470148456Spjd	struct vb2_v4l2_buffer *src_buf;
471148456Spjd	u32 reg;
472148456Spjd	int ret;
473148456Spjd
474148456Spjd	/* Prepare the H264 decoder context. */
475148456Spjd	ret = hantro_h264_dec_prepare_run(ctx);
476148456Spjd	if (ret)
477148456Spjd		return ret;
478148456Spjd
479148456Spjd	src_buf = hantro_get_src_buf(ctx);
480148456Spjd	set_params(ctx, src_buf);
481148456Spjd	set_ref(ctx);
482148456Spjd	set_buffers(ctx, src_buf);
483148456Spjd
484148456Spjd	hantro_end_prepare_run(ctx);
485148456Spjd
486148456Spjd	/* Start decoding! */
487148456Spjd	reg = vdpu_read(vpu, VDPU_SWREG(57)) | VDPU_REG_DEC_E(1);
488148456Spjd	vdpu_write(vpu, reg, VDPU_SWREG(57));
489148456Spjd
490148456Spjd	return 0;
491148456Spjd}
492148456Spjd