1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Cedrus VPU driver
4 *
5 * Copyright (c) 2013 Jens Kuske <jenskuske@gmail.com>
6 * Copyright (c) 2018 Bootlin
7 */
8
9#include <linux/delay.h>
10#include <linux/types.h>
11
12#include <media/videobuf2-dma-contig.h>
13
14#include "cedrus.h"
15#include "cedrus_hw.h"
16#include "cedrus_regs.h"
17
18enum cedrus_h264_sram_off {
19	CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE	= 0x000,
20	CEDRUS_SRAM_H264_FRAMEBUFFER_LIST	= 0x100,
21	CEDRUS_SRAM_H264_REF_LIST_0		= 0x190,
22	CEDRUS_SRAM_H264_REF_LIST_1		= 0x199,
23	CEDRUS_SRAM_H264_SCALING_LIST_8x8_0	= 0x200,
24	CEDRUS_SRAM_H264_SCALING_LIST_8x8_1	= 0x210,
25	CEDRUS_SRAM_H264_SCALING_LIST_4x4	= 0x220,
26};
27
28struct cedrus_h264_sram_ref_pic {
29	__le32	top_field_order_cnt;
30	__le32	bottom_field_order_cnt;
31	__le32	frame_info;
32	__le32	luma_ptr;
33	__le32	chroma_ptr;
34	__le32	mv_col_top_ptr;
35	__le32	mv_col_bot_ptr;
36	__le32	reserved;
37} __packed;
38
39#define CEDRUS_H264_FRAME_NUM		18
40
41#define CEDRUS_NEIGHBOR_INFO_BUF_SIZE	(32 * SZ_1K)
42#define CEDRUS_MIN_PIC_INFO_BUF_SIZE       (130 * SZ_1K)
43
44static void cedrus_h264_write_sram(struct cedrus_dev *dev,
45				   enum cedrus_h264_sram_off off,
46				   const void *data, size_t len)
47{
48	const u32 *buffer = data;
49	size_t count = DIV_ROUND_UP(len, 4);
50
51	cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET, off << 2);
52
53	while (count--)
54		cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, *buffer++);
55}
56
57static dma_addr_t cedrus_h264_mv_col_buf_addr(struct cedrus_buffer *buf,
58					      unsigned int field)
59{
60	dma_addr_t addr = buf->codec.h264.mv_col_buf_dma;
61
62	/* Adjust for the field */
63	addr += field * buf->codec.h264.mv_col_buf_size / 2;
64
65	return addr;
66}
67
68static void cedrus_fill_ref_pic(struct cedrus_ctx *ctx,
69				struct cedrus_buffer *buf,
70				unsigned int top_field_order_cnt,
71				unsigned int bottom_field_order_cnt,
72				struct cedrus_h264_sram_ref_pic *pic)
73{
74	struct vb2_buffer *vbuf = &buf->m2m_buf.vb.vb2_buf;
75
76	pic->top_field_order_cnt = cpu_to_le32(top_field_order_cnt);
77	pic->bottom_field_order_cnt = cpu_to_le32(bottom_field_order_cnt);
78	pic->frame_info = cpu_to_le32(buf->codec.h264.pic_type << 8);
79
80	pic->luma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 0));
81	pic->chroma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 1));
82	pic->mv_col_top_ptr = cpu_to_le32(cedrus_h264_mv_col_buf_addr(buf, 0));
83	pic->mv_col_bot_ptr = cpu_to_le32(cedrus_h264_mv_col_buf_addr(buf, 1));
84}
85
86static int cedrus_write_frame_list(struct cedrus_ctx *ctx,
87				   struct cedrus_run *run)
88{
89	struct cedrus_h264_sram_ref_pic pic_list[CEDRUS_H264_FRAME_NUM];
90	const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
91	const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
92	struct vb2_queue *cap_q;
93	struct cedrus_buffer *output_buf;
94	struct cedrus_dev *dev = ctx->dev;
95	unsigned long used_dpbs = 0;
96	unsigned int position;
97	int output = -1;
98	unsigned int i;
99
100	cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
101
102	memset(pic_list, 0, sizeof(pic_list));
103
104	for (i = 0; i < ARRAY_SIZE(decode->dpb); i++) {
105		const struct v4l2_h264_dpb_entry *dpb = &decode->dpb[i];
106		struct cedrus_buffer *cedrus_buf;
107		struct vb2_buffer *buf;
108
109		if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID))
110			continue;
111
112		buf = vb2_find_buffer(cap_q, dpb->reference_ts);
113		if (!buf)
114			continue;
115
116		cedrus_buf = vb2_to_cedrus_buffer(buf);
117		position = cedrus_buf->codec.h264.position;
118		used_dpbs |= BIT(position);
119
120		if (run->dst->vb2_buf.timestamp == dpb->reference_ts) {
121			output = position;
122			continue;
123		}
124
125		if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
126			continue;
127
128		cedrus_fill_ref_pic(ctx, cedrus_buf,
129				    dpb->top_field_order_cnt,
130				    dpb->bottom_field_order_cnt,
131				    &pic_list[position]);
132	}
133
134	if (output >= 0)
135		position = output;
136	else
137		position = find_first_zero_bit(&used_dpbs, CEDRUS_H264_FRAME_NUM);
138
139	output_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf);
140	output_buf->codec.h264.position = position;
141
142	if (!output_buf->codec.h264.mv_col_buf_size) {
143		const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
144		unsigned int field_size;
145
146		field_size = DIV_ROUND_UP(ctx->src_fmt.width, 16) *
147			DIV_ROUND_UP(ctx->src_fmt.height, 16) * 16;
148		if (!(sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE))
149			field_size = field_size * 2;
150		if (!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY))
151			field_size = field_size * 2;
152
153		output_buf->codec.h264.mv_col_buf_size = field_size * 2;
154		/* Buffer is never accessed by CPU, so we can skip kernel mapping. */
155		output_buf->codec.h264.mv_col_buf =
156			dma_alloc_attrs(dev->dev,
157					output_buf->codec.h264.mv_col_buf_size,
158					&output_buf->codec.h264.mv_col_buf_dma,
159					GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
160
161		if (!output_buf->codec.h264.mv_col_buf) {
162			output_buf->codec.h264.mv_col_buf_size = 0;
163			return -ENOMEM;
164		}
165	}
166
167	if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)
168		output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FIELD;
169	else if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
170		output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_MBAFF;
171	else
172		output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FRAME;
173
174	cedrus_fill_ref_pic(ctx, output_buf,
175			    decode->top_field_order_cnt,
176			    decode->bottom_field_order_cnt,
177			    &pic_list[position]);
178
179	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_FRAMEBUFFER_LIST,
180			       pic_list, sizeof(pic_list));
181
182	cedrus_write(dev, VE_H264_OUTPUT_FRAME_IDX, position);
183
184	return 0;
185}
186
187#define CEDRUS_MAX_REF_IDX	32
188
189static void _cedrus_write_ref_list(struct cedrus_ctx *ctx,
190				   struct cedrus_run *run,
191				   const struct v4l2_h264_reference *ref_list,
192				   u8 num_ref, enum cedrus_h264_sram_off sram)
193{
194	const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
195	struct vb2_queue *cap_q;
196	struct cedrus_dev *dev = ctx->dev;
197	u8 sram_array[CEDRUS_MAX_REF_IDX];
198	unsigned int i;
199	size_t size;
200
201	cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
202
203	memset(sram_array, 0, sizeof(sram_array));
204
205	for (i = 0; i < num_ref; i++) {
206		const struct v4l2_h264_dpb_entry *dpb;
207		const struct cedrus_buffer *cedrus_buf;
208		unsigned int position;
209		struct vb2_buffer *buf;
210		u8 dpb_idx;
211
212		dpb_idx = ref_list[i].index;
213		dpb = &decode->dpb[dpb_idx];
214
215		if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
216			continue;
217
218		buf = vb2_find_buffer(cap_q, dpb->reference_ts);
219		if (!buf)
220			continue;
221
222		cedrus_buf = vb2_to_cedrus_buffer(buf);
223		position = cedrus_buf->codec.h264.position;
224
225		sram_array[i] |= position << 1;
226		if (ref_list[i].fields == V4L2_H264_BOTTOM_FIELD_REF)
227			sram_array[i] |= BIT(0);
228	}
229
230	size = min_t(size_t, ALIGN(num_ref, 4), sizeof(sram_array));
231	cedrus_h264_write_sram(dev, sram, &sram_array, size);
232}
233
234static void cedrus_write_ref_list0(struct cedrus_ctx *ctx,
235				   struct cedrus_run *run)
236{
237	const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
238
239	_cedrus_write_ref_list(ctx, run,
240			       slice->ref_pic_list0,
241			       slice->num_ref_idx_l0_active_minus1 + 1,
242			       CEDRUS_SRAM_H264_REF_LIST_0);
243}
244
245static void cedrus_write_ref_list1(struct cedrus_ctx *ctx,
246				   struct cedrus_run *run)
247{
248	const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
249
250	_cedrus_write_ref_list(ctx, run,
251			       slice->ref_pic_list1,
252			       slice->num_ref_idx_l1_active_minus1 + 1,
253			       CEDRUS_SRAM_H264_REF_LIST_1);
254}
255
256static void cedrus_write_scaling_lists(struct cedrus_ctx *ctx,
257				       struct cedrus_run *run)
258{
259	const struct v4l2_ctrl_h264_scaling_matrix *scaling =
260		run->h264.scaling_matrix;
261	const struct v4l2_ctrl_h264_pps *pps = run->h264.pps;
262	struct cedrus_dev *dev = ctx->dev;
263
264	if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT))
265		return;
266
267	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_0,
268			       scaling->scaling_list_8x8[0],
269			       sizeof(scaling->scaling_list_8x8[0]));
270
271	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_1,
272			       scaling->scaling_list_8x8[1],
273			       sizeof(scaling->scaling_list_8x8[1]));
274
275	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_4x4,
276			       scaling->scaling_list_4x4,
277			       sizeof(scaling->scaling_list_4x4));
278}
279
280static void cedrus_write_pred_weight_table(struct cedrus_ctx *ctx,
281					   struct cedrus_run *run)
282{
283	const struct v4l2_ctrl_h264_pred_weights *pred_weight =
284		run->h264.pred_weights;
285	struct cedrus_dev *dev = ctx->dev;
286	int i, j, k;
287
288	cedrus_write(dev, VE_H264_SHS_WP,
289		     ((pred_weight->chroma_log2_weight_denom & 0x7) << 4) |
290		     ((pred_weight->luma_log2_weight_denom & 0x7) << 0));
291
292	cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET,
293		     CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE << 2);
294
295	for (i = 0; i < ARRAY_SIZE(pred_weight->weight_factors); i++) {
296		const struct v4l2_h264_weight_factors *factors =
297			&pred_weight->weight_factors[i];
298
299		for (j = 0; j < ARRAY_SIZE(factors->luma_weight); j++) {
300			u32 val;
301
302			val = (((u32)factors->luma_offset[j] & 0x1ff) << 16) |
303				(factors->luma_weight[j] & 0x1ff);
304			cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val);
305		}
306
307		for (j = 0; j < ARRAY_SIZE(factors->chroma_weight); j++) {
308			for (k = 0; k < ARRAY_SIZE(factors->chroma_weight[0]); k++) {
309				u32 val;
310
311				val = (((u32)factors->chroma_offset[j][k] & 0x1ff) << 16) |
312					(factors->chroma_weight[j][k] & 0x1ff);
313				cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val);
314			}
315		}
316	}
317}
318
319/*
320 * It turns out that using VE_H264_VLD_OFFSET to skip bits is not reliable. In
321 * rare cases frame is not decoded correctly. However, setting offset to 0 and
322 * skipping appropriate amount of bits with flush bits trigger always works.
323 */
324static void cedrus_skip_bits(struct cedrus_dev *dev, int num)
325{
326	int count = 0;
327
328	while (count < num) {
329		int tmp = min(num - count, 32);
330
331		cedrus_write(dev, VE_H264_TRIGGER_TYPE,
332			     VE_H264_TRIGGER_TYPE_FLUSH_BITS |
333			     VE_H264_TRIGGER_TYPE_N_BITS(tmp));
334		while (cedrus_read(dev, VE_H264_STATUS) & VE_H264_STATUS_VLD_BUSY)
335			udelay(1);
336
337		count += tmp;
338	}
339}
340
341static void cedrus_set_params(struct cedrus_ctx *ctx,
342			      struct cedrus_run *run)
343{
344	const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
345	const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
346	const struct v4l2_ctrl_h264_pps *pps = run->h264.pps;
347	const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
348	struct vb2_buffer *src_buf = &run->src->vb2_buf;
349	struct cedrus_dev *dev = ctx->dev;
350	dma_addr_t src_buf_addr;
351	size_t slice_bytes = vb2_get_plane_payload(src_buf, 0);
352	unsigned int pic_width_in_mbs;
353	bool mbaff_pic;
354	u32 reg;
355
356	cedrus_write(dev, VE_H264_VLD_LEN, slice_bytes * 8);
357	cedrus_write(dev, VE_H264_VLD_OFFSET, 0);
358
359	src_buf_addr = vb2_dma_contig_plane_dma_addr(src_buf, 0);
360	cedrus_write(dev, VE_H264_VLD_END, src_buf_addr + slice_bytes);
361	cedrus_write(dev, VE_H264_VLD_ADDR,
362		     VE_H264_VLD_ADDR_VAL(src_buf_addr) |
363		     VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID |
364		     VE_H264_VLD_ADDR_LAST);
365
366	if (ctx->src_fmt.width > 2048) {
367		cedrus_write(dev, VE_BUF_CTRL,
368			     VE_BUF_CTRL_INTRAPRED_MIXED_RAM |
369			     VE_BUF_CTRL_DBLK_MIXED_RAM);
370		cedrus_write(dev, VE_DBLK_DRAM_BUF_ADDR,
371			     ctx->codec.h264.deblk_buf_dma);
372		cedrus_write(dev, VE_INTRAPRED_DRAM_BUF_ADDR,
373			     ctx->codec.h264.intra_pred_buf_dma);
374	} else {
375		cedrus_write(dev, VE_BUF_CTRL,
376			     VE_BUF_CTRL_INTRAPRED_INT_SRAM |
377			     VE_BUF_CTRL_DBLK_INT_SRAM);
378	}
379
380	/*
381	 * FIXME: Since the bitstream parsing is done in software, and
382	 * in userspace, this shouldn't be needed anymore. But it
383	 * turns out that removing it breaks the decoding process,
384	 * without any clear indication why.
385	 */
386	cedrus_write(dev, VE_H264_TRIGGER_TYPE,
387		     VE_H264_TRIGGER_TYPE_INIT_SWDEC);
388
389	cedrus_skip_bits(dev, slice->header_bit_size);
390
391	if (V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(pps, slice))
392		cedrus_write_pred_weight_table(ctx, run);
393
394	if ((slice->slice_type == V4L2_H264_SLICE_TYPE_P) ||
395	    (slice->slice_type == V4L2_H264_SLICE_TYPE_SP) ||
396	    (slice->slice_type == V4L2_H264_SLICE_TYPE_B))
397		cedrus_write_ref_list0(ctx, run);
398
399	if (slice->slice_type == V4L2_H264_SLICE_TYPE_B)
400		cedrus_write_ref_list1(ctx, run);
401
402	// picture parameters
403	reg = 0;
404	/*
405	 * FIXME: the kernel headers are allowing the default value to
406	 * be passed, but the libva doesn't give us that.
407	 */
408	reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 10;
409	reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 5;
410	reg |= (pps->weighted_bipred_idc & 0x3) << 2;
411	if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE)
412		reg |= VE_H264_PPS_ENTROPY_CODING_MODE;
413	if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED)
414		reg |= VE_H264_PPS_WEIGHTED_PRED;
415	if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED)
416		reg |= VE_H264_PPS_CONSTRAINED_INTRA_PRED;
417	if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE)
418		reg |= VE_H264_PPS_TRANSFORM_8X8_MODE;
419	cedrus_write(dev, VE_H264_PPS, reg);
420
421	// sequence parameters
422	reg = 0;
423	reg |= (sps->chroma_format_idc & 0x7) << 19;
424	reg |= (sps->pic_width_in_mbs_minus1 & 0xff) << 8;
425	reg |= sps->pic_height_in_map_units_minus1 & 0xff;
426	if (sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)
427		reg |= VE_H264_SPS_MBS_ONLY;
428	if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
429		reg |= VE_H264_SPS_MB_ADAPTIVE_FRAME_FIELD;
430	if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)
431		reg |= VE_H264_SPS_DIRECT_8X8_INFERENCE;
432	cedrus_write(dev, VE_H264_SPS, reg);
433
434	mbaff_pic = !(decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) &&
435		    (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD);
436	pic_width_in_mbs = sps->pic_width_in_mbs_minus1 + 1;
437
438	// slice parameters
439	reg = 0;
440	reg |= ((slice->first_mb_in_slice % pic_width_in_mbs) & 0xff) << 24;
441	reg |= (((slice->first_mb_in_slice / pic_width_in_mbs) *
442		 (mbaff_pic + 1)) & 0xff) << 16;
443	reg |= decode->nal_ref_idc ? BIT(12) : 0;
444	reg |= (slice->slice_type & 0xf) << 8;
445	reg |= slice->cabac_init_idc & 0x3;
446	if (ctx->fh.m2m_ctx->new_frame)
447		reg |= VE_H264_SHS_FIRST_SLICE_IN_PIC;
448	if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)
449		reg |= VE_H264_SHS_FIELD_PIC;
450	if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)
451		reg |= VE_H264_SHS_BOTTOM_FIELD;
452	if (slice->flags & V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED)
453		reg |= VE_H264_SHS_DIRECT_SPATIAL_MV_PRED;
454	cedrus_write(dev, VE_H264_SHS, reg);
455
456	reg = 0;
457	reg |= VE_H264_SHS2_NUM_REF_IDX_ACTIVE_OVRD;
458	reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 24;
459	reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 16;
460	reg |= (slice->disable_deblocking_filter_idc & 0x3) << 8;
461	reg |= (slice->slice_alpha_c0_offset_div2 & 0xf) << 4;
462	reg |= slice->slice_beta_offset_div2 & 0xf;
463	cedrus_write(dev, VE_H264_SHS2, reg);
464
465	reg = 0;
466	reg |= (pps->second_chroma_qp_index_offset & 0x3f) << 16;
467	reg |= (pps->chroma_qp_index_offset & 0x3f) << 8;
468	reg |= (pps->pic_init_qp_minus26 + 26 + slice->slice_qp_delta) & 0x3f;
469	if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT))
470		reg |= VE_H264_SHS_QP_SCALING_MATRIX_DEFAULT;
471	cedrus_write(dev, VE_H264_SHS_QP, reg);
472
473	// clear status flags
474	cedrus_write(dev, VE_H264_STATUS, cedrus_read(dev, VE_H264_STATUS));
475
476	// enable int
477	cedrus_write(dev, VE_H264_CTRL,
478		     VE_H264_CTRL_SLICE_DECODE_INT |
479		     VE_H264_CTRL_DECODE_ERR_INT |
480		     VE_H264_CTRL_VLD_DATA_REQ_INT);
481}
482
483static enum cedrus_irq_status
484cedrus_h264_irq_status(struct cedrus_ctx *ctx)
485{
486	struct cedrus_dev *dev = ctx->dev;
487	u32 reg = cedrus_read(dev, VE_H264_STATUS);
488
489	if (reg & (VE_H264_STATUS_DECODE_ERR_INT |
490		   VE_H264_STATUS_VLD_DATA_REQ_INT))
491		return CEDRUS_IRQ_ERROR;
492
493	if (reg & VE_H264_CTRL_SLICE_DECODE_INT)
494		return CEDRUS_IRQ_OK;
495
496	return CEDRUS_IRQ_NONE;
497}
498
499static void cedrus_h264_irq_clear(struct cedrus_ctx *ctx)
500{
501	struct cedrus_dev *dev = ctx->dev;
502
503	cedrus_write(dev, VE_H264_STATUS,
504		     VE_H264_STATUS_INT_MASK);
505}
506
507static void cedrus_h264_irq_disable(struct cedrus_ctx *ctx)
508{
509	struct cedrus_dev *dev = ctx->dev;
510	u32 reg = cedrus_read(dev, VE_H264_CTRL);
511
512	cedrus_write(dev, VE_H264_CTRL,
513		     reg & ~VE_H264_CTRL_INT_MASK);
514}
515
516static int cedrus_h264_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
517{
518	struct cedrus_dev *dev = ctx->dev;
519	int ret;
520
521	cedrus_engine_enable(ctx);
522
523	cedrus_write(dev, VE_H264_SDROT_CTRL, 0);
524	cedrus_write(dev, VE_H264_EXTRA_BUFFER1,
525		     ctx->codec.h264.pic_info_buf_dma);
526	cedrus_write(dev, VE_H264_EXTRA_BUFFER2,
527		     ctx->codec.h264.neighbor_info_buf_dma);
528
529	cedrus_write_scaling_lists(ctx, run);
530	ret = cedrus_write_frame_list(ctx, run);
531	if (ret)
532		return ret;
533
534	cedrus_set_params(ctx, run);
535
536	return 0;
537}
538
539static int cedrus_h264_start(struct cedrus_ctx *ctx)
540{
541	struct cedrus_dev *dev = ctx->dev;
542	unsigned int pic_info_size;
543	int ret;
544
545	/*
546	 * NOTE: All buffers allocated here are only used by HW, so we
547	 * can add DMA_ATTR_NO_KERNEL_MAPPING flag when allocating them.
548	 */
549
550	/* Formula for picture buffer size is taken from CedarX source. */
551
552	if (ctx->src_fmt.width > 2048)
553		pic_info_size = CEDRUS_H264_FRAME_NUM * 0x4000;
554	else
555		pic_info_size = CEDRUS_H264_FRAME_NUM * 0x1000;
556
557	/*
558	 * FIXME: If V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is set,
559	 * there is no need to multiply by 2.
560	 */
561	pic_info_size += ctx->src_fmt.height * 2 * 64;
562
563	if (pic_info_size < CEDRUS_MIN_PIC_INFO_BUF_SIZE)
564		pic_info_size = CEDRUS_MIN_PIC_INFO_BUF_SIZE;
565
566	ctx->codec.h264.pic_info_buf_size = pic_info_size;
567	ctx->codec.h264.pic_info_buf =
568		dma_alloc_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size,
569				&ctx->codec.h264.pic_info_buf_dma,
570				GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
571	if (!ctx->codec.h264.pic_info_buf)
572		return -ENOMEM;
573
574	/*
575	 * That buffer is supposed to be 16kiB in size, and be aligned
576	 * on 16kiB as well. However, dma_alloc_attrs provides the
577	 * guarantee that we'll have a DMA address aligned on the
578	 * smallest page order that is greater to the requested size,
579	 * so we don't have to overallocate.
580	 */
581	ctx->codec.h264.neighbor_info_buf =
582		dma_alloc_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
583				&ctx->codec.h264.neighbor_info_buf_dma,
584				GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
585	if (!ctx->codec.h264.neighbor_info_buf) {
586		ret = -ENOMEM;
587		goto err_pic_buf;
588	}
589
590	if (ctx->src_fmt.width > 2048) {
591		/*
592		 * Formulas for deblock and intra prediction buffer sizes
593		 * are taken from CedarX source.
594		 */
595
596		ctx->codec.h264.deblk_buf_size =
597			ALIGN(ctx->src_fmt.width, 32) * 12;
598		ctx->codec.h264.deblk_buf =
599			dma_alloc_attrs(dev->dev,
600					ctx->codec.h264.deblk_buf_size,
601					&ctx->codec.h264.deblk_buf_dma,
602					GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
603		if (!ctx->codec.h264.deblk_buf) {
604			ret = -ENOMEM;
605			goto err_neighbor_buf;
606		}
607
608		/*
609		 * NOTE: Multiplying by two deviates from CedarX logic, but it
610		 * is for some unknown reason needed for H264 4K decoding on H6.
611		 */
612		ctx->codec.h264.intra_pred_buf_size =
613			ALIGN(ctx->src_fmt.width, 64) * 5 * 2;
614		ctx->codec.h264.intra_pred_buf =
615			dma_alloc_attrs(dev->dev,
616					ctx->codec.h264.intra_pred_buf_size,
617					&ctx->codec.h264.intra_pred_buf_dma,
618					GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
619		if (!ctx->codec.h264.intra_pred_buf) {
620			ret = -ENOMEM;
621			goto err_deblk_buf;
622		}
623	}
624
625	return 0;
626
627err_deblk_buf:
628	dma_free_attrs(dev->dev, ctx->codec.h264.deblk_buf_size,
629		       ctx->codec.h264.deblk_buf,
630		       ctx->codec.h264.deblk_buf_dma,
631		       DMA_ATTR_NO_KERNEL_MAPPING);
632
633err_neighbor_buf:
634	dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
635		       ctx->codec.h264.neighbor_info_buf,
636		       ctx->codec.h264.neighbor_info_buf_dma,
637		       DMA_ATTR_NO_KERNEL_MAPPING);
638
639err_pic_buf:
640	dma_free_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size,
641		       ctx->codec.h264.pic_info_buf,
642		       ctx->codec.h264.pic_info_buf_dma,
643		       DMA_ATTR_NO_KERNEL_MAPPING);
644	return ret;
645}
646
647static void cedrus_h264_stop(struct cedrus_ctx *ctx)
648{
649	struct cedrus_dev *dev = ctx->dev;
650	struct cedrus_buffer *buf;
651	struct vb2_queue *vq;
652	unsigned int i;
653
654	vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
655
656	for (i = 0; i < vb2_get_num_buffers(vq); i++) {
657		struct vb2_buffer *vb = vb2_get_buffer(vq, i);
658
659		if (!vb)
660			continue;
661
662		buf = vb2_to_cedrus_buffer(vb);
663
664		if (buf->codec.h264.mv_col_buf_size > 0) {
665			dma_free_attrs(dev->dev,
666				       buf->codec.h264.mv_col_buf_size,
667				       buf->codec.h264.mv_col_buf,
668				       buf->codec.h264.mv_col_buf_dma,
669				       DMA_ATTR_NO_KERNEL_MAPPING);
670
671			buf->codec.h264.mv_col_buf_size = 0;
672		}
673	}
674
675	dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
676		       ctx->codec.h264.neighbor_info_buf,
677		       ctx->codec.h264.neighbor_info_buf_dma,
678		       DMA_ATTR_NO_KERNEL_MAPPING);
679	dma_free_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size,
680		       ctx->codec.h264.pic_info_buf,
681		       ctx->codec.h264.pic_info_buf_dma,
682		       DMA_ATTR_NO_KERNEL_MAPPING);
683	if (ctx->codec.h264.deblk_buf_size)
684		dma_free_attrs(dev->dev, ctx->codec.h264.deblk_buf_size,
685			       ctx->codec.h264.deblk_buf,
686			       ctx->codec.h264.deblk_buf_dma,
687			       DMA_ATTR_NO_KERNEL_MAPPING);
688	if (ctx->codec.h264.intra_pred_buf_size)
689		dma_free_attrs(dev->dev, ctx->codec.h264.intra_pred_buf_size,
690			       ctx->codec.h264.intra_pred_buf,
691			       ctx->codec.h264.intra_pred_buf_dma,
692			       DMA_ATTR_NO_KERNEL_MAPPING);
693}
694
695static void cedrus_h264_trigger(struct cedrus_ctx *ctx)
696{
697	struct cedrus_dev *dev = ctx->dev;
698
699	cedrus_write(dev, VE_H264_TRIGGER_TYPE,
700		     VE_H264_TRIGGER_TYPE_AVC_SLICE_DECODE);
701}
702
703struct cedrus_dec_ops cedrus_dec_ops_h264 = {
704	.irq_clear	= cedrus_h264_irq_clear,
705	.irq_disable	= cedrus_h264_irq_disable,
706	.irq_status	= cedrus_h264_irq_status,
707	.setup		= cedrus_h264_setup,
708	.start		= cedrus_h264_start,
709	.stop		= cedrus_h264_stop,
710	.trigger	= cedrus_h264_trigger,
711};
712