1// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
2/*
3 * Wave5 series multi-standard codec IP - decoder interface
4 *
5 * Copyright (C) 2021-2023 CHIPS&MEDIA INC
6 */
7
8#include "wave5-helper.h"
9
10#define VPU_DEC_DEV_NAME "C&M Wave5 VPU decoder"
11#define VPU_DEC_DRV_NAME "wave5-dec"
12
13#define DEFAULT_SRC_SIZE(width, height) ({			\
14	(width) * (height) / 8 * 3;					\
15})
16
17static const struct vpu_format dec_fmt_list[FMT_TYPES][MAX_FMTS] = {
18	[VPU_FMT_TYPE_CODEC] = {
19		{
20			.v4l2_pix_fmt = V4L2_PIX_FMT_HEVC,
21			.max_width = 8192,
22			.min_width = 8,
23			.max_height = 4320,
24			.min_height = 8,
25		},
26		{
27			.v4l2_pix_fmt = V4L2_PIX_FMT_H264,
28			.max_width = 8192,
29			.min_width = 32,
30			.max_height = 4320,
31			.min_height = 32,
32		},
33	},
34	[VPU_FMT_TYPE_RAW] = {
35		{
36			.v4l2_pix_fmt = V4L2_PIX_FMT_YUV420,
37			.max_width = 8192,
38			.min_width = 8,
39			.max_height = 4320,
40			.min_height = 8,
41		},
42		{
43			.v4l2_pix_fmt = V4L2_PIX_FMT_NV12,
44			.max_width = 8192,
45			.min_width = 8,
46			.max_height = 4320,
47			.min_height = 8,
48		},
49		{
50			.v4l2_pix_fmt = V4L2_PIX_FMT_NV21,
51			.max_width = 8192,
52			.min_width = 8,
53			.max_height = 4320,
54			.min_height = 8,
55		},
56		{
57			.v4l2_pix_fmt = V4L2_PIX_FMT_YUV422P,
58			.max_width = 8192,
59			.min_width = 8,
60			.max_height = 4320,
61			.min_height = 8,
62		},
63		{
64			.v4l2_pix_fmt = V4L2_PIX_FMT_NV16,
65			.max_width = 8192,
66			.min_width = 8,
67			.max_height = 4320,
68			.min_height = 8,
69		},
70		{
71			.v4l2_pix_fmt = V4L2_PIX_FMT_NV61,
72			.max_width = 8192,
73			.min_width = 8,
74			.max_height = 4320,
75			.min_height = 8,
76		},
77		{
78			.v4l2_pix_fmt = V4L2_PIX_FMT_YUV420M,
79			.max_width = 8192,
80			.min_width = 8,
81			.max_height = 4320,
82			.min_height = 8,
83		},
84		{
85			.v4l2_pix_fmt = V4L2_PIX_FMT_NV12M,
86			.max_width = 8192,
87			.min_width = 8,
88			.max_height = 4320,
89			.min_height = 8,
90		},
91		{
92			.v4l2_pix_fmt = V4L2_PIX_FMT_NV21M,
93			.max_width = 8192,
94			.min_width = 8,
95			.max_height = 4320,
96			.min_height = 8,
97		},
98		{
99			.v4l2_pix_fmt = V4L2_PIX_FMT_YUV422M,
100			.max_width = 8192,
101			.min_width = 8,
102			.max_height = 4320,
103			.min_height = 8,
104		},
105		{
106			.v4l2_pix_fmt = V4L2_PIX_FMT_NV16M,
107			.max_width = 8192,
108			.min_width = 8,
109			.max_height = 4320,
110			.min_height = 8,
111		},
112		{
113			.v4l2_pix_fmt = V4L2_PIX_FMT_NV61M,
114			.max_width = 8192,
115			.min_width = 8,
116			.max_height = 4320,
117			.min_height = 8,
118		},
119	}
120};
121
122/*
123 * Make sure that the state switch is allowed and add logging for debugging
124 * purposes
125 */
126static int switch_state(struct vpu_instance *inst, enum vpu_instance_state state)
127{
128	switch (state) {
129	case VPU_INST_STATE_NONE:
130		break;
131	case VPU_INST_STATE_OPEN:
132		if (inst->state != VPU_INST_STATE_NONE)
133			goto invalid_state_switch;
134		goto valid_state_switch;
135	case VPU_INST_STATE_INIT_SEQ:
136		if (inst->state != VPU_INST_STATE_OPEN && inst->state != VPU_INST_STATE_STOP)
137			goto invalid_state_switch;
138		goto valid_state_switch;
139	case VPU_INST_STATE_PIC_RUN:
140		if (inst->state != VPU_INST_STATE_INIT_SEQ)
141			goto invalid_state_switch;
142		goto valid_state_switch;
143	case VPU_INST_STATE_STOP:
144		goto valid_state_switch;
145	}
146invalid_state_switch:
147	WARN(1, "Invalid state switch from %s to %s.\n",
148	     state_to_str(inst->state), state_to_str(state));
149	return -EINVAL;
150valid_state_switch:
151	dev_dbg(inst->dev->dev, "Switch state from %s to %s.\n",
152		state_to_str(inst->state), state_to_str(state));
153	inst->state = state;
154	return 0;
155}
156
157static int wave5_vpu_dec_set_eos_on_firmware(struct vpu_instance *inst)
158{
159	int ret;
160
161	ret = wave5_vpu_dec_update_bitstream_buffer(inst, 0);
162	if (ret) {
163		/*
164		 * To set the EOS flag, a command is sent to the firmware.
165		 * That command may never return (timeout) or may report an error.
166		 */
167		dev_err(inst->dev->dev,
168			"Setting EOS for the bitstream, fail: %d\n", ret);
169		return ret;
170	}
171	return 0;
172}
173
174static bool wave5_last_src_buffer_consumed(struct v4l2_m2m_ctx *m2m_ctx)
175{
176	struct vpu_src_buffer *vpu_buf;
177
178	if (!m2m_ctx->last_src_buf)
179		return false;
180
181	vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
182	return vpu_buf->consumed;
183}
184
185static void wave5_handle_src_buffer(struct vpu_instance *inst, dma_addr_t rd_ptr)
186{
187	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
188	struct v4l2_m2m_buffer *buf, *n;
189	size_t consumed_bytes = 0;
190
191	if (rd_ptr >= inst->last_rd_ptr) {
192		consumed_bytes = rd_ptr - inst->last_rd_ptr;
193	} else {
194		size_t rd_offs = rd_ptr - inst->bitstream_vbuf.daddr;
195		size_t last_rd_offs = inst->last_rd_ptr - inst->bitstream_vbuf.daddr;
196
197		consumed_bytes = rd_offs + (inst->bitstream_vbuf.size - last_rd_offs);
198	}
199
200	inst->last_rd_ptr = rd_ptr;
201	consumed_bytes += inst->remaining_consumed_bytes;
202
203	dev_dbg(inst->dev->dev, "%s: %zu bytes of bitstream was consumed", __func__,
204		consumed_bytes);
205
206	v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
207		struct vb2_v4l2_buffer *src_buf = &buf->vb;
208		size_t src_size = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
209
210		if (src_size > consumed_bytes)
211			break;
212
213		dev_dbg(inst->dev->dev, "%s: removing src buffer %i",
214			__func__, src_buf->vb2_buf.index);
215		src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
216		inst->timestamp = src_buf->vb2_buf.timestamp;
217		v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
218		consumed_bytes -= src_size;
219
220		/* Handle the case the last bitstream buffer has been picked */
221		if (src_buf == m2m_ctx->last_src_buf) {
222			int ret;
223
224			m2m_ctx->last_src_buf = NULL;
225			ret = wave5_vpu_dec_set_eos_on_firmware(inst);
226			if (ret)
227				dev_warn(inst->dev->dev,
228					 "Setting EOS for the bitstream, fail: %d\n", ret);
229			break;
230		}
231	}
232
233	inst->remaining_consumed_bytes = consumed_bytes;
234}
235
236static void wave5_update_pix_fmt(struct v4l2_pix_format_mplane *pix_mp, unsigned int width,
237				 unsigned int height)
238{
239	switch (pix_mp->pixelformat) {
240	case V4L2_PIX_FMT_YUV420:
241	case V4L2_PIX_FMT_NV12:
242	case V4L2_PIX_FMT_NV21:
243		pix_mp->width = round_up(width, 32);
244		pix_mp->height = round_up(height, 16);
245		pix_mp->plane_fmt[0].bytesperline = round_up(width, 32);
246		pix_mp->plane_fmt[0].sizeimage = width * height * 3 / 2;
247		break;
248	case V4L2_PIX_FMT_YUV422P:
249	case V4L2_PIX_FMT_NV16:
250	case V4L2_PIX_FMT_NV61:
251		pix_mp->width = round_up(width, 32);
252		pix_mp->height = round_up(height, 16);
253		pix_mp->plane_fmt[0].bytesperline = round_up(width, 32);
254		pix_mp->plane_fmt[0].sizeimage = width * height * 2;
255		break;
256	case V4L2_PIX_FMT_YUV420M:
257		pix_mp->width = round_up(width, 32);
258		pix_mp->height = round_up(height, 16);
259		pix_mp->plane_fmt[0].bytesperline = round_up(width, 32);
260		pix_mp->plane_fmt[0].sizeimage = width * height;
261		pix_mp->plane_fmt[1].bytesperline = round_up(width, 32) / 2;
262		pix_mp->plane_fmt[1].sizeimage = width * height / 4;
263		pix_mp->plane_fmt[2].bytesperline = round_up(width, 32) / 2;
264		pix_mp->plane_fmt[2].sizeimage = width * height / 4;
265		break;
266	case V4L2_PIX_FMT_NV12M:
267	case V4L2_PIX_FMT_NV21M:
268		pix_mp->width = round_up(width, 32);
269		pix_mp->height = round_up(height, 16);
270		pix_mp->plane_fmt[0].bytesperline = round_up(width, 32);
271		pix_mp->plane_fmt[0].sizeimage = width * height;
272		pix_mp->plane_fmt[1].bytesperline = round_up(width, 32);
273		pix_mp->plane_fmt[1].sizeimage = width * height / 2;
274		break;
275	case V4L2_PIX_FMT_YUV422M:
276		pix_mp->width = round_up(width, 32);
277		pix_mp->height = round_up(height, 16);
278		pix_mp->plane_fmt[0].bytesperline = round_up(width, 32);
279		pix_mp->plane_fmt[0].sizeimage = width * height;
280		pix_mp->plane_fmt[1].bytesperline = round_up(width, 32) / 2;
281		pix_mp->plane_fmt[1].sizeimage = width * height / 2;
282		pix_mp->plane_fmt[2].bytesperline = round_up(width, 32) / 2;
283		pix_mp->plane_fmt[2].sizeimage = width * height / 2;
284		break;
285	case V4L2_PIX_FMT_NV16M:
286	case V4L2_PIX_FMT_NV61M:
287		pix_mp->width = round_up(width, 32);
288		pix_mp->height = round_up(height, 16);
289		pix_mp->plane_fmt[0].bytesperline = round_up(width, 32);
290		pix_mp->plane_fmt[0].sizeimage = width * height;
291		pix_mp->plane_fmt[1].bytesperline = round_up(width, 32);
292		pix_mp->plane_fmt[1].sizeimage = width * height;
293		break;
294	default:
295		pix_mp->width = width;
296		pix_mp->height = height;
297		pix_mp->plane_fmt[0].bytesperline = 0;
298		pix_mp->plane_fmt[0].sizeimage = max(DEFAULT_SRC_SIZE(width, height),
299						     pix_mp->plane_fmt[0].sizeimage);
300		break;
301	}
302}
303
304static int start_decode(struct vpu_instance *inst, u32 *fail_res)
305{
306	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
307	int ret = 0;
308
309	ret = wave5_vpu_dec_start_one_frame(inst, fail_res);
310	if (ret) {
311		struct vb2_v4l2_buffer *src_buf;
312
313		src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
314		if (src_buf)
315			v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
316		switch_state(inst, VPU_INST_STATE_STOP);
317
318		dev_dbg(inst->dev->dev, "%s: pic run failed / finish job", __func__);
319		v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
320	}
321
322	return ret;
323}
324
325static void flag_last_buffer_done(struct vpu_instance *inst)
326{
327	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
328	struct vb2_v4l2_buffer *vb;
329	int i;
330
331	lockdep_assert_held(&inst->state_spinlock);
332
333	vb = v4l2_m2m_dst_buf_remove(m2m_ctx);
334	if (!vb) {
335		m2m_ctx->is_draining = true;
336		m2m_ctx->next_buf_last = true;
337		return;
338	}
339
340	for (i = 0; i < vb->vb2_buf.num_planes; i++)
341		vb2_set_plane_payload(&vb->vb2_buf, i, 0);
342	vb->field = V4L2_FIELD_NONE;
343
344	v4l2_m2m_last_buffer_done(m2m_ctx, vb);
345}
346
347static void send_eos_event(struct vpu_instance *inst)
348{
349	static const struct v4l2_event vpu_event_eos = {
350		.type = V4L2_EVENT_EOS
351	};
352
353	lockdep_assert_held(&inst->state_spinlock);
354
355	v4l2_event_queue_fh(&inst->v4l2_fh, &vpu_event_eos);
356	inst->eos = false;
357}
358
359static int handle_dynamic_resolution_change(struct vpu_instance *inst)
360{
361	struct v4l2_fh *fh = &inst->v4l2_fh;
362	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
363
364	static const struct v4l2_event vpu_event_src_ch = {
365		.type = V4L2_EVENT_SOURCE_CHANGE,
366		.u.src_change.changes = V4L2_EVENT_SRC_CH_RESOLUTION,
367	};
368	struct dec_info *p_dec_info = &inst->codec_info->dec_info;
369	struct dec_initial_info *initial_info = &inst->codec_info->dec_info.initial_info;
370
371	lockdep_assert_held(&inst->state_spinlock);
372
373	dev_dbg(inst->dev->dev, "%s: rd_ptr %pad", __func__, &initial_info->rd_ptr);
374
375	dev_dbg(inst->dev->dev, "%s: width: %u height: %u profile: %u | minbuffer: %u\n",
376		__func__, initial_info->pic_width, initial_info->pic_height,
377		initial_info->profile, initial_info->min_frame_buffer_count);
378
379	inst->needs_reallocation = true;
380	inst->fbc_buf_count = initial_info->min_frame_buffer_count + 1;
381	if (inst->fbc_buf_count != v4l2_m2m_num_dst_bufs_ready(m2m_ctx)) {
382		struct v4l2_ctrl *ctrl;
383
384		ctrl = v4l2_ctrl_find(&inst->v4l2_ctrl_hdl,
385				      V4L2_CID_MIN_BUFFERS_FOR_CAPTURE);
386		if (ctrl)
387			v4l2_ctrl_s_ctrl(ctrl, inst->fbc_buf_count);
388	}
389
390	if (p_dec_info->initial_info_obtained) {
391		inst->conf_win.left = initial_info->pic_crop_rect.left;
392		inst->conf_win.top = initial_info->pic_crop_rect.top;
393		inst->conf_win.width = initial_info->pic_width -
394			initial_info->pic_crop_rect.left - initial_info->pic_crop_rect.right;
395		inst->conf_win.height = initial_info->pic_height -
396			initial_info->pic_crop_rect.top - initial_info->pic_crop_rect.bottom;
397
398		wave5_update_pix_fmt(&inst->src_fmt, initial_info->pic_width,
399				     initial_info->pic_height);
400		wave5_update_pix_fmt(&inst->dst_fmt, initial_info->pic_width,
401				     initial_info->pic_height);
402	}
403
404	v4l2_event_queue_fh(fh, &vpu_event_src_ch);
405
406	return 0;
407}
408
409static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst)
410{
411	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
412	struct dec_output_info dec_info;
413	int ret;
414	struct vb2_v4l2_buffer *dec_buf = NULL;
415	struct vb2_v4l2_buffer *disp_buf = NULL;
416	struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
417	struct queue_status_info q_status;
418
419	dev_dbg(inst->dev->dev, "%s: Fetch output info from firmware.", __func__);
420
421	ret = wave5_vpu_dec_get_output_info(inst, &dec_info);
422	if (ret) {
423		dev_warn(inst->dev->dev, "%s: could not get output info.", __func__);
424		v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
425		return;
426	}
427
428	dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &dec_info.rd_ptr,
429		&dec_info.wr_ptr);
430	wave5_handle_src_buffer(inst, dec_info.rd_ptr);
431
432	dev_dbg(inst->dev->dev, "%s: dec_info dec_idx %i disp_idx %i", __func__,
433		dec_info.index_frame_decoded, dec_info.index_frame_display);
434
435	if (!vb2_is_streaming(dst_vq)) {
436		dev_dbg(inst->dev->dev, "%s: capture is not streaming..", __func__);
437		v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
438		return;
439	}
440
441	/* Remove decoded buffer from the ready queue now that it has been
442	 * decoded.
443	 */
444	if (dec_info.index_frame_decoded >= 0) {
445		struct vb2_buffer *vb = vb2_get_buffer(dst_vq,
446						       dec_info.index_frame_decoded);
447		if (vb) {
448			dec_buf = to_vb2_v4l2_buffer(vb);
449			dec_buf->vb2_buf.timestamp = inst->timestamp;
450		} else {
451			dev_warn(inst->dev->dev, "%s: invalid decoded frame index %i",
452				 __func__, dec_info.index_frame_decoded);
453		}
454	}
455
456	if (dec_info.index_frame_display >= 0) {
457		disp_buf = v4l2_m2m_dst_buf_remove_by_idx(m2m_ctx, dec_info.index_frame_display);
458		if (!disp_buf)
459			dev_warn(inst->dev->dev, "%s: invalid display frame index %i",
460				 __func__, dec_info.index_frame_display);
461	}
462
463	/* If there is anything to display, do that now */
464	if (disp_buf) {
465		struct vpu_dst_buffer *dst_vpu_buf = wave5_to_vpu_dst_buf(disp_buf);
466
467		if (inst->dst_fmt.num_planes == 1) {
468			vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
469					      inst->dst_fmt.plane_fmt[0].sizeimage);
470		} else if (inst->dst_fmt.num_planes == 2) {
471			vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
472					      inst->dst_fmt.plane_fmt[0].sizeimage);
473			vb2_set_plane_payload(&disp_buf->vb2_buf, 1,
474					      inst->dst_fmt.plane_fmt[1].sizeimage);
475		} else if (inst->dst_fmt.num_planes == 3) {
476			vb2_set_plane_payload(&disp_buf->vb2_buf, 0,
477					      inst->dst_fmt.plane_fmt[0].sizeimage);
478			vb2_set_plane_payload(&disp_buf->vb2_buf, 1,
479					      inst->dst_fmt.plane_fmt[1].sizeimage);
480			vb2_set_plane_payload(&disp_buf->vb2_buf, 2,
481					      inst->dst_fmt.plane_fmt[2].sizeimage);
482		}
483
484		/* TODO implement interlace support */
485		disp_buf->field = V4L2_FIELD_NONE;
486		dst_vpu_buf->display = true;
487		v4l2_m2m_buf_done(disp_buf, VB2_BUF_STATE_DONE);
488
489		dev_dbg(inst->dev->dev, "%s: frame_cycle %8u (payload %lu)\n",
490			__func__, dec_info.frame_cycle,
491			vb2_get_plane_payload(&disp_buf->vb2_buf, 0));
492	}
493
494	if ((dec_info.index_frame_display == DISPLAY_IDX_FLAG_SEQ_END ||
495	     dec_info.sequence_changed)) {
496		unsigned long flags;
497
498		spin_lock_irqsave(&inst->state_spinlock, flags);
499		if (!v4l2_m2m_has_stopped(m2m_ctx)) {
500			switch_state(inst, VPU_INST_STATE_STOP);
501
502			if (dec_info.sequence_changed)
503				handle_dynamic_resolution_change(inst);
504			else
505				send_eos_event(inst);
506
507			flag_last_buffer_done(inst);
508		}
509		spin_unlock_irqrestore(&inst->state_spinlock, flags);
510	}
511
512	/*
513	 * During a resolution change and while draining, the firmware may flush
514	 * the reorder queue regardless of having a matching decoding operation
515	 * pending. Only terminate the job if there are no more IRQ coming.
516	 */
517	wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
518	if (q_status.report_queue_count == 0 &&
519	    (q_status.instance_queue_count == 0 || dec_info.sequence_changed)) {
520		dev_dbg(inst->dev->dev, "%s: finishing job.\n", __func__);
521		v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
522	}
523}
524
525static int wave5_vpu_dec_querycap(struct file *file, void *fh, struct v4l2_capability *cap)
526{
527	strscpy(cap->driver, VPU_DEC_DRV_NAME, sizeof(cap->driver));
528	strscpy(cap->card, VPU_DEC_DRV_NAME, sizeof(cap->card));
529
530	return 0;
531}
532
533static int wave5_vpu_dec_enum_framesizes(struct file *f, void *fh, struct v4l2_frmsizeenum *fsize)
534{
535	const struct vpu_format *vpu_fmt;
536
537	if (fsize->index)
538		return -EINVAL;
539
540	vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
541	if (!vpu_fmt) {
542		vpu_fmt = wave5_find_vpu_fmt(fsize->pixel_format, dec_fmt_list[VPU_FMT_TYPE_RAW]);
543		if (!vpu_fmt)
544			return -EINVAL;
545	}
546
547	fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS;
548	fsize->stepwise.min_width = vpu_fmt->min_width;
549	fsize->stepwise.max_width = vpu_fmt->max_width;
550	fsize->stepwise.step_width = 1;
551	fsize->stepwise.min_height = vpu_fmt->min_height;
552	fsize->stepwise.max_height = vpu_fmt->max_height;
553	fsize->stepwise.step_height = 1;
554
555	return 0;
556}
557
558static int wave5_vpu_dec_enum_fmt_cap(struct file *file, void *fh, struct v4l2_fmtdesc *f)
559{
560	const struct vpu_format *vpu_fmt;
561
562	vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_RAW]);
563	if (!vpu_fmt)
564		return -EINVAL;
565
566	f->pixelformat = vpu_fmt->v4l2_pix_fmt;
567	f->flags = 0;
568
569	return 0;
570}
571
572static int wave5_vpu_dec_try_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
573{
574	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
575	struct dec_info *p_dec_info = &inst->codec_info->dec_info;
576	const struct vpu_format *vpu_fmt;
577	int width, height;
578
579	dev_dbg(inst->dev->dev,
580		"%s: fourcc: %u width: %u height: %u nm planes: %u colorspace: %u field: %u\n",
581		__func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
582		f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
583
584	vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_RAW]);
585	if (!vpu_fmt) {
586		width = inst->dst_fmt.width;
587		height = inst->dst_fmt.height;
588		f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat;
589		f->fmt.pix_mp.num_planes = inst->dst_fmt.num_planes;
590	} else {
591		const struct v4l2_format_info *info = v4l2_format_info(vpu_fmt->v4l2_pix_fmt);
592
593		width = clamp(f->fmt.pix_mp.width, vpu_fmt->min_width, vpu_fmt->max_width);
594		height = clamp(f->fmt.pix_mp.height, vpu_fmt->min_height, vpu_fmt->max_height);
595		f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt;
596		f->fmt.pix_mp.num_planes = info->mem_planes;
597	}
598
599	if (p_dec_info->initial_info_obtained) {
600		width = inst->dst_fmt.width;
601		height = inst->dst_fmt.height;
602	}
603
604	wave5_update_pix_fmt(&f->fmt.pix_mp, width, height);
605	f->fmt.pix_mp.flags = 0;
606	f->fmt.pix_mp.field = V4L2_FIELD_NONE;
607	f->fmt.pix_mp.colorspace = inst->colorspace;
608	f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc;
609	f->fmt.pix_mp.quantization = inst->quantization;
610	f->fmt.pix_mp.xfer_func = inst->xfer_func;
611
612	return 0;
613}
614
615static int wave5_vpu_dec_s_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
616{
617	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
618	int i, ret;
619
620	dev_dbg(inst->dev->dev,
621		"%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n",
622		__func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
623		f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
624
625	ret = wave5_vpu_dec_try_fmt_cap(file, fh, f);
626	if (ret)
627		return ret;
628
629	inst->dst_fmt.width = f->fmt.pix_mp.width;
630	inst->dst_fmt.height = f->fmt.pix_mp.height;
631	inst->dst_fmt.pixelformat = f->fmt.pix_mp.pixelformat;
632	inst->dst_fmt.field = f->fmt.pix_mp.field;
633	inst->dst_fmt.flags = f->fmt.pix_mp.flags;
634	inst->dst_fmt.num_planes = f->fmt.pix_mp.num_planes;
635	for (i = 0; i < inst->dst_fmt.num_planes; i++) {
636		inst->dst_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline;
637		inst->dst_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage;
638	}
639
640	if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12 ||
641	    inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV12M) {
642		inst->cbcr_interleave = true;
643		inst->nv21 = false;
644		inst->output_format = FORMAT_420;
645	} else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21 ||
646		   inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV21M) {
647		inst->cbcr_interleave = true;
648		inst->nv21 = true;
649		inst->output_format = FORMAT_420;
650	} else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16 ||
651		   inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV16M) {
652		inst->cbcr_interleave = true;
653		inst->nv21 = false;
654		inst->output_format = FORMAT_422;
655	} else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61 ||
656		   inst->dst_fmt.pixelformat == V4L2_PIX_FMT_NV61M) {
657		inst->cbcr_interleave = true;
658		inst->nv21 = true;
659		inst->output_format = FORMAT_422;
660	} else if (inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422P ||
661		   inst->dst_fmt.pixelformat == V4L2_PIX_FMT_YUV422M) {
662		inst->cbcr_interleave = false;
663		inst->nv21 = false;
664		inst->output_format = FORMAT_422;
665	} else {
666		inst->cbcr_interleave = false;
667		inst->nv21 = false;
668		inst->output_format = FORMAT_420;
669	}
670
671	return 0;
672}
673
674static int wave5_vpu_dec_g_fmt_cap(struct file *file, void *fh, struct v4l2_format *f)
675{
676	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
677	int i;
678
679	f->fmt.pix_mp.width = inst->dst_fmt.width;
680	f->fmt.pix_mp.height = inst->dst_fmt.height;
681	f->fmt.pix_mp.pixelformat = inst->dst_fmt.pixelformat;
682	f->fmt.pix_mp.field = inst->dst_fmt.field;
683	f->fmt.pix_mp.flags = inst->dst_fmt.flags;
684	f->fmt.pix_mp.num_planes = inst->dst_fmt.num_planes;
685	for (i = 0; i < f->fmt.pix_mp.num_planes; i++) {
686		f->fmt.pix_mp.plane_fmt[i].bytesperline = inst->dst_fmt.plane_fmt[i].bytesperline;
687		f->fmt.pix_mp.plane_fmt[i].sizeimage = inst->dst_fmt.plane_fmt[i].sizeimage;
688	}
689
690	f->fmt.pix_mp.colorspace = inst->colorspace;
691	f->fmt.pix_mp.ycbcr_enc = inst->ycbcr_enc;
692	f->fmt.pix_mp.quantization = inst->quantization;
693	f->fmt.pix_mp.xfer_func = inst->xfer_func;
694
695	return 0;
696}
697
698static int wave5_vpu_dec_enum_fmt_out(struct file *file, void *fh, struct v4l2_fmtdesc *f)
699{
700	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
701	const struct vpu_format *vpu_fmt;
702
703	dev_dbg(inst->dev->dev, "%s: index: %u\n", __func__, f->index);
704
705	vpu_fmt = wave5_find_vpu_fmt_by_idx(f->index, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
706	if (!vpu_fmt)
707		return -EINVAL;
708
709	f->pixelformat = vpu_fmt->v4l2_pix_fmt;
710	f->flags = V4L2_FMT_FLAG_DYN_RESOLUTION | V4L2_FMT_FLAG_COMPRESSED;
711
712	return 0;
713}
714
715static int wave5_vpu_dec_try_fmt_out(struct file *file, void *fh, struct v4l2_format *f)
716{
717	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
718	const struct vpu_format *vpu_fmt;
719
720	dev_dbg(inst->dev->dev,
721		"%s: fourcc: %u width: %u height: %u num_planes: %u colorspace: %u field: %u\n",
722		__func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
723		f->fmt.pix_mp.num_planes, f->fmt.pix_mp.colorspace, f->fmt.pix_mp.field);
724
725	vpu_fmt = wave5_find_vpu_fmt(f->fmt.pix_mp.pixelformat, dec_fmt_list[VPU_FMT_TYPE_CODEC]);
726	if (!vpu_fmt) {
727		f->fmt.pix_mp.pixelformat = inst->src_fmt.pixelformat;
728		f->fmt.pix_mp.num_planes = inst->src_fmt.num_planes;
729		wave5_update_pix_fmt(&f->fmt.pix_mp, inst->src_fmt.width, inst->src_fmt.height);
730	} else {
731		int width = clamp(f->fmt.pix_mp.width, vpu_fmt->min_width, vpu_fmt->max_width);
732		int height = clamp(f->fmt.pix_mp.height, vpu_fmt->min_height, vpu_fmt->max_height);
733
734		f->fmt.pix_mp.pixelformat = vpu_fmt->v4l2_pix_fmt;
735		f->fmt.pix_mp.num_planes = 1;
736		wave5_update_pix_fmt(&f->fmt.pix_mp, width, height);
737	}
738
739	f->fmt.pix_mp.flags = 0;
740	f->fmt.pix_mp.field = V4L2_FIELD_NONE;
741
742	return 0;
743}
744
745static int wave5_vpu_dec_s_fmt_out(struct file *file, void *fh, struct v4l2_format *f)
746{
747	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
748	int i, ret;
749
750	dev_dbg(inst->dev->dev,
751		"%s: fourcc: %u width: %u height: %u num_planes: %u field: %u\n",
752		__func__, f->fmt.pix_mp.pixelformat, f->fmt.pix_mp.width, f->fmt.pix_mp.height,
753		f->fmt.pix_mp.num_planes, f->fmt.pix_mp.field);
754
755	ret = wave5_vpu_dec_try_fmt_out(file, fh, f);
756	if (ret)
757		return ret;
758
759	inst->std = wave5_to_vpu_std(f->fmt.pix_mp.pixelformat, inst->type);
760	if (inst->std == STD_UNKNOWN) {
761		dev_warn(inst->dev->dev, "unsupported pixelformat: %.4s\n",
762			 (char *)&f->fmt.pix_mp.pixelformat);
763		return -EINVAL;
764	}
765
766	inst->src_fmt.width = f->fmt.pix_mp.width;
767	inst->src_fmt.height = f->fmt.pix_mp.height;
768	inst->src_fmt.pixelformat = f->fmt.pix_mp.pixelformat;
769	inst->src_fmt.field = f->fmt.pix_mp.field;
770	inst->src_fmt.flags = f->fmt.pix_mp.flags;
771	inst->src_fmt.num_planes = f->fmt.pix_mp.num_planes;
772	for (i = 0; i < inst->src_fmt.num_planes; i++) {
773		inst->src_fmt.plane_fmt[i].bytesperline = f->fmt.pix_mp.plane_fmt[i].bytesperline;
774		inst->src_fmt.plane_fmt[i].sizeimage = f->fmt.pix_mp.plane_fmt[i].sizeimage;
775	}
776
777	inst->colorspace = f->fmt.pix_mp.colorspace;
778	inst->ycbcr_enc = f->fmt.pix_mp.ycbcr_enc;
779	inst->quantization = f->fmt.pix_mp.quantization;
780	inst->xfer_func = f->fmt.pix_mp.xfer_func;
781
782	wave5_update_pix_fmt(&inst->dst_fmt, f->fmt.pix_mp.width, f->fmt.pix_mp.height);
783
784	return 0;
785}
786
787static int wave5_vpu_dec_g_selection(struct file *file, void *fh, struct v4l2_selection *s)
788{
789	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
790
791	dev_dbg(inst->dev->dev, "%s: type: %u | target: %u\n", __func__, s->type, s->target);
792
793	if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
794		return -EINVAL;
795	switch (s->target) {
796	case V4L2_SEL_TGT_COMPOSE_BOUNDS:
797	case V4L2_SEL_TGT_COMPOSE_PADDED:
798		s->r.left = 0;
799		s->r.top = 0;
800		s->r.width = inst->dst_fmt.width;
801		s->r.height = inst->dst_fmt.height;
802		break;
803	case V4L2_SEL_TGT_COMPOSE:
804	case V4L2_SEL_TGT_COMPOSE_DEFAULT:
805		s->r.left = 0;
806		s->r.top = 0;
807		if (inst->state > VPU_INST_STATE_OPEN) {
808			s->r = inst->conf_win;
809		} else {
810			s->r.width = inst->src_fmt.width;
811			s->r.height = inst->src_fmt.height;
812		}
813		break;
814	default:
815		return -EINVAL;
816	}
817
818	return 0;
819}
820
821static int wave5_vpu_dec_s_selection(struct file *file, void *fh, struct v4l2_selection *s)
822{
823	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
824
825	if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
826		return -EINVAL;
827
828	if (s->target != V4L2_SEL_TGT_COMPOSE)
829		return -EINVAL;
830
831	dev_dbg(inst->dev->dev, "V4L2_SEL_TGT_COMPOSE w: %u h: %u\n",
832		s->r.width, s->r.height);
833
834	s->r.left = 0;
835	s->r.top = 0;
836	s->r.width = inst->dst_fmt.width;
837	s->r.height = inst->dst_fmt.height;
838
839	return 0;
840}
841
842static int wave5_vpu_dec_stop(struct vpu_instance *inst)
843{
844	int ret = 0;
845	unsigned long flags;
846	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
847
848	spin_lock_irqsave(&inst->state_spinlock, flags);
849
850	if (m2m_ctx->is_draining) {
851		ret = -EBUSY;
852		goto unlock_and_return;
853	}
854
855	if (inst->state != VPU_INST_STATE_NONE) {
856		/*
857		 * Temporarily release the state_spinlock so that subsequent
858		 * calls do not block on a mutex while inside this spinlock.
859		 */
860		spin_unlock_irqrestore(&inst->state_spinlock, flags);
861		ret = wave5_vpu_dec_set_eos_on_firmware(inst);
862		if (ret)
863			return ret;
864
865		spin_lock_irqsave(&inst->state_spinlock, flags);
866		/*
867		 * TODO eliminate this check by using a separate check for
868		 * draining triggered by a resolution change.
869		 */
870		if (m2m_ctx->is_draining) {
871			ret = -EBUSY;
872			goto unlock_and_return;
873		}
874	}
875
876	/*
877	 * Used to remember the EOS state after the streamoff/on transition on
878	 * the capture queue.
879	 */
880	inst->eos = true;
881
882	if (m2m_ctx->has_stopped)
883		goto unlock_and_return;
884
885	m2m_ctx->last_src_buf = v4l2_m2m_last_src_buf(m2m_ctx);
886	m2m_ctx->is_draining = true;
887
888	/*
889	 * Deferred to device run in case it wasn't in the ring buffer
890	 * yet. In other case, we have to send the EOS signal to the
891	 * firmware so that any pending PIC_RUN ends without new
892	 * bitstream buffer.
893	 */
894	if (m2m_ctx->last_src_buf)
895		goto unlock_and_return;
896
897	if (inst->state == VPU_INST_STATE_NONE) {
898		send_eos_event(inst);
899		flag_last_buffer_done(inst);
900	}
901
902unlock_and_return:
903	spin_unlock_irqrestore(&inst->state_spinlock, flags);
904	return ret;
905}
906
907static int wave5_vpu_dec_start(struct vpu_instance *inst)
908{
909	int ret = 0;
910	unsigned long flags;
911	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
912	struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
913
914	spin_lock_irqsave(&inst->state_spinlock, flags);
915
916	if (m2m_ctx->is_draining) {
917		ret = -EBUSY;
918		goto unlock_and_return;
919	}
920
921	if (m2m_ctx->has_stopped)
922		m2m_ctx->has_stopped = false;
923
924	vb2_clear_last_buffer_dequeued(dst_vq);
925	inst->eos = false;
926
927unlock_and_return:
928	spin_unlock_irqrestore(&inst->state_spinlock, flags);
929	return ret;
930}
931
932static int wave5_vpu_dec_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *dc)
933{
934	struct vpu_instance *inst = wave5_to_vpu_inst(fh);
935	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
936	int ret;
937
938	dev_dbg(inst->dev->dev, "decoder command: %u\n", dc->cmd);
939
940	ret = v4l2_m2m_ioctl_try_decoder_cmd(file, fh, dc);
941	if (ret)
942		return ret;
943
944	switch (dc->cmd) {
945	case V4L2_DEC_CMD_STOP:
946		ret = wave5_vpu_dec_stop(inst);
947		/* Just in case we don't have anything to decode anymore */
948		v4l2_m2m_try_schedule(m2m_ctx);
949		break;
950	case V4L2_DEC_CMD_START:
951		ret = wave5_vpu_dec_start(inst);
952		break;
953	default:
954		ret = -EINVAL;
955	}
956
957	return ret;
958}
959
960static const struct v4l2_ioctl_ops wave5_vpu_dec_ioctl_ops = {
961	.vidioc_querycap = wave5_vpu_dec_querycap,
962	.vidioc_enum_framesizes = wave5_vpu_dec_enum_framesizes,
963
964	.vidioc_enum_fmt_vid_cap	= wave5_vpu_dec_enum_fmt_cap,
965	.vidioc_s_fmt_vid_cap_mplane = wave5_vpu_dec_s_fmt_cap,
966	.vidioc_g_fmt_vid_cap_mplane = wave5_vpu_dec_g_fmt_cap,
967	.vidioc_try_fmt_vid_cap_mplane = wave5_vpu_dec_try_fmt_cap,
968
969	.vidioc_enum_fmt_vid_out	= wave5_vpu_dec_enum_fmt_out,
970	.vidioc_s_fmt_vid_out_mplane = wave5_vpu_dec_s_fmt_out,
971	.vidioc_g_fmt_vid_out_mplane = wave5_vpu_g_fmt_out,
972	.vidioc_try_fmt_vid_out_mplane = wave5_vpu_dec_try_fmt_out,
973
974	.vidioc_g_selection = wave5_vpu_dec_g_selection,
975	.vidioc_s_selection = wave5_vpu_dec_s_selection,
976
977	.vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs,
978	/*
979	 * Firmware does not support CREATE_BUFS for CAPTURE queue. Since
980	 * there is no immediate use-case for supporting CREATE_BUFS on
981	 * just the OUTPUT queue, disable CREATE_BUFS altogether.
982	 */
983	.vidioc_querybuf = v4l2_m2m_ioctl_querybuf,
984	.vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf,
985	.vidioc_qbuf = v4l2_m2m_ioctl_qbuf,
986	.vidioc_expbuf = v4l2_m2m_ioctl_expbuf,
987	.vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf,
988	.vidioc_streamon = v4l2_m2m_ioctl_streamon,
989	.vidioc_streamoff = v4l2_m2m_ioctl_streamoff,
990
991	.vidioc_try_decoder_cmd = v4l2_m2m_ioctl_try_decoder_cmd,
992	.vidioc_decoder_cmd = wave5_vpu_dec_decoder_cmd,
993
994	.vidioc_subscribe_event = wave5_vpu_subscribe_event,
995	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
996};
997
998static int wave5_vpu_dec_queue_setup(struct vb2_queue *q, unsigned int *num_buffers,
999				     unsigned int *num_planes, unsigned int sizes[],
1000				     struct device *alloc_devs[])
1001{
1002	struct vpu_instance *inst = vb2_get_drv_priv(q);
1003	struct v4l2_pix_format_mplane inst_format =
1004		(q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) ? inst->src_fmt : inst->dst_fmt;
1005
1006	dev_dbg(inst->dev->dev, "%s: num_buffers: %u | num_planes: %u | type: %u\n", __func__,
1007		*num_buffers, *num_planes, q->type);
1008
1009	*num_planes = inst_format.num_planes;
1010
1011	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
1012		sizes[0] = inst_format.plane_fmt[0].sizeimage;
1013		dev_dbg(inst->dev->dev, "%s: size[0]: %u\n", __func__, sizes[0]);
1014	} else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
1015		if (*num_buffers < inst->fbc_buf_count)
1016			*num_buffers = inst->fbc_buf_count;
1017
1018		if (*num_planes == 1) {
1019			if (inst->output_format == FORMAT_422)
1020				sizes[0] = inst_format.width * inst_format.height * 2;
1021			else
1022				sizes[0] = inst_format.width * inst_format.height * 3 / 2;
1023			dev_dbg(inst->dev->dev, "%s: size[0]: %u\n", __func__, sizes[0]);
1024		} else if (*num_planes == 2) {
1025			sizes[0] = inst_format.width * inst_format.height;
1026			if (inst->output_format == FORMAT_422)
1027				sizes[1] = inst_format.width * inst_format.height;
1028			else
1029				sizes[1] = inst_format.width * inst_format.height / 2;
1030			dev_dbg(inst->dev->dev, "%s: size[0]: %u | size[1]: %u\n",
1031				__func__, sizes[0], sizes[1]);
1032		} else if (*num_planes == 3) {
1033			sizes[0] = inst_format.width * inst_format.height;
1034			if (inst->output_format == FORMAT_422) {
1035				sizes[1] = inst_format.width * inst_format.height / 2;
1036				sizes[2] = inst_format.width * inst_format.height / 2;
1037			} else {
1038				sizes[1] = inst_format.width * inst_format.height / 4;
1039				sizes[2] = inst_format.width * inst_format.height / 4;
1040			}
1041			dev_dbg(inst->dev->dev, "%s: size[0]: %u | size[1]: %u | size[2]: %u\n",
1042				__func__, sizes[0], sizes[1], sizes[2]);
1043		}
1044	}
1045
1046	return 0;
1047}
1048
1049static int wave5_prepare_fb(struct vpu_instance *inst)
1050{
1051	int linear_num;
1052	int non_linear_num;
1053	int fb_stride = 0, fb_height = 0;
1054	int luma_size, chroma_size;
1055	int ret, i;
1056	struct v4l2_m2m_buffer *buf, *n;
1057	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1058
1059	linear_num = v4l2_m2m_num_dst_bufs_ready(m2m_ctx);
1060	non_linear_num = inst->fbc_buf_count;
1061
1062	for (i = 0; i < non_linear_num; i++) {
1063		struct frame_buffer *frame = &inst->frame_buf[i];
1064		struct vpu_buf *vframe = &inst->frame_vbuf[i];
1065
1066		fb_stride = inst->dst_fmt.width;
1067		fb_height = ALIGN(inst->dst_fmt.height, 32);
1068		luma_size = fb_stride * fb_height;
1069
1070		chroma_size = ALIGN(fb_stride / 2, 16) * fb_height;
1071
1072		if (vframe->size == (luma_size + chroma_size))
1073			continue;
1074
1075		if (vframe->size)
1076			wave5_vpu_dec_reset_framebuffer(inst, i);
1077
1078		vframe->size = luma_size + chroma_size;
1079		ret = wave5_vdi_allocate_dma_memory(inst->dev, vframe);
1080		if (ret) {
1081			dev_dbg(inst->dev->dev,
1082				"%s: Allocating FBC buf of size %zu, fail: %d\n",
1083				__func__, vframe->size, ret);
1084			return ret;
1085		}
1086
1087		frame->buf_y = vframe->daddr;
1088		frame->buf_cb = vframe->daddr + luma_size;
1089		frame->buf_cr = (dma_addr_t)-1;
1090		frame->size = vframe->size;
1091		frame->width = inst->src_fmt.width;
1092		frame->stride = fb_stride;
1093		frame->map_type = COMPRESSED_FRAME_MAP;
1094		frame->update_fb_info = true;
1095	}
1096	/* In case the count has reduced, clean up leftover framebuffer memory */
1097	for (i = non_linear_num; i < MAX_REG_FRAME; i++) {
1098		ret = wave5_vpu_dec_reset_framebuffer(inst, i);
1099		if (ret)
1100			break;
1101	}
1102
1103	for (i = 0; i < linear_num; i++) {
1104		struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1105		struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
1106		struct vb2_buffer *vb = vb2_get_buffer(dst_vq, i);
1107		struct frame_buffer *frame = &inst->frame_buf[non_linear_num + i];
1108		dma_addr_t buf_addr_y = 0, buf_addr_cb = 0, buf_addr_cr = 0;
1109		u32 buf_size = 0;
1110		u32 fb_stride = inst->dst_fmt.width;
1111		u32 luma_size = fb_stride * inst->dst_fmt.height;
1112		u32 chroma_size;
1113
1114		if (inst->output_format == FORMAT_422)
1115			chroma_size = fb_stride * inst->dst_fmt.height / 2;
1116		else
1117			chroma_size = fb_stride * inst->dst_fmt.height / 4;
1118
1119		if (inst->dst_fmt.num_planes == 1) {
1120			buf_size = vb2_plane_size(vb, 0);
1121			buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1122			buf_addr_cb = buf_addr_y + luma_size;
1123			buf_addr_cr = buf_addr_cb + chroma_size;
1124		} else if (inst->dst_fmt.num_planes == 2) {
1125			buf_size = vb2_plane_size(vb, 0) +
1126				vb2_plane_size(vb, 1);
1127			buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1128			buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1);
1129			buf_addr_cr = buf_addr_cb + chroma_size;
1130		} else if (inst->dst_fmt.num_planes == 3) {
1131			buf_size = vb2_plane_size(vb, 0) +
1132				vb2_plane_size(vb, 1) +
1133				vb2_plane_size(vb, 2);
1134			buf_addr_y = vb2_dma_contig_plane_dma_addr(vb, 0);
1135			buf_addr_cb = vb2_dma_contig_plane_dma_addr(vb, 1);
1136			buf_addr_cr = vb2_dma_contig_plane_dma_addr(vb, 2);
1137		}
1138
1139		frame->buf_y = buf_addr_y;
1140		frame->buf_cb = buf_addr_cb;
1141		frame->buf_cr = buf_addr_cr;
1142		frame->size = buf_size;
1143		frame->width = inst->src_fmt.width;
1144		frame->stride = fb_stride;
1145		frame->map_type = LINEAR_FRAME_MAP;
1146		frame->update_fb_info = true;
1147	}
1148
1149	ret = wave5_vpu_dec_register_frame_buffer_ex(inst, non_linear_num, linear_num,
1150						     fb_stride, inst->dst_fmt.height);
1151	if (ret) {
1152		dev_dbg(inst->dev->dev, "%s: vpu_dec_register_frame_buffer_ex fail: %d",
1153			__func__, ret);
1154		return ret;
1155	}
1156
1157	/*
1158	 * Mark all frame buffers as out of display, to avoid using them before
1159	 * the application have them queued.
1160	 */
1161	for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
1162		ret = wave5_vpu_dec_set_disp_flag(inst, i);
1163		if (ret) {
1164			dev_dbg(inst->dev->dev,
1165				"%s: Setting display flag of buf index: %u, fail: %d\n",
1166				__func__, i, ret);
1167		}
1168	}
1169
1170	v4l2_m2m_for_each_dst_buf_safe(m2m_ctx, buf, n) {
1171		struct vb2_v4l2_buffer *vbuf = &buf->vb;
1172
1173		ret = wave5_vpu_dec_clr_disp_flag(inst, vbuf->vb2_buf.index);
1174		if (ret)
1175			dev_dbg(inst->dev->dev,
1176				"%s: Clearing display flag of buf index: %u, fail: %d\n",
1177				__func__, i, ret);
1178	}
1179
1180	return 0;
1181}
1182
1183static int write_to_ringbuffer(struct vpu_instance *inst, void *buffer, size_t buffer_size,
1184			       struct vpu_buf *ring_buffer, dma_addr_t wr_ptr)
1185{
1186	size_t size;
1187	size_t offset = wr_ptr - ring_buffer->daddr;
1188	int ret;
1189
1190	if (wr_ptr + buffer_size > ring_buffer->daddr + ring_buffer->size) {
1191		size = ring_buffer->daddr + ring_buffer->size - wr_ptr;
1192		ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer, size);
1193		if (ret < 0)
1194			return ret;
1195
1196		ret = wave5_vdi_write_memory(inst->dev, ring_buffer, 0, (u8 *)buffer + size,
1197					     buffer_size - size);
1198		if (ret < 0)
1199			return ret;
1200	} else {
1201		ret = wave5_vdi_write_memory(inst->dev, ring_buffer, offset, (u8 *)buffer,
1202					     buffer_size);
1203		if (ret < 0)
1204			return ret;
1205	}
1206
1207	return 0;
1208}
1209
1210static int fill_ringbuffer(struct vpu_instance *inst)
1211{
1212	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1213	struct v4l2_m2m_buffer *buf, *n;
1214	int ret;
1215
1216	if (m2m_ctx->last_src_buf)  {
1217		struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
1218
1219		if (vpu_buf->consumed) {
1220			dev_dbg(inst->dev->dev, "last src buffer already written\n");
1221			return 0;
1222		}
1223	}
1224
1225	v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
1226		struct vb2_v4l2_buffer *vbuf = &buf->vb;
1227		struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf);
1228		struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
1229		size_t src_size = vb2_get_plane_payload(&vbuf->vb2_buf, 0);
1230		void *src_buf = vb2_plane_vaddr(&vbuf->vb2_buf, 0);
1231		dma_addr_t rd_ptr = 0;
1232		dma_addr_t wr_ptr = 0;
1233		size_t remain_size = 0;
1234
1235		if (vpu_buf->consumed) {
1236			dev_dbg(inst->dev->dev, "already copied src buf (%u) to the ring buffer\n",
1237				vbuf->vb2_buf.index);
1238			continue;
1239		}
1240
1241		if (!src_buf) {
1242			dev_dbg(inst->dev->dev,
1243				"%s: Acquiring kernel pointer to src buf (%u), fail\n",
1244				__func__, vbuf->vb2_buf.index);
1245			break;
1246		}
1247
1248		ret = wave5_vpu_dec_get_bitstream_buffer(inst, &rd_ptr, &wr_ptr, &remain_size);
1249		if (ret) {
1250			/* Unable to acquire the mutex */
1251			dev_err(inst->dev->dev, "Getting the bitstream buffer, fail: %d\n",
1252				ret);
1253			return ret;
1254		}
1255
1256		dev_dbg(inst->dev->dev, "%s: rd_ptr %pad wr_ptr %pad", __func__, &rd_ptr, &wr_ptr);
1257
1258		if (remain_size < src_size) {
1259			dev_dbg(inst->dev->dev,
1260				"%s: remaining size: %zu < source size: %zu for src buf (%u)\n",
1261				__func__, remain_size, src_size, vbuf->vb2_buf.index);
1262			break;
1263		}
1264
1265		ret = write_to_ringbuffer(inst, src_buf, src_size, ring_buffer, wr_ptr);
1266		if (ret) {
1267			dev_err(inst->dev->dev, "Write src buf (%u) to ring buffer, fail: %d\n",
1268				vbuf->vb2_buf.index, ret);
1269			return ret;
1270		}
1271
1272		ret = wave5_vpu_dec_update_bitstream_buffer(inst, src_size);
1273		if (ret) {
1274			dev_dbg(inst->dev->dev,
1275				"update_bitstream_buffer fail: %d for src buf (%u)\n",
1276				ret, vbuf->vb2_buf.index);
1277			break;
1278		}
1279
1280		vpu_buf->consumed = true;
1281
1282		/* Don't write buffers passed the last one while draining. */
1283		if (v4l2_m2m_is_last_draining_src_buf(m2m_ctx, vbuf)) {
1284			dev_dbg(inst->dev->dev, "last src buffer written to the ring buffer\n");
1285			break;
1286		}
1287	}
1288
1289	return 0;
1290}
1291
1292static void wave5_vpu_dec_buf_queue_src(struct vb2_buffer *vb)
1293{
1294	struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1295	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1296	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1297	struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf);
1298
1299	vpu_buf->consumed = false;
1300	vbuf->sequence = inst->queued_src_buf_num++;
1301
1302	v4l2_m2m_buf_queue(m2m_ctx, vbuf);
1303}
1304
1305static void wave5_vpu_dec_buf_queue_dst(struct vb2_buffer *vb)
1306{
1307	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1308	struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1309	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1310
1311	vbuf->sequence = inst->queued_dst_buf_num++;
1312
1313	if (inst->state == VPU_INST_STATE_PIC_RUN) {
1314		struct vpu_dst_buffer *vpu_buf = wave5_to_vpu_dst_buf(vbuf);
1315		int ret;
1316
1317		/*
1318		 * The buffer is already registered just clear the display flag
1319		 * to let the firmware know it can be used.
1320		 */
1321		vpu_buf->display = false;
1322		ret = wave5_vpu_dec_clr_disp_flag(inst, vb->index);
1323		if (ret) {
1324			dev_dbg(inst->dev->dev,
1325				"%s: Clearing the display flag of buffer index: %u, fail: %d\n",
1326				__func__, vb->index, ret);
1327		}
1328	}
1329
1330	if (vb2_is_streaming(vb->vb2_queue) && v4l2_m2m_dst_buf_is_last(m2m_ctx)) {
1331		unsigned int i;
1332
1333		for (i = 0; i < vb->num_planes; i++)
1334			vb2_set_plane_payload(vb, i, 0);
1335
1336		vbuf->field = V4L2_FIELD_NONE;
1337
1338		send_eos_event(inst);
1339		v4l2_m2m_last_buffer_done(m2m_ctx, vbuf);
1340	} else {
1341		v4l2_m2m_buf_queue(m2m_ctx, vbuf);
1342	}
1343}
1344
1345static void wave5_vpu_dec_buf_queue(struct vb2_buffer *vb)
1346{
1347	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1348	struct vpu_instance *inst = vb2_get_drv_priv(vb->vb2_queue);
1349
1350	dev_dbg(inst->dev->dev, "%s: type: %4u index: %4u size: ([0]=%4lu, [1]=%4lu, [2]=%4lu)\n",
1351		__func__, vb->type, vb->index, vb2_plane_size(&vbuf->vb2_buf, 0),
1352		vb2_plane_size(&vbuf->vb2_buf, 1), vb2_plane_size(&vbuf->vb2_buf, 2));
1353
1354	if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
1355		wave5_vpu_dec_buf_queue_src(vb);
1356	else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
1357		wave5_vpu_dec_buf_queue_dst(vb);
1358}
1359
1360static int wave5_vpu_dec_allocate_ring_buffer(struct vpu_instance *inst)
1361{
1362	int ret;
1363	struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
1364
1365	ring_buffer->size = ALIGN(inst->src_fmt.plane_fmt[0].sizeimage, 1024) * 4;
1366	ret = wave5_vdi_allocate_dma_memory(inst->dev, ring_buffer);
1367	if (ret) {
1368		dev_dbg(inst->dev->dev, "%s: allocate ring buffer of size %zu fail: %d\n",
1369			__func__, ring_buffer->size, ret);
1370		return ret;
1371	}
1372
1373	inst->last_rd_ptr = ring_buffer->daddr;
1374
1375	return 0;
1376}
1377
1378static int wave5_vpu_dec_start_streaming(struct vb2_queue *q, unsigned int count)
1379{
1380	struct vpu_instance *inst = vb2_get_drv_priv(q);
1381	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1382	int ret = 0;
1383
1384	dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type);
1385
1386	v4l2_m2m_update_start_streaming_state(m2m_ctx, q);
1387
1388	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE && inst->state == VPU_INST_STATE_NONE) {
1389		struct dec_open_param open_param;
1390
1391		memset(&open_param, 0, sizeof(struct dec_open_param));
1392
1393		ret = wave5_vpu_dec_allocate_ring_buffer(inst);
1394		if (ret)
1395			goto return_buffers;
1396
1397		open_param.bitstream_buffer = inst->bitstream_vbuf.daddr;
1398		open_param.bitstream_buffer_size = inst->bitstream_vbuf.size;
1399
1400		ret = wave5_vpu_dec_open(inst, &open_param);
1401		if (ret) {
1402			dev_dbg(inst->dev->dev, "%s: decoder opening, fail: %d\n",
1403				__func__, ret);
1404			goto free_bitstream_vbuf;
1405		}
1406
1407		ret = switch_state(inst, VPU_INST_STATE_OPEN);
1408		if (ret)
1409			goto free_bitstream_vbuf;
1410	} else if (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
1411		struct dec_initial_info *initial_info =
1412			&inst->codec_info->dec_info.initial_info;
1413
1414		if (inst->state == VPU_INST_STATE_STOP)
1415			ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1416		if (ret)
1417			goto return_buffers;
1418
1419		if (inst->state == VPU_INST_STATE_INIT_SEQ) {
1420			if (initial_info->luma_bitdepth != 8) {
1421				dev_info(inst->dev->dev, "%s: no support for %d bit depth",
1422					 __func__, initial_info->luma_bitdepth);
1423				ret = -EINVAL;
1424				goto return_buffers;
1425			}
1426		}
1427	}
1428
1429	return ret;
1430
1431free_bitstream_vbuf:
1432	wave5_vdi_free_dma_memory(inst->dev, &inst->bitstream_vbuf);
1433return_buffers:
1434	wave5_return_bufs(q, VB2_BUF_STATE_QUEUED);
1435	return ret;
1436}
1437
1438static int streamoff_output(struct vb2_queue *q)
1439{
1440	struct vpu_instance *inst = vb2_get_drv_priv(q);
1441	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1442	struct vb2_v4l2_buffer *buf;
1443	int ret;
1444	dma_addr_t new_rd_ptr;
1445
1446	while ((buf = v4l2_m2m_src_buf_remove(m2m_ctx))) {
1447		dev_dbg(inst->dev->dev, "%s: (Multiplanar) buf type %4u | index %4u\n",
1448			__func__, buf->vb2_buf.type, buf->vb2_buf.index);
1449		v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
1450	}
1451
1452	ret = wave5_vpu_flush_instance(inst);
1453	if (ret)
1454		return ret;
1455
1456	/* Reset the ring buffer information */
1457	new_rd_ptr = wave5_vpu_dec_get_rd_ptr(inst);
1458	inst->last_rd_ptr = new_rd_ptr;
1459	inst->codec_info->dec_info.stream_rd_ptr = new_rd_ptr;
1460	inst->codec_info->dec_info.stream_wr_ptr = new_rd_ptr;
1461
1462	if (v4l2_m2m_has_stopped(m2m_ctx))
1463		send_eos_event(inst);
1464
1465	/* streamoff on output cancels any draining operation */
1466	inst->eos = false;
1467
1468	return 0;
1469}
1470
1471static int streamoff_capture(struct vb2_queue *q)
1472{
1473	struct vpu_instance *inst = vb2_get_drv_priv(q);
1474	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1475	struct vb2_v4l2_buffer *buf;
1476	unsigned int i;
1477	int ret = 0;
1478
1479	for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
1480		ret = wave5_vpu_dec_set_disp_flag(inst, i);
1481		if (ret)
1482			dev_dbg(inst->dev->dev,
1483				"%s: Setting display flag of buf index: %u, fail: %d\n",
1484				__func__, i, ret);
1485	}
1486
1487	while ((buf = v4l2_m2m_dst_buf_remove(m2m_ctx))) {
1488		u32 plane;
1489
1490		dev_dbg(inst->dev->dev, "%s: buf type %4u | index %4u\n",
1491			__func__, buf->vb2_buf.type, buf->vb2_buf.index);
1492
1493		for (plane = 0; plane < inst->dst_fmt.num_planes; plane++)
1494			vb2_set_plane_payload(&buf->vb2_buf, plane, 0);
1495
1496		v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
1497	}
1498
1499	if (inst->needs_reallocation) {
1500		wave5_vpu_dec_give_command(inst, DEC_RESET_FRAMEBUF_INFO, NULL);
1501		inst->needs_reallocation = false;
1502	}
1503
1504	if (v4l2_m2m_has_stopped(m2m_ctx)) {
1505		ret = switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1506		if (ret)
1507			return ret;
1508	}
1509
1510	return 0;
1511}
1512
1513static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q)
1514{
1515	struct vpu_instance *inst = vb2_get_drv_priv(q);
1516	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1517	bool check_cmd = TRUE;
1518
1519	dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type);
1520
1521	while (check_cmd) {
1522		struct queue_status_info q_status;
1523		struct dec_output_info dec_output_info;
1524
1525		wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
1526
1527		if (q_status.report_queue_count == 0)
1528			break;
1529
1530		if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0)
1531			break;
1532
1533		if (wave5_vpu_dec_get_output_info(inst, &dec_output_info))
1534			dev_dbg(inst->dev->dev, "Getting decoding results from fw, fail\n");
1535	}
1536
1537	v4l2_m2m_update_stop_streaming_state(m2m_ctx, q);
1538
1539	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
1540		streamoff_output(q);
1541	else
1542		streamoff_capture(q);
1543}
1544
1545static const struct vb2_ops wave5_vpu_dec_vb2_ops = {
1546	.queue_setup = wave5_vpu_dec_queue_setup,
1547	.wait_prepare = vb2_ops_wait_prepare,
1548	.wait_finish = vb2_ops_wait_finish,
1549	.buf_queue = wave5_vpu_dec_buf_queue,
1550	.start_streaming = wave5_vpu_dec_start_streaming,
1551	.stop_streaming = wave5_vpu_dec_stop_streaming,
1552};
1553
1554static void wave5_set_default_format(struct v4l2_pix_format_mplane *src_fmt,
1555				     struct v4l2_pix_format_mplane *dst_fmt)
1556{
1557	unsigned int dst_pix_fmt = dec_fmt_list[VPU_FMT_TYPE_RAW][0].v4l2_pix_fmt;
1558	const struct v4l2_format_info *dst_fmt_info = v4l2_format_info(dst_pix_fmt);
1559
1560	src_fmt->pixelformat = dec_fmt_list[VPU_FMT_TYPE_CODEC][0].v4l2_pix_fmt;
1561	src_fmt->field = V4L2_FIELD_NONE;
1562	src_fmt->flags = 0;
1563	src_fmt->num_planes = 1;
1564	wave5_update_pix_fmt(src_fmt, 720, 480);
1565
1566	dst_fmt->pixelformat = dst_pix_fmt;
1567	dst_fmt->field = V4L2_FIELD_NONE;
1568	dst_fmt->flags = 0;
1569	dst_fmt->num_planes = dst_fmt_info->mem_planes;
1570	wave5_update_pix_fmt(dst_fmt, 736, 480);
1571}
1572
1573static int wave5_vpu_dec_queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
1574{
1575	return wave5_vpu_queue_init(priv, src_vq, dst_vq, &wave5_vpu_dec_vb2_ops);
1576}
1577
1578static const struct vpu_instance_ops wave5_vpu_dec_inst_ops = {
1579	.finish_process = wave5_vpu_dec_finish_decode,
1580};
1581
1582static int initialize_sequence(struct vpu_instance *inst)
1583{
1584	struct dec_initial_info initial_info;
1585	int ret = 0;
1586
1587	memset(&initial_info, 0, sizeof(struct dec_initial_info));
1588
1589	ret = wave5_vpu_dec_issue_seq_init(inst);
1590	if (ret) {
1591		dev_dbg(inst->dev->dev, "%s: wave5_vpu_dec_issue_seq_init, fail: %d\n",
1592			__func__, ret);
1593		return ret;
1594	}
1595
1596	if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0)
1597		dev_dbg(inst->dev->dev, "%s: failed to call vpu_wait_interrupt()\n", __func__);
1598
1599	ret = wave5_vpu_dec_complete_seq_init(inst, &initial_info);
1600	if (ret) {
1601		dev_dbg(inst->dev->dev, "%s: vpu_dec_complete_seq_init, fail: %d, reason: %u\n",
1602			__func__, ret, initial_info.seq_init_err_reason);
1603		wave5_handle_src_buffer(inst, initial_info.rd_ptr);
1604		return ret;
1605	}
1606
1607	handle_dynamic_resolution_change(inst);
1608
1609	return 0;
1610}
1611
1612static bool wave5_is_draining_or_eos(struct vpu_instance *inst)
1613{
1614	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1615
1616	lockdep_assert_held(&inst->state_spinlock);
1617	return m2m_ctx->is_draining || inst->eos;
1618}
1619
1620static void wave5_vpu_dec_device_run(void *priv)
1621{
1622	struct vpu_instance *inst = priv;
1623	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1624	struct queue_status_info q_status;
1625	u32 fail_res = 0;
1626	int ret = 0;
1627
1628	dev_dbg(inst->dev->dev, "%s: Fill the ring buffer with new bitstream data", __func__);
1629
1630	ret = fill_ringbuffer(inst);
1631	if (ret) {
1632		dev_warn(inst->dev->dev, "Filling ring buffer failed\n");
1633		goto finish_job_and_return;
1634	}
1635
1636	switch (inst->state) {
1637	case VPU_INST_STATE_OPEN:
1638		ret = initialize_sequence(inst);
1639		if (ret) {
1640			unsigned long flags;
1641
1642			spin_lock_irqsave(&inst->state_spinlock, flags);
1643			if (wave5_is_draining_or_eos(inst) &&
1644			    wave5_last_src_buffer_consumed(m2m_ctx)) {
1645				struct vb2_queue *dst_vq = v4l2_m2m_get_dst_vq(m2m_ctx);
1646
1647				switch_state(inst, VPU_INST_STATE_STOP);
1648
1649				if (vb2_is_streaming(dst_vq))
1650					send_eos_event(inst);
1651				else
1652					handle_dynamic_resolution_change(inst);
1653
1654				flag_last_buffer_done(inst);
1655			}
1656			spin_unlock_irqrestore(&inst->state_spinlock, flags);
1657		} else {
1658			switch_state(inst, VPU_INST_STATE_INIT_SEQ);
1659		}
1660
1661		break;
1662
1663	case VPU_INST_STATE_INIT_SEQ:
1664		/*
1665		 * Do this early, preparing the fb can trigger an IRQ before
1666		 * we had a chance to switch, which leads to an invalid state
1667		 * change.
1668		 */
1669		switch_state(inst, VPU_INST_STATE_PIC_RUN);
1670
1671		/*
1672		 * During DRC, the picture decoding remains pending, so just leave the job
1673		 * active until this decode operation completes.
1674		 */
1675		wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS, &q_status);
1676
1677		/*
1678		 * The sequence must be analyzed first to calculate the proper
1679		 * size of the auxiliary buffers.
1680		 */
1681		ret = wave5_prepare_fb(inst);
1682		if (ret) {
1683			dev_warn(inst->dev->dev, "Framebuffer preparation, fail: %d\n", ret);
1684			switch_state(inst, VPU_INST_STATE_STOP);
1685			break;
1686		}
1687
1688		if (q_status.instance_queue_count) {
1689			dev_dbg(inst->dev->dev, "%s: leave with active job", __func__);
1690			return;
1691		}
1692
1693		fallthrough;
1694	case VPU_INST_STATE_PIC_RUN:
1695		ret = start_decode(inst, &fail_res);
1696		if (ret) {
1697			dev_err(inst->dev->dev,
1698				"Frame decoding on m2m context (%p), fail: %d (result: %d)\n",
1699				m2m_ctx, ret, fail_res);
1700			break;
1701		}
1702		/* Return so that we leave this job active */
1703		dev_dbg(inst->dev->dev, "%s: leave with active job", __func__);
1704		return;
1705	default:
1706		WARN(1, "Execution of a job in state %s illegal.\n", state_to_str(inst->state));
1707		break;
1708	}
1709
1710finish_job_and_return:
1711	dev_dbg(inst->dev->dev, "%s: leave and finish job", __func__);
1712	v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx);
1713}
1714
1715static void wave5_vpu_dec_job_abort(void *priv)
1716{
1717	struct vpu_instance *inst = priv;
1718	int ret;
1719
1720	ret = switch_state(inst, VPU_INST_STATE_STOP);
1721	if (ret)
1722		return;
1723
1724	ret = wave5_vpu_dec_set_eos_on_firmware(inst);
1725	if (ret)
1726		dev_warn(inst->dev->dev,
1727			 "Setting EOS for the bitstream, fail: %d\n", ret);
1728}
1729
1730static int wave5_vpu_dec_job_ready(void *priv)
1731{
1732	struct vpu_instance *inst = priv;
1733	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
1734	unsigned long flags;
1735	int ret = 0;
1736
1737	spin_lock_irqsave(&inst->state_spinlock, flags);
1738
1739	switch (inst->state) {
1740	case VPU_INST_STATE_NONE:
1741		dev_dbg(inst->dev->dev, "Decoder must be open to start queueing M2M jobs!\n");
1742		break;
1743	case VPU_INST_STATE_OPEN:
1744		if (wave5_is_draining_or_eos(inst) || !v4l2_m2m_has_stopped(m2m_ctx) ||
1745		    v4l2_m2m_num_src_bufs_ready(m2m_ctx) > 0) {
1746			ret = 1;
1747			break;
1748		}
1749
1750		dev_dbg(inst->dev->dev,
1751			"Decoder must be draining or >= 1 OUTPUT queue buffer must be queued!\n");
1752		break;
1753	case VPU_INST_STATE_INIT_SEQ:
1754	case VPU_INST_STATE_PIC_RUN:
1755		if (!m2m_ctx->cap_q_ctx.q.streaming) {
1756			dev_dbg(inst->dev->dev, "CAPTURE queue must be streaming to queue jobs!\n");
1757			break;
1758		} else if (v4l2_m2m_num_dst_bufs_ready(m2m_ctx) < (inst->fbc_buf_count - 1)) {
1759			dev_dbg(inst->dev->dev,
1760				"No capture buffer ready to decode!\n");
1761			break;
1762		} else if (!wave5_is_draining_or_eos(inst) &&
1763			   !v4l2_m2m_num_src_bufs_ready(m2m_ctx)) {
1764			dev_dbg(inst->dev->dev,
1765				"No bitstream data to decode!\n");
1766			break;
1767		}
1768		ret = 1;
1769		break;
1770	case VPU_INST_STATE_STOP:
1771		dev_dbg(inst->dev->dev, "Decoder is stopped, not running.\n");
1772		break;
1773	}
1774
1775	spin_unlock_irqrestore(&inst->state_spinlock, flags);
1776
1777	return ret;
1778}
1779
1780static const struct v4l2_m2m_ops wave5_vpu_dec_m2m_ops = {
1781	.device_run = wave5_vpu_dec_device_run,
1782	.job_abort = wave5_vpu_dec_job_abort,
1783	.job_ready = wave5_vpu_dec_job_ready,
1784};
1785
1786static int wave5_vpu_open_dec(struct file *filp)
1787{
1788	struct video_device *vdev = video_devdata(filp);
1789	struct vpu_device *dev = video_drvdata(filp);
1790	struct vpu_instance *inst = NULL;
1791	struct v4l2_m2m_ctx *m2m_ctx;
1792	int ret = 0;
1793
1794	inst = kzalloc(sizeof(*inst), GFP_KERNEL);
1795	if (!inst)
1796		return -ENOMEM;
1797
1798	inst->dev = dev;
1799	inst->type = VPU_INST_TYPE_DEC;
1800	inst->ops = &wave5_vpu_dec_inst_ops;
1801
1802	spin_lock_init(&inst->state_spinlock);
1803
1804	inst->codec_info = kzalloc(sizeof(*inst->codec_info), GFP_KERNEL);
1805	if (!inst->codec_info)
1806		return -ENOMEM;
1807
1808	v4l2_fh_init(&inst->v4l2_fh, vdev);
1809	filp->private_data = &inst->v4l2_fh;
1810	v4l2_fh_add(&inst->v4l2_fh);
1811
1812	INIT_LIST_HEAD(&inst->list);
1813
1814	inst->v4l2_m2m_dev = inst->dev->v4l2_m2m_dec_dev;
1815	inst->v4l2_fh.m2m_ctx =
1816		v4l2_m2m_ctx_init(inst->v4l2_m2m_dev, inst, wave5_vpu_dec_queue_init);
1817	if (IS_ERR(inst->v4l2_fh.m2m_ctx)) {
1818		ret = PTR_ERR(inst->v4l2_fh.m2m_ctx);
1819		goto cleanup_inst;
1820	}
1821	m2m_ctx = inst->v4l2_fh.m2m_ctx;
1822
1823	v4l2_m2m_set_src_buffered(m2m_ctx, true);
1824	v4l2_m2m_set_dst_buffered(m2m_ctx, true);
1825	/*
1826	 * We use the M2M job queue to ensure synchronization of steps where
1827	 * needed, as IOCTLs can occur at anytime and we need to run commands on
1828	 * the firmware in a specified order.
1829	 * In order to initialize the sequence on the firmware within an M2M
1830	 * job, the M2M framework needs to be able to queue jobs before
1831	 * the CAPTURE queue has been started, because we need the results of the
1832	 * initialization to properly prepare the CAPTURE queue with the correct
1833	 * amount of buffers.
1834	 * By setting ignore_cap_streaming to true the m2m framework will call
1835	 * job_ready as soon as the OUTPUT queue is streaming, instead of
1836	 * waiting until both the CAPTURE and OUTPUT queues are streaming.
1837	 */
1838	m2m_ctx->ignore_cap_streaming = true;
1839
1840	v4l2_ctrl_handler_init(&inst->v4l2_ctrl_hdl, 10);
1841	v4l2_ctrl_new_std(&inst->v4l2_ctrl_hdl, NULL,
1842			  V4L2_CID_MIN_BUFFERS_FOR_CAPTURE, 1, 32, 1, 1);
1843
1844	if (inst->v4l2_ctrl_hdl.error) {
1845		ret = -ENODEV;
1846		goto cleanup_inst;
1847	}
1848
1849	inst->v4l2_fh.ctrl_handler = &inst->v4l2_ctrl_hdl;
1850	v4l2_ctrl_handler_setup(&inst->v4l2_ctrl_hdl);
1851
1852	wave5_set_default_format(&inst->src_fmt, &inst->dst_fmt);
1853	inst->colorspace = V4L2_COLORSPACE_REC709;
1854	inst->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
1855	inst->quantization = V4L2_QUANTIZATION_DEFAULT;
1856	inst->xfer_func = V4L2_XFER_FUNC_DEFAULT;
1857
1858	init_completion(&inst->irq_done);
1859
1860	inst->id = ida_alloc(&inst->dev->inst_ida, GFP_KERNEL);
1861	if (inst->id < 0) {
1862		dev_warn(inst->dev->dev, "Allocating instance ID, fail: %d\n", inst->id);
1863		ret = inst->id;
1864		goto cleanup_inst;
1865	}
1866
1867	wave5_vdi_allocate_sram(inst->dev);
1868
1869	ret = mutex_lock_interruptible(&dev->dev_lock);
1870	if (ret)
1871		goto cleanup_inst;
1872
1873	if (dev->irq < 0 && !hrtimer_active(&dev->hrtimer) && list_empty(&dev->instances))
1874		hrtimer_start(&dev->hrtimer, ns_to_ktime(dev->vpu_poll_interval * NSEC_PER_MSEC),
1875			      HRTIMER_MODE_REL_PINNED);
1876
1877	list_add_tail(&inst->list, &dev->instances);
1878
1879	mutex_unlock(&dev->dev_lock);
1880
1881	return 0;
1882
1883cleanup_inst:
1884	wave5_cleanup_instance(inst);
1885	return ret;
1886}
1887
1888static int wave5_vpu_dec_release(struct file *filp)
1889{
1890	return wave5_vpu_release_device(filp, wave5_vpu_dec_close, "decoder");
1891}
1892
1893static const struct v4l2_file_operations wave5_vpu_dec_fops = {
1894	.owner = THIS_MODULE,
1895	.open = wave5_vpu_open_dec,
1896	.release = wave5_vpu_dec_release,
1897	.unlocked_ioctl = video_ioctl2,
1898	.poll = v4l2_m2m_fop_poll,
1899	.mmap = v4l2_m2m_fop_mmap,
1900};
1901
1902int wave5_vpu_dec_register_device(struct vpu_device *dev)
1903{
1904	struct video_device *vdev_dec;
1905	int ret;
1906
1907	vdev_dec = devm_kzalloc(dev->v4l2_dev.dev, sizeof(*vdev_dec), GFP_KERNEL);
1908	if (!vdev_dec)
1909		return -ENOMEM;
1910
1911	dev->v4l2_m2m_dec_dev = v4l2_m2m_init(&wave5_vpu_dec_m2m_ops);
1912	if (IS_ERR(dev->v4l2_m2m_dec_dev)) {
1913		ret = PTR_ERR(dev->v4l2_m2m_dec_dev);
1914		dev_err(dev->dev, "v4l2_m2m_init, fail: %d\n", ret);
1915		return -EINVAL;
1916	}
1917
1918	dev->video_dev_dec = vdev_dec;
1919
1920	strscpy(vdev_dec->name, VPU_DEC_DEV_NAME, sizeof(vdev_dec->name));
1921	vdev_dec->fops = &wave5_vpu_dec_fops;
1922	vdev_dec->ioctl_ops = &wave5_vpu_dec_ioctl_ops;
1923	vdev_dec->release = video_device_release_empty;
1924	vdev_dec->v4l2_dev = &dev->v4l2_dev;
1925	vdev_dec->vfl_dir = VFL_DIR_M2M;
1926	vdev_dec->device_caps = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_STREAMING;
1927	vdev_dec->lock = &dev->dev_lock;
1928
1929	ret = video_register_device(vdev_dec, VFL_TYPE_VIDEO, -1);
1930	if (ret)
1931		return ret;
1932
1933	video_set_drvdata(vdev_dec, dev);
1934
1935	return 0;
1936}
1937
1938void wave5_vpu_dec_unregister_device(struct vpu_device *dev)
1939{
1940	video_unregister_device(dev->video_dev_dec);
1941	if (dev->v4l2_m2m_dec_dev)
1942		v4l2_m2m_release(dev->v4l2_m2m_dec_dev);
1943}
1944