1#include "gfx_conv_mmx.h"
2#include "gfx_conv_c.h"
3
4// Packed
5extern "C" void _Convert_YUV422_RGBA32_SSE(void *fromYPtr, void *toPtr,
6	int width);
7extern "C" void _Convert_YUV422_RGBA32_SSE2(void *fromYPtr, void *toPtr,
8	int width);
9extern "C" void _Convert_YUV422_RGBA32_SSSE3(void *fromYPtr, void *toPtr,
10	int width);
11
12// Planar
13extern "C" void _Convert_YUV420P_RGBA32_SSE(void *fromYPtr, void *fromUPtr,
14	void *fromVPtr, void *toPtr, int width);
15extern "C" void _Convert_YUV420P_RGBA32_SSE2(void *fromYPtr, void *fromUPtr,
16	void *fromVPtr, void *toPtr, int width);
17extern "C" void _Convert_YUV420P_RGBA32_SSSE3(void *fromYPtr, void *fromUPtr,
18	void *fromVPtr, void *toPtr, int width);
19
20
21// Planar YUV420 means 2 Y lines share a UV line
22void
23gfx_conv_yuv420p_rgba32_sse(AVFrame *in, AVFrame *out, int width, int height)
24{
25	// in and out buffers must be aligned to 16 bytes,
26	// in should be as ffmpeg allocates it
27	if ((off_t)out->data[0] % 16 != 0) {
28		gfx_conv_YCbCr420p_RGB32_c(in, out, width, height);
29		return;
30	}
31
32	uint8 *ybase = (uint8 *)in->data[0];
33	uint8 *ubase = (uint8 *)in->data[1];
34	uint8 *vbase = (uint8 *)in->data[2];
35	uint8 *rgbbase = (uint8 *)out->data[0];
36
37	int yBaseInc = in->linesize[0];
38	int uBaseInc = in->linesize[1];
39	int vBaseInc = in->linesize[2];
40	int rgbBaseInc = out->linesize[0];
41
42	for (int i=0;i<height;i+=2) {
43		// First Y row
44		_Convert_YUV420P_RGBA32_SSE(ybase, ubase, vbase, rgbbase, width);
45		ybase += yBaseInc;
46		rgbbase += rgbBaseInc;
47
48		// Second Y row but same u and v row
49		_Convert_YUV420P_RGBA32_SSE(ybase, ubase, vbase, rgbbase, width);
50		ybase += yBaseInc;
51		ubase += uBaseInc;
52		vbase += vBaseInc;
53		rgbbase += rgbBaseInc;
54	}
55}
56
57// Planar YUV420 means 2 Y lines share a UV line
58void
59gfx_conv_yuv420p_rgba32_sse2(AVFrame *in, AVFrame *out, int width, int height)
60{
61	// in and out buffers must be aligned to 32 bytes,
62	// in should be as ffmpeg allocates it
63	if ((off_t)out->data[0] % 32 != 0) {
64		gfx_conv_YCbCr420p_RGB32_c(in, out, width, height);
65		return;
66	}
67
68	uint8 *ybase = (uint8 *)in->data[0];
69	uint8 *ubase = (uint8 *)in->data[1];
70	uint8 *vbase = (uint8 *)in->data[2];
71	uint8 *rgbbase = (uint8 *)out->data[0];
72
73	int yBaseInc = in->linesize[0];
74	int uBaseInc = in->linesize[1];
75	int vBaseInc = in->linesize[2];
76	int rgbBaseInc = out->linesize[0];
77
78	for (int i=0;i<height;i+=2) {
79		// First Y row
80		_Convert_YUV420P_RGBA32_SSE2(ybase, ubase, vbase, rgbbase, width);
81		ybase += yBaseInc;
82		rgbbase += rgbBaseInc;
83
84		// Second Y row but same u and v row
85		_Convert_YUV420P_RGBA32_SSE2(ybase, ubase, vbase, rgbbase, width);
86		ybase += yBaseInc;
87		ubase += uBaseInc;
88		vbase += vBaseInc;
89		rgbbase += rgbBaseInc;
90	}
91}
92
93// Planar YUV420 means 2 Y lines share a UV line
94void
95gfx_conv_yuv420p_rgba32_ssse3(AVFrame *in, AVFrame *out, int width, int height)
96{
97	// in and out buffers must be aligned to 32 bytes,
98	// in should be as ffmpeg allocates it
99	if ((off_t)out->data[0] % 32 != 0) {
100		gfx_conv_YCbCr420p_RGB32_c(in, out, width, height);
101		return;
102	}
103
104	uint8 *ybase = (uint8 *)in->data[0];
105	uint8 *ubase = (uint8 *)in->data[1];
106	uint8 *vbase = (uint8 *)in->data[2];
107	uint8 *rgbbase = (uint8 *)out->data[0];
108
109	int yBaseInc = in->linesize[0];
110	int uBaseInc = in->linesize[1];
111	int vBaseInc = in->linesize[2];
112	int rgbBaseInc = out->linesize[0];
113
114	for (int i=0;i<height;i+=2) {
115		// First Y row
116		_Convert_YUV420P_RGBA32_SSSE3(ybase, ubase, vbase, rgbbase, width);
117		ybase += yBaseInc;
118		rgbbase += rgbBaseInc;
119
120		// Second Y row but same u and v row
121		_Convert_YUV420P_RGBA32_SSSE3(ybase, ubase, vbase, rgbbase, width);
122		ybase += yBaseInc;
123		ubase += uBaseInc;
124		vbase += vBaseInc;
125		rgbbase += rgbBaseInc;
126	}
127}
128
129// Planar YUV422 means each Y line has it's own UV line
130void
131gfx_conv_yuv422p_rgba32_sse(AVFrame *in, AVFrame *out, int width, int height)
132{
133	// in and out buffers must be aligned to 32 bytes,
134	// in should be as ffmpeg allocates it
135	if ((off_t)out->data[0] % 32 != 0) {
136		gfx_conv_YCbCr422_RGB32_c(in, out, width, height);
137		return;
138	}
139
140	uint8 *ybase = (uint8 *)in->data[0];
141	uint8 *ubase = (uint8 *)in->data[1];
142	uint8 *vbase = (uint8 *)in->data[2];
143	uint8 *rgbbase = (uint8 *)out->data[0];
144
145	int yBaseInc = in->linesize[0];
146	int uBaseInc = in->linesize[1];
147	int vBaseInc = in->linesize[2];
148	int rgbBaseInc = out->linesize[0];
149
150	for (int i=0;i<height;i++) {
151		_Convert_YUV420P_RGBA32_SSE(ybase, ubase, vbase, rgbbase, width);
152		ybase += yBaseInc;
153		ubase += uBaseInc;
154		vbase += vBaseInc;
155		rgbbase += rgbBaseInc;
156	}
157}
158
159// Planar YUV422 means each Y line has it's own UV line
160void
161gfx_conv_yuv422p_rgba32_sse2(AVFrame *in, AVFrame *out, int width, int height)
162{
163	// in and out buffers must be aligned to 32 bytes,
164	// in should be as ffmpeg allocates it
165	if ((off_t)out->data[0] % 32 != 0) {
166		gfx_conv_YCbCr422_RGB32_c(in, out, width, height);
167		return;
168	}
169
170	uint8 *ybase = (uint8 *)in->data[0];
171	uint8 *ubase = (uint8 *)in->data[1];
172	uint8 *vbase = (uint8 *)in->data[2];
173	uint8 *rgbbase = (uint8 *)out->data[0];
174
175	int yBaseInc = in->linesize[0];
176	int uBaseInc = in->linesize[1];
177	int vBaseInc = in->linesize[2];
178	int rgbBaseInc = out->linesize[0];
179
180	for (int i=0;i<height;i++) {
181		_Convert_YUV420P_RGBA32_SSE2(ybase, ubase, vbase, rgbbase, width);
182		ybase += yBaseInc;
183		ubase += uBaseInc;
184		vbase += vBaseInc;
185		rgbbase += rgbBaseInc;
186	}
187}
188
189// Planar YUV422 means each Y line has it's own UV line
190void
191gfx_conv_yuv422p_rgba32_ssse3(AVFrame *in, AVFrame *out, int width, int height)
192{
193	// in and out buffers must be aligned to 32 bytes,
194	// in should be as ffmpeg allocates it
195	if ((off_t)out->data[0] % 32 != 0) {
196		gfx_conv_YCbCr422_RGB32_c(in, out, width, height);
197		return;
198	}
199
200	uint8 *ybase = (uint8 *)in->data[0];
201	uint8 *ubase = (uint8 *)in->data[1];
202	uint8 *vbase = (uint8 *)in->data[2];
203	uint8 *rgbbase = (uint8 *)out->data[0];
204
205	int yBaseInc = in->linesize[0];
206	int uBaseInc = in->linesize[1];
207	int vBaseInc = in->linesize[2];
208	int rgbBaseInc = out->linesize[0];
209
210	for (int i=0;i<height;i++) {
211		_Convert_YUV420P_RGBA32_SSSE3(ybase, ubase, vbase, rgbbase, width);
212		ybase += yBaseInc;
213		ubase += uBaseInc;
214		vbase += vBaseInc;
215		rgbbase += rgbBaseInc;
216	}
217}
218
219// Packed YUV422 (YUYV)
220void
221gfx_conv_yuv422_rgba32_sse(AVFrame *in, AVFrame *out, int width, int height)
222{
223	// in and out buffers must be aligned to 16 bytes,
224	// in should be as ffmpeg allocates it
225	if ((off_t)out->data[0] % 16 != 0) {
226		gfx_conv_YCbCr422_RGB32_c(in, out, width, height);
227		return;
228	}
229
230	uint8 *ybase = (uint8 *)in->data[0];
231	uint8 *rgbbase = (uint8 *)out->data[0];
232
233	for (int i = 0; i <= height; i++) {
234		_Convert_YUV422_RGBA32_SSE(ybase, rgbbase, width);
235		ybase += in->linesize[0];
236		rgbbase += out->linesize[0];
237	}
238}
239
240// Packed YUV422 (YUYV)
241void
242gfx_conv_yuv422_rgba32_sse2(AVFrame *in, AVFrame *out, int width, int height)
243{
244	// in and out buffers must be aligned to 32 bytes,
245	// in should be as ffmpeg allocates it
246	if ((off_t)out->data[0] % 32 != 0) {
247		gfx_conv_YCbCr422_RGB32_c(in, out, width, height);
248		return;
249	}
250
251	uint8 *ybase = (uint8 *)in->data[0];
252	uint8 *rgbbase = (uint8 *)out->data[0];
253
254	for (int i = 0; i <= height; i++) {
255		_Convert_YUV422_RGBA32_SSE2(ybase, rgbbase, width);
256		ybase += in->linesize[0];
257		rgbbase += out->linesize[0];
258	}
259}
260
261// Packed YUV422 (YUYV)
262void
263gfx_conv_yuv422_rgba32_ssse3(AVFrame *in, AVFrame *out, int width, int height)
264{
265	// in and out buffers must be aligned to 32 bytes,
266	// in should be as ffmpeg allocates it
267	if ((off_t)out->data[0] % 32 != 0) {
268		gfx_conv_YCbCr422_RGB32_c(in, out, width, height);
269		return;
270	}
271
272	uint8 *ybase = (uint8 *)in->data[0];
273	uint8 *rgbbase = (uint8 *)out->data[0];
274
275	for (int i = 0; i <= height; i++) {
276		_Convert_YUV422_RGBA32_SSSE3(ybase, rgbbase, width);
277		ybase += in->linesize[0];
278		rgbbase += out->linesize[0];
279	}
280}
281