1/*
2 * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "config.h"
22#include "libavutil/cpu.h"
23#include "libavutil/x86/cpu.h"
24#include "libavresample/audio_convert.h"
25
26/* flat conversions */
27
28void ff_conv_s16_to_s32_sse2(int16_t *dst, const int32_t *src, int len);
29
30void ff_conv_s16_to_flt_sse2(float *dst, const int16_t *src, int len);
31void ff_conv_s16_to_flt_sse4(float *dst, const int16_t *src, int len);
32
33void ff_conv_s32_to_s16_mmx (int16_t *dst, const int32_t *src, int len);
34void ff_conv_s32_to_s16_sse2(int16_t *dst, const int32_t *src, int len);
35
36void ff_conv_s32_to_flt_sse2(float *dst, const int32_t *src, int len);
37void ff_conv_s32_to_flt_avx (float *dst, const int32_t *src, int len);
38
39void ff_conv_flt_to_s16_sse2(int16_t *dst, const float *src, int len);
40
41void ff_conv_flt_to_s32_sse2(int32_t *dst, const float *src, int len);
42void ff_conv_flt_to_s32_avx (int32_t *dst, const float *src, int len);
43
44/* interleave conversions */
45
46void ff_conv_s16p_to_s16_2ch_sse2(int16_t *dst, int16_t *const *src,
47                                  int len, int channels);
48void ff_conv_s16p_to_s16_2ch_avx (int16_t *dst, int16_t *const *src,
49                                  int len, int channels);
50
51void ff_conv_s16p_to_s16_6ch_sse2(int16_t *dst, int16_t *const *src,
52                                  int len, int channels);
53void ff_conv_s16p_to_s16_6ch_sse2slow(int16_t *dst, int16_t *const *src,
54                                      int len, int channels);
55void ff_conv_s16p_to_s16_6ch_avx (int16_t *dst, int16_t *const *src,
56                                  int len, int channels);
57
58void ff_conv_s16p_to_flt_2ch_sse2(float *dst, int16_t *const *src,
59                                  int len, int channels);
60void ff_conv_s16p_to_flt_2ch_avx (float *dst, int16_t *const *src,
61                                  int len, int channels);
62
63void ff_conv_s16p_to_flt_6ch_sse2 (float *dst, int16_t *const *src,
64                                   int len, int channels);
65void ff_conv_s16p_to_flt_6ch_ssse3(float *dst, int16_t *const *src,
66                                  int len, int channels);
67void ff_conv_s16p_to_flt_6ch_avx  (float *dst, int16_t *const *src,
68                                   int len, int channels);
69
70void ff_conv_fltp_to_s16_2ch_sse2 (int16_t *dst, float *const *src,
71                                   int len, int channels);
72void ff_conv_fltp_to_s16_2ch_ssse3(int16_t *dst, float *const *src,
73                                   int len, int channels);
74
75void ff_conv_fltp_to_s16_6ch_sse (int16_t *dst, float *const *src,
76                                  int len, int channels);
77void ff_conv_fltp_to_s16_6ch_sse2(int16_t *dst, float *const *src,
78                                  int len, int channels);
79void ff_conv_fltp_to_s16_6ch_avx (int16_t *dst, float *const *src,
80                                  int len, int channels);
81
82void ff_conv_fltp_to_flt_2ch_sse(float *dst, float *const *src, int len,
83                                 int channels);
84void ff_conv_fltp_to_flt_2ch_avx(float *dst, float *const *src, int len,
85                                 int channels);
86
87void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int len,
88                                  int channels);
89void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int len,
90                                  int channels);
91void ff_conv_fltp_to_flt_6ch_avx (float *dst, float *const *src, int len,
92                                  int channels);
93
94/* deinterleave conversions */
95
96void ff_conv_s16_to_s16p_2ch_sse2(int16_t *const *dst, int16_t *src,
97                                  int len, int channels);
98void ff_conv_s16_to_s16p_2ch_ssse3(int16_t *const *dst, int16_t *src,
99                                   int len, int channels);
100void ff_conv_s16_to_s16p_2ch_avx (int16_t *const *dst, int16_t *src,
101                                  int len, int channels);
102
103void ff_conv_s16_to_s16p_6ch_sse2 (int16_t *const *dst, int16_t *src,
104                                   int len, int channels);
105void ff_conv_s16_to_s16p_6ch_ssse3(int16_t *const *dst, int16_t *src,
106                                   int len, int channels);
107void ff_conv_s16_to_s16p_6ch_avx  (int16_t *const *dst, int16_t *src,
108                                   int len, int channels);
109
110void ff_conv_s16_to_fltp_2ch_sse2(float *const *dst, int16_t *src,
111                                  int len, int channels);
112void ff_conv_s16_to_fltp_2ch_avx (float *const *dst, int16_t *src,
113                                  int len, int channels);
114
115void ff_conv_s16_to_fltp_6ch_sse2 (float *const *dst, int16_t *src,
116                                   int len, int channels);
117void ff_conv_s16_to_fltp_6ch_ssse3(float *const *dst, int16_t *src,
118                                   int len, int channels);
119void ff_conv_s16_to_fltp_6ch_sse4 (float *const *dst, int16_t *src,
120                                   int len, int channels);
121void ff_conv_s16_to_fltp_6ch_avx  (float *const *dst, int16_t *src,
122                                   int len, int channels);
123
124void ff_conv_flt_to_s16p_2ch_sse2(int16_t *const *dst, float *src,
125                                  int len, int channels);
126void ff_conv_flt_to_s16p_2ch_avx (int16_t *const *dst, float *src,
127                                  int len, int channels);
128
129void ff_conv_flt_to_s16p_6ch_sse2 (int16_t *const *dst, float *src,
130                                   int len, int channels);
131void ff_conv_flt_to_s16p_6ch_ssse3(int16_t *const *dst, float *src,
132                                   int len, int channels);
133void ff_conv_flt_to_s16p_6ch_avx  (int16_t *const *dst, float *src,
134                                   int len, int channels);
135
136void ff_conv_flt_to_fltp_2ch_sse(float *const *dst, float *src, int len,
137                                 int channels);
138void ff_conv_flt_to_fltp_2ch_avx(float *const *dst, float *src, int len,
139                                 int channels);
140
141void ff_conv_flt_to_fltp_6ch_sse2(float *const *dst, float *src, int len,
142                                  int channels);
143void ff_conv_flt_to_fltp_6ch_avx (float *const *dst, float *src, int len,
144                                  int channels);
145
146av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
147{
148    int cpu_flags = av_get_cpu_flags();
149
150    if (EXTERNAL_MMX(cpu_flags)) {
151        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
152                                  0, 1, 8, "MMX", ff_conv_s32_to_s16_mmx);
153        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
154                                  6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx);
155    }
156    if (EXTERNAL_SSE(cpu_flags)) {
157        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
158                                  6, 1, 2, "SSE", ff_conv_fltp_to_s16_6ch_sse);
159        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
160                                  2, 16, 8, "SSE", ff_conv_fltp_to_flt_2ch_sse);
161        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
162                                  2, 16, 4, "SSE", ff_conv_flt_to_fltp_2ch_sse);
163    }
164    if (EXTERNAL_SSE2(cpu_flags)) {
165        if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
166            ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
167                                      0, 16, 16, "SSE2", ff_conv_s32_to_s16_sse2);
168            ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
169                                      6, 16, 8, "SSE2", ff_conv_s16p_to_s16_6ch_sse2);
170            ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
171                                      6, 16, 4, "SSE2", ff_conv_fltp_to_s16_6ch_sse2);
172        } else {
173            ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
174                                      6, 1, 4, "SSE2SLOW", ff_conv_s16p_to_s16_6ch_sse2slow);
175        }
176        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S16,
177                                  0, 16, 8, "SSE2", ff_conv_s16_to_s32_sse2);
178        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
179                                  0, 16, 8, "SSE2", ff_conv_s16_to_flt_sse2);
180        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
181                                  0, 16, 8, "SSE2", ff_conv_s32_to_flt_sse2);
182        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT,
183                                  0, 16, 16, "SSE2", ff_conv_flt_to_s16_sse2);
184        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
185                                  0, 16, 16, "SSE2", ff_conv_flt_to_s32_sse2);
186        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
187                                  2, 16, 16, "SSE2", ff_conv_s16p_to_s16_2ch_sse2);
188        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
189                                  2, 16, 8, "SSE2", ff_conv_s16p_to_flt_2ch_sse2);
190        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
191                                  6, 16, 4, "SSE2", ff_conv_s16p_to_flt_6ch_sse2);
192        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
193                                  2, 16, 4, "SSE2", ff_conv_fltp_to_s16_2ch_sse2);
194        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
195                                  2, 16, 8, "SSE2", ff_conv_s16_to_s16p_2ch_sse2);
196        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
197                                  6, 16, 4, "SSE2", ff_conv_s16_to_s16p_6ch_sse2);
198        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
199                                  2, 16, 8, "SSE2", ff_conv_s16_to_fltp_2ch_sse2);
200        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
201                                  6, 16, 4, "SSE2", ff_conv_s16_to_fltp_6ch_sse2);
202        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
203                                  2, 16, 8, "SSE2", ff_conv_flt_to_s16p_2ch_sse2);
204        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
205                                  6, 16, 4, "SSE2", ff_conv_flt_to_s16p_6ch_sse2);
206        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
207                                  6, 16, 4, "SSE2", ff_conv_flt_to_fltp_6ch_sse2);
208    }
209    if (EXTERNAL_SSSE3(cpu_flags)) {
210        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
211                                  6, 16, 4, "SSSE3", ff_conv_s16p_to_flt_6ch_ssse3);
212        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
213                                  2, 16, 4, "SSSE3", ff_conv_fltp_to_s16_2ch_ssse3);
214        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
215                                  2, 16, 8, "SSSE3", ff_conv_s16_to_s16p_2ch_ssse3);
216        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
217                                  6, 16, 4, "SSSE3", ff_conv_s16_to_s16p_6ch_ssse3);
218        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
219                                  6, 16, 4, "SSSE3", ff_conv_s16_to_fltp_6ch_ssse3);
220        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
221                                  6, 16, 4, "SSSE3", ff_conv_flt_to_s16p_6ch_ssse3);
222    }
223    if (EXTERNAL_SSE4(cpu_flags)) {
224        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
225                                  0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4);
226        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
227                                  6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4);
228    }
229    if (EXTERNAL_AVX(cpu_flags)) {
230        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
231                                  0, 32, 16, "AVX", ff_conv_s32_to_flt_avx);
232        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
233                                  0, 32, 32, "AVX", ff_conv_flt_to_s32_avx);
234        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
235                                  2, 16, 16, "AVX", ff_conv_s16p_to_s16_2ch_avx);
236        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
237                                  6, 16, 8, "AVX", ff_conv_s16p_to_s16_6ch_avx);
238        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
239                                  2, 16, 8, "AVX", ff_conv_s16p_to_flt_2ch_avx);
240        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
241                                  6, 16, 4, "AVX", ff_conv_s16p_to_flt_6ch_avx);
242        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
243                                  6, 16, 4, "AVX", ff_conv_fltp_to_s16_6ch_avx);
244        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
245                                  6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx);
246        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
247                                  2, 16, 8, "AVX", ff_conv_s16_to_s16p_2ch_avx);
248        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
249                                  6, 16, 4, "AVX", ff_conv_s16_to_s16p_6ch_avx);
250        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
251                                  2, 16, 8, "AVX", ff_conv_s16_to_fltp_2ch_avx);
252        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
253                                  6, 16, 4, "AVX", ff_conv_s16_to_fltp_6ch_avx);
254        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
255                                  2, 16, 8, "AVX", ff_conv_flt_to_s16p_2ch_avx);
256        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
257                                  6, 16, 4, "AVX", ff_conv_flt_to_s16p_6ch_avx);
258        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
259                                  2, 16, 4, "AVX", ff_conv_flt_to_fltp_2ch_avx);
260        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
261                                  6, 16, 4, "AVX", ff_conv_flt_to_fltp_6ch_avx);
262    }
263}
264