1/* 2 * Format Conversion Utils 3 * Copyright (c) 2000, 2001 Fabrice Bellard 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 5 * 6 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> 7 * 8 * This file is part of FFmpeg. 9 * 10 * FFmpeg is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * FFmpeg is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with FFmpeg; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 */ 24 25#include "libavutil/attributes.h" 26#include "libavutil/cpu.h" 27#include "libavutil/x86/asm.h" 28#include "libavutil/x86/cpu.h" 29#include "libavcodec/fmtconvert.h" 30 31#if HAVE_YASM 32 33void ff_int32_to_float_fmul_scalar_sse (float *dst, const int32_t *src, float mul, int len); 34void ff_int32_to_float_fmul_scalar_sse2(float *dst, const int32_t *src, float mul, int len); 35 36void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len); 37void ff_float_to_int16_sse (int16_t *dst, const float *src, long len); 38void ff_float_to_int16_sse2 (int16_t *dst, const float *src, long len); 39 40void ff_float_to_int16_step_3dnow(int16_t *dst, const float *src, long len, long step); 41void ff_float_to_int16_step_sse (int16_t *dst, const float *src, long len, long step); 42void ff_float_to_int16_step_sse2 (int16_t *dst, const float *src, long len, long step); 43 44void ff_float_to_int16_interleave2_3dnow(int16_t *dst, const float **src, long len); 45void ff_float_to_int16_interleave2_sse (int16_t *dst, const float **src, long len); 46void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long len); 47 48void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); 49void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); 50void ff_float_to_int16_interleave6_3dnowext(int16_t *dst, const float **src, int len); 51 52#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse 53 54#define FLOAT_TO_INT16_INTERLEAVE(cpu) \ 55/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\ 56static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\ 57 int c;\ 58 for(c=0; c<channels; c++){\ 59 ff_float_to_int16_step_##cpu(dst+c, src[c], len, channels);\ 60 }\ 61}\ 62\ 63static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\ 64 if(channels==1)\ 65 ff_float_to_int16_##cpu(dst, src[0], len);\ 66 else if(channels==2){\ 67 ff_float_to_int16_interleave2_##cpu(dst, src, len);\ 68 }else if(channels==6){\ 69 ff_float_to_int16_interleave6_##cpu(dst, src, len);\ 70 }else\ 71 float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\ 72} 73 74FLOAT_TO_INT16_INTERLEAVE(3dnow) 75FLOAT_TO_INT16_INTERLEAVE(sse) 76FLOAT_TO_INT16_INTERLEAVE(sse2) 77 78static void float_to_int16_interleave_3dnowext(int16_t *dst, const float **src, 79 long len, int channels) 80{ 81 if(channels==6) 82 ff_float_to_int16_interleave6_3dnowext(dst, src, len); 83 else 84 float_to_int16_interleave_3dnow(dst, src, len, channels); 85} 86 87void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len); 88void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len); 89 90void ff_float_interleave6_mmx(float *dst, const float **src, unsigned int len); 91void ff_float_interleave6_sse(float *dst, const float **src, unsigned int len); 92 93static void float_interleave_mmx(float *dst, const float **src, 94 unsigned int len, int channels) 95{ 96 if (channels == 2) { 97 ff_float_interleave2_mmx(dst, src, len); 98 } else if (channels == 6) 99 ff_float_interleave6_mmx(dst, src, len); 100 else 101 ff_float_interleave_c(dst, src, len, channels); 102} 103 104static void float_interleave_sse(float *dst, const float **src, 105 unsigned int len, int channels) 106{ 107 if (channels == 2) { 108 ff_float_interleave2_sse(dst, src, len); 109 } else if (channels == 6) 110 ff_float_interleave6_sse(dst, src, len); 111 else 112 ff_float_interleave_c(dst, src, len, channels); 113} 114#endif /* HAVE_YASM */ 115 116av_cold void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx) 117{ 118#if HAVE_YASM 119 int cpu_flags = av_get_cpu_flags(); 120 121 if (EXTERNAL_MMX(cpu_flags)) { 122 c->float_interleave = float_interleave_mmx; 123 } 124 if (EXTERNAL_AMD3DNOW(cpu_flags)) { 125 if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { 126 c->float_to_int16 = ff_float_to_int16_3dnow; 127 c->float_to_int16_interleave = float_to_int16_interleave_3dnow; 128 } 129 } 130 if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) { 131 if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { 132 c->float_to_int16_interleave = float_to_int16_interleave_3dnowext; 133 } 134 } 135 if (EXTERNAL_SSE(cpu_flags)) { 136 c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse; 137 c->float_to_int16 = ff_float_to_int16_sse; 138 c->float_to_int16_interleave = float_to_int16_interleave_sse; 139 c->float_interleave = float_interleave_sse; 140 } 141 if (EXTERNAL_SSE2(cpu_flags)) { 142 c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2; 143 c->float_to_int16 = ff_float_to_int16_sse2; 144 c->float_to_int16_interleave = float_to_int16_interleave_sse2; 145 } 146#endif /* HAVE_YASM */ 147} 148