1/* 2 * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org> 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "config.h" 22#include "libavutil/attributes.h" 23#include "libavutil/cpu.h" 24#include "libavutil/mem.h" 25#include "libavutil/ppc/cpu.h" 26#include "libavutil/ppc/util_altivec.h" 27#include "libavcodec/fmtconvert.h" 28 29#if HAVE_ALTIVEC 30 31static void int32_to_float_fmul_scalar_altivec(float *dst, const int32_t *src, 32 float mul, int len) 33{ 34 union { 35 vector float v; 36 float s[4]; 37 } mul_u; 38 int i; 39 vector float src1, src2, dst1, dst2, mul_v, zero; 40 41 zero = (vector float)vec_splat_u32(0); 42 mul_u.s[0] = mul; 43 mul_v = vec_splat(mul_u.v, 0); 44 45 for (i = 0; i < len; i += 8) { 46 src1 = vec_ctf(vec_ld(0, src+i), 0); 47 src2 = vec_ctf(vec_ld(16, src+i), 0); 48 dst1 = vec_madd(src1, mul_v, zero); 49 dst2 = vec_madd(src2, mul_v, zero); 50 vec_st(dst1, 0, dst+i); 51 vec_st(dst2, 16, dst+i); 52 } 53} 54 55 56static vector signed short float_to_int16_one_altivec(const float *src) 57{ 58 vector float s0 = vec_ld(0, src); 59 vector float s1 = vec_ld(16, src); 60 vector signed int t0 = vec_cts(s0, 0); 61 vector signed int t1 = vec_cts(s1, 0); 62 return vec_packs(t0,t1); 63} 64 65static void float_to_int16_altivec(int16_t *dst, const float *src, long len) 66{ 67 int i; 68 vector signed short d0, d1, d; 69 vector unsigned char align; 70 if (((long)dst) & 15) { //FIXME 71 for (i = 0; i < len - 7; i += 8) { 72 d0 = vec_ld(0, dst+i); 73 d = float_to_int16_one_altivec(src + i); 74 d1 = vec_ld(15, dst+i); 75 d1 = vec_perm(d1, d0, vec_lvsl(0, dst + i)); 76 align = vec_lvsr(0, dst + i); 77 d0 = vec_perm(d1, d, align); 78 d1 = vec_perm(d, d1, align); 79 vec_st(d0, 0, dst + i); 80 vec_st(d1, 15, dst + i); 81 } 82 } else { 83 for (i = 0; i < len - 7; i += 8) { 84 d = float_to_int16_one_altivec(src + i); 85 vec_st(d, 0, dst + i); 86 } 87 } 88} 89 90#define VSTE_INC(dst, v, elem, inc) do { \ 91 vector signed short s = vec_splat(v, elem); \ 92 vec_ste(s, 0, dst); \ 93 dst += inc; \ 94 } while (0) 95 96static void float_to_int16_stride_altivec(int16_t *dst, const float *src, 97 long len, int stride) 98{ 99 int i; 100 vector signed short d; 101 102 for (i = 0; i < len - 7; i += 8) { 103 d = float_to_int16_one_altivec(src + i); 104 VSTE_INC(dst, d, 0, stride); 105 VSTE_INC(dst, d, 1, stride); 106 VSTE_INC(dst, d, 2, stride); 107 VSTE_INC(dst, d, 3, stride); 108 VSTE_INC(dst, d, 4, stride); 109 VSTE_INC(dst, d, 5, stride); 110 VSTE_INC(dst, d, 6, stride); 111 VSTE_INC(dst, d, 7, stride); 112 } 113} 114 115static void float_to_int16_interleave_altivec(int16_t *dst, const float **src, 116 long len, int channels) 117{ 118 int i; 119 vector signed short d0, d1, d2, c0, c1, t0, t1; 120 vector unsigned char align; 121 122 if (channels == 1) 123 float_to_int16_altivec(dst, src[0], len); 124 else { 125 if (channels == 2) { 126 if (((long)dst) & 15) { 127 for (i = 0; i < len - 7; i += 8) { 128 d0 = vec_ld(0, dst + i); 129 t0 = float_to_int16_one_altivec(src[0] + i); 130 d1 = vec_ld(31, dst + i); 131 t1 = float_to_int16_one_altivec(src[1] + i); 132 c0 = vec_mergeh(t0, t1); 133 c1 = vec_mergel(t0, t1); 134 d2 = vec_perm(d1, d0, vec_lvsl(0, dst + i)); 135 align = vec_lvsr(0, dst + i); 136 d0 = vec_perm(d2, c0, align); 137 d1 = vec_perm(c0, c1, align); 138 vec_st(d0, 0, dst + i); 139 d0 = vec_perm(c1, d2, align); 140 vec_st(d1, 15, dst + i); 141 vec_st(d0, 31, dst + i); 142 dst += 8; 143 } 144 } else { 145 for (i = 0; i < len - 7; i += 8) { 146 t0 = float_to_int16_one_altivec(src[0] + i); 147 t1 = float_to_int16_one_altivec(src[1] + i); 148 d0 = vec_mergeh(t0, t1); 149 d1 = vec_mergel(t0, t1); 150 vec_st(d0, 0, dst + i); 151 vec_st(d1, 16, dst + i); 152 dst += 8; 153 } 154 } 155 } else { 156 for (i = 0; i < channels; i++) 157 float_to_int16_stride_altivec(dst + i, src[i], len, channels); 158 } 159 } 160} 161 162#endif /* HAVE_ALTIVEC */ 163 164av_cold void ff_fmt_convert_init_ppc(FmtConvertContext *c, 165 AVCodecContext *avctx) 166{ 167#if HAVE_ALTIVEC 168 if (!PPC_ALTIVEC(av_get_cpu_flags())) 169 return; 170 171 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_altivec; 172 if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { 173 c->float_to_int16 = float_to_int16_altivec; 174 c->float_to_int16_interleave = float_to_int16_interleave_altivec; 175 } 176#endif /* HAVE_ALTIVEC */ 177} 178