1/* 2 * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org> 3 * 4 * This file is part of Libav. 5 * 6 * Libav is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * Libav is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with Libav; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "libavcodec/fmtconvert.h" 22 23#include "dsputil_altivec.h" 24#include "util_altivec.h" 25 26static void int32_to_float_fmul_scalar_altivec(float *dst, const int *src, float mul, int len) 27{ 28 union { 29 vector float v; 30 float s[4]; 31 } mul_u; 32 int i; 33 vector float src1, src2, dst1, dst2, mul_v, zero; 34 35 zero = (vector float)vec_splat_u32(0); 36 mul_u.s[0] = mul; 37 mul_v = vec_splat(mul_u.v, 0); 38 39 for(i=0; i<len; i+=8) { 40 src1 = vec_ctf(vec_ld(0, src+i), 0); 41 src2 = vec_ctf(vec_ld(16, src+i), 0); 42 dst1 = vec_madd(src1, mul_v, zero); 43 dst2 = vec_madd(src2, mul_v, zero); 44 vec_st(dst1, 0, dst+i); 45 vec_st(dst2, 16, dst+i); 46 } 47} 48 49 50static vector signed short 51float_to_int16_one_altivec(const float *src) 52{ 53 vector float s0 = vec_ld(0, src); 54 vector float s1 = vec_ld(16, src); 55 vector signed int t0 = vec_cts(s0, 0); 56 vector signed int t1 = vec_cts(s1, 0); 57 return vec_packs(t0,t1); 58} 59 60static void float_to_int16_altivec(int16_t *dst, const float *src, long len) 61{ 62 int i; 63 vector signed short d0, d1, d; 64 vector unsigned char align; 65 if(((long)dst)&15) //FIXME 66 for(i=0; i<len-7; i+=8) { 67 d0 = vec_ld(0, dst+i); 68 d = float_to_int16_one_altivec(src+i); 69 d1 = vec_ld(15, dst+i); 70 d1 = vec_perm(d1, d0, vec_lvsl(0,dst+i)); 71 align = vec_lvsr(0, dst+i); 72 d0 = vec_perm(d1, d, align); 73 d1 = vec_perm(d, d1, align); 74 vec_st(d0, 0, dst+i); 75 vec_st(d1,15, dst+i); 76 } 77 else 78 for(i=0; i<len-7; i+=8) { 79 d = float_to_int16_one_altivec(src+i); 80 vec_st(d, 0, dst+i); 81 } 82} 83 84static void 85float_to_int16_interleave_altivec(int16_t *dst, const float **src, 86 long len, int channels) 87{ 88 int i; 89 vector signed short d0, d1, d2, c0, c1, t0, t1; 90 vector unsigned char align; 91 if(channels == 1) 92 float_to_int16_altivec(dst, src[0], len); 93 else 94 if (channels == 2) { 95 if(((long)dst)&15) 96 for(i=0; i<len-7; i+=8) { 97 d0 = vec_ld(0, dst + i); 98 t0 = float_to_int16_one_altivec(src[0] + i); 99 d1 = vec_ld(31, dst + i); 100 t1 = float_to_int16_one_altivec(src[1] + i); 101 c0 = vec_mergeh(t0, t1); 102 c1 = vec_mergel(t0, t1); 103 d2 = vec_perm(d1, d0, vec_lvsl(0, dst + i)); 104 align = vec_lvsr(0, dst + i); 105 d0 = vec_perm(d2, c0, align); 106 d1 = vec_perm(c0, c1, align); 107 vec_st(d0, 0, dst + i); 108 d0 = vec_perm(c1, d2, align); 109 vec_st(d1, 15, dst + i); 110 vec_st(d0, 31, dst + i); 111 dst+=8; 112 } 113 else 114 for(i=0; i<len-7; i+=8) { 115 t0 = float_to_int16_one_altivec(src[0] + i); 116 t1 = float_to_int16_one_altivec(src[1] + i); 117 d0 = vec_mergeh(t0, t1); 118 d1 = vec_mergel(t0, t1); 119 vec_st(d0, 0, dst + i); 120 vec_st(d1, 16, dst + i); 121 dst+=8; 122 } 123 } else { 124 DECLARE_ALIGNED(16, int16_t, tmp)[len]; 125 int c, j; 126 for (c = 0; c < channels; c++) { 127 float_to_int16_altivec(tmp, src[c], len); 128 for (i = 0, j = c; i < len; i++, j+=channels) { 129 dst[j] = tmp[i]; 130 } 131 } 132 } 133} 134 135void ff_fmt_convert_init_altivec(FmtConvertContext *c, AVCodecContext *avctx) 136{ 137 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_altivec; 138 if(!(avctx->flags & CODEC_FLAG_BITEXACT)) { 139 c->float_to_int16 = float_to_int16_altivec; 140 c->float_to_int16_interleave = float_to_int16_interleave_altivec; 141 } 142} 143