1/* 2 * This file is part of FFmpeg. 3 * 4 * FFmpeg is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * FFmpeg is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with FFmpeg; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#include "config.h" 20#include <stdint.h> 21#if HAVE_ALTIVEC_H 22#include <altivec.h> 23#endif 24 25#include "libavutil/attributes.h" 26#include "libavutil/cpu.h" 27#include "libavutil/ppc/cpu.h" 28#include "libavutil/ppc/types_altivec.h" 29#include "libavutil/ppc/util_altivec.h" 30#include "libavcodec/mpegvideoencdsp.h" 31 32#if HAVE_ALTIVEC 33 34static int pix_norm1_altivec(uint8_t *pix, int line_size) 35{ 36 int i, s = 0; 37 const vector unsigned int zero = 38 (const vector unsigned int) vec_splat_u32(0); 39 vector unsigned char perm = vec_lvsl(0, pix); 40 vector unsigned int sv = (vector unsigned int) vec_splat_u32(0); 41 vector signed int sum; 42 43 for (i = 0; i < 16; i++) { 44 /* Read the potentially unaligned pixels. */ 45 vector unsigned char pixl = vec_ld(0, pix); 46 vector unsigned char pixr = vec_ld(15, pix); 47 vector unsigned char pixv = vec_perm(pixl, pixr, perm); 48 49 /* Square the values, and add them to our sum. */ 50 sv = vec_msum(pixv, pixv, sv); 51 52 pix += line_size; 53 } 54 /* Sum up the four partial sums, and put the result into s. */ 55 sum = vec_sums((vector signed int) sv, (vector signed int) zero); 56 sum = vec_splat(sum, 3); 57 vec_ste(sum, 0, &s); 58 59 return s; 60} 61 62static int pix_sum_altivec(uint8_t *pix, int line_size) 63{ 64 int i, s; 65 const vector unsigned int zero = 66 (const vector unsigned int) vec_splat_u32(0); 67 vector unsigned char perm = vec_lvsl(0, pix); 68 vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); 69 vector signed int sumdiffs; 70 71 for (i = 0; i < 16; i++) { 72 /* Read the potentially unaligned 16 pixels into t1. */ 73 vector unsigned char pixl = vec_ld(0, pix); 74 vector unsigned char pixr = vec_ld(15, pix); 75 vector unsigned char t1 = vec_perm(pixl, pixr, perm); 76 77 /* Add each 4 pixel group together and put 4 results into sad. */ 78 sad = vec_sum4s(t1, sad); 79 80 pix += line_size; 81 } 82 83 /* Sum up the four partial sums, and put the result into s. */ 84 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); 85 sumdiffs = vec_splat(sumdiffs, 3); 86 vec_ste(sumdiffs, 0, &s); 87 88 return s; 89} 90 91#endif /* HAVE_ALTIVEC */ 92 93av_cold void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c, 94 AVCodecContext *avctx) 95{ 96#if HAVE_ALTIVEC 97 if (!PPC_ALTIVEC(av_get_cpu_flags())) 98 return; 99 100 c->pix_norm1 = pix_norm1_altivec; 101 c->pix_sum = pix_sum_altivec; 102#endif /* HAVE_ALTIVEC */ 103} 104