1/* 2 * Copyright (c) 2002 Dieter Shirley 3 * 4 * dct_unquantize_h263_altivec: 5 * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org> 6 * 7 * This file is part of FFmpeg. 8 * 9 * FFmpeg is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * FFmpeg is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with FFmpeg; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22 */ 23 24#include <stdlib.h> 25#include <stdio.h> 26 27#include "config.h" 28#include "libavutil/attributes.h" 29#include "libavutil/cpu.h" 30#include "libavutil/ppc/cpu.h" 31#include "libavutil/ppc/types_altivec.h" 32#include "libavutil/ppc/util_altivec.h" 33#include "libavcodec/mpegvideo.h" 34 35#if HAVE_ALTIVEC 36 37/* AltiVec version of dct_unquantize_h263 38 this code assumes `block' is 16 bytes-aligned */ 39static void dct_unquantize_h263_altivec(MpegEncContext *s, 40 int16_t *block, int n, int qscale) 41{ 42 int i, level, qmul, qadd; 43 int nCoeffs; 44 45 assert(s->block_last_index[n]>=0); 46 47 qadd = (qscale - 1) | 1; 48 qmul = qscale << 1; 49 50 if (s->mb_intra) { 51 if (!s->h263_aic) { 52 if (n < 4) 53 block[0] = block[0] * s->y_dc_scale; 54 else 55 block[0] = block[0] * s->c_dc_scale; 56 }else 57 qadd = 0; 58 i = 1; 59 nCoeffs= 63; //does not always use zigzag table 60 } else { 61 i = 0; 62 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; 63 } 64 65 { 66 register const vector signed short vczero = (const vector signed short)vec_splat_s16(0); 67 DECLARE_ALIGNED(16, short, qmul8) = qmul; 68 DECLARE_ALIGNED(16, short, qadd8) = qadd; 69 register vector signed short blockv, qmulv, qaddv, nqaddv, temp1; 70 register vector bool short blockv_null, blockv_neg; 71 register short backup_0 = block[0]; 72 register int j = 0; 73 74 qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0); 75 qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0); 76 nqaddv = vec_sub(vczero, qaddv); 77 78 // vectorize all the 16 bytes-aligned blocks 79 // of 8 elements 80 for(; (j + 7) <= nCoeffs ; j+=8) { 81 blockv = vec_ld(j << 1, block); 82 blockv_neg = vec_cmplt(blockv, vczero); 83 blockv_null = vec_cmpeq(blockv, vczero); 84 // choose between +qadd or -qadd as the third operand 85 temp1 = vec_sel(qaddv, nqaddv, blockv_neg); 86 // multiply & add (block{i,i+7} * qmul [+-] qadd) 87 temp1 = vec_mladd(blockv, qmulv, temp1); 88 // put 0 where block[{i,i+7} used to have 0 89 blockv = vec_sel(temp1, blockv, blockv_null); 90 vec_st(blockv, j << 1, block); 91 } 92 93 // if nCoeffs isn't a multiple of 8, finish the job 94 // using good old scalar units. 95 // (we could do it using a truncated vector, 96 // but I'm not sure it's worth the hassle) 97 for(; j <= nCoeffs ; j++) { 98 level = block[j]; 99 if (level) { 100 if (level < 0) { 101 level = level * qmul - qadd; 102 } else { 103 level = level * qmul + qadd; 104 } 105 block[j] = level; 106 } 107 } 108 109 if (i == 1) { 110 // cheat. this avoid special-casing the first iteration 111 block[0] = backup_0; 112 } 113 } 114} 115 116#endif /* HAVE_ALTIVEC */ 117 118av_cold void ff_MPV_common_init_ppc(MpegEncContext *s) 119{ 120#if HAVE_ALTIVEC 121 if (!PPC_ALTIVEC(av_get_cpu_flags())) 122 return; 123 124 if ((s->avctx->dct_algo == FF_DCT_AUTO) || 125 (s->avctx->dct_algo == FF_DCT_ALTIVEC)) { 126 s->dct_unquantize_h263_intra = dct_unquantize_h263_altivec; 127 s->dct_unquantize_h263_inter = dct_unquantize_h263_altivec; 128 } 129#endif /* HAVE_ALTIVEC */ 130} 131