1/* 2 * ARM optimized DSP utils 3 * Copyright (c) 2001 Lionel Ulmer 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22#include "libavcodec/dsputil.h" 23#if HAVE_IPP 24#include <ipp.h> 25#endif 26 27void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx); 28void ff_float_init_arm_vfp(DSPContext* c, AVCodecContext *avctx); 29void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx); 30 31void j_rev_dct_ARM(DCTELEM *data); 32void simple_idct_ARM(DCTELEM *data); 33 34void simple_idct_armv5te(DCTELEM *data); 35void simple_idct_put_armv5te(uint8_t *dest, int line_size, DCTELEM *data); 36void simple_idct_add_armv5te(uint8_t *dest, int line_size, DCTELEM *data); 37 38void ff_simple_idct_armv6(DCTELEM *data); 39void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data); 40void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data); 41 42void ff_simple_idct_neon(DCTELEM *data); 43void ff_simple_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data); 44void ff_simple_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data); 45 46/* XXX: local hack */ 47static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); 48static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); 49 50void put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); 51void put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); 52void put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); 53void put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); 54 55void put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); 56void put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); 57void put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); 58 59void put_pixels16_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); 60 61void ff_prefetch_arm(void *mem, int stride, int h); 62 63CALL_2X_PIXELS(put_pixels16_x2_arm , put_pixels8_x2_arm , 8) 64CALL_2X_PIXELS(put_pixels16_y2_arm , put_pixels8_y2_arm , 8) 65CALL_2X_PIXELS(put_pixels16_xy2_arm, put_pixels8_xy2_arm, 8) 66CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm , put_no_rnd_pixels8_x2_arm , 8) 67CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm , put_no_rnd_pixels8_y2_arm , 8) 68CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8) 69 70void ff_add_pixels_clamped_ARM(short *block, unsigned char *dest, 71 int line_size); 72 73/* XXX: those functions should be suppressed ASAP when all IDCTs are 74 converted */ 75static void j_rev_dct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block) 76{ 77 j_rev_dct_ARM (block); 78 ff_put_pixels_clamped(block, dest, line_size); 79} 80static void j_rev_dct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block) 81{ 82 j_rev_dct_ARM (block); 83 ff_add_pixels_clamped(block, dest, line_size); 84} 85static void simple_idct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block) 86{ 87 simple_idct_ARM (block); 88 ff_put_pixels_clamped(block, dest, line_size); 89} 90static void simple_idct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block) 91{ 92 simple_idct_ARM (block); 93 ff_add_pixels_clamped(block, dest, line_size); 94} 95 96#if HAVE_IPP 97static void simple_idct_ipp(DCTELEM *block) 98{ 99 ippiDCT8x8Inv_Video_16s_C1I(block); 100} 101static void simple_idct_ipp_put(uint8_t *dest, int line_size, DCTELEM *block) 102{ 103 ippiDCT8x8Inv_Video_16s8u_C1R(block, dest, line_size); 104} 105 106void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size); 107 108static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block) 109{ 110 ippiDCT8x8Inv_Video_16s_C1I(block); 111#if HAVE_IWMMXT 112 add_pixels_clamped_iwmmxt(block, dest, line_size); 113#else 114 ff_add_pixels_clamped_ARM(block, dest, line_size); 115#endif 116} 117#endif 118 119int mm_support(void) 120{ 121 return HAVE_IWMMXT * FF_MM_IWMMXT; 122} 123 124void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx) 125{ 126 int idct_algo= avctx->idct_algo; 127 128 ff_put_pixels_clamped = c->put_pixels_clamped; 129 ff_add_pixels_clamped = c->add_pixels_clamped; 130 131 if (avctx->lowres == 0) { 132 if(idct_algo == FF_IDCT_AUTO){ 133#if HAVE_IPP 134 idct_algo = FF_IDCT_IPP; 135#elif HAVE_NEON 136 idct_algo = FF_IDCT_SIMPLENEON; 137#elif HAVE_ARMV6 138 idct_algo = FF_IDCT_SIMPLEARMV6; 139#elif HAVE_ARMV5TE 140 idct_algo = FF_IDCT_SIMPLEARMV5TE; 141#else 142 idct_algo = FF_IDCT_ARM; 143#endif 144 } 145 146 if(idct_algo==FF_IDCT_ARM){ 147 c->idct_put= j_rev_dct_ARM_put; 148 c->idct_add= j_rev_dct_ARM_add; 149 c->idct = j_rev_dct_ARM; 150 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; 151 } else if (idct_algo==FF_IDCT_SIMPLEARM){ 152 c->idct_put= simple_idct_ARM_put; 153 c->idct_add= simple_idct_ARM_add; 154 c->idct = simple_idct_ARM; 155 c->idct_permutation_type= FF_NO_IDCT_PERM; 156#if HAVE_ARMV6 157 } else if (idct_algo==FF_IDCT_SIMPLEARMV6){ 158 c->idct_put= ff_simple_idct_put_armv6; 159 c->idct_add= ff_simple_idct_add_armv6; 160 c->idct = ff_simple_idct_armv6; 161 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; 162#endif 163#if HAVE_ARMV5TE 164 } else if (idct_algo==FF_IDCT_SIMPLEARMV5TE){ 165 c->idct_put= simple_idct_put_armv5te; 166 c->idct_add= simple_idct_add_armv5te; 167 c->idct = simple_idct_armv5te; 168 c->idct_permutation_type = FF_NO_IDCT_PERM; 169#endif 170#if HAVE_IPP 171 } else if (idct_algo==FF_IDCT_IPP){ 172 c->idct_put= simple_idct_ipp_put; 173 c->idct_add= simple_idct_ipp_add; 174 c->idct = simple_idct_ipp; 175 c->idct_permutation_type= FF_NO_IDCT_PERM; 176#endif 177#if HAVE_NEON 178 } else if (idct_algo==FF_IDCT_SIMPLENEON){ 179 c->idct_put= ff_simple_idct_put_neon; 180 c->idct_add= ff_simple_idct_add_neon; 181 c->idct = ff_simple_idct_neon; 182 c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM; 183#endif 184 } 185 } 186 187 c->put_pixels_tab[0][0] = put_pixels16_arm; 188 c->put_pixels_tab[0][1] = put_pixels16_x2_arm; 189 c->put_pixels_tab[0][2] = put_pixels16_y2_arm; 190 c->put_pixels_tab[0][3] = put_pixels16_xy2_arm; 191 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_arm; 192 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_arm; 193 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_arm; 194 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_arm; 195 c->put_pixels_tab[1][0] = put_pixels8_arm; 196 c->put_pixels_tab[1][1] = put_pixels8_x2_arm; 197 c->put_pixels_tab[1][2] = put_pixels8_y2_arm; 198 c->put_pixels_tab[1][3] = put_pixels8_xy2_arm; 199 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_arm; 200 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_arm; 201 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_arm; 202 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm; 203 204#if HAVE_ARMV5TE 205 c->prefetch = ff_prefetch_arm; 206#endif 207 208#if HAVE_IWMMXT 209 dsputil_init_iwmmxt(c, avctx); 210#endif 211#if HAVE_ARMVFP 212 ff_float_init_arm_vfp(c, avctx); 213#endif 214#if HAVE_NEON 215 ff_dsputil_init_neon(c, avctx); 216#endif 217} 218