1/* 2 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org> 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "config.h" 22#include "libavutil/attributes.h" 23#include "libavutil/cpu.h" 24#include "libavutil/intreadwrite.h" 25#include "libavutil/ppc/cpu.h" 26#include "libavutil/ppc/types_altivec.h" 27#include "libavutil/ppc/util_altivec.h" 28#include "libavcodec/h264qpel.h" 29#include "hpeldsp_altivec.h" 30 31#if HAVE_ALTIVEC 32 33#define PUT_OP_U8_ALTIVEC(d, s, dst) d = s 34#define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s) 35 36#define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC 37#define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec 38#define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num 39#define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec 40#define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num 41#define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec 42#define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num 43#include "h264qpel_template.c" 44#undef OP_U8_ALTIVEC 45#undef PREFIX_h264_qpel16_h_lowpass_altivec 46#undef PREFIX_h264_qpel16_h_lowpass_num 47#undef PREFIX_h264_qpel16_v_lowpass_altivec 48#undef PREFIX_h264_qpel16_v_lowpass_num 49#undef PREFIX_h264_qpel16_hv_lowpass_altivec 50#undef PREFIX_h264_qpel16_hv_lowpass_num 51 52#define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC 53#define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec 54#define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num 55#define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec 56#define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num 57#define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec 58#define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num 59#include "h264qpel_template.c" 60#undef OP_U8_ALTIVEC 61#undef PREFIX_h264_qpel16_h_lowpass_altivec 62#undef PREFIX_h264_qpel16_h_lowpass_num 63#undef PREFIX_h264_qpel16_v_lowpass_altivec 64#undef PREFIX_h264_qpel16_v_lowpass_num 65#undef PREFIX_h264_qpel16_hv_lowpass_altivec 66#undef PREFIX_h264_qpel16_hv_lowpass_num 67 68#define H264_MC(OPNAME, SIZE, CODETYPE) \ 69static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 70{\ 71 ff_ ## OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\ 72}\ 73\ 74static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 75{ \ 76 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ 77 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ 78 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\ 79}\ 80\ 81static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 82{\ 83 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\ 84}\ 85\ 86static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 87{\ 88 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ 89 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ 90 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\ 91}\ 92\ 93static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 94{\ 95 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ 96 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ 97 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\ 98}\ 99\ 100static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 101{\ 102 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\ 103}\ 104\ 105static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 106{\ 107 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ 108 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ 109 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\ 110}\ 111\ 112static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 113{\ 114 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ 115 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ 116 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ 117 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ 118 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ 119}\ 120\ 121static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 122{\ 123 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ 124 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ 125 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ 126 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ 127 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ 128}\ 129\ 130static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 131{\ 132 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ 133 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ 134 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ 135 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ 136 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ 137}\ 138\ 139static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 140{\ 141 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ 142 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ 143 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ 144 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ 145 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ 146}\ 147\ 148static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 149{\ 150 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ 151 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\ 152}\ 153\ 154static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 155{\ 156 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ 157 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ 158 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ 159 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ 160 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ 161 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\ 162}\ 163\ 164static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 165{\ 166 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ 167 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ 168 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ 169 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ 170 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ 171 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\ 172}\ 173\ 174static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 175{\ 176 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ 177 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ 178 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ 179 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ 180 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ 181 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\ 182}\ 183\ 184static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 185{\ 186 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ 187 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ 188 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ 189 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ 190 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ 191 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\ 192}\ 193 194static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, 195 const uint8_t * src2, int dst_stride, 196 int src_stride1, int h) 197{ 198 int i; 199 vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align; 200 201 mask_ = vec_lvsl(0, src2); 202 203 for (i = 0; i < h; i++) { 204 205 tmp1 = vec_ld(i * src_stride1, src1); 206 mask = vec_lvsl(i * src_stride1, src1); 207 tmp2 = vec_ld(i * src_stride1 + 15, src1); 208 209 a = vec_perm(tmp1, tmp2, mask); 210 211 tmp1 = vec_ld(i * 16, src2); 212 tmp2 = vec_ld(i * 16 + 15, src2); 213 214 b = vec_perm(tmp1, tmp2, mask_); 215 216 tmp1 = vec_ld(0, dst); 217 mask = vec_lvsl(0, dst); 218 tmp2 = vec_ld(15, dst); 219 220 d = vec_avg(a, b); 221 222 edges = vec_perm(tmp2, tmp1, mask); 223 224 align = vec_lvsr(0, dst); 225 226 tmp2 = vec_perm(d, edges, align); 227 tmp1 = vec_perm(edges, d, align); 228 229 vec_st(tmp2, 15, dst); 230 vec_st(tmp1, 0 , dst); 231 232 dst += dst_stride; 233 } 234} 235 236static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, 237 const uint8_t * src2, int dst_stride, 238 int src_stride1, int h) 239{ 240 int i; 241 vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align; 242 243 mask_ = vec_lvsl(0, src2); 244 245 for (i = 0; i < h; i++) { 246 247 tmp1 = vec_ld(i * src_stride1, src1); 248 mask = vec_lvsl(i * src_stride1, src1); 249 tmp2 = vec_ld(i * src_stride1 + 15, src1); 250 251 a = vec_perm(tmp1, tmp2, mask); 252 253 tmp1 = vec_ld(i * 16, src2); 254 tmp2 = vec_ld(i * 16 + 15, src2); 255 256 b = vec_perm(tmp1, tmp2, mask_); 257 258 tmp1 = vec_ld(0, dst); 259 mask = vec_lvsl(0, dst); 260 tmp2 = vec_ld(15, dst); 261 262 d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b)); 263 264 edges = vec_perm(tmp2, tmp1, mask); 265 266 align = vec_lvsr(0, dst); 267 268 tmp2 = vec_perm(d, edges, align); 269 tmp1 = vec_perm(edges, d, align); 270 271 vec_st(tmp2, 15, dst); 272 vec_st(tmp1, 0 , dst); 273 274 dst += dst_stride; 275 } 276} 277 278/* Implemented but could be faster 279#define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h) 280#define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h) 281 */ 282 283H264_MC(put_, 16, altivec) 284H264_MC(avg_, 16, altivec) 285#endif /* HAVE_ALTIVEC */ 286 287av_cold void ff_h264qpel_init_ppc(H264QpelContext *c, int bit_depth) 288{ 289#if HAVE_ALTIVEC 290 const int high_bit_depth = bit_depth > 8; 291 292 if (!PPC_ALTIVEC(av_get_cpu_flags())) 293 return; 294 295 if (!high_bit_depth) { 296#define dspfunc(PFX, IDX, NUM) \ 297 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \ 298 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \ 299 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \ 300 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \ 301 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \ 302 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \ 303 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \ 304 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \ 305 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \ 306 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \ 307 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \ 308 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \ 309 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \ 310 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \ 311 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \ 312 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec 313 314 dspfunc(put_h264_qpel, 0, 16); 315 dspfunc(avg_h264_qpel, 0, 16); 316#undef dspfunc 317 } 318#endif /* HAVE_ALTIVEC */ 319} 320