1/* 2 * Half-pel DSP functions. 3 * Copyright (c) 2000, 2001 Fabrice Bellard 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 5 * 6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> 7 * 8 * This file is part of FFmpeg. 9 * 10 * FFmpeg is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * FFmpeg is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with FFmpeg; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 */ 24 25/** 26 * @file 27 * Half-pel DSP functions. 28 */ 29 30#include "libavutil/attributes.h" 31#include "libavutil/intreadwrite.h" 32#include "hpeldsp.h" 33 34#define BIT_DEPTH 8 35#include "hpel_template.c" 36#include "pel_template.c" 37 38#define PIXOP2(OPNAME, OP) \ 39static inline void OPNAME ## _no_rnd_pixels8_l2_8(uint8_t *dst, \ 40 const uint8_t *src1, \ 41 const uint8_t *src2, \ 42 int dst_stride, \ 43 int src_stride1, \ 44 int src_stride2, \ 45 int h) \ 46{ \ 47 int i; \ 48 \ 49 for (i = 0; i < h; i++) { \ 50 uint32_t a, b; \ 51 a = AV_RN32(&src1[i * src_stride1]); \ 52 b = AV_RN32(&src2[i * src_stride2]); \ 53 OP(*((uint32_t *) &dst[i * dst_stride]), \ 54 no_rnd_avg32(a, b)); \ 55 a = AV_RN32(&src1[i * src_stride1 + 4]); \ 56 b = AV_RN32(&src2[i * src_stride2 + 4]); \ 57 OP(*((uint32_t *) &dst[i * dst_stride + 4]), \ 58 no_rnd_avg32(a, b)); \ 59 } \ 60} \ 61 \ 62static inline void OPNAME ## _no_rnd_pixels8_x2_8_c(uint8_t *block, \ 63 const uint8_t *pixels, \ 64 ptrdiff_t line_size, \ 65 int h) \ 66{ \ 67 OPNAME ## _no_rnd_pixels8_l2_8(block, pixels, pixels + 1, \ 68 line_size, line_size, line_size, h); \ 69} \ 70 \ 71static inline void OPNAME ## _pixels8_x2_8_c(uint8_t *block, \ 72 const uint8_t *pixels, \ 73 ptrdiff_t line_size, \ 74 int h) \ 75{ \ 76 OPNAME ## _pixels8_l2_8(block, pixels, pixels + 1, \ 77 line_size, line_size, line_size, h); \ 78} \ 79 \ 80static inline void OPNAME ## _no_rnd_pixels8_y2_8_c(uint8_t *block, \ 81 const uint8_t *pixels, \ 82 ptrdiff_t line_size, \ 83 int h) \ 84{ \ 85 OPNAME ## _no_rnd_pixels8_l2_8(block, pixels, pixels + line_size, \ 86 line_size, line_size, line_size, h); \ 87} \ 88 \ 89static inline void OPNAME ## _pixels8_y2_8_c(uint8_t *block, \ 90 const uint8_t *pixels, \ 91 ptrdiff_t line_size, \ 92 int h) \ 93{ \ 94 OPNAME ## _pixels8_l2_8(block, pixels, pixels + line_size, \ 95 line_size, line_size, line_size, h); \ 96} \ 97 \ 98static inline void OPNAME ## _pixels4_x2_8_c(uint8_t *block, \ 99 const uint8_t *pixels, \ 100 ptrdiff_t line_size, \ 101 int h) \ 102{ \ 103 OPNAME ## _pixels4_l2_8(block, pixels, pixels + 1, \ 104 line_size, line_size, line_size, h); \ 105} \ 106 \ 107static inline void OPNAME ## _pixels4_y2_8_c(uint8_t *block, \ 108 const uint8_t *pixels, \ 109 ptrdiff_t line_size, \ 110 int h) \ 111{ \ 112 OPNAME ## _pixels4_l2_8(block, pixels, pixels + line_size, \ 113 line_size, line_size, line_size, h); \ 114} \ 115 \ 116static inline void OPNAME ## _pixels2_x2_8_c(uint8_t *block, \ 117 const uint8_t *pixels, \ 118 ptrdiff_t line_size, \ 119 int h) \ 120{ \ 121 OPNAME ## _pixels2_l2_8(block, pixels, pixels + 1, \ 122 line_size, line_size, line_size, h); \ 123} \ 124 \ 125static inline void OPNAME ## _pixels2_y2_8_c(uint8_t *block, \ 126 const uint8_t *pixels, \ 127 ptrdiff_t line_size, \ 128 int h) \ 129{ \ 130 OPNAME ## _pixels2_l2_8(block, pixels, pixels + line_size, \ 131 line_size, line_size, line_size, h); \ 132} \ 133 \ 134static inline void OPNAME ## _pixels2_xy2_8_c(uint8_t *block, \ 135 const uint8_t *pixels, \ 136 ptrdiff_t line_size, \ 137 int h) \ 138{ \ 139 int i, a1, b1; \ 140 int a0 = pixels[0]; \ 141 int b0 = pixels[1] + 2; \ 142 \ 143 a0 += b0; \ 144 b0 += pixels[2]; \ 145 pixels += line_size; \ 146 for (i = 0; i < h; i += 2) { \ 147 a1 = pixels[0]; \ 148 b1 = pixels[1]; \ 149 a1 += b1; \ 150 b1 += pixels[2]; \ 151 \ 152 block[0] = (a1 + a0) >> 2; /* FIXME non put */ \ 153 block[1] = (b1 + b0) >> 2; \ 154 \ 155 pixels += line_size; \ 156 block += line_size; \ 157 \ 158 a0 = pixels[0]; \ 159 b0 = pixels[1] + 2; \ 160 a0 += b0; \ 161 b0 += pixels[2]; \ 162 \ 163 block[0] = (a1 + a0) >> 2; \ 164 block[1] = (b1 + b0) >> 2; \ 165 pixels += line_size; \ 166 block += line_size; \ 167 } \ 168} \ 169 \ 170static inline void OPNAME ## _pixels4_xy2_8_c(uint8_t *block, \ 171 const uint8_t *pixels, \ 172 ptrdiff_t line_size, \ 173 int h) \ 174{ \ 175 /* FIXME HIGH BIT DEPTH */ \ 176 int i; \ 177 const uint32_t a = AV_RN32(pixels); \ 178 const uint32_t b = AV_RN32(pixels + 1); \ 179 uint32_t l0 = (a & 0x03030303UL) + \ 180 (b & 0x03030303UL) + \ 181 0x02020202UL; \ 182 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ 183 ((b & 0xFCFCFCFCUL) >> 2); \ 184 uint32_t l1, h1; \ 185 \ 186 pixels += line_size; \ 187 for (i = 0; i < h; i += 2) { \ 188 uint32_t a = AV_RN32(pixels); \ 189 uint32_t b = AV_RN32(pixels + 1); \ 190 l1 = (a & 0x03030303UL) + \ 191 (b & 0x03030303UL); \ 192 h1 = ((a & 0xFCFCFCFCUL) >> 2) + \ 193 ((b & 0xFCFCFCFCUL) >> 2); \ 194 OP(*((uint32_t *) block), h0 + h1 + \ 195 (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ 196 pixels += line_size; \ 197 block += line_size; \ 198 a = AV_RN32(pixels); \ 199 b = AV_RN32(pixels + 1); \ 200 l0 = (a & 0x03030303UL) + \ 201 (b & 0x03030303UL) + \ 202 0x02020202UL; \ 203 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ 204 ((b & 0xFCFCFCFCUL) >> 2); \ 205 OP(*((uint32_t *) block), h0 + h1 + \ 206 (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ 207 pixels += line_size; \ 208 block += line_size; \ 209 } \ 210} \ 211 \ 212static inline void OPNAME ## _pixels8_xy2_8_c(uint8_t *block, \ 213 const uint8_t *pixels, \ 214 ptrdiff_t line_size, \ 215 int h) \ 216{ \ 217 /* FIXME HIGH BIT DEPTH */ \ 218 int j; \ 219 \ 220 for (j = 0; j < 2; j++) { \ 221 int i; \ 222 const uint32_t a = AV_RN32(pixels); \ 223 const uint32_t b = AV_RN32(pixels + 1); \ 224 uint32_t l0 = (a & 0x03030303UL) + \ 225 (b & 0x03030303UL) + \ 226 0x02020202UL; \ 227 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ 228 ((b & 0xFCFCFCFCUL) >> 2); \ 229 uint32_t l1, h1; \ 230 \ 231 pixels += line_size; \ 232 for (i = 0; i < h; i += 2) { \ 233 uint32_t a = AV_RN32(pixels); \ 234 uint32_t b = AV_RN32(pixels + 1); \ 235 l1 = (a & 0x03030303UL) + \ 236 (b & 0x03030303UL); \ 237 h1 = ((a & 0xFCFCFCFCUL) >> 2) + \ 238 ((b & 0xFCFCFCFCUL) >> 2); \ 239 OP(*((uint32_t *) block), h0 + h1 + \ 240 (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ 241 pixels += line_size; \ 242 block += line_size; \ 243 a = AV_RN32(pixels); \ 244 b = AV_RN32(pixels + 1); \ 245 l0 = (a & 0x03030303UL) + \ 246 (b & 0x03030303UL) + \ 247 0x02020202UL; \ 248 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ 249 ((b & 0xFCFCFCFCUL) >> 2); \ 250 OP(*((uint32_t *) block), h0 + h1 + \ 251 (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ 252 pixels += line_size; \ 253 block += line_size; \ 254 } \ 255 pixels += 4 - line_size * (h + 1); \ 256 block += 4 - line_size * h; \ 257 } \ 258} \ 259 \ 260static inline void OPNAME ## _no_rnd_pixels8_xy2_8_c(uint8_t *block, \ 261 const uint8_t *pixels, \ 262 ptrdiff_t line_size, \ 263 int h) \ 264{ \ 265 /* FIXME HIGH BIT DEPTH */ \ 266 int j; \ 267 \ 268 for (j = 0; j < 2; j++) { \ 269 int i; \ 270 const uint32_t a = AV_RN32(pixels); \ 271 const uint32_t b = AV_RN32(pixels + 1); \ 272 uint32_t l0 = (a & 0x03030303UL) + \ 273 (b & 0x03030303UL) + \ 274 0x01010101UL; \ 275 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ 276 ((b & 0xFCFCFCFCUL) >> 2); \ 277 uint32_t l1, h1; \ 278 \ 279 pixels += line_size; \ 280 for (i = 0; i < h; i += 2) { \ 281 uint32_t a = AV_RN32(pixels); \ 282 uint32_t b = AV_RN32(pixels + 1); \ 283 l1 = (a & 0x03030303UL) + \ 284 (b & 0x03030303UL); \ 285 h1 = ((a & 0xFCFCFCFCUL) >> 2) + \ 286 ((b & 0xFCFCFCFCUL) >> 2); \ 287 OP(*((uint32_t *) block), h0 + h1 + \ 288 (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ 289 pixels += line_size; \ 290 block += line_size; \ 291 a = AV_RN32(pixels); \ 292 b = AV_RN32(pixels + 1); \ 293 l0 = (a & 0x03030303UL) + \ 294 (b & 0x03030303UL) + \ 295 0x01010101UL; \ 296 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ 297 ((b & 0xFCFCFCFCUL) >> 2); \ 298 OP(*((uint32_t *) block), h0 + h1 + \ 299 (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ 300 pixels += line_size; \ 301 block += line_size; \ 302 } \ 303 pixels += 4 - line_size * (h + 1); \ 304 block += 4 - line_size * h; \ 305 } \ 306} \ 307 \ 308CALL_2X_PIXELS(OPNAME ## _pixels16_x2_8_c, \ 309 OPNAME ## _pixels8_x2_8_c, \ 310 8) \ 311CALL_2X_PIXELS(OPNAME ## _pixels16_y2_8_c, \ 312 OPNAME ## _pixels8_y2_8_c, \ 313 8) \ 314CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_8_c, \ 315 OPNAME ## _pixels8_xy2_8_c, \ 316 8) \ 317CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_8_c, \ 318 OPNAME ## _pixels8_8_c, \ 319 8) \ 320CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_8_c, \ 321 OPNAME ## _no_rnd_pixels8_x2_8_c, \ 322 8) \ 323CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_8_c, \ 324 OPNAME ## _no_rnd_pixels8_y2_8_c, \ 325 8) \ 326CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_8_c, \ 327 OPNAME ## _no_rnd_pixels8_xy2_8_c, \ 328 8) \ 329 330#define op_avg(a, b) a = rnd_avg32(a, b) 331#define op_put(a, b) a = b 332#define put_no_rnd_pixels8_8_c put_pixels8_8_c 333PIXOP2(avg, op_avg) 334PIXOP2(put, op_put) 335#undef op_avg 336#undef op_put 337 338av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags) 339{ 340#define hpel_funcs(prefix, idx, num) \ 341 c->prefix ## _pixels_tab idx [0] = prefix ## _pixels ## num ## _8_c; \ 342 c->prefix ## _pixels_tab idx [1] = prefix ## _pixels ## num ## _x2_8_c; \ 343 c->prefix ## _pixels_tab idx [2] = prefix ## _pixels ## num ## _y2_8_c; \ 344 c->prefix ## _pixels_tab idx [3] = prefix ## _pixels ## num ## _xy2_8_c 345 346 hpel_funcs(put, [0], 16); 347 hpel_funcs(put, [1], 8); 348 hpel_funcs(put, [2], 4); 349 hpel_funcs(put, [3], 2); 350 hpel_funcs(put_no_rnd, [0], 16); 351 hpel_funcs(put_no_rnd, [1], 8); 352 hpel_funcs(avg, [0], 16); 353 hpel_funcs(avg, [1], 8); 354 hpel_funcs(avg, [2], 4); 355 hpel_funcs(avg, [3], 2); 356 hpel_funcs(avg_no_rnd,, 16); 357 358 if (ARCH_AARCH64) 359 ff_hpeldsp_init_aarch64(c, flags); 360 if (ARCH_ALPHA) 361 ff_hpeldsp_init_alpha(c, flags); 362 if (ARCH_ARM) 363 ff_hpeldsp_init_arm(c, flags); 364 if (ARCH_PPC) 365 ff_hpeldsp_init_ppc(c, flags); 366 if (ARCH_X86) 367 ff_hpeldsp_init_x86(c, flags); 368} 369