1/* 2 * DSP utils 3 * Copyright (c) 2000, 2001 Fabrice Bellard 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 5 * 6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> 7 * 8 * This file is part of Libav. 9 * 10 * Libav is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * Libav is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with Libav; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 */ 24 25/** 26 * @file 27 * DSP utils 28 */ 29 30#include "libavutil/imgutils.h" 31#include "avcodec.h" 32#include "dsputil.h" 33#include "simple_idct.h" 34#include "faandct.h" 35#include "faanidct.h" 36#include "mathops.h" 37#include "mpegvideo.h" 38#include "config.h" 39#include "ac3dec.h" 40#include "vorbis.h" 41#include "png.h" 42 43uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; 44uint32_t ff_squareTbl[512] = {0, }; 45 46#define BIT_DEPTH 9 47#include "dsputil_template.c" 48#undef BIT_DEPTH 49 50#define BIT_DEPTH 10 51#include "dsputil_template.c" 52#undef BIT_DEPTH 53 54#define BIT_DEPTH 8 55#include "dsputil_template.c" 56 57// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size 58#define pb_7f (~0UL/255 * 0x7f) 59#define pb_80 (~0UL/255 * 0x80) 60 61const uint8_t ff_zigzag_direct[64] = { 62 0, 1, 8, 16, 9, 2, 3, 10, 63 17, 24, 32, 25, 18, 11, 4, 5, 64 12, 19, 26, 33, 40, 48, 41, 34, 65 27, 20, 13, 6, 7, 14, 21, 28, 66 35, 42, 49, 56, 57, 50, 43, 36, 67 29, 22, 15, 23, 30, 37, 44, 51, 68 58, 59, 52, 45, 38, 31, 39, 46, 69 53, 60, 61, 54, 47, 55, 62, 63 70}; 71 72/* Specific zigzag scan for 248 idct. NOTE that unlike the 73 specification, we interleave the fields */ 74const uint8_t ff_zigzag248_direct[64] = { 75 0, 8, 1, 9, 16, 24, 2, 10, 76 17, 25, 32, 40, 48, 56, 33, 41, 77 18, 26, 3, 11, 4, 12, 19, 27, 78 34, 42, 49, 57, 50, 58, 35, 43, 79 20, 28, 5, 13, 6, 14, 21, 29, 80 36, 44, 51, 59, 52, 60, 37, 45, 81 22, 30, 7, 15, 23, 31, 38, 46, 82 53, 61, 54, 62, 39, 47, 55, 63, 83}; 84 85/* not permutated inverse zigzag_direct + 1 for MMX quantizer */ 86DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64]; 87 88const uint8_t ff_alternate_horizontal_scan[64] = { 89 0, 1, 2, 3, 8, 9, 16, 17, 90 10, 11, 4, 5, 6, 7, 15, 14, 91 13, 12, 19, 18, 24, 25, 32, 33, 92 26, 27, 20, 21, 22, 23, 28, 29, 93 30, 31, 34, 35, 40, 41, 48, 49, 94 42, 43, 36, 37, 38, 39, 44, 45, 95 46, 47, 50, 51, 56, 57, 58, 59, 96 52, 53, 54, 55, 60, 61, 62, 63, 97}; 98 99const uint8_t ff_alternate_vertical_scan[64] = { 100 0, 8, 16, 24, 1, 9, 2, 10, 101 17, 25, 32, 40, 48, 56, 57, 49, 102 41, 33, 26, 18, 3, 11, 4, 12, 103 19, 27, 34, 42, 50, 58, 35, 43, 104 51, 59, 20, 28, 5, 13, 6, 14, 105 21, 29, 36, 44, 52, 60, 37, 45, 106 53, 61, 22, 30, 7, 15, 23, 31, 107 38, 46, 54, 62, 39, 47, 55, 63, 108}; 109 110/* Input permutation for the simple_idct_mmx */ 111static const uint8_t simple_mmx_permutation[64]={ 112 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 113 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 114 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 115 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 116 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 117 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 118 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 119 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, 120}; 121 122static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7}; 123 124void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){ 125 int i; 126 int end; 127 128 st->scantable= src_scantable; 129 130 for(i=0; i<64; i++){ 131 int j; 132 j = src_scantable[i]; 133 st->permutated[i] = permutation[j]; 134#if ARCH_PPC 135 st->inverse[j] = i; 136#endif 137 } 138 139 end=-1; 140 for(i=0; i<64; i++){ 141 int j; 142 j = st->permutated[i]; 143 if(j>end) end=j; 144 st->raster_end[i]= end; 145 } 146} 147 148void ff_init_scantable_permutation(uint8_t *idct_permutation, 149 int idct_permutation_type) 150{ 151 int i; 152 153 switch(idct_permutation_type){ 154 case FF_NO_IDCT_PERM: 155 for(i=0; i<64; i++) 156 idct_permutation[i]= i; 157 break; 158 case FF_LIBMPEG2_IDCT_PERM: 159 for(i=0; i<64; i++) 160 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); 161 break; 162 case FF_SIMPLE_IDCT_PERM: 163 for(i=0; i<64; i++) 164 idct_permutation[i]= simple_mmx_permutation[i]; 165 break; 166 case FF_TRANSPOSE_IDCT_PERM: 167 for(i=0; i<64; i++) 168 idct_permutation[i]= ((i&7)<<3) | (i>>3); 169 break; 170 case FF_PARTTRANS_IDCT_PERM: 171 for(i=0; i<64; i++) 172 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); 173 break; 174 case FF_SSE2_IDCT_PERM: 175 for(i=0; i<64; i++) 176 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; 177 break; 178 default: 179 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); 180 } 181} 182 183static int pix_sum_c(uint8_t * pix, int line_size) 184{ 185 int s, i, j; 186 187 s = 0; 188 for (i = 0; i < 16; i++) { 189 for (j = 0; j < 16; j += 8) { 190 s += pix[0]; 191 s += pix[1]; 192 s += pix[2]; 193 s += pix[3]; 194 s += pix[4]; 195 s += pix[5]; 196 s += pix[6]; 197 s += pix[7]; 198 pix += 8; 199 } 200 pix += line_size - 16; 201 } 202 return s; 203} 204 205static int pix_norm1_c(uint8_t * pix, int line_size) 206{ 207 int s, i, j; 208 uint32_t *sq = ff_squareTbl + 256; 209 210 s = 0; 211 for (i = 0; i < 16; i++) { 212 for (j = 0; j < 16; j += 8) { 213#if 0 214 s += sq[pix[0]]; 215 s += sq[pix[1]]; 216 s += sq[pix[2]]; 217 s += sq[pix[3]]; 218 s += sq[pix[4]]; 219 s += sq[pix[5]]; 220 s += sq[pix[6]]; 221 s += sq[pix[7]]; 222#else 223#if HAVE_FAST_64BIT 224 register uint64_t x=*(uint64_t*)pix; 225 s += sq[x&0xff]; 226 s += sq[(x>>8)&0xff]; 227 s += sq[(x>>16)&0xff]; 228 s += sq[(x>>24)&0xff]; 229 s += sq[(x>>32)&0xff]; 230 s += sq[(x>>40)&0xff]; 231 s += sq[(x>>48)&0xff]; 232 s += sq[(x>>56)&0xff]; 233#else 234 register uint32_t x=*(uint32_t*)pix; 235 s += sq[x&0xff]; 236 s += sq[(x>>8)&0xff]; 237 s += sq[(x>>16)&0xff]; 238 s += sq[(x>>24)&0xff]; 239 x=*(uint32_t*)(pix+4); 240 s += sq[x&0xff]; 241 s += sq[(x>>8)&0xff]; 242 s += sq[(x>>16)&0xff]; 243 s += sq[(x>>24)&0xff]; 244#endif 245#endif 246 pix += 8; 247 } 248 pix += line_size - 16; 249 } 250 return s; 251} 252 253static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){ 254 int i; 255 256 for(i=0; i+8<=w; i+=8){ 257 dst[i+0]= av_bswap32(src[i+0]); 258 dst[i+1]= av_bswap32(src[i+1]); 259 dst[i+2]= av_bswap32(src[i+2]); 260 dst[i+3]= av_bswap32(src[i+3]); 261 dst[i+4]= av_bswap32(src[i+4]); 262 dst[i+5]= av_bswap32(src[i+5]); 263 dst[i+6]= av_bswap32(src[i+6]); 264 dst[i+7]= av_bswap32(src[i+7]); 265 } 266 for(;i<w; i++){ 267 dst[i+0]= av_bswap32(src[i+0]); 268 } 269} 270 271static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len) 272{ 273 while (len--) 274 *dst++ = av_bswap16(*src++); 275} 276 277static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) 278{ 279 int s, i; 280 uint32_t *sq = ff_squareTbl + 256; 281 282 s = 0; 283 for (i = 0; i < h; i++) { 284 s += sq[pix1[0] - pix2[0]]; 285 s += sq[pix1[1] - pix2[1]]; 286 s += sq[pix1[2] - pix2[2]]; 287 s += sq[pix1[3] - pix2[3]]; 288 pix1 += line_size; 289 pix2 += line_size; 290 } 291 return s; 292} 293 294static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) 295{ 296 int s, i; 297 uint32_t *sq = ff_squareTbl + 256; 298 299 s = 0; 300 for (i = 0; i < h; i++) { 301 s += sq[pix1[0] - pix2[0]]; 302 s += sq[pix1[1] - pix2[1]]; 303 s += sq[pix1[2] - pix2[2]]; 304 s += sq[pix1[3] - pix2[3]]; 305 s += sq[pix1[4] - pix2[4]]; 306 s += sq[pix1[5] - pix2[5]]; 307 s += sq[pix1[6] - pix2[6]]; 308 s += sq[pix1[7] - pix2[7]]; 309 pix1 += line_size; 310 pix2 += line_size; 311 } 312 return s; 313} 314 315static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 316{ 317 int s, i; 318 uint32_t *sq = ff_squareTbl + 256; 319 320 s = 0; 321 for (i = 0; i < h; i++) { 322 s += sq[pix1[ 0] - pix2[ 0]]; 323 s += sq[pix1[ 1] - pix2[ 1]]; 324 s += sq[pix1[ 2] - pix2[ 2]]; 325 s += sq[pix1[ 3] - pix2[ 3]]; 326 s += sq[pix1[ 4] - pix2[ 4]]; 327 s += sq[pix1[ 5] - pix2[ 5]]; 328 s += sq[pix1[ 6] - pix2[ 6]]; 329 s += sq[pix1[ 7] - pix2[ 7]]; 330 s += sq[pix1[ 8] - pix2[ 8]]; 331 s += sq[pix1[ 9] - pix2[ 9]]; 332 s += sq[pix1[10] - pix2[10]]; 333 s += sq[pix1[11] - pix2[11]]; 334 s += sq[pix1[12] - pix2[12]]; 335 s += sq[pix1[13] - pix2[13]]; 336 s += sq[pix1[14] - pix2[14]]; 337 s += sq[pix1[15] - pix2[15]]; 338 339 pix1 += line_size; 340 pix2 += line_size; 341 } 342 return s; 343} 344 345static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, 346 const uint8_t *s2, int stride){ 347 int i; 348 349 /* read the pixels */ 350 for(i=0;i<8;i++) { 351 block[0] = s1[0] - s2[0]; 352 block[1] = s1[1] - s2[1]; 353 block[2] = s1[2] - s2[2]; 354 block[3] = s1[3] - s2[3]; 355 block[4] = s1[4] - s2[4]; 356 block[5] = s1[5] - s2[5]; 357 block[6] = s1[6] - s2[6]; 358 block[7] = s1[7] - s2[7]; 359 s1 += stride; 360 s2 += stride; 361 block += 8; 362 } 363} 364 365 366void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, 367 int line_size) 368{ 369 int i; 370 371 /* read the pixels */ 372 for(i=0;i<8;i++) { 373 pixels[0] = av_clip_uint8(block[0]); 374 pixels[1] = av_clip_uint8(block[1]); 375 pixels[2] = av_clip_uint8(block[2]); 376 pixels[3] = av_clip_uint8(block[3]); 377 pixels[4] = av_clip_uint8(block[4]); 378 pixels[5] = av_clip_uint8(block[5]); 379 pixels[6] = av_clip_uint8(block[6]); 380 pixels[7] = av_clip_uint8(block[7]); 381 382 pixels += line_size; 383 block += 8; 384 } 385} 386 387static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, 388 int line_size) 389{ 390 int i; 391 392 /* read the pixels */ 393 for(i=0;i<4;i++) { 394 pixels[0] = av_clip_uint8(block[0]); 395 pixels[1] = av_clip_uint8(block[1]); 396 pixels[2] = av_clip_uint8(block[2]); 397 pixels[3] = av_clip_uint8(block[3]); 398 399 pixels += line_size; 400 block += 8; 401 } 402} 403 404static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, 405 int line_size) 406{ 407 int i; 408 409 /* read the pixels */ 410 for(i=0;i<2;i++) { 411 pixels[0] = av_clip_uint8(block[0]); 412 pixels[1] = av_clip_uint8(block[1]); 413 414 pixels += line_size; 415 block += 8; 416 } 417} 418 419void ff_put_signed_pixels_clamped_c(const DCTELEM *block, 420 uint8_t *restrict pixels, 421 int line_size) 422{ 423 int i, j; 424 425 for (i = 0; i < 8; i++) { 426 for (j = 0; j < 8; j++) { 427 if (*block < -128) 428 *pixels = 0; 429 else if (*block > 127) 430 *pixels = 255; 431 else 432 *pixels = (uint8_t)(*block + 128); 433 block++; 434 pixels++; 435 } 436 pixels += (line_size - 8); 437 } 438} 439 440void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, 441 int line_size) 442{ 443 int i; 444 445 /* read the pixels */ 446 for(i=0;i<8;i++) { 447 pixels[0] = av_clip_uint8(pixels[0] + block[0]); 448 pixels[1] = av_clip_uint8(pixels[1] + block[1]); 449 pixels[2] = av_clip_uint8(pixels[2] + block[2]); 450 pixels[3] = av_clip_uint8(pixels[3] + block[3]); 451 pixels[4] = av_clip_uint8(pixels[4] + block[4]); 452 pixels[5] = av_clip_uint8(pixels[5] + block[5]); 453 pixels[6] = av_clip_uint8(pixels[6] + block[6]); 454 pixels[7] = av_clip_uint8(pixels[7] + block[7]); 455 pixels += line_size; 456 block += 8; 457 } 458} 459 460static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, 461 int line_size) 462{ 463 int i; 464 465 /* read the pixels */ 466 for(i=0;i<4;i++) { 467 pixels[0] = av_clip_uint8(pixels[0] + block[0]); 468 pixels[1] = av_clip_uint8(pixels[1] + block[1]); 469 pixels[2] = av_clip_uint8(pixels[2] + block[2]); 470 pixels[3] = av_clip_uint8(pixels[3] + block[3]); 471 pixels += line_size; 472 block += 8; 473 } 474} 475 476static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, 477 int line_size) 478{ 479 int i; 480 481 /* read the pixels */ 482 for(i=0;i<2;i++) { 483 pixels[0] = av_clip_uint8(pixels[0] + block[0]); 484 pixels[1] = av_clip_uint8(pixels[1] + block[1]); 485 pixels += line_size; 486 block += 8; 487 } 488} 489 490static int sum_abs_dctelem_c(DCTELEM *block) 491{ 492 int sum=0, i; 493 for(i=0; i<64; i++) 494 sum+= FFABS(block[i]); 495 return sum; 496} 497 498static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h) 499{ 500 int i; 501 502 for (i = 0; i < h; i++) { 503 memset(block, value, 16); 504 block += line_size; 505 } 506} 507 508static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h) 509{ 510 int i; 511 512 for (i = 0; i < h; i++) { 513 memset(block, value, 8); 514 block += line_size; 515 } 516} 517 518#define avg2(a,b) ((a+b+1)>>1) 519#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) 520 521static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) 522{ 523 const int A=(16-x16)*(16-y16); 524 const int B=( x16)*(16-y16); 525 const int C=(16-x16)*( y16); 526 const int D=( x16)*( y16); 527 int i; 528 529 for(i=0; i<h; i++) 530 { 531 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; 532 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; 533 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; 534 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8; 535 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8; 536 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8; 537 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; 538 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; 539 dst+= stride; 540 src+= stride; 541 } 542} 543 544void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, 545 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) 546{ 547 int y, vx, vy; 548 const int s= 1<<shift; 549 550 width--; 551 height--; 552 553 for(y=0; y<h; y++){ 554 int x; 555 556 vx= ox; 557 vy= oy; 558 for(x=0; x<8; x++){ //XXX FIXME optimize 559 int src_x, src_y, frac_x, frac_y, index; 560 561 src_x= vx>>16; 562 src_y= vy>>16; 563 frac_x= src_x&(s-1); 564 frac_y= src_y&(s-1); 565 src_x>>=shift; 566 src_y>>=shift; 567 568 if((unsigned)src_x < width){ 569 if((unsigned)src_y < height){ 570 index= src_x + src_y*stride; 571 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) 572 + src[index +1]* frac_x )*(s-frac_y) 573 + ( src[index+stride ]*(s-frac_x) 574 + src[index+stride+1]* frac_x )* frac_y 575 + r)>>(shift*2); 576 }else{ 577 index= src_x + av_clip(src_y, 0, height)*stride; 578 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) 579 + src[index +1]* frac_x )*s 580 + r)>>(shift*2); 581 } 582 }else{ 583 if((unsigned)src_y < height){ 584 index= av_clip(src_x, 0, width) + src_y*stride; 585 dst[y*stride + x]= ( ( src[index ]*(s-frac_y) 586 + src[index+stride ]* frac_y )*s 587 + r)>>(shift*2); 588 }else{ 589 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride; 590 dst[y*stride + x]= src[index ]; 591 } 592 } 593 594 vx+= dxx; 595 vy+= dyx; 596 } 597 ox += dxy; 598 oy += dyy; 599 } 600} 601 602static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 603 switch(width){ 604 case 2: put_pixels2_8_c (dst, src, stride, height); break; 605 case 4: put_pixels4_8_c (dst, src, stride, height); break; 606 case 8: put_pixels8_8_c (dst, src, stride, height); break; 607 case 16:put_pixels16_8_c(dst, src, stride, height); break; 608 } 609} 610 611static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 612 int i,j; 613 for (i=0; i < height; i++) { 614 for (j=0; j < width; j++) { 615 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; 616 } 617 src += stride; 618 dst += stride; 619 } 620} 621 622static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 623 int i,j; 624 for (i=0; i < height; i++) { 625 for (j=0; j < width; j++) { 626 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; 627 } 628 src += stride; 629 dst += stride; 630 } 631} 632 633static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 634 int i,j; 635 for (i=0; i < height; i++) { 636 for (j=0; j < width; j++) { 637 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; 638 } 639 src += stride; 640 dst += stride; 641 } 642} 643 644static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 645 int i,j; 646 for (i=0; i < height; i++) { 647 for (j=0; j < width; j++) { 648 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; 649 } 650 src += stride; 651 dst += stride; 652 } 653} 654 655static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 656 int i,j; 657 for (i=0; i < height; i++) { 658 for (j=0; j < width; j++) { 659 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; 660 } 661 src += stride; 662 dst += stride; 663 } 664} 665 666static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 667 int i,j; 668 for (i=0; i < height; i++) { 669 for (j=0; j < width; j++) { 670 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; 671 } 672 src += stride; 673 dst += stride; 674 } 675} 676 677static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 678 int i,j; 679 for (i=0; i < height; i++) { 680 for (j=0; j < width; j++) { 681 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; 682 } 683 src += stride; 684 dst += stride; 685 } 686} 687 688static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 689 int i,j; 690 for (i=0; i < height; i++) { 691 for (j=0; j < width; j++) { 692 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; 693 } 694 src += stride; 695 dst += stride; 696 } 697} 698 699static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 700 switch(width){ 701 case 2: avg_pixels2_8_c (dst, src, stride, height); break; 702 case 4: avg_pixels4_8_c (dst, src, stride, height); break; 703 case 8: avg_pixels8_8_c (dst, src, stride, height); break; 704 case 16:avg_pixels16_8_c(dst, src, stride, height); break; 705 } 706} 707 708static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 709 int i,j; 710 for (i=0; i < height; i++) { 711 for (j=0; j < width; j++) { 712 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; 713 } 714 src += stride; 715 dst += stride; 716 } 717} 718 719static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 720 int i,j; 721 for (i=0; i < height; i++) { 722 for (j=0; j < width; j++) { 723 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; 724 } 725 src += stride; 726 dst += stride; 727 } 728} 729 730static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 731 int i,j; 732 for (i=0; i < height; i++) { 733 for (j=0; j < width; j++) { 734 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; 735 } 736 src += stride; 737 dst += stride; 738 } 739} 740 741static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 742 int i,j; 743 for (i=0; i < height; i++) { 744 for (j=0; j < width; j++) { 745 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; 746 } 747 src += stride; 748 dst += stride; 749 } 750} 751 752static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 753 int i,j; 754 for (i=0; i < height; i++) { 755 for (j=0; j < width; j++) { 756 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; 757 } 758 src += stride; 759 dst += stride; 760 } 761} 762 763static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 764 int i,j; 765 for (i=0; i < height; i++) { 766 for (j=0; j < width; j++) { 767 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; 768 } 769 src += stride; 770 dst += stride; 771 } 772} 773 774static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 775 int i,j; 776 for (i=0; i < height; i++) { 777 for (j=0; j < width; j++) { 778 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; 779 } 780 src += stride; 781 dst += stride; 782 } 783} 784 785static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 786 int i,j; 787 for (i=0; i < height; i++) { 788 for (j=0; j < width; j++) { 789 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; 790 } 791 src += stride; 792 dst += stride; 793 } 794} 795 796#define QPEL_MC(r, OPNAME, RND, OP) \ 797static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ 798 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 799 int i;\ 800 for(i=0; i<h; i++)\ 801 {\ 802 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\ 803 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\ 804 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\ 805 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\ 806 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\ 807 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\ 808 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\ 809 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\ 810 dst+=dstStride;\ 811 src+=srcStride;\ 812 }\ 813}\ 814\ 815static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 816 const int w=8;\ 817 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 818 int i;\ 819 for(i=0; i<w; i++)\ 820 {\ 821 const int src0= src[0*srcStride];\ 822 const int src1= src[1*srcStride];\ 823 const int src2= src[2*srcStride];\ 824 const int src3= src[3*srcStride];\ 825 const int src4= src[4*srcStride];\ 826 const int src5= src[5*srcStride];\ 827 const int src6= src[6*srcStride];\ 828 const int src7= src[7*srcStride];\ 829 const int src8= src[8*srcStride];\ 830 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\ 831 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\ 832 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\ 833 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\ 834 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\ 835 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\ 836 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\ 837 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ 838 dst++;\ 839 src++;\ 840 }\ 841}\ 842\ 843static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ 844 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 845 int i;\ 846 \ 847 for(i=0; i<h; i++)\ 848 {\ 849 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\ 850 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\ 851 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\ 852 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\ 853 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\ 854 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\ 855 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\ 856 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\ 857 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\ 858 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\ 859 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\ 860 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\ 861 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\ 862 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\ 863 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\ 864 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\ 865 dst+=dstStride;\ 866 src+=srcStride;\ 867 }\ 868}\ 869\ 870static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 871 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 872 int i;\ 873 const int w=16;\ 874 for(i=0; i<w; i++)\ 875 {\ 876 const int src0= src[0*srcStride];\ 877 const int src1= src[1*srcStride];\ 878 const int src2= src[2*srcStride];\ 879 const int src3= src[3*srcStride];\ 880 const int src4= src[4*srcStride];\ 881 const int src5= src[5*srcStride];\ 882 const int src6= src[6*srcStride];\ 883 const int src7= src[7*srcStride];\ 884 const int src8= src[8*srcStride];\ 885 const int src9= src[9*srcStride];\ 886 const int src10= src[10*srcStride];\ 887 const int src11= src[11*srcStride];\ 888 const int src12= src[12*srcStride];\ 889 const int src13= src[13*srcStride];\ 890 const int src14= src[14*srcStride];\ 891 const int src15= src[15*srcStride];\ 892 const int src16= src[16*srcStride];\ 893 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\ 894 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\ 895 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\ 896 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\ 897 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\ 898 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\ 899 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\ 900 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\ 901 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\ 902 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\ 903 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\ 904 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\ 905 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\ 906 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\ 907 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\ 908 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ 909 dst++;\ 910 src++;\ 911 }\ 912}\ 913\ 914static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ 915 uint8_t half[64];\ 916 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ 917 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\ 918}\ 919\ 920static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ 921 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\ 922}\ 923\ 924static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ 925 uint8_t half[64];\ 926 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ 927 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\ 928}\ 929\ 930static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ 931 uint8_t full[16*9];\ 932 uint8_t half[64];\ 933 copy_block9(full, src, 16, stride, 9);\ 934 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ 935 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\ 936}\ 937\ 938static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ 939 uint8_t full[16*9];\ 940 copy_block9(full, src, 16, stride, 9);\ 941 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ 942}\ 943\ 944static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ 945 uint8_t full[16*9];\ 946 uint8_t half[64];\ 947 copy_block9(full, src, 16, stride, 9);\ 948 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ 949 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\ 950}\ 951void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ 952 uint8_t full[16*9];\ 953 uint8_t halfH[72];\ 954 uint8_t halfV[64];\ 955 uint8_t halfHV[64];\ 956 copy_block9(full, src, 16, stride, 9);\ 957 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 958 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ 959 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 960 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ 961}\ 962static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ 963 uint8_t full[16*9];\ 964 uint8_t halfH[72];\ 965 uint8_t halfHV[64];\ 966 copy_block9(full, src, 16, stride, 9);\ 967 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 968 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\ 969 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 970 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\ 971}\ 972void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ 973 uint8_t full[16*9];\ 974 uint8_t halfH[72];\ 975 uint8_t halfV[64];\ 976 uint8_t halfHV[64];\ 977 copy_block9(full, src, 16, stride, 9);\ 978 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 979 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ 980 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 981 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ 982}\ 983static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ 984 uint8_t full[16*9];\ 985 uint8_t halfH[72];\ 986 uint8_t halfHV[64];\ 987 copy_block9(full, src, 16, stride, 9);\ 988 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 989 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\ 990 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 991 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\ 992}\ 993void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ 994 uint8_t full[16*9];\ 995 uint8_t halfH[72];\ 996 uint8_t halfV[64];\ 997 uint8_t halfHV[64];\ 998 copy_block9(full, src, 16, stride, 9);\ 999 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 1000 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ 1001 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1002 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ 1003}\ 1004static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ 1005 uint8_t full[16*9];\ 1006 uint8_t halfH[72];\ 1007 uint8_t halfHV[64];\ 1008 copy_block9(full, src, 16, stride, 9);\ 1009 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 1010 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\ 1011 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1012 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\ 1013}\ 1014void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ 1015 uint8_t full[16*9];\ 1016 uint8_t halfH[72];\ 1017 uint8_t halfV[64];\ 1018 uint8_t halfHV[64];\ 1019 copy_block9(full, src, 16, stride, 9);\ 1020 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\ 1021 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ 1022 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1023 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ 1024}\ 1025static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ 1026 uint8_t full[16*9];\ 1027 uint8_t halfH[72];\ 1028 uint8_t halfHV[64];\ 1029 copy_block9(full, src, 16, stride, 9);\ 1030 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 1031 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\ 1032 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1033 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\ 1034}\ 1035static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ 1036 uint8_t halfH[72];\ 1037 uint8_t halfHV[64];\ 1038 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ 1039 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1040 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\ 1041}\ 1042static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ 1043 uint8_t halfH[72];\ 1044 uint8_t halfHV[64];\ 1045 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ 1046 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1047 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\ 1048}\ 1049void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ 1050 uint8_t full[16*9];\ 1051 uint8_t halfH[72];\ 1052 uint8_t halfV[64];\ 1053 uint8_t halfHV[64];\ 1054 copy_block9(full, src, 16, stride, 9);\ 1055 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 1056 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ 1057 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1058 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\ 1059}\ 1060static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ 1061 uint8_t full[16*9];\ 1062 uint8_t halfH[72];\ 1063 copy_block9(full, src, 16, stride, 9);\ 1064 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 1065 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\ 1066 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ 1067}\ 1068void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ 1069 uint8_t full[16*9];\ 1070 uint8_t halfH[72];\ 1071 uint8_t halfV[64];\ 1072 uint8_t halfHV[64];\ 1073 copy_block9(full, src, 16, stride, 9);\ 1074 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 1075 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ 1076 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1077 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\ 1078}\ 1079static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ 1080 uint8_t full[16*9];\ 1081 uint8_t halfH[72];\ 1082 copy_block9(full, src, 16, stride, 9);\ 1083 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 1084 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\ 1085 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ 1086}\ 1087static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ 1088 uint8_t halfH[72];\ 1089 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ 1090 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ 1091}\ 1092\ 1093static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ 1094 uint8_t half[256];\ 1095 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ 1096 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\ 1097}\ 1098\ 1099static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ 1100 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\ 1101}\ 1102\ 1103static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ 1104 uint8_t half[256];\ 1105 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ 1106 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\ 1107}\ 1108\ 1109static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ 1110 uint8_t full[24*17];\ 1111 uint8_t half[256];\ 1112 copy_block17(full, src, 24, stride, 17);\ 1113 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ 1114 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\ 1115}\ 1116\ 1117static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ 1118 uint8_t full[24*17];\ 1119 copy_block17(full, src, 24, stride, 17);\ 1120 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\ 1121}\ 1122\ 1123static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ 1124 uint8_t full[24*17];\ 1125 uint8_t half[256];\ 1126 copy_block17(full, src, 24, stride, 17);\ 1127 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ 1128 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\ 1129}\ 1130void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ 1131 uint8_t full[24*17];\ 1132 uint8_t halfH[272];\ 1133 uint8_t halfV[256];\ 1134 uint8_t halfHV[256];\ 1135 copy_block17(full, src, 24, stride, 17);\ 1136 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 1137 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ 1138 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 1139 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ 1140}\ 1141static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ 1142 uint8_t full[24*17];\ 1143 uint8_t halfH[272];\ 1144 uint8_t halfHV[256];\ 1145 copy_block17(full, src, 24, stride, 17);\ 1146 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 1147 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\ 1148 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 1149 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\ 1150}\ 1151void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ 1152 uint8_t full[24*17];\ 1153 uint8_t halfH[272];\ 1154 uint8_t halfV[256];\ 1155 uint8_t halfHV[256];\ 1156 copy_block17(full, src, 24, stride, 17);\ 1157 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 1158 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ 1159 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 1160 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ 1161}\ 1162static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ 1163 uint8_t full[24*17];\ 1164 uint8_t halfH[272];\ 1165 uint8_t halfHV[256];\ 1166 copy_block17(full, src, 24, stride, 17);\ 1167 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 1168 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\ 1169 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 1170 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\ 1171}\ 1172void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ 1173 uint8_t full[24*17];\ 1174 uint8_t halfH[272];\ 1175 uint8_t halfV[256];\ 1176 uint8_t halfHV[256];\ 1177 copy_block17(full, src, 24, stride, 17);\ 1178 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 1179 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ 1180 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 1181 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ 1182}\ 1183static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ 1184 uint8_t full[24*17];\ 1185 uint8_t halfH[272];\ 1186 uint8_t halfHV[256];\ 1187 copy_block17(full, src, 24, stride, 17);\ 1188 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 1189 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\ 1190 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 1191 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\ 1192}\ 1193void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ 1194 uint8_t full[24*17];\ 1195 uint8_t halfH[272];\ 1196 uint8_t halfV[256];\ 1197 uint8_t halfHV[256];\ 1198 copy_block17(full, src, 24, stride, 17);\ 1199 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\ 1200 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ 1201 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 1202 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ 1203}\ 1204static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ 1205 uint8_t full[24*17];\ 1206 uint8_t halfH[272];\ 1207 uint8_t halfHV[256];\ 1208 copy_block17(full, src, 24, stride, 17);\ 1209 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 1210 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\ 1211 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 1212 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\ 1213}\ 1214static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ 1215 uint8_t halfH[272];\ 1216 uint8_t halfHV[256];\ 1217 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ 1218 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 1219 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\ 1220}\ 1221static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ 1222 uint8_t halfH[272];\ 1223 uint8_t halfHV[256];\ 1224 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ 1225 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 1226 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\ 1227}\ 1228void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ 1229 uint8_t full[24*17];\ 1230 uint8_t halfH[272];\ 1231 uint8_t halfV[256];\ 1232 uint8_t halfHV[256];\ 1233 copy_block17(full, src, 24, stride, 17);\ 1234 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 1235 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ 1236 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 1237 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\ 1238}\ 1239static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ 1240 uint8_t full[24*17];\ 1241 uint8_t halfH[272];\ 1242 copy_block17(full, src, 24, stride, 17);\ 1243 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 1244 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\ 1245 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ 1246}\ 1247void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ 1248 uint8_t full[24*17];\ 1249 uint8_t halfH[272];\ 1250 uint8_t halfV[256];\ 1251 uint8_t halfHV[256];\ 1252 copy_block17(full, src, 24, stride, 17);\ 1253 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 1254 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ 1255 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 1256 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\ 1257}\ 1258static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ 1259 uint8_t full[24*17];\ 1260 uint8_t halfH[272];\ 1261 copy_block17(full, src, 24, stride, 17);\ 1262 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 1263 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\ 1264 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ 1265}\ 1266static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ 1267 uint8_t halfH[272];\ 1268 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ 1269 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ 1270} 1271 1272#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) 1273#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1) 1274#define op_put(a, b) a = cm[((b) + 16)>>5] 1275#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5] 1276 1277QPEL_MC(0, put_ , _ , op_put) 1278QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) 1279QPEL_MC(0, avg_ , _ , op_avg) 1280//QPEL_MC(1, avg_no_rnd , _ , op_avg) 1281#undef op_avg 1282#undef op_avg_no_rnd 1283#undef op_put 1284#undef op_put_no_rnd 1285 1286#define put_qpel8_mc00_c ff_put_pixels8x8_c 1287#define avg_qpel8_mc00_c ff_avg_pixels8x8_c 1288#define put_qpel16_mc00_c ff_put_pixels16x16_c 1289#define avg_qpel16_mc00_c ff_avg_pixels16x16_c 1290#define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c 1291#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c 1292 1293static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ 1294 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 1295 int i; 1296 1297 for(i=0; i<h; i++){ 1298 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4]; 1299 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4]; 1300 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4]; 1301 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4]; 1302 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4]; 1303 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; 1304 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; 1305 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; 1306 dst+=dstStride; 1307 src+=srcStride; 1308 } 1309} 1310 1311#if CONFIG_RV40_DECODER 1312void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ 1313 put_pixels16_xy2_8_c(dst, src, stride, 16); 1314} 1315void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ 1316 avg_pixels16_xy2_8_c(dst, src, stride, 16); 1317} 1318void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ 1319 put_pixels8_xy2_8_c(dst, src, stride, 8); 1320} 1321void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ 1322 avg_pixels8_xy2_8_c(dst, src, stride, 8); 1323} 1324#endif /* CONFIG_RV40_DECODER */ 1325 1326static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ 1327 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 1328 int i; 1329 1330 for(i=0; i<w; i++){ 1331 const int src_1= src[ -srcStride]; 1332 const int src0 = src[0 ]; 1333 const int src1 = src[ srcStride]; 1334 const int src2 = src[2*srcStride]; 1335 const int src3 = src[3*srcStride]; 1336 const int src4 = src[4*srcStride]; 1337 const int src5 = src[5*srcStride]; 1338 const int src6 = src[6*srcStride]; 1339 const int src7 = src[7*srcStride]; 1340 const int src8 = src[8*srcStride]; 1341 const int src9 = src[9*srcStride]; 1342 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; 1343 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; 1344 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; 1345 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; 1346 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; 1347 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; 1348 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; 1349 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; 1350 src++; 1351 dst++; 1352 } 1353} 1354 1355static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ 1356 uint8_t half[64]; 1357 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); 1358 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8); 1359} 1360 1361static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ 1362 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); 1363} 1364 1365static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ 1366 uint8_t half[64]; 1367 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); 1368 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8); 1369} 1370 1371static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ 1372 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); 1373} 1374 1375static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ 1376 uint8_t halfH[88]; 1377 uint8_t halfV[64]; 1378 uint8_t halfHV[64]; 1379 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); 1380 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); 1381 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); 1382 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); 1383} 1384static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ 1385 uint8_t halfH[88]; 1386 uint8_t halfV[64]; 1387 uint8_t halfHV[64]; 1388 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); 1389 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); 1390 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); 1391 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); 1392} 1393static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ 1394 uint8_t halfH[88]; 1395 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); 1396 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); 1397} 1398 1399static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){ 1400 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { 1401 int x; 1402 const int strength= ff_h263_loop_filter_strength[qscale]; 1403 1404 for(x=0; x<8; x++){ 1405 int d1, d2, ad1; 1406 int p0= src[x-2*stride]; 1407 int p1= src[x-1*stride]; 1408 int p2= src[x+0*stride]; 1409 int p3= src[x+1*stride]; 1410 int d = (p0 - p3 + 4*(p2 - p1)) / 8; 1411 1412 if (d<-2*strength) d1= 0; 1413 else if(d<- strength) d1=-2*strength - d; 1414 else if(d< strength) d1= d; 1415 else if(d< 2*strength) d1= 2*strength - d; 1416 else d1= 0; 1417 1418 p1 += d1; 1419 p2 -= d1; 1420 if(p1&256) p1= ~(p1>>31); 1421 if(p2&256) p2= ~(p2>>31); 1422 1423 src[x-1*stride] = p1; 1424 src[x+0*stride] = p2; 1425 1426 ad1= FFABS(d1)>>1; 1427 1428 d2= av_clip((p0-p3)/4, -ad1, ad1); 1429 1430 src[x-2*stride] = p0 - d2; 1431 src[x+ stride] = p3 + d2; 1432 } 1433 } 1434} 1435 1436static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ 1437 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { 1438 int y; 1439 const int strength= ff_h263_loop_filter_strength[qscale]; 1440 1441 for(y=0; y<8; y++){ 1442 int d1, d2, ad1; 1443 int p0= src[y*stride-2]; 1444 int p1= src[y*stride-1]; 1445 int p2= src[y*stride+0]; 1446 int p3= src[y*stride+1]; 1447 int d = (p0 - p3 + 4*(p2 - p1)) / 8; 1448 1449 if (d<-2*strength) d1= 0; 1450 else if(d<- strength) d1=-2*strength - d; 1451 else if(d< strength) d1= d; 1452 else if(d< 2*strength) d1= 2*strength - d; 1453 else d1= 0; 1454 1455 p1 += d1; 1456 p2 -= d1; 1457 if(p1&256) p1= ~(p1>>31); 1458 if(p2&256) p2= ~(p2>>31); 1459 1460 src[y*stride-1] = p1; 1461 src[y*stride+0] = p2; 1462 1463 ad1= FFABS(d1)>>1; 1464 1465 d2= av_clip((p0-p3)/4, -ad1, ad1); 1466 1467 src[y*stride-2] = p0 - d2; 1468 src[y*stride+1] = p3 + d2; 1469 } 1470 } 1471} 1472 1473static void h261_loop_filter_c(uint8_t *src, int stride){ 1474 int x,y,xy,yz; 1475 int temp[64]; 1476 1477 for(x=0; x<8; x++){ 1478 temp[x ] = 4*src[x ]; 1479 temp[x + 7*8] = 4*src[x + 7*stride]; 1480 } 1481 for(y=1; y<7; y++){ 1482 for(x=0; x<8; x++){ 1483 xy = y * stride + x; 1484 yz = y * 8 + x; 1485 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride]; 1486 } 1487 } 1488 1489 for(y=0; y<8; y++){ 1490 src[ y*stride] = (temp[ y*8] + 2)>>2; 1491 src[7+y*stride] = (temp[7+y*8] + 2)>>2; 1492 for(x=1; x<7; x++){ 1493 xy = y * stride + x; 1494 yz = y * 8 + x; 1495 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4; 1496 } 1497 } 1498} 1499 1500static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 1501{ 1502 int s, i; 1503 1504 s = 0; 1505 for(i=0;i<h;i++) { 1506 s += abs(pix1[0] - pix2[0]); 1507 s += abs(pix1[1] - pix2[1]); 1508 s += abs(pix1[2] - pix2[2]); 1509 s += abs(pix1[3] - pix2[3]); 1510 s += abs(pix1[4] - pix2[4]); 1511 s += abs(pix1[5] - pix2[5]); 1512 s += abs(pix1[6] - pix2[6]); 1513 s += abs(pix1[7] - pix2[7]); 1514 s += abs(pix1[8] - pix2[8]); 1515 s += abs(pix1[9] - pix2[9]); 1516 s += abs(pix1[10] - pix2[10]); 1517 s += abs(pix1[11] - pix2[11]); 1518 s += abs(pix1[12] - pix2[12]); 1519 s += abs(pix1[13] - pix2[13]); 1520 s += abs(pix1[14] - pix2[14]); 1521 s += abs(pix1[15] - pix2[15]); 1522 pix1 += line_size; 1523 pix2 += line_size; 1524 } 1525 return s; 1526} 1527 1528static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 1529{ 1530 int s, i; 1531 1532 s = 0; 1533 for(i=0;i<h;i++) { 1534 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); 1535 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); 1536 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); 1537 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); 1538 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); 1539 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); 1540 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); 1541 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); 1542 s += abs(pix1[8] - avg2(pix2[8], pix2[9])); 1543 s += abs(pix1[9] - avg2(pix2[9], pix2[10])); 1544 s += abs(pix1[10] - avg2(pix2[10], pix2[11])); 1545 s += abs(pix1[11] - avg2(pix2[11], pix2[12])); 1546 s += abs(pix1[12] - avg2(pix2[12], pix2[13])); 1547 s += abs(pix1[13] - avg2(pix2[13], pix2[14])); 1548 s += abs(pix1[14] - avg2(pix2[14], pix2[15])); 1549 s += abs(pix1[15] - avg2(pix2[15], pix2[16])); 1550 pix1 += line_size; 1551 pix2 += line_size; 1552 } 1553 return s; 1554} 1555 1556static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 1557{ 1558 int s, i; 1559 uint8_t *pix3 = pix2 + line_size; 1560 1561 s = 0; 1562 for(i=0;i<h;i++) { 1563 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); 1564 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); 1565 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); 1566 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); 1567 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); 1568 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); 1569 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); 1570 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); 1571 s += abs(pix1[8] - avg2(pix2[8], pix3[8])); 1572 s += abs(pix1[9] - avg2(pix2[9], pix3[9])); 1573 s += abs(pix1[10] - avg2(pix2[10], pix3[10])); 1574 s += abs(pix1[11] - avg2(pix2[11], pix3[11])); 1575 s += abs(pix1[12] - avg2(pix2[12], pix3[12])); 1576 s += abs(pix1[13] - avg2(pix2[13], pix3[13])); 1577 s += abs(pix1[14] - avg2(pix2[14], pix3[14])); 1578 s += abs(pix1[15] - avg2(pix2[15], pix3[15])); 1579 pix1 += line_size; 1580 pix2 += line_size; 1581 pix3 += line_size; 1582 } 1583 return s; 1584} 1585 1586static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 1587{ 1588 int s, i; 1589 uint8_t *pix3 = pix2 + line_size; 1590 1591 s = 0; 1592 for(i=0;i<h;i++) { 1593 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); 1594 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); 1595 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); 1596 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); 1597 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); 1598 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); 1599 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); 1600 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); 1601 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); 1602 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); 1603 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); 1604 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); 1605 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); 1606 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); 1607 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); 1608 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); 1609 pix1 += line_size; 1610 pix2 += line_size; 1611 pix3 += line_size; 1612 } 1613 return s; 1614} 1615 1616static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 1617{ 1618 int s, i; 1619 1620 s = 0; 1621 for(i=0;i<h;i++) { 1622 s += abs(pix1[0] - pix2[0]); 1623 s += abs(pix1[1] - pix2[1]); 1624 s += abs(pix1[2] - pix2[2]); 1625 s += abs(pix1[3] - pix2[3]); 1626 s += abs(pix1[4] - pix2[4]); 1627 s += abs(pix1[5] - pix2[5]); 1628 s += abs(pix1[6] - pix2[6]); 1629 s += abs(pix1[7] - pix2[7]); 1630 pix1 += line_size; 1631 pix2 += line_size; 1632 } 1633 return s; 1634} 1635 1636static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 1637{ 1638 int s, i; 1639 1640 s = 0; 1641 for(i=0;i<h;i++) { 1642 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); 1643 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); 1644 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); 1645 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); 1646 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); 1647 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); 1648 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); 1649 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); 1650 pix1 += line_size; 1651 pix2 += line_size; 1652 } 1653 return s; 1654} 1655 1656static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 1657{ 1658 int s, i; 1659 uint8_t *pix3 = pix2 + line_size; 1660 1661 s = 0; 1662 for(i=0;i<h;i++) { 1663 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); 1664 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); 1665 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); 1666 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); 1667 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); 1668 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); 1669 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); 1670 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); 1671 pix1 += line_size; 1672 pix2 += line_size; 1673 pix3 += line_size; 1674 } 1675 return s; 1676} 1677 1678static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 1679{ 1680 int s, i; 1681 uint8_t *pix3 = pix2 + line_size; 1682 1683 s = 0; 1684 for(i=0;i<h;i++) { 1685 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); 1686 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); 1687 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); 1688 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); 1689 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); 1690 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); 1691 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); 1692 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); 1693 pix1 += line_size; 1694 pix2 += line_size; 1695 pix3 += line_size; 1696 } 1697 return s; 1698} 1699 1700static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ 1701 MpegEncContext *c = v; 1702 int score1=0; 1703 int score2=0; 1704 int x,y; 1705 1706 for(y=0; y<h; y++){ 1707 for(x=0; x<16; x++){ 1708 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); 1709 } 1710 if(y+1<h){ 1711 for(x=0; x<15; x++){ 1712 score2+= FFABS( s1[x ] - s1[x +stride] 1713 - s1[x+1] + s1[x+1+stride]) 1714 -FFABS( s2[x ] - s2[x +stride] 1715 - s2[x+1] + s2[x+1+stride]); 1716 } 1717 } 1718 s1+= stride; 1719 s2+= stride; 1720 } 1721 1722 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; 1723 else return score1 + FFABS(score2)*8; 1724} 1725 1726static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ 1727 MpegEncContext *c = v; 1728 int score1=0; 1729 int score2=0; 1730 int x,y; 1731 1732 for(y=0; y<h; y++){ 1733 for(x=0; x<8; x++){ 1734 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); 1735 } 1736 if(y+1<h){ 1737 for(x=0; x<7; x++){ 1738 score2+= FFABS( s1[x ] - s1[x +stride] 1739 - s1[x+1] + s1[x+1+stride]) 1740 -FFABS( s2[x ] - s2[x +stride] 1741 - s2[x+1] + s2[x+1+stride]); 1742 } 1743 } 1744 s1+= stride; 1745 s2+= stride; 1746 } 1747 1748 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; 1749 else return score1 + FFABS(score2)*8; 1750} 1751 1752static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ 1753 int i; 1754 unsigned int sum=0; 1755 1756 for(i=0; i<8*8; i++){ 1757 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT)); 1758 int w= weight[i]; 1759 b>>= RECON_SHIFT; 1760 assert(-512<b && b<512); 1761 1762 sum += (w*b)*(w*b)>>4; 1763 } 1764 return sum>>2; 1765} 1766 1767static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){ 1768 int i; 1769 1770 for(i=0; i<8*8; i++){ 1771 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT); 1772 } 1773} 1774 1775/** 1776 * Permute an 8x8 block. 1777 * @param block the block which will be permuted according to the given permutation vector 1778 * @param permutation the permutation vector 1779 * @param last the last non zero coefficient in scantable order, used to speed the permutation up 1780 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not 1781 * (inverse) permutated to scantable order! 1782 */ 1783void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last) 1784{ 1785 int i; 1786 DCTELEM temp[64]; 1787 1788 if(last<=0) return; 1789 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations 1790 1791 for(i=0; i<=last; i++){ 1792 const int j= scantable[i]; 1793 temp[j]= block[j]; 1794 block[j]=0; 1795 } 1796 1797 for(i=0; i<=last; i++){ 1798 const int j= scantable[i]; 1799 const int perm_j= permutation[j]; 1800 block[perm_j]= temp[j]; 1801 } 1802} 1803 1804static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ 1805 return 0; 1806} 1807 1808void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ 1809 int i; 1810 1811 memset(cmp, 0, sizeof(void*)*6); 1812 1813 for(i=0; i<6; i++){ 1814 switch(type&0xFF){ 1815 case FF_CMP_SAD: 1816 cmp[i]= c->sad[i]; 1817 break; 1818 case FF_CMP_SATD: 1819 cmp[i]= c->hadamard8_diff[i]; 1820 break; 1821 case FF_CMP_SSE: 1822 cmp[i]= c->sse[i]; 1823 break; 1824 case FF_CMP_DCT: 1825 cmp[i]= c->dct_sad[i]; 1826 break; 1827 case FF_CMP_DCT264: 1828 cmp[i]= c->dct264_sad[i]; 1829 break; 1830 case FF_CMP_DCTMAX: 1831 cmp[i]= c->dct_max[i]; 1832 break; 1833 case FF_CMP_PSNR: 1834 cmp[i]= c->quant_psnr[i]; 1835 break; 1836 case FF_CMP_BIT: 1837 cmp[i]= c->bit[i]; 1838 break; 1839 case FF_CMP_RD: 1840 cmp[i]= c->rd[i]; 1841 break; 1842 case FF_CMP_VSAD: 1843 cmp[i]= c->vsad[i]; 1844 break; 1845 case FF_CMP_VSSE: 1846 cmp[i]= c->vsse[i]; 1847 break; 1848 case FF_CMP_ZERO: 1849 cmp[i]= zero_cmp; 1850 break; 1851 case FF_CMP_NSSE: 1852 cmp[i]= c->nsse[i]; 1853 break; 1854#if CONFIG_DWT 1855 case FF_CMP_W53: 1856 cmp[i]= c->w53[i]; 1857 break; 1858 case FF_CMP_W97: 1859 cmp[i]= c->w97[i]; 1860 break; 1861#endif 1862 default: 1863 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); 1864 } 1865 } 1866} 1867 1868static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ 1869 long i; 1870 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ 1871 long a = *(long*)(src+i); 1872 long b = *(long*)(dst+i); 1873 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); 1874 } 1875 for(; i<w; i++) 1876 dst[i+0] += src[i+0]; 1877} 1878 1879static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ 1880 long i; 1881 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ 1882 long a = *(long*)(src1+i); 1883 long b = *(long*)(src2+i); 1884 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); 1885 } 1886 for(; i<w; i++) 1887 dst[i] = src1[i]+src2[i]; 1888} 1889 1890static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ 1891 long i; 1892#if !HAVE_FAST_UNALIGNED 1893 if((long)src2 & (sizeof(long)-1)){ 1894 for(i=0; i+7<w; i+=8){ 1895 dst[i+0] = src1[i+0]-src2[i+0]; 1896 dst[i+1] = src1[i+1]-src2[i+1]; 1897 dst[i+2] = src1[i+2]-src2[i+2]; 1898 dst[i+3] = src1[i+3]-src2[i+3]; 1899 dst[i+4] = src1[i+4]-src2[i+4]; 1900 dst[i+5] = src1[i+5]-src2[i+5]; 1901 dst[i+6] = src1[i+6]-src2[i+6]; 1902 dst[i+7] = src1[i+7]-src2[i+7]; 1903 } 1904 }else 1905#endif 1906 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ 1907 long a = *(long*)(src1+i); 1908 long b = *(long*)(src2+i); 1909 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80); 1910 } 1911 for(; i<w; i++) 1912 dst[i+0] = src1[i+0]-src2[i+0]; 1913} 1914 1915static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){ 1916 int i; 1917 uint8_t l, lt; 1918 1919 l= *left; 1920 lt= *left_top; 1921 1922 for(i=0; i<w; i++){ 1923 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i]; 1924 lt= src1[i]; 1925 dst[i]= l; 1926 } 1927 1928 *left= l; 1929 *left_top= lt; 1930} 1931 1932static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){ 1933 int i; 1934 uint8_t l, lt; 1935 1936 l= *left; 1937 lt= *left_top; 1938 1939 for(i=0; i<w; i++){ 1940 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); 1941 lt= src1[i]; 1942 l= src2[i]; 1943 dst[i]= l - pred; 1944 } 1945 1946 *left= l; 1947 *left_top= lt; 1948} 1949 1950static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){ 1951 int i; 1952 1953 for(i=0; i<w-1; i++){ 1954 acc+= src[i]; 1955 dst[i]= acc; 1956 i++; 1957 acc+= src[i]; 1958 dst[i]= acc; 1959 } 1960 1961 for(; i<w; i++){ 1962 acc+= src[i]; 1963 dst[i]= acc; 1964 } 1965 1966 return acc; 1967} 1968 1969#if HAVE_BIGENDIAN 1970#define B 3 1971#define G 2 1972#define R 1 1973#define A 0 1974#else 1975#define B 0 1976#define G 1 1977#define R 2 1978#define A 3 1979#endif 1980static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){ 1981 int i; 1982 int r,g,b,a; 1983 r= *red; 1984 g= *green; 1985 b= *blue; 1986 a= *alpha; 1987 1988 for(i=0; i<w; i++){ 1989 b+= src[4*i+B]; 1990 g+= src[4*i+G]; 1991 r+= src[4*i+R]; 1992 a+= src[4*i+A]; 1993 1994 dst[4*i+B]= b; 1995 dst[4*i+G]= g; 1996 dst[4*i+R]= r; 1997 dst[4*i+A]= a; 1998 } 1999 2000 *red= r; 2001 *green= g; 2002 *blue= b; 2003 *alpha= a; 2004} 2005#undef B 2006#undef G 2007#undef R 2008#undef A 2009 2010#define BUTTERFLY2(o1,o2,i1,i2) \ 2011o1= (i1)+(i2);\ 2012o2= (i1)-(i2); 2013 2014#define BUTTERFLY1(x,y) \ 2015{\ 2016 int a,b;\ 2017 a= x;\ 2018 b= y;\ 2019 x= a+b;\ 2020 y= a-b;\ 2021} 2022 2023#define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y))) 2024 2025static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ 2026 int i; 2027 int temp[64]; 2028 int sum=0; 2029 2030 assert(h==8); 2031 2032 for(i=0; i<8; i++){ 2033 //FIXME try pointer walks 2034 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]); 2035 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]); 2036 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]); 2037 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]); 2038 2039 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); 2040 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); 2041 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); 2042 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); 2043 2044 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); 2045 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); 2046 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); 2047 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); 2048 } 2049 2050 for(i=0; i<8; i++){ 2051 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); 2052 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); 2053 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); 2054 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); 2055 2056 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); 2057 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); 2058 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); 2059 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); 2060 2061 sum += 2062 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) 2063 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) 2064 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) 2065 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); 2066 } 2067 return sum; 2068} 2069 2070static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){ 2071 int i; 2072 int temp[64]; 2073 int sum=0; 2074 2075 assert(h==8); 2076 2077 for(i=0; i<8; i++){ 2078 //FIXME try pointer walks 2079 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]); 2080 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]); 2081 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]); 2082 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]); 2083 2084 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); 2085 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); 2086 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); 2087 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); 2088 2089 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); 2090 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); 2091 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); 2092 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); 2093 } 2094 2095 for(i=0; i<8; i++){ 2096 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); 2097 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); 2098 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); 2099 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); 2100 2101 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); 2102 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); 2103 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); 2104 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); 2105 2106 sum += 2107 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) 2108 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) 2109 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) 2110 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); 2111 } 2112 2113 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean 2114 2115 return sum; 2116} 2117 2118static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 2119 MpegEncContext * const s= (MpegEncContext *)c; 2120 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); 2121 2122 assert(h==8); 2123 2124 s->dsp.diff_pixels(temp, src1, src2, stride); 2125 s->dsp.fdct(temp); 2126 return s->dsp.sum_abs_dctelem(temp); 2127} 2128 2129#if CONFIG_GPL 2130#define DCT8_1D {\ 2131 const int s07 = SRC(0) + SRC(7);\ 2132 const int s16 = SRC(1) + SRC(6);\ 2133 const int s25 = SRC(2) + SRC(5);\ 2134 const int s34 = SRC(3) + SRC(4);\ 2135 const int a0 = s07 + s34;\ 2136 const int a1 = s16 + s25;\ 2137 const int a2 = s07 - s34;\ 2138 const int a3 = s16 - s25;\ 2139 const int d07 = SRC(0) - SRC(7);\ 2140 const int d16 = SRC(1) - SRC(6);\ 2141 const int d25 = SRC(2) - SRC(5);\ 2142 const int d34 = SRC(3) - SRC(4);\ 2143 const int a4 = d16 + d25 + (d07 + (d07>>1));\ 2144 const int a5 = d07 - d34 - (d25 + (d25>>1));\ 2145 const int a6 = d07 + d34 - (d16 + (d16>>1));\ 2146 const int a7 = d16 - d25 + (d34 + (d34>>1));\ 2147 DST(0, a0 + a1 ) ;\ 2148 DST(1, a4 + (a7>>2)) ;\ 2149 DST(2, a2 + (a3>>1)) ;\ 2150 DST(3, a5 + (a6>>2)) ;\ 2151 DST(4, a0 - a1 ) ;\ 2152 DST(5, a6 - (a5>>2)) ;\ 2153 DST(6, (a2>>1) - a3 ) ;\ 2154 DST(7, (a4>>2) - a7 ) ;\ 2155} 2156 2157static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 2158 MpegEncContext * const s= (MpegEncContext *)c; 2159 DCTELEM dct[8][8]; 2160 int i; 2161 int sum=0; 2162 2163 s->dsp.diff_pixels(dct[0], src1, src2, stride); 2164 2165#define SRC(x) dct[i][x] 2166#define DST(x,v) dct[i][x]= v 2167 for( i = 0; i < 8; i++ ) 2168 DCT8_1D 2169#undef SRC 2170#undef DST 2171 2172#define SRC(x) dct[x][i] 2173#define DST(x,v) sum += FFABS(v) 2174 for( i = 0; i < 8; i++ ) 2175 DCT8_1D 2176#undef SRC 2177#undef DST 2178 return sum; 2179} 2180#endif 2181 2182static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 2183 MpegEncContext * const s= (MpegEncContext *)c; 2184 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); 2185 int sum=0, i; 2186 2187 assert(h==8); 2188 2189 s->dsp.diff_pixels(temp, src1, src2, stride); 2190 s->dsp.fdct(temp); 2191 2192 for(i=0; i<64; i++) 2193 sum= FFMAX(sum, FFABS(temp[i])); 2194 2195 return sum; 2196} 2197 2198static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 2199 MpegEncContext * const s= (MpegEncContext *)c; 2200 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]); 2201 DCTELEM * const bak = temp+64; 2202 int sum=0, i; 2203 2204 assert(h==8); 2205 s->mb_intra=0; 2206 2207 s->dsp.diff_pixels(temp, src1, src2, stride); 2208 2209 memcpy(bak, temp, 64*sizeof(DCTELEM)); 2210 2211 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); 2212 s->dct_unquantize_inter(s, temp, 0, s->qscale); 2213 ff_simple_idct_8(temp); //FIXME 2214 2215 for(i=0; i<64; i++) 2216 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); 2217 2218 return sum; 2219} 2220 2221static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 2222 MpegEncContext * const s= (MpegEncContext *)c; 2223 const uint8_t *scantable= s->intra_scantable.permutated; 2224 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); 2225 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]); 2226 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]); 2227 int i, last, run, bits, level, distortion, start_i; 2228 const int esc_length= s->ac_esc_length; 2229 uint8_t * length; 2230 uint8_t * last_length; 2231 2232 assert(h==8); 2233 2234 copy_block8(lsrc1, src1, 8, stride, 8); 2235 copy_block8(lsrc2, src2, 8, stride, 8); 2236 2237 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8); 2238 2239 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); 2240 2241 bits=0; 2242 2243 if (s->mb_intra) { 2244 start_i = 1; 2245 length = s->intra_ac_vlc_length; 2246 last_length= s->intra_ac_vlc_last_length; 2247 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma 2248 } else { 2249 start_i = 0; 2250 length = s->inter_ac_vlc_length; 2251 last_length= s->inter_ac_vlc_last_length; 2252 } 2253 2254 if(last>=start_i){ 2255 run=0; 2256 for(i=start_i; i<last; i++){ 2257 int j= scantable[i]; 2258 level= temp[j]; 2259 2260 if(level){ 2261 level+=64; 2262 if((level&(~127)) == 0){ 2263 bits+= length[UNI_AC_ENC_INDEX(run, level)]; 2264 }else 2265 bits+= esc_length; 2266 run=0; 2267 }else 2268 run++; 2269 } 2270 i= scantable[last]; 2271 2272 level= temp[i] + 64; 2273 2274 assert(level - 64); 2275 2276 if((level&(~127)) == 0){ 2277 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; 2278 }else 2279 bits+= esc_length; 2280 2281 } 2282 2283 if(last>=0){ 2284 if(s->mb_intra) 2285 s->dct_unquantize_intra(s, temp, 0, s->qscale); 2286 else 2287 s->dct_unquantize_inter(s, temp, 0, s->qscale); 2288 } 2289 2290 s->dsp.idct_add(lsrc2, 8, temp); 2291 2292 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8); 2293 2294 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7); 2295} 2296 2297static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 2298 MpegEncContext * const s= (MpegEncContext *)c; 2299 const uint8_t *scantable= s->intra_scantable.permutated; 2300 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); 2301 int i, last, run, bits, level, start_i; 2302 const int esc_length= s->ac_esc_length; 2303 uint8_t * length; 2304 uint8_t * last_length; 2305 2306 assert(h==8); 2307 2308 s->dsp.diff_pixels(temp, src1, src2, stride); 2309 2310 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); 2311 2312 bits=0; 2313 2314 if (s->mb_intra) { 2315 start_i = 1; 2316 length = s->intra_ac_vlc_length; 2317 last_length= s->intra_ac_vlc_last_length; 2318 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma 2319 } else { 2320 start_i = 0; 2321 length = s->inter_ac_vlc_length; 2322 last_length= s->inter_ac_vlc_last_length; 2323 } 2324 2325 if(last>=start_i){ 2326 run=0; 2327 for(i=start_i; i<last; i++){ 2328 int j= scantable[i]; 2329 level= temp[j]; 2330 2331 if(level){ 2332 level+=64; 2333 if((level&(~127)) == 0){ 2334 bits+= length[UNI_AC_ENC_INDEX(run, level)]; 2335 }else 2336 bits+= esc_length; 2337 run=0; 2338 }else 2339 run++; 2340 } 2341 i= scantable[last]; 2342 2343 level= temp[i] + 64; 2344 2345 assert(level - 64); 2346 2347 if((level&(~127)) == 0){ 2348 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; 2349 }else 2350 bits+= esc_length; 2351 } 2352 2353 return bits; 2354} 2355 2356#define VSAD_INTRA(size) \ 2357static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \ 2358 int score=0; \ 2359 int x,y; \ 2360 \ 2361 for(y=1; y<h; y++){ \ 2362 for(x=0; x<size; x+=4){ \ 2363 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \ 2364 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \ 2365 } \ 2366 s+= stride; \ 2367 } \ 2368 \ 2369 return score; \ 2370} 2371VSAD_INTRA(8) 2372VSAD_INTRA(16) 2373 2374static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ 2375 int score=0; 2376 int x,y; 2377 2378 for(y=1; y<h; y++){ 2379 for(x=0; x<16; x++){ 2380 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); 2381 } 2382 s1+= stride; 2383 s2+= stride; 2384 } 2385 2386 return score; 2387} 2388 2389#define SQ(a) ((a)*(a)) 2390#define VSSE_INTRA(size) \ 2391static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \ 2392 int score=0; \ 2393 int x,y; \ 2394 \ 2395 for(y=1; y<h; y++){ \ 2396 for(x=0; x<size; x+=4){ \ 2397 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \ 2398 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \ 2399 } \ 2400 s+= stride; \ 2401 } \ 2402 \ 2403 return score; \ 2404} 2405VSSE_INTRA(8) 2406VSSE_INTRA(16) 2407 2408static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ 2409 int score=0; 2410 int x,y; 2411 2412 for(y=1; y<h; y++){ 2413 for(x=0; x<16; x++){ 2414 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); 2415 } 2416 s1+= stride; 2417 s2+= stride; 2418 } 2419 2420 return score; 2421} 2422 2423static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, 2424 int size){ 2425 int score=0; 2426 int i; 2427 for(i=0; i<size; i++) 2428 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]); 2429 return score; 2430} 2431 2432WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) 2433WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) 2434WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) 2435#if CONFIG_GPL 2436WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c) 2437#endif 2438WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c) 2439WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) 2440WRAPPER8_16_SQ(rd8x8_c, rd16_c) 2441WRAPPER8_16_SQ(bit8x8_c, bit16_c) 2442 2443static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){ 2444 int i; 2445 for(i=0; i<len; i++) 2446 dst[i] = src0[i] * src1[i]; 2447} 2448 2449static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){ 2450 int i; 2451 src1 += len-1; 2452 for(i=0; i<len; i++) 2453 dst[i] = src0[i] * src1[-i]; 2454} 2455 2456static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){ 2457 int i; 2458 for(i=0; i<len; i++) 2459 dst[i] = src0[i] * src1[i] + src2[i]; 2460} 2461 2462static void vector_fmul_window_c(float *dst, const float *src0, 2463 const float *src1, const float *win, int len) 2464{ 2465 int i,j; 2466 dst += len; 2467 win += len; 2468 src0+= len; 2469 for(i=-len, j=len-1; i<0; i++, j--) { 2470 float s0 = src0[i]; 2471 float s1 = src1[j]; 2472 float wi = win[i]; 2473 float wj = win[j]; 2474 dst[i] = s0*wj - s1*wi; 2475 dst[j] = s0*wi + s1*wj; 2476 } 2477} 2478 2479static void vector_fmul_scalar_c(float *dst, const float *src, float mul, 2480 int len) 2481{ 2482 int i; 2483 for (i = 0; i < len; i++) 2484 dst[i] = src[i] * mul; 2485} 2486 2487static void vector_fmac_scalar_c(float *dst, const float *src, float mul, 2488 int len) 2489{ 2490 int i; 2491 for (i = 0; i < len; i++) 2492 dst[i] += src[i] * mul; 2493} 2494 2495static void butterflies_float_c(float *restrict v1, float *restrict v2, 2496 int len) 2497{ 2498 int i; 2499 for (i = 0; i < len; i++) { 2500 float t = v1[i] - v2[i]; 2501 v1[i] += v2[i]; 2502 v2[i] = t; 2503 } 2504} 2505 2506static void butterflies_float_interleave_c(float *dst, const float *src0, 2507 const float *src1, int len) 2508{ 2509 int i; 2510 for (i = 0; i < len; i++) { 2511 float f1 = src0[i]; 2512 float f2 = src1[i]; 2513 dst[2*i ] = f1 + f2; 2514 dst[2*i + 1] = f1 - f2; 2515 } 2516} 2517 2518static float scalarproduct_float_c(const float *v1, const float *v2, int len) 2519{ 2520 float p = 0.0; 2521 int i; 2522 2523 for (i = 0; i < len; i++) 2524 p += v1[i] * v2[i]; 2525 2526 return p; 2527} 2528 2529static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini, 2530 uint32_t maxi, uint32_t maxisign) 2531{ 2532 2533 if(a > mini) return mini; 2534 else if((a^(1U<<31)) > maxisign) return maxi; 2535 else return a; 2536} 2537 2538static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){ 2539 int i; 2540 uint32_t mini = *(uint32_t*)min; 2541 uint32_t maxi = *(uint32_t*)max; 2542 uint32_t maxisign = maxi ^ (1U<<31); 2543 uint32_t *dsti = (uint32_t*)dst; 2544 const uint32_t *srci = (const uint32_t*)src; 2545 for(i=0; i<len; i+=8) { 2546 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign); 2547 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign); 2548 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign); 2549 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign); 2550 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign); 2551 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign); 2552 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign); 2553 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign); 2554 } 2555} 2556static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){ 2557 int i; 2558 if(min < 0 && max > 0) { 2559 vector_clipf_c_opposite_sign(dst, src, &min, &max, len); 2560 } else { 2561 for(i=0; i < len; i+=8) { 2562 dst[i ] = av_clipf(src[i ], min, max); 2563 dst[i + 1] = av_clipf(src[i + 1], min, max); 2564 dst[i + 2] = av_clipf(src[i + 2], min, max); 2565 dst[i + 3] = av_clipf(src[i + 3], min, max); 2566 dst[i + 4] = av_clipf(src[i + 4], min, max); 2567 dst[i + 5] = av_clipf(src[i + 5], min, max); 2568 dst[i + 6] = av_clipf(src[i + 6], min, max); 2569 dst[i + 7] = av_clipf(src[i + 7], min, max); 2570 } 2571 } 2572} 2573 2574static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift) 2575{ 2576 int res = 0; 2577 2578 while (order--) 2579 res += (*v1++ * *v2++) >> shift; 2580 2581 return res; 2582} 2583 2584static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul) 2585{ 2586 int res = 0; 2587 while (order--) { 2588 res += *v1 * *v2++; 2589 *v1++ += mul * *v3++; 2590 } 2591 return res; 2592} 2593 2594static void apply_window_int16_c(int16_t *output, const int16_t *input, 2595 const int16_t *window, unsigned int len) 2596{ 2597 int i; 2598 int len2 = len >> 1; 2599 2600 for (i = 0; i < len2; i++) { 2601 int16_t w = window[i]; 2602 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15; 2603 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15; 2604 } 2605} 2606 2607static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min, 2608 int32_t max, unsigned int len) 2609{ 2610 do { 2611 *dst++ = av_clip(*src++, min, max); 2612 *dst++ = av_clip(*src++, min, max); 2613 *dst++ = av_clip(*src++, min, max); 2614 *dst++ = av_clip(*src++, min, max); 2615 *dst++ = av_clip(*src++, min, max); 2616 *dst++ = av_clip(*src++, min, max); 2617 *dst++ = av_clip(*src++, min, max); 2618 *dst++ = av_clip(*src++, min, max); 2619 len -= 8; 2620 } while (len > 0); 2621} 2622 2623#define W0 2048 2624#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ 2625#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ 2626#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ 2627#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */ 2628#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ 2629#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ 2630#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ 2631 2632static void wmv2_idct_row(short * b) 2633{ 2634 int s1,s2; 2635 int a0,a1,a2,a3,a4,a5,a6,a7; 2636 /*step 1*/ 2637 a1 = W1*b[1]+W7*b[7]; 2638 a7 = W7*b[1]-W1*b[7]; 2639 a5 = W5*b[5]+W3*b[3]; 2640 a3 = W3*b[5]-W5*b[3]; 2641 a2 = W2*b[2]+W6*b[6]; 2642 a6 = W6*b[2]-W2*b[6]; 2643 a0 = W0*b[0]+W0*b[4]; 2644 a4 = W0*b[0]-W0*b[4]; 2645 /*step 2*/ 2646 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7, 2647 s2 = (181*(a1-a5-a7+a3)+128)>>8; 2648 /*step 3*/ 2649 b[0] = (a0+a2+a1+a5 + (1<<7))>>8; 2650 b[1] = (a4+a6 +s1 + (1<<7))>>8; 2651 b[2] = (a4-a6 +s2 + (1<<7))>>8; 2652 b[3] = (a0-a2+a7+a3 + (1<<7))>>8; 2653 b[4] = (a0-a2-a7-a3 + (1<<7))>>8; 2654 b[5] = (a4-a6 -s2 + (1<<7))>>8; 2655 b[6] = (a4+a6 -s1 + (1<<7))>>8; 2656 b[7] = (a0+a2-a1-a5 + (1<<7))>>8; 2657} 2658static void wmv2_idct_col(short * b) 2659{ 2660 int s1,s2; 2661 int a0,a1,a2,a3,a4,a5,a6,a7; 2662 /*step 1, with extended precision*/ 2663 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3; 2664 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3; 2665 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3; 2666 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3; 2667 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3; 2668 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3; 2669 a0 = (W0*b[8*0]+W0*b[8*4] )>>3; 2670 a4 = (W0*b[8*0]-W0*b[8*4] )>>3; 2671 /*step 2*/ 2672 s1 = (181*(a1-a5+a7-a3)+128)>>8; 2673 s2 = (181*(a1-a5-a7+a3)+128)>>8; 2674 /*step 3*/ 2675 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14; 2676 b[8*1] = (a4+a6 +s1 + (1<<13))>>14; 2677 b[8*2] = (a4-a6 +s2 + (1<<13))>>14; 2678 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14; 2679 2680 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14; 2681 b[8*5] = (a4-a6 -s2 + (1<<13))>>14; 2682 b[8*6] = (a4+a6 -s1 + (1<<13))>>14; 2683 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14; 2684} 2685void ff_wmv2_idct_c(short * block){ 2686 int i; 2687 2688 for(i=0;i<64;i+=8){ 2689 wmv2_idct_row(block+i); 2690 } 2691 for(i=0;i<8;i++){ 2692 wmv2_idct_col(block+i); 2693 } 2694} 2695/* XXX: those functions should be suppressed ASAP when all IDCTs are 2696 converted */ 2697static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block) 2698{ 2699 ff_wmv2_idct_c(block); 2700 ff_put_pixels_clamped_c(block, dest, line_size); 2701} 2702static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block) 2703{ 2704 ff_wmv2_idct_c(block); 2705 ff_add_pixels_clamped_c(block, dest, line_size); 2706} 2707static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) 2708{ 2709 j_rev_dct (block); 2710 ff_put_pixels_clamped_c(block, dest, line_size); 2711} 2712static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) 2713{ 2714 j_rev_dct (block); 2715 ff_add_pixels_clamped_c(block, dest, line_size); 2716} 2717 2718static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block) 2719{ 2720 j_rev_dct4 (block); 2721 put_pixels_clamped4_c(block, dest, line_size); 2722} 2723static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block) 2724{ 2725 j_rev_dct4 (block); 2726 add_pixels_clamped4_c(block, dest, line_size); 2727} 2728 2729static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block) 2730{ 2731 j_rev_dct2 (block); 2732 put_pixels_clamped2_c(block, dest, line_size); 2733} 2734static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) 2735{ 2736 j_rev_dct2 (block); 2737 add_pixels_clamped2_c(block, dest, line_size); 2738} 2739 2740static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) 2741{ 2742 dest[0] = av_clip_uint8((block[0] + 4)>>3); 2743} 2744static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) 2745{ 2746 dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3)); 2747} 2748 2749static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; } 2750 2751/* init static data */ 2752av_cold void dsputil_static_init(void) 2753{ 2754 int i; 2755 2756 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i; 2757 for(i=0;i<MAX_NEG_CROP;i++) { 2758 ff_cropTbl[i] = 0; 2759 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255; 2760 } 2761 2762 for(i=0;i<512;i++) { 2763 ff_squareTbl[i] = (i - 256) * (i - 256); 2764 } 2765 2766 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; 2767} 2768 2769int ff_check_alignment(void){ 2770 static int did_fail=0; 2771 LOCAL_ALIGNED_16(int, aligned, [4]); 2772 2773 if((intptr_t)aligned & 15){ 2774 if(!did_fail){ 2775#if HAVE_MMX || HAVE_ALTIVEC 2776 av_log(NULL, AV_LOG_ERROR, 2777 "Compiler did not align stack variables. Libavcodec has been miscompiled\n" 2778 "and may be very slow or crash. This is not a bug in libavcodec,\n" 2779 "but in the compiler. You may try recompiling using gcc >= 4.2.\n" 2780 "Do not report crashes to Libav developers.\n"); 2781#endif 2782 did_fail=1; 2783 } 2784 return -1; 2785 } 2786 return 0; 2787} 2788 2789av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) 2790{ 2791 int i, j; 2792 2793 ff_check_alignment(); 2794 2795#if CONFIG_ENCODERS 2796 if (avctx->bits_per_raw_sample == 10) { 2797 c->fdct = ff_jpeg_fdct_islow_10; 2798 c->fdct248 = ff_fdct248_islow_10; 2799 } else { 2800 if(avctx->dct_algo==FF_DCT_FASTINT) { 2801 c->fdct = fdct_ifast; 2802 c->fdct248 = fdct_ifast248; 2803 } 2804 else if(avctx->dct_algo==FF_DCT_FAAN) { 2805 c->fdct = ff_faandct; 2806 c->fdct248 = ff_faandct248; 2807 } 2808 else { 2809 c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default 2810 c->fdct248 = ff_fdct248_islow_8; 2811 } 2812 } 2813#endif //CONFIG_ENCODERS 2814 2815 if(avctx->lowres==1){ 2816 c->idct_put= ff_jref_idct4_put; 2817 c->idct_add= ff_jref_idct4_add; 2818 c->idct = j_rev_dct4; 2819 c->idct_permutation_type= FF_NO_IDCT_PERM; 2820 }else if(avctx->lowres==2){ 2821 c->idct_put= ff_jref_idct2_put; 2822 c->idct_add= ff_jref_idct2_add; 2823 c->idct = j_rev_dct2; 2824 c->idct_permutation_type= FF_NO_IDCT_PERM; 2825 }else if(avctx->lowres==3){ 2826 c->idct_put= ff_jref_idct1_put; 2827 c->idct_add= ff_jref_idct1_add; 2828 c->idct = j_rev_dct1; 2829 c->idct_permutation_type= FF_NO_IDCT_PERM; 2830 }else{ 2831 if (avctx->bits_per_raw_sample == 10) { 2832 c->idct_put = ff_simple_idct_put_10; 2833 c->idct_add = ff_simple_idct_add_10; 2834 c->idct = ff_simple_idct_10; 2835 c->idct_permutation_type = FF_NO_IDCT_PERM; 2836 } else { 2837 if(avctx->idct_algo==FF_IDCT_INT){ 2838 c->idct_put= ff_jref_idct_put; 2839 c->idct_add= ff_jref_idct_add; 2840 c->idct = j_rev_dct; 2841 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; 2842 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) && 2843 avctx->idct_algo==FF_IDCT_VP3){ 2844 c->idct_put= ff_vp3_idct_put_c; 2845 c->idct_add= ff_vp3_idct_add_c; 2846 c->idct = ff_vp3_idct_c; 2847 c->idct_permutation_type= FF_NO_IDCT_PERM; 2848 }else if(avctx->idct_algo==FF_IDCT_WMV2){ 2849 c->idct_put= ff_wmv2_idct_put_c; 2850 c->idct_add= ff_wmv2_idct_add_c; 2851 c->idct = ff_wmv2_idct_c; 2852 c->idct_permutation_type= FF_NO_IDCT_PERM; 2853 }else if(avctx->idct_algo==FF_IDCT_FAAN){ 2854 c->idct_put= ff_faanidct_put; 2855 c->idct_add= ff_faanidct_add; 2856 c->idct = ff_faanidct; 2857 c->idct_permutation_type= FF_NO_IDCT_PERM; 2858 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) { 2859 c->idct_put= ff_ea_idct_put_c; 2860 c->idct_permutation_type= FF_NO_IDCT_PERM; 2861 }else{ //accurate/default 2862 c->idct_put = ff_simple_idct_put_8; 2863 c->idct_add = ff_simple_idct_add_8; 2864 c->idct = ff_simple_idct_8; 2865 c->idct_permutation_type= FF_NO_IDCT_PERM; 2866 } 2867 } 2868 } 2869 2870 c->diff_pixels = diff_pixels_c; 2871 c->put_pixels_clamped = ff_put_pixels_clamped_c; 2872 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c; 2873 c->add_pixels_clamped = ff_add_pixels_clamped_c; 2874 c->sum_abs_dctelem = sum_abs_dctelem_c; 2875 c->gmc1 = gmc1_c; 2876 c->gmc = ff_gmc_c; 2877 c->pix_sum = pix_sum_c; 2878 c->pix_norm1 = pix_norm1_c; 2879 2880 c->fill_block_tab[0] = fill_block16_c; 2881 c->fill_block_tab[1] = fill_block8_c; 2882 2883 /* TODO [0] 16 [1] 8 */ 2884 c->pix_abs[0][0] = pix_abs16_c; 2885 c->pix_abs[0][1] = pix_abs16_x2_c; 2886 c->pix_abs[0][2] = pix_abs16_y2_c; 2887 c->pix_abs[0][3] = pix_abs16_xy2_c; 2888 c->pix_abs[1][0] = pix_abs8_c; 2889 c->pix_abs[1][1] = pix_abs8_x2_c; 2890 c->pix_abs[1][2] = pix_abs8_y2_c; 2891 c->pix_abs[1][3] = pix_abs8_xy2_c; 2892 2893 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; 2894 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; 2895 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; 2896 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; 2897 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; 2898 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; 2899 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; 2900 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; 2901 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; 2902 2903 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; 2904 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; 2905 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; 2906 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; 2907 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; 2908 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; 2909 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; 2910 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; 2911 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; 2912 2913#define dspfunc(PFX, IDX, NUM) \ 2914 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \ 2915 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \ 2916 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \ 2917 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \ 2918 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \ 2919 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \ 2920 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \ 2921 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \ 2922 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \ 2923 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \ 2924 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \ 2925 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \ 2926 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \ 2927 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \ 2928 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \ 2929 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c 2930 2931 dspfunc(put_qpel, 0, 16); 2932 dspfunc(put_no_rnd_qpel, 0, 16); 2933 2934 dspfunc(avg_qpel, 0, 16); 2935 /* dspfunc(avg_no_rnd_qpel, 0, 16); */ 2936 2937 dspfunc(put_qpel, 1, 8); 2938 dspfunc(put_no_rnd_qpel, 1, 8); 2939 2940 dspfunc(avg_qpel, 1, 8); 2941 /* dspfunc(avg_no_rnd_qpel, 1, 8); */ 2942 2943#undef dspfunc 2944 2945#if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER 2946 ff_mlp_init(c, avctx); 2947#endif 2948#if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER 2949 ff_intrax8dsp_init(c,avctx); 2950#endif 2951 2952 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c; 2953 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; 2954 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; 2955 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; 2956 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; 2957 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; 2958 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; 2959 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; 2960 2961#define SET_CMP_FUNC(name) \ 2962 c->name[0]= name ## 16_c;\ 2963 c->name[1]= name ## 8x8_c; 2964 2965 SET_CMP_FUNC(hadamard8_diff) 2966 c->hadamard8_diff[4]= hadamard8_intra16_c; 2967 c->hadamard8_diff[5]= hadamard8_intra8x8_c; 2968 SET_CMP_FUNC(dct_sad) 2969 SET_CMP_FUNC(dct_max) 2970#if CONFIG_GPL 2971 SET_CMP_FUNC(dct264_sad) 2972#endif 2973 c->sad[0]= pix_abs16_c; 2974 c->sad[1]= pix_abs8_c; 2975 c->sse[0]= sse16_c; 2976 c->sse[1]= sse8_c; 2977 c->sse[2]= sse4_c; 2978 SET_CMP_FUNC(quant_psnr) 2979 SET_CMP_FUNC(rd) 2980 SET_CMP_FUNC(bit) 2981 c->vsad[0]= vsad16_c; 2982 c->vsad[4]= vsad_intra16_c; 2983 c->vsad[5]= vsad_intra8_c; 2984 c->vsse[0]= vsse16_c; 2985 c->vsse[4]= vsse_intra16_c; 2986 c->vsse[5]= vsse_intra8_c; 2987 c->nsse[0]= nsse16_c; 2988 c->nsse[1]= nsse8_c; 2989#if CONFIG_DWT 2990 ff_dsputil_init_dwt(c); 2991#endif 2992 2993 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; 2994 2995 c->add_bytes= add_bytes_c; 2996 c->add_bytes_l2= add_bytes_l2_c; 2997 c->diff_bytes= diff_bytes_c; 2998 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c; 2999 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; 3000 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c; 3001 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c; 3002 c->bswap_buf= bswap_buf; 3003 c->bswap16_buf = bswap16_buf; 3004#if CONFIG_PNG_DECODER 3005 c->add_png_paeth_prediction= ff_add_png_paeth_prediction; 3006#endif 3007 3008 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { 3009 c->h263_h_loop_filter= h263_h_loop_filter_c; 3010 c->h263_v_loop_filter= h263_v_loop_filter_c; 3011 } 3012 3013 if (CONFIG_VP3_DECODER) { 3014 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c; 3015 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c; 3016 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c; 3017 } 3018 3019 c->h261_loop_filter= h261_loop_filter_c; 3020 3021 c->try_8x8basis= try_8x8basis_c; 3022 c->add_8x8basis= add_8x8basis_c; 3023 3024#if CONFIG_VORBIS_DECODER 3025 c->vorbis_inverse_coupling = vorbis_inverse_coupling; 3026#endif 3027#if CONFIG_AC3_DECODER 3028 c->ac3_downmix = ff_ac3_downmix_c; 3029#endif 3030 c->vector_fmul = vector_fmul_c; 3031 c->vector_fmul_reverse = vector_fmul_reverse_c; 3032 c->vector_fmul_add = vector_fmul_add_c; 3033 c->vector_fmul_window = vector_fmul_window_c; 3034 c->vector_clipf = vector_clipf_c; 3035 c->scalarproduct_int16 = scalarproduct_int16_c; 3036 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c; 3037 c->apply_window_int16 = apply_window_int16_c; 3038 c->vector_clip_int32 = vector_clip_int32_c; 3039 c->scalarproduct_float = scalarproduct_float_c; 3040 c->butterflies_float = butterflies_float_c; 3041 c->butterflies_float_interleave = butterflies_float_interleave_c; 3042 c->vector_fmul_scalar = vector_fmul_scalar_c; 3043 c->vector_fmac_scalar = vector_fmac_scalar_c; 3044 3045 c->shrink[0]= av_image_copy_plane; 3046 c->shrink[1]= ff_shrink22; 3047 c->shrink[2]= ff_shrink44; 3048 c->shrink[3]= ff_shrink88; 3049 3050 c->prefetch= just_return; 3051 3052 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab)); 3053 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab)); 3054 3055#undef FUNC 3056#undef FUNCC 3057#define FUNC(f, depth) f ## _ ## depth 3058#define FUNCC(f, depth) f ## _ ## depth ## _c 3059 3060#define dspfunc1(PFX, IDX, NUM, depth)\ 3061 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\ 3062 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\ 3063 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\ 3064 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth) 3065 3066#define dspfunc2(PFX, IDX, NUM, depth)\ 3067 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\ 3068 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\ 3069 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\ 3070 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\ 3071 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\ 3072 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\ 3073 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\ 3074 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\ 3075 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\ 3076 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\ 3077 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\ 3078 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\ 3079 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\ 3080 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\ 3081 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\ 3082 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth) 3083 3084 3085#define BIT_DEPTH_FUNCS(depth, dct)\ 3086 c->get_pixels = FUNCC(get_pixels ## dct , depth);\ 3087 c->draw_edges = FUNCC(draw_edges , depth);\ 3088 c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\ 3089 c->clear_block = FUNCC(clear_block ## dct , depth);\ 3090 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\ 3091 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\ 3092 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\ 3093 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\ 3094 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\ 3095\ 3096 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\ 3097 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\ 3098 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\ 3099 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\ 3100 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\ 3101 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\ 3102\ 3103 dspfunc1(put , 0, 16, depth);\ 3104 dspfunc1(put , 1, 8, depth);\ 3105 dspfunc1(put , 2, 4, depth);\ 3106 dspfunc1(put , 3, 2, depth);\ 3107 dspfunc1(put_no_rnd, 0, 16, depth);\ 3108 dspfunc1(put_no_rnd, 1, 8, depth);\ 3109 dspfunc1(avg , 0, 16, depth);\ 3110 dspfunc1(avg , 1, 8, depth);\ 3111 dspfunc1(avg , 2, 4, depth);\ 3112 dspfunc1(avg , 3, 2, depth);\ 3113 dspfunc1(avg_no_rnd, 0, 16, depth);\ 3114 dspfunc1(avg_no_rnd, 1, 8, depth);\ 3115\ 3116 dspfunc2(put_h264_qpel, 0, 16, depth);\ 3117 dspfunc2(put_h264_qpel, 1, 8, depth);\ 3118 dspfunc2(put_h264_qpel, 2, 4, depth);\ 3119 dspfunc2(put_h264_qpel, 3, 2, depth);\ 3120 dspfunc2(avg_h264_qpel, 0, 16, depth);\ 3121 dspfunc2(avg_h264_qpel, 1, 8, depth);\ 3122 dspfunc2(avg_h264_qpel, 2, 4, depth); 3123 3124 switch (avctx->bits_per_raw_sample) { 3125 case 9: 3126 if (c->dct_bits == 32) { 3127 BIT_DEPTH_FUNCS(9, _32); 3128 } else { 3129 BIT_DEPTH_FUNCS(9, _16); 3130 } 3131 break; 3132 case 10: 3133 if (c->dct_bits == 32) { 3134 BIT_DEPTH_FUNCS(10, _32); 3135 } else { 3136 BIT_DEPTH_FUNCS(10, _16); 3137 } 3138 break; 3139 default: 3140 av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample); 3141 case 8: 3142 BIT_DEPTH_FUNCS(8, _16); 3143 break; 3144 } 3145 3146 3147 if (HAVE_MMX) dsputil_init_mmx (c, avctx); 3148 if (ARCH_ARM) dsputil_init_arm (c, avctx); 3149 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx); 3150 if (HAVE_VIS) dsputil_init_vis (c, avctx); 3151 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx); 3152 if (ARCH_PPC) dsputil_init_ppc (c, avctx); 3153 if (HAVE_MMI) dsputil_init_mmi (c, avctx); 3154 if (ARCH_SH4) dsputil_init_sh4 (c, avctx); 3155 if (ARCH_BFIN) dsputil_init_bfin (c, avctx); 3156 3157 for (i = 0; i < 4; i++) { 3158 for (j = 0; j < 16; j++) { 3159 if(!c->put_2tap_qpel_pixels_tab[i][j]) 3160 c->put_2tap_qpel_pixels_tab[i][j] = 3161 c->put_h264_qpel_pixels_tab[i][j]; 3162 if(!c->avg_2tap_qpel_pixels_tab[i][j]) 3163 c->avg_2tap_qpel_pixels_tab[i][j] = 3164 c->avg_h264_qpel_pixels_tab[i][j]; 3165 } 3166 } 3167 3168 ff_init_scantable_permutation(c->idct_permutation, 3169 c->idct_permutation_type); 3170} 3171