1/* 2 * DSP utils 3 * Copyright (c) 2000, 2001 Fabrice Bellard 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 5 * 6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> 7 * 8 * This file is part of FFmpeg. 9 * 10 * FFmpeg is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * FFmpeg is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with FFmpeg; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 */ 24 25/** 26 * @file libavcodec/dsputil.c 27 * DSP utils 28 */ 29 30#include "avcodec.h" 31#include "dsputil.h" 32#include "simple_idct.h" 33#include "faandct.h" 34#include "faanidct.h" 35#include "mathops.h" 36#include "h263.h" 37#include "snow.h" 38 39/* snow.c */ 40void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count); 41 42/* vorbis.c */ 43void vorbis_inverse_coupling(float *mag, float *ang, int blocksize); 44 45/* ac3dec.c */ 46void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); 47 48/* flacenc.c */ 49void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc); 50 51/* pngdec.c */ 52void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp); 53 54/* eaidct.c */ 55void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block); 56 57uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; 58uint32_t ff_squareTbl[512] = {0, }; 59 60// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size 61#define pb_7f (~0UL/255 * 0x7f) 62#define pb_80 (~0UL/255 * 0x80) 63 64const uint8_t ff_zigzag_direct[64] = { 65 0, 1, 8, 16, 9, 2, 3, 10, 66 17, 24, 32, 25, 18, 11, 4, 5, 67 12, 19, 26, 33, 40, 48, 41, 34, 68 27, 20, 13, 6, 7, 14, 21, 28, 69 35, 42, 49, 56, 57, 50, 43, 36, 70 29, 22, 15, 23, 30, 37, 44, 51, 71 58, 59, 52, 45, 38, 31, 39, 46, 72 53, 60, 61, 54, 47, 55, 62, 63 73}; 74 75/* Specific zigzag scan for 248 idct. NOTE that unlike the 76 specification, we interleave the fields */ 77const uint8_t ff_zigzag248_direct[64] = { 78 0, 8, 1, 9, 16, 24, 2, 10, 79 17, 25, 32, 40, 48, 56, 33, 41, 80 18, 26, 3, 11, 4, 12, 19, 27, 81 34, 42, 49, 57, 50, 58, 35, 43, 82 20, 28, 5, 13, 6, 14, 21, 29, 83 36, 44, 51, 59, 52, 60, 37, 45, 84 22, 30, 7, 15, 23, 31, 38, 46, 85 53, 61, 54, 62, 39, 47, 55, 63, 86}; 87 88/* not permutated inverse zigzag_direct + 1 for MMX quantizer */ 89DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16[64]) = {0, }; 90 91const uint8_t ff_alternate_horizontal_scan[64] = { 92 0, 1, 2, 3, 8, 9, 16, 17, 93 10, 11, 4, 5, 6, 7, 15, 14, 94 13, 12, 19, 18, 24, 25, 32, 33, 95 26, 27, 20, 21, 22, 23, 28, 29, 96 30, 31, 34, 35, 40, 41, 48, 49, 97 42, 43, 36, 37, 38, 39, 44, 45, 98 46, 47, 50, 51, 56, 57, 58, 59, 99 52, 53, 54, 55, 60, 61, 62, 63, 100}; 101 102const uint8_t ff_alternate_vertical_scan[64] = { 103 0, 8, 16, 24, 1, 9, 2, 10, 104 17, 25, 32, 40, 48, 56, 57, 49, 105 41, 33, 26, 18, 3, 11, 4, 12, 106 19, 27, 34, 42, 50, 58, 35, 43, 107 51, 59, 20, 28, 5, 13, 6, 14, 108 21, 29, 36, 44, 52, 60, 37, 45, 109 53, 61, 22, 30, 7, 15, 23, 31, 110 38, 46, 54, 62, 39, 47, 55, 63, 111}; 112 113/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ 114const uint32_t ff_inverse[256]={ 115 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, 116 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, 117 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, 118 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, 119 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, 120 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, 121 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, 122 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, 123 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, 124 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, 125 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, 126 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, 127 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, 128 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, 129 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, 130 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, 131 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, 132 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, 133 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, 134 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, 135 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, 136 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, 137 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, 138 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, 139 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, 140 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, 141 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, 142 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, 143 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, 144 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, 145 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, 146 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, 147}; 148 149/* Input permutation for the simple_idct_mmx */ 150static const uint8_t simple_mmx_permutation[64]={ 151 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 152 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 153 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 154 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 155 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 156 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 157 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 158 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, 159}; 160 161static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7}; 162 163void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){ 164 int i; 165 int end; 166 167 st->scantable= src_scantable; 168 169 for(i=0; i<64; i++){ 170 int j; 171 j = src_scantable[i]; 172 st->permutated[i] = permutation[j]; 173#if ARCH_PPC 174 st->inverse[j] = i; 175#endif 176 } 177 178 end=-1; 179 for(i=0; i<64; i++){ 180 int j; 181 j = st->permutated[i]; 182 if(j>end) end=j; 183 st->raster_end[i]= end; 184 } 185} 186 187static int pix_sum_c(uint8_t * pix, int line_size) 188{ 189 int s, i, j; 190 191 s = 0; 192 for (i = 0; i < 16; i++) { 193 for (j = 0; j < 16; j += 8) { 194 s += pix[0]; 195 s += pix[1]; 196 s += pix[2]; 197 s += pix[3]; 198 s += pix[4]; 199 s += pix[5]; 200 s += pix[6]; 201 s += pix[7]; 202 pix += 8; 203 } 204 pix += line_size - 16; 205 } 206 return s; 207} 208 209static int pix_norm1_c(uint8_t * pix, int line_size) 210{ 211 int s, i, j; 212 uint32_t *sq = ff_squareTbl + 256; 213 214 s = 0; 215 for (i = 0; i < 16; i++) { 216 for (j = 0; j < 16; j += 8) { 217#if 0 218 s += sq[pix[0]]; 219 s += sq[pix[1]]; 220 s += sq[pix[2]]; 221 s += sq[pix[3]]; 222 s += sq[pix[4]]; 223 s += sq[pix[5]]; 224 s += sq[pix[6]]; 225 s += sq[pix[7]]; 226#else 227#if LONG_MAX > 2147483647 228 register uint64_t x=*(uint64_t*)pix; 229 s += sq[x&0xff]; 230 s += sq[(x>>8)&0xff]; 231 s += sq[(x>>16)&0xff]; 232 s += sq[(x>>24)&0xff]; 233 s += sq[(x>>32)&0xff]; 234 s += sq[(x>>40)&0xff]; 235 s += sq[(x>>48)&0xff]; 236 s += sq[(x>>56)&0xff]; 237#else 238 register uint32_t x=*(uint32_t*)pix; 239 s += sq[x&0xff]; 240 s += sq[(x>>8)&0xff]; 241 s += sq[(x>>16)&0xff]; 242 s += sq[(x>>24)&0xff]; 243 x=*(uint32_t*)(pix+4); 244 s += sq[x&0xff]; 245 s += sq[(x>>8)&0xff]; 246 s += sq[(x>>16)&0xff]; 247 s += sq[(x>>24)&0xff]; 248#endif 249#endif 250 pix += 8; 251 } 252 pix += line_size - 16; 253 } 254 return s; 255} 256 257static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){ 258 int i; 259 260 for(i=0; i+8<=w; i+=8){ 261 dst[i+0]= bswap_32(src[i+0]); 262 dst[i+1]= bswap_32(src[i+1]); 263 dst[i+2]= bswap_32(src[i+2]); 264 dst[i+3]= bswap_32(src[i+3]); 265 dst[i+4]= bswap_32(src[i+4]); 266 dst[i+5]= bswap_32(src[i+5]); 267 dst[i+6]= bswap_32(src[i+6]); 268 dst[i+7]= bswap_32(src[i+7]); 269 } 270 for(;i<w; i++){ 271 dst[i+0]= bswap_32(src[i+0]); 272 } 273} 274 275static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) 276{ 277 int s, i; 278 uint32_t *sq = ff_squareTbl + 256; 279 280 s = 0; 281 for (i = 0; i < h; i++) { 282 s += sq[pix1[0] - pix2[0]]; 283 s += sq[pix1[1] - pix2[1]]; 284 s += sq[pix1[2] - pix2[2]]; 285 s += sq[pix1[3] - pix2[3]]; 286 pix1 += line_size; 287 pix2 += line_size; 288 } 289 return s; 290} 291 292static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) 293{ 294 int s, i; 295 uint32_t *sq = ff_squareTbl + 256; 296 297 s = 0; 298 for (i = 0; i < h; i++) { 299 s += sq[pix1[0] - pix2[0]]; 300 s += sq[pix1[1] - pix2[1]]; 301 s += sq[pix1[2] - pix2[2]]; 302 s += sq[pix1[3] - pix2[3]]; 303 s += sq[pix1[4] - pix2[4]]; 304 s += sq[pix1[5] - pix2[5]]; 305 s += sq[pix1[6] - pix2[6]]; 306 s += sq[pix1[7] - pix2[7]]; 307 pix1 += line_size; 308 pix2 += line_size; 309 } 310 return s; 311} 312 313static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 314{ 315 int s, i; 316 uint32_t *sq = ff_squareTbl + 256; 317 318 s = 0; 319 for (i = 0; i < h; i++) { 320 s += sq[pix1[ 0] - pix2[ 0]]; 321 s += sq[pix1[ 1] - pix2[ 1]]; 322 s += sq[pix1[ 2] - pix2[ 2]]; 323 s += sq[pix1[ 3] - pix2[ 3]]; 324 s += sq[pix1[ 4] - pix2[ 4]]; 325 s += sq[pix1[ 5] - pix2[ 5]]; 326 s += sq[pix1[ 6] - pix2[ 6]]; 327 s += sq[pix1[ 7] - pix2[ 7]]; 328 s += sq[pix1[ 8] - pix2[ 8]]; 329 s += sq[pix1[ 9] - pix2[ 9]]; 330 s += sq[pix1[10] - pix2[10]]; 331 s += sq[pix1[11] - pix2[11]]; 332 s += sq[pix1[12] - pix2[12]]; 333 s += sq[pix1[13] - pix2[13]]; 334 s += sq[pix1[14] - pix2[14]]; 335 s += sq[pix1[15] - pix2[15]]; 336 337 pix1 += line_size; 338 pix2 += line_size; 339 } 340 return s; 341} 342 343 344#if CONFIG_SNOW_ENCODER //dwt is in snow.c 345static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){ 346 int s, i, j; 347 const int dec_count= w==8 ? 3 : 4; 348 int tmp[32*32]; 349 int level, ori; 350 static const int scale[2][2][4][4]={ 351 { 352 { 353 // 9/7 8x8 dec=3 354 {268, 239, 239, 213}, 355 { 0, 224, 224, 152}, 356 { 0, 135, 135, 110}, 357 },{ 358 // 9/7 16x16 or 32x32 dec=4 359 {344, 310, 310, 280}, 360 { 0, 320, 320, 228}, 361 { 0, 175, 175, 136}, 362 { 0, 129, 129, 102}, 363 } 364 },{ 365 { 366 // 5/3 8x8 dec=3 367 {275, 245, 245, 218}, 368 { 0, 230, 230, 156}, 369 { 0, 138, 138, 113}, 370 },{ 371 // 5/3 16x16 or 32x32 dec=4 372 {352, 317, 317, 286}, 373 { 0, 328, 328, 233}, 374 { 0, 180, 180, 140}, 375 { 0, 132, 132, 105}, 376 } 377 } 378 }; 379 380 for (i = 0; i < h; i++) { 381 for (j = 0; j < w; j+=4) { 382 tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4; 383 tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4; 384 tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4; 385 tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4; 386 } 387 pix1 += line_size; 388 pix2 += line_size; 389 } 390 391 ff_spatial_dwt(tmp, w, h, 32, type, dec_count); 392 393 s=0; 394 assert(w==h); 395 for(level=0; level<dec_count; level++){ 396 for(ori= level ? 1 : 0; ori<4; ori++){ 397 int size= w>>(dec_count-level); 398 int sx= (ori&1) ? size : 0; 399 int stride= 32<<(dec_count-level); 400 int sy= (ori&2) ? stride>>1 : 0; 401 402 for(i=0; i<size; i++){ 403 for(j=0; j<size; j++){ 404 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori]; 405 s += FFABS(v); 406 } 407 } 408 } 409 } 410 assert(s>=0); 411 return s>>9; 412} 413 414static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ 415 return w_c(v, pix1, pix2, line_size, 8, h, 1); 416} 417 418static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ 419 return w_c(v, pix1, pix2, line_size, 8, h, 0); 420} 421 422static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ 423 return w_c(v, pix1, pix2, line_size, 16, h, 1); 424} 425 426static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ 427 return w_c(v, pix1, pix2, line_size, 16, h, 0); 428} 429 430int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ 431 return w_c(v, pix1, pix2, line_size, 32, h, 1); 432} 433 434int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ 435 return w_c(v, pix1, pix2, line_size, 32, h, 0); 436} 437#endif 438 439/* draw the edges of width 'w' of an image of size width, height */ 440//FIXME check that this is ok for mpeg4 interlaced 441static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w) 442{ 443 uint8_t *ptr, *last_line; 444 int i; 445 446 last_line = buf + (height - 1) * wrap; 447 for(i=0;i<w;i++) { 448 /* top and bottom */ 449 memcpy(buf - (i + 1) * wrap, buf, width); 450 memcpy(last_line + (i + 1) * wrap, last_line, width); 451 } 452 /* left and right */ 453 ptr = buf; 454 for(i=0;i<height;i++) { 455 memset(ptr - w, ptr[0], w); 456 memset(ptr + width, ptr[width-1], w); 457 ptr += wrap; 458 } 459 /* corners */ 460 for(i=0;i<w;i++) { 461 memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */ 462 memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */ 463 memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */ 464 memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */ 465 } 466} 467 468/** 469 * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples. 470 * @param buf destination buffer 471 * @param src source buffer 472 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers 473 * @param block_w width of block 474 * @param block_h height of block 475 * @param src_x x coordinate of the top left sample of the block in the source buffer 476 * @param src_y y coordinate of the top left sample of the block in the source buffer 477 * @param w width of the source buffer 478 * @param h height of the source buffer 479 */ 480void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h, 481 int src_x, int src_y, int w, int h){ 482 int x, y; 483 int start_y, start_x, end_y, end_x; 484 485 if(src_y>= h){ 486 src+= (h-1-src_y)*linesize; 487 src_y=h-1; 488 }else if(src_y<=-block_h){ 489 src+= (1-block_h-src_y)*linesize; 490 src_y=1-block_h; 491 } 492 if(src_x>= w){ 493 src+= (w-1-src_x); 494 src_x=w-1; 495 }else if(src_x<=-block_w){ 496 src+= (1-block_w-src_x); 497 src_x=1-block_w; 498 } 499 500 start_y= FFMAX(0, -src_y); 501 start_x= FFMAX(0, -src_x); 502 end_y= FFMIN(block_h, h-src_y); 503 end_x= FFMIN(block_w, w-src_x); 504 505 // copy existing part 506 for(y=start_y; y<end_y; y++){ 507 for(x=start_x; x<end_x; x++){ 508 buf[x + y*linesize]= src[x + y*linesize]; 509 } 510 } 511 512 //top 513 for(y=0; y<start_y; y++){ 514 for(x=start_x; x<end_x; x++){ 515 buf[x + y*linesize]= buf[x + start_y*linesize]; 516 } 517 } 518 519 //bottom 520 for(y=end_y; y<block_h; y++){ 521 for(x=start_x; x<end_x; x++){ 522 buf[x + y*linesize]= buf[x + (end_y-1)*linesize]; 523 } 524 } 525 526 for(y=0; y<block_h; y++){ 527 //left 528 for(x=0; x<start_x; x++){ 529 buf[x + y*linesize]= buf[start_x + y*linesize]; 530 } 531 532 //right 533 for(x=end_x; x<block_w; x++){ 534 buf[x + y*linesize]= buf[end_x - 1 + y*linesize]; 535 } 536 } 537} 538 539static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) 540{ 541 int i; 542 543 /* read the pixels */ 544 for(i=0;i<8;i++) { 545 block[0] = pixels[0]; 546 block[1] = pixels[1]; 547 block[2] = pixels[2]; 548 block[3] = pixels[3]; 549 block[4] = pixels[4]; 550 block[5] = pixels[5]; 551 block[6] = pixels[6]; 552 block[7] = pixels[7]; 553 pixels += line_size; 554 block += 8; 555 } 556} 557 558static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, 559 const uint8_t *s2, int stride){ 560 int i; 561 562 /* read the pixels */ 563 for(i=0;i<8;i++) { 564 block[0] = s1[0] - s2[0]; 565 block[1] = s1[1] - s2[1]; 566 block[2] = s1[2] - s2[2]; 567 block[3] = s1[3] - s2[3]; 568 block[4] = s1[4] - s2[4]; 569 block[5] = s1[5] - s2[5]; 570 block[6] = s1[6] - s2[6]; 571 block[7] = s1[7] - s2[7]; 572 s1 += stride; 573 s2 += stride; 574 block += 8; 575 } 576} 577 578 579static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, 580 int line_size) 581{ 582 int i; 583 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 584 585 /* read the pixels */ 586 for(i=0;i<8;i++) { 587 pixels[0] = cm[block[0]]; 588 pixels[1] = cm[block[1]]; 589 pixels[2] = cm[block[2]]; 590 pixels[3] = cm[block[3]]; 591 pixels[4] = cm[block[4]]; 592 pixels[5] = cm[block[5]]; 593 pixels[6] = cm[block[6]]; 594 pixels[7] = cm[block[7]]; 595 596 pixels += line_size; 597 block += 8; 598 } 599} 600 601static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, 602 int line_size) 603{ 604 int i; 605 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 606 607 /* read the pixels */ 608 for(i=0;i<4;i++) { 609 pixels[0] = cm[block[0]]; 610 pixels[1] = cm[block[1]]; 611 pixels[2] = cm[block[2]]; 612 pixels[3] = cm[block[3]]; 613 614 pixels += line_size; 615 block += 8; 616 } 617} 618 619static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, 620 int line_size) 621{ 622 int i; 623 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 624 625 /* read the pixels */ 626 for(i=0;i<2;i++) { 627 pixels[0] = cm[block[0]]; 628 pixels[1] = cm[block[1]]; 629 630 pixels += line_size; 631 block += 8; 632 } 633} 634 635static void put_signed_pixels_clamped_c(const DCTELEM *block, 636 uint8_t *restrict pixels, 637 int line_size) 638{ 639 int i, j; 640 641 for (i = 0; i < 8; i++) { 642 for (j = 0; j < 8; j++) { 643 if (*block < -128) 644 *pixels = 0; 645 else if (*block > 127) 646 *pixels = 255; 647 else 648 *pixels = (uint8_t)(*block + 128); 649 block++; 650 pixels++; 651 } 652 pixels += (line_size - 8); 653 } 654} 655 656static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, 657 int line_size) 658{ 659 int i; 660 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 661 662 /* read the pixels */ 663 for(i=0;i<8;i++) { 664 pixels[0] = cm[pixels[0] + block[0]]; 665 pixels[1] = cm[pixels[1] + block[1]]; 666 pixels[2] = cm[pixels[2] + block[2]]; 667 pixels[3] = cm[pixels[3] + block[3]]; 668 pixels[4] = cm[pixels[4] + block[4]]; 669 pixels[5] = cm[pixels[5] + block[5]]; 670 pixels[6] = cm[pixels[6] + block[6]]; 671 pixels[7] = cm[pixels[7] + block[7]]; 672 pixels += line_size; 673 block += 8; 674 } 675} 676 677static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, 678 int line_size) 679{ 680 int i; 681 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 682 683 /* read the pixels */ 684 for(i=0;i<4;i++) { 685 pixels[0] = cm[pixels[0] + block[0]]; 686 pixels[1] = cm[pixels[1] + block[1]]; 687 pixels[2] = cm[pixels[2] + block[2]]; 688 pixels[3] = cm[pixels[3] + block[3]]; 689 pixels += line_size; 690 block += 8; 691 } 692} 693 694static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, 695 int line_size) 696{ 697 int i; 698 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 699 700 /* read the pixels */ 701 for(i=0;i<2;i++) { 702 pixels[0] = cm[pixels[0] + block[0]]; 703 pixels[1] = cm[pixels[1] + block[1]]; 704 pixels += line_size; 705 block += 8; 706 } 707} 708 709static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) 710{ 711 int i; 712 for(i=0;i<8;i++) { 713 pixels[0] += block[0]; 714 pixels[1] += block[1]; 715 pixels[2] += block[2]; 716 pixels[3] += block[3]; 717 pixels[4] += block[4]; 718 pixels[5] += block[5]; 719 pixels[6] += block[6]; 720 pixels[7] += block[7]; 721 pixels += line_size; 722 block += 8; 723 } 724} 725 726static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) 727{ 728 int i; 729 for(i=0;i<4;i++) { 730 pixels[0] += block[0]; 731 pixels[1] += block[1]; 732 pixels[2] += block[2]; 733 pixels[3] += block[3]; 734 pixels += line_size; 735 block += 4; 736 } 737} 738 739static int sum_abs_dctelem_c(DCTELEM *block) 740{ 741 int sum=0, i; 742 for(i=0; i<64; i++) 743 sum+= FFABS(block[i]); 744 return sum; 745} 746 747#if 0 748 749#define PIXOP2(OPNAME, OP) \ 750static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 751{\ 752 int i;\ 753 for(i=0; i<h; i++){\ 754 OP(*((uint64_t*)block), AV_RN64(pixels));\ 755 pixels+=line_size;\ 756 block +=line_size;\ 757 }\ 758}\ 759\ 760static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 761{\ 762 int i;\ 763 for(i=0; i<h; i++){\ 764 const uint64_t a= AV_RN64(pixels );\ 765 const uint64_t b= AV_RN64(pixels+1);\ 766 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ 767 pixels+=line_size;\ 768 block +=line_size;\ 769 }\ 770}\ 771\ 772static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 773{\ 774 int i;\ 775 for(i=0; i<h; i++){\ 776 const uint64_t a= AV_RN64(pixels );\ 777 const uint64_t b= AV_RN64(pixels+1);\ 778 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ 779 pixels+=line_size;\ 780 block +=line_size;\ 781 }\ 782}\ 783\ 784static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 785{\ 786 int i;\ 787 for(i=0; i<h; i++){\ 788 const uint64_t a= AV_RN64(pixels );\ 789 const uint64_t b= AV_RN64(pixels+line_size);\ 790 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ 791 pixels+=line_size;\ 792 block +=line_size;\ 793 }\ 794}\ 795\ 796static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 797{\ 798 int i;\ 799 for(i=0; i<h; i++){\ 800 const uint64_t a= AV_RN64(pixels );\ 801 const uint64_t b= AV_RN64(pixels+line_size);\ 802 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ 803 pixels+=line_size;\ 804 block +=line_size;\ 805 }\ 806}\ 807\ 808static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 809{\ 810 int i;\ 811 const uint64_t a= AV_RN64(pixels );\ 812 const uint64_t b= AV_RN64(pixels+1);\ 813 uint64_t l0= (a&0x0303030303030303ULL)\ 814 + (b&0x0303030303030303ULL)\ 815 + 0x0202020202020202ULL;\ 816 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ 817 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ 818 uint64_t l1,h1;\ 819\ 820 pixels+=line_size;\ 821 for(i=0; i<h; i+=2){\ 822 uint64_t a= AV_RN64(pixels );\ 823 uint64_t b= AV_RN64(pixels+1);\ 824 l1= (a&0x0303030303030303ULL)\ 825 + (b&0x0303030303030303ULL);\ 826 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ 827 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ 828 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ 829 pixels+=line_size;\ 830 block +=line_size;\ 831 a= AV_RN64(pixels );\ 832 b= AV_RN64(pixels+1);\ 833 l0= (a&0x0303030303030303ULL)\ 834 + (b&0x0303030303030303ULL)\ 835 + 0x0202020202020202ULL;\ 836 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ 837 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ 838 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ 839 pixels+=line_size;\ 840 block +=line_size;\ 841 }\ 842}\ 843\ 844static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 845{\ 846 int i;\ 847 const uint64_t a= AV_RN64(pixels );\ 848 const uint64_t b= AV_RN64(pixels+1);\ 849 uint64_t l0= (a&0x0303030303030303ULL)\ 850 + (b&0x0303030303030303ULL)\ 851 + 0x0101010101010101ULL;\ 852 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ 853 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ 854 uint64_t l1,h1;\ 855\ 856 pixels+=line_size;\ 857 for(i=0; i<h; i+=2){\ 858 uint64_t a= AV_RN64(pixels );\ 859 uint64_t b= AV_RN64(pixels+1);\ 860 l1= (a&0x0303030303030303ULL)\ 861 + (b&0x0303030303030303ULL);\ 862 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ 863 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ 864 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ 865 pixels+=line_size;\ 866 block +=line_size;\ 867 a= AV_RN64(pixels );\ 868 b= AV_RN64(pixels+1);\ 869 l0= (a&0x0303030303030303ULL)\ 870 + (b&0x0303030303030303ULL)\ 871 + 0x0101010101010101ULL;\ 872 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ 873 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ 874 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ 875 pixels+=line_size;\ 876 block +=line_size;\ 877 }\ 878}\ 879\ 880CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\ 881CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\ 882CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\ 883CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\ 884CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\ 885CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\ 886CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8) 887 888#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) 889#else // 64 bit variant 890 891#define PIXOP2(OPNAME, OP) \ 892static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 893 int i;\ 894 for(i=0; i<h; i++){\ 895 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\ 896 pixels+=line_size;\ 897 block +=line_size;\ 898 }\ 899}\ 900static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 901 int i;\ 902 for(i=0; i<h; i++){\ 903 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ 904 pixels+=line_size;\ 905 block +=line_size;\ 906 }\ 907}\ 908static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 909 int i;\ 910 for(i=0; i<h; i++){\ 911 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ 912 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\ 913 pixels+=line_size;\ 914 block +=line_size;\ 915 }\ 916}\ 917static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 918 OPNAME ## _pixels8_c(block, pixels, line_size, h);\ 919}\ 920\ 921static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 922 int src_stride1, int src_stride2, int h){\ 923 int i;\ 924 for(i=0; i<h; i++){\ 925 uint32_t a,b;\ 926 a= AV_RN32(&src1[i*src_stride1 ]);\ 927 b= AV_RN32(&src2[i*src_stride2 ]);\ 928 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ 929 a= AV_RN32(&src1[i*src_stride1+4]);\ 930 b= AV_RN32(&src2[i*src_stride2+4]);\ 931 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ 932 }\ 933}\ 934\ 935static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 936 int src_stride1, int src_stride2, int h){\ 937 int i;\ 938 for(i=0; i<h; i++){\ 939 uint32_t a,b;\ 940 a= AV_RN32(&src1[i*src_stride1 ]);\ 941 b= AV_RN32(&src2[i*src_stride2 ]);\ 942 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ 943 a= AV_RN32(&src1[i*src_stride1+4]);\ 944 b= AV_RN32(&src2[i*src_stride2+4]);\ 945 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ 946 }\ 947}\ 948\ 949static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 950 int src_stride1, int src_stride2, int h){\ 951 int i;\ 952 for(i=0; i<h; i++){\ 953 uint32_t a,b;\ 954 a= AV_RN32(&src1[i*src_stride1 ]);\ 955 b= AV_RN32(&src2[i*src_stride2 ]);\ 956 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ 957 }\ 958}\ 959\ 960static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 961 int src_stride1, int src_stride2, int h){\ 962 int i;\ 963 for(i=0; i<h; i++){\ 964 uint32_t a,b;\ 965 a= AV_RN16(&src1[i*src_stride1 ]);\ 966 b= AV_RN16(&src2[i*src_stride2 ]);\ 967 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ 968 }\ 969}\ 970\ 971static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 972 int src_stride1, int src_stride2, int h){\ 973 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ 974 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ 975}\ 976\ 977static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 978 int src_stride1, int src_stride2, int h){\ 979 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ 980 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ 981}\ 982\ 983static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 984 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ 985}\ 986\ 987static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 988 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ 989}\ 990\ 991static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 992 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ 993}\ 994\ 995static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 996 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ 997}\ 998\ 999static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ 1000 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 1001 int i;\ 1002 for(i=0; i<h; i++){\ 1003 uint32_t a, b, c, d, l0, l1, h0, h1;\ 1004 a= AV_RN32(&src1[i*src_stride1]);\ 1005 b= AV_RN32(&src2[i*src_stride2]);\ 1006 c= AV_RN32(&src3[i*src_stride3]);\ 1007 d= AV_RN32(&src4[i*src_stride4]);\ 1008 l0= (a&0x03030303UL)\ 1009 + (b&0x03030303UL)\ 1010 + 0x02020202UL;\ 1011 h0= ((a&0xFCFCFCFCUL)>>2)\ 1012 + ((b&0xFCFCFCFCUL)>>2);\ 1013 l1= (c&0x03030303UL)\ 1014 + (d&0x03030303UL);\ 1015 h1= ((c&0xFCFCFCFCUL)>>2)\ 1016 + ((d&0xFCFCFCFCUL)>>2);\ 1017 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 1018 a= AV_RN32(&src1[i*src_stride1+4]);\ 1019 b= AV_RN32(&src2[i*src_stride2+4]);\ 1020 c= AV_RN32(&src3[i*src_stride3+4]);\ 1021 d= AV_RN32(&src4[i*src_stride4+4]);\ 1022 l0= (a&0x03030303UL)\ 1023 + (b&0x03030303UL)\ 1024 + 0x02020202UL;\ 1025 h0= ((a&0xFCFCFCFCUL)>>2)\ 1026 + ((b&0xFCFCFCFCUL)>>2);\ 1027 l1= (c&0x03030303UL)\ 1028 + (d&0x03030303UL);\ 1029 h1= ((c&0xFCFCFCFCUL)>>2)\ 1030 + ((d&0xFCFCFCFCUL)>>2);\ 1031 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 1032 }\ 1033}\ 1034\ 1035static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 1036 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ 1037}\ 1038\ 1039static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 1040 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ 1041}\ 1042\ 1043static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 1044 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ 1045}\ 1046\ 1047static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 1048 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ 1049}\ 1050\ 1051static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ 1052 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 1053 int i;\ 1054 for(i=0; i<h; i++){\ 1055 uint32_t a, b, c, d, l0, l1, h0, h1;\ 1056 a= AV_RN32(&src1[i*src_stride1]);\ 1057 b= AV_RN32(&src2[i*src_stride2]);\ 1058 c= AV_RN32(&src3[i*src_stride3]);\ 1059 d= AV_RN32(&src4[i*src_stride4]);\ 1060 l0= (a&0x03030303UL)\ 1061 + (b&0x03030303UL)\ 1062 + 0x01010101UL;\ 1063 h0= ((a&0xFCFCFCFCUL)>>2)\ 1064 + ((b&0xFCFCFCFCUL)>>2);\ 1065 l1= (c&0x03030303UL)\ 1066 + (d&0x03030303UL);\ 1067 h1= ((c&0xFCFCFCFCUL)>>2)\ 1068 + ((d&0xFCFCFCFCUL)>>2);\ 1069 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 1070 a= AV_RN32(&src1[i*src_stride1+4]);\ 1071 b= AV_RN32(&src2[i*src_stride2+4]);\ 1072 c= AV_RN32(&src3[i*src_stride3+4]);\ 1073 d= AV_RN32(&src4[i*src_stride4+4]);\ 1074 l0= (a&0x03030303UL)\ 1075 + (b&0x03030303UL)\ 1076 + 0x01010101UL;\ 1077 h0= ((a&0xFCFCFCFCUL)>>2)\ 1078 + ((b&0xFCFCFCFCUL)>>2);\ 1079 l1= (c&0x03030303UL)\ 1080 + (d&0x03030303UL);\ 1081 h1= ((c&0xFCFCFCFCUL)>>2)\ 1082 + ((d&0xFCFCFCFCUL)>>2);\ 1083 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 1084 }\ 1085}\ 1086static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ 1087 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 1088 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ 1089 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ 1090}\ 1091static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ 1092 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 1093 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ 1094 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ 1095}\ 1096\ 1097static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 1098{\ 1099 int i, a0, b0, a1, b1;\ 1100 a0= pixels[0];\ 1101 b0= pixels[1] + 2;\ 1102 a0 += b0;\ 1103 b0 += pixels[2];\ 1104\ 1105 pixels+=line_size;\ 1106 for(i=0; i<h; i+=2){\ 1107 a1= pixels[0];\ 1108 b1= pixels[1];\ 1109 a1 += b1;\ 1110 b1 += pixels[2];\ 1111\ 1112 block[0]= (a1+a0)>>2; /* FIXME non put */\ 1113 block[1]= (b1+b0)>>2;\ 1114\ 1115 pixels+=line_size;\ 1116 block +=line_size;\ 1117\ 1118 a0= pixels[0];\ 1119 b0= pixels[1] + 2;\ 1120 a0 += b0;\ 1121 b0 += pixels[2];\ 1122\ 1123 block[0]= (a1+a0)>>2;\ 1124 block[1]= (b1+b0)>>2;\ 1125 pixels+=line_size;\ 1126 block +=line_size;\ 1127 }\ 1128}\ 1129\ 1130static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 1131{\ 1132 int i;\ 1133 const uint32_t a= AV_RN32(pixels );\ 1134 const uint32_t b= AV_RN32(pixels+1);\ 1135 uint32_t l0= (a&0x03030303UL)\ 1136 + (b&0x03030303UL)\ 1137 + 0x02020202UL;\ 1138 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ 1139 + ((b&0xFCFCFCFCUL)>>2);\ 1140 uint32_t l1,h1;\ 1141\ 1142 pixels+=line_size;\ 1143 for(i=0; i<h; i+=2){\ 1144 uint32_t a= AV_RN32(pixels );\ 1145 uint32_t b= AV_RN32(pixels+1);\ 1146 l1= (a&0x03030303UL)\ 1147 + (b&0x03030303UL);\ 1148 h1= ((a&0xFCFCFCFCUL)>>2)\ 1149 + ((b&0xFCFCFCFCUL)>>2);\ 1150 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 1151 pixels+=line_size;\ 1152 block +=line_size;\ 1153 a= AV_RN32(pixels );\ 1154 b= AV_RN32(pixels+1);\ 1155 l0= (a&0x03030303UL)\ 1156 + (b&0x03030303UL)\ 1157 + 0x02020202UL;\ 1158 h0= ((a&0xFCFCFCFCUL)>>2)\ 1159 + ((b&0xFCFCFCFCUL)>>2);\ 1160 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 1161 pixels+=line_size;\ 1162 block +=line_size;\ 1163 }\ 1164}\ 1165\ 1166static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 1167{\ 1168 int j;\ 1169 for(j=0; j<2; j++){\ 1170 int i;\ 1171 const uint32_t a= AV_RN32(pixels );\ 1172 const uint32_t b= AV_RN32(pixels+1);\ 1173 uint32_t l0= (a&0x03030303UL)\ 1174 + (b&0x03030303UL)\ 1175 + 0x02020202UL;\ 1176 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ 1177 + ((b&0xFCFCFCFCUL)>>2);\ 1178 uint32_t l1,h1;\ 1179\ 1180 pixels+=line_size;\ 1181 for(i=0; i<h; i+=2){\ 1182 uint32_t a= AV_RN32(pixels );\ 1183 uint32_t b= AV_RN32(pixels+1);\ 1184 l1= (a&0x03030303UL)\ 1185 + (b&0x03030303UL);\ 1186 h1= ((a&0xFCFCFCFCUL)>>2)\ 1187 + ((b&0xFCFCFCFCUL)>>2);\ 1188 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 1189 pixels+=line_size;\ 1190 block +=line_size;\ 1191 a= AV_RN32(pixels );\ 1192 b= AV_RN32(pixels+1);\ 1193 l0= (a&0x03030303UL)\ 1194 + (b&0x03030303UL)\ 1195 + 0x02020202UL;\ 1196 h0= ((a&0xFCFCFCFCUL)>>2)\ 1197 + ((b&0xFCFCFCFCUL)>>2);\ 1198 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 1199 pixels+=line_size;\ 1200 block +=line_size;\ 1201 }\ 1202 pixels+=4-line_size*(h+1);\ 1203 block +=4-line_size*h;\ 1204 }\ 1205}\ 1206\ 1207static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 1208{\ 1209 int j;\ 1210 for(j=0; j<2; j++){\ 1211 int i;\ 1212 const uint32_t a= AV_RN32(pixels );\ 1213 const uint32_t b= AV_RN32(pixels+1);\ 1214 uint32_t l0= (a&0x03030303UL)\ 1215 + (b&0x03030303UL)\ 1216 + 0x01010101UL;\ 1217 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ 1218 + ((b&0xFCFCFCFCUL)>>2);\ 1219 uint32_t l1,h1;\ 1220\ 1221 pixels+=line_size;\ 1222 for(i=0; i<h; i+=2){\ 1223 uint32_t a= AV_RN32(pixels );\ 1224 uint32_t b= AV_RN32(pixels+1);\ 1225 l1= (a&0x03030303UL)\ 1226 + (b&0x03030303UL);\ 1227 h1= ((a&0xFCFCFCFCUL)>>2)\ 1228 + ((b&0xFCFCFCFCUL)>>2);\ 1229 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 1230 pixels+=line_size;\ 1231 block +=line_size;\ 1232 a= AV_RN32(pixels );\ 1233 b= AV_RN32(pixels+1);\ 1234 l0= (a&0x03030303UL)\ 1235 + (b&0x03030303UL)\ 1236 + 0x01010101UL;\ 1237 h0= ((a&0xFCFCFCFCUL)>>2)\ 1238 + ((b&0xFCFCFCFCUL)>>2);\ 1239 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 1240 pixels+=line_size;\ 1241 block +=line_size;\ 1242 }\ 1243 pixels+=4-line_size*(h+1);\ 1244 block +=4-line_size*h;\ 1245 }\ 1246}\ 1247\ 1248CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\ 1249CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\ 1250CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\ 1251CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\ 1252CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\ 1253CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\ 1254CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ 1255CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ 1256 1257#define op_avg(a, b) a = rnd_avg32(a, b) 1258#endif 1259#define op_put(a, b) a = b 1260 1261PIXOP2(avg, op_avg) 1262PIXOP2(put, op_put) 1263#undef op_avg 1264#undef op_put 1265 1266#define avg2(a,b) ((a+b+1)>>1) 1267#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) 1268 1269static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ 1270 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h); 1271} 1272 1273static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ 1274 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h); 1275} 1276 1277static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) 1278{ 1279 const int A=(16-x16)*(16-y16); 1280 const int B=( x16)*(16-y16); 1281 const int C=(16-x16)*( y16); 1282 const int D=( x16)*( y16); 1283 int i; 1284 1285 for(i=0; i<h; i++) 1286 { 1287 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; 1288 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; 1289 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; 1290 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8; 1291 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8; 1292 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8; 1293 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; 1294 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; 1295 dst+= stride; 1296 src+= stride; 1297 } 1298} 1299 1300void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, 1301 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) 1302{ 1303 int y, vx, vy; 1304 const int s= 1<<shift; 1305 1306 width--; 1307 height--; 1308 1309 for(y=0; y<h; y++){ 1310 int x; 1311 1312 vx= ox; 1313 vy= oy; 1314 for(x=0; x<8; x++){ //XXX FIXME optimize 1315 int src_x, src_y, frac_x, frac_y, index; 1316 1317 src_x= vx>>16; 1318 src_y= vy>>16; 1319 frac_x= src_x&(s-1); 1320 frac_y= src_y&(s-1); 1321 src_x>>=shift; 1322 src_y>>=shift; 1323 1324 if((unsigned)src_x < width){ 1325 if((unsigned)src_y < height){ 1326 index= src_x + src_y*stride; 1327 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) 1328 + src[index +1]* frac_x )*(s-frac_y) 1329 + ( src[index+stride ]*(s-frac_x) 1330 + src[index+stride+1]* frac_x )* frac_y 1331 + r)>>(shift*2); 1332 }else{ 1333 index= src_x + av_clip(src_y, 0, height)*stride; 1334 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) 1335 + src[index +1]* frac_x )*s 1336 + r)>>(shift*2); 1337 } 1338 }else{ 1339 if((unsigned)src_y < height){ 1340 index= av_clip(src_x, 0, width) + src_y*stride; 1341 dst[y*stride + x]= ( ( src[index ]*(s-frac_y) 1342 + src[index+stride ]* frac_y )*s 1343 + r)>>(shift*2); 1344 }else{ 1345 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride; 1346 dst[y*stride + x]= src[index ]; 1347 } 1348 } 1349 1350 vx+= dxx; 1351 vy+= dyx; 1352 } 1353 ox += dxy; 1354 oy += dyy; 1355 } 1356} 1357 1358static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1359 switch(width){ 1360 case 2: put_pixels2_c (dst, src, stride, height); break; 1361 case 4: put_pixels4_c (dst, src, stride, height); break; 1362 case 8: put_pixels8_c (dst, src, stride, height); break; 1363 case 16:put_pixels16_c(dst, src, stride, height); break; 1364 } 1365} 1366 1367static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1368 int i,j; 1369 for (i=0; i < height; i++) { 1370 for (j=0; j < width; j++) { 1371 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; 1372 } 1373 src += stride; 1374 dst += stride; 1375 } 1376} 1377 1378static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1379 int i,j; 1380 for (i=0; i < height; i++) { 1381 for (j=0; j < width; j++) { 1382 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; 1383 } 1384 src += stride; 1385 dst += stride; 1386 } 1387} 1388 1389static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1390 int i,j; 1391 for (i=0; i < height; i++) { 1392 for (j=0; j < width; j++) { 1393 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; 1394 } 1395 src += stride; 1396 dst += stride; 1397 } 1398} 1399 1400static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1401 int i,j; 1402 for (i=0; i < height; i++) { 1403 for (j=0; j < width; j++) { 1404 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; 1405 } 1406 src += stride; 1407 dst += stride; 1408 } 1409} 1410 1411static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1412 int i,j; 1413 for (i=0; i < height; i++) { 1414 for (j=0; j < width; j++) { 1415 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; 1416 } 1417 src += stride; 1418 dst += stride; 1419 } 1420} 1421 1422static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1423 int i,j; 1424 for (i=0; i < height; i++) { 1425 for (j=0; j < width; j++) { 1426 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; 1427 } 1428 src += stride; 1429 dst += stride; 1430 } 1431} 1432 1433static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1434 int i,j; 1435 for (i=0; i < height; i++) { 1436 for (j=0; j < width; j++) { 1437 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; 1438 } 1439 src += stride; 1440 dst += stride; 1441 } 1442} 1443 1444static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1445 int i,j; 1446 for (i=0; i < height; i++) { 1447 for (j=0; j < width; j++) { 1448 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; 1449 } 1450 src += stride; 1451 dst += stride; 1452 } 1453} 1454 1455static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1456 switch(width){ 1457 case 2: avg_pixels2_c (dst, src, stride, height); break; 1458 case 4: avg_pixels4_c (dst, src, stride, height); break; 1459 case 8: avg_pixels8_c (dst, src, stride, height); break; 1460 case 16:avg_pixels16_c(dst, src, stride, height); break; 1461 } 1462} 1463 1464static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1465 int i,j; 1466 for (i=0; i < height; i++) { 1467 for (j=0; j < width; j++) { 1468 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; 1469 } 1470 src += stride; 1471 dst += stride; 1472 } 1473} 1474 1475static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1476 int i,j; 1477 for (i=0; i < height; i++) { 1478 for (j=0; j < width; j++) { 1479 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; 1480 } 1481 src += stride; 1482 dst += stride; 1483 } 1484} 1485 1486static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1487 int i,j; 1488 for (i=0; i < height; i++) { 1489 for (j=0; j < width; j++) { 1490 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; 1491 } 1492 src += stride; 1493 dst += stride; 1494 } 1495} 1496 1497static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1498 int i,j; 1499 for (i=0; i < height; i++) { 1500 for (j=0; j < width; j++) { 1501 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; 1502 } 1503 src += stride; 1504 dst += stride; 1505 } 1506} 1507 1508static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1509 int i,j; 1510 for (i=0; i < height; i++) { 1511 for (j=0; j < width; j++) { 1512 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; 1513 } 1514 src += stride; 1515 dst += stride; 1516 } 1517} 1518 1519static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1520 int i,j; 1521 for (i=0; i < height; i++) { 1522 for (j=0; j < width; j++) { 1523 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; 1524 } 1525 src += stride; 1526 dst += stride; 1527 } 1528} 1529 1530static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1531 int i,j; 1532 for (i=0; i < height; i++) { 1533 for (j=0; j < width; j++) { 1534 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; 1535 } 1536 src += stride; 1537 dst += stride; 1538 } 1539} 1540 1541static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 1542 int i,j; 1543 for (i=0; i < height; i++) { 1544 for (j=0; j < width; j++) { 1545 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; 1546 } 1547 src += stride; 1548 dst += stride; 1549 } 1550} 1551#if 0 1552#define TPEL_WIDTH(width)\ 1553static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ 1554 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\ 1555static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ 1556 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\ 1557static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ 1558 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\ 1559static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ 1560 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\ 1561static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ 1562 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\ 1563static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ 1564 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\ 1565static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ 1566 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\ 1567static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ 1568 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\ 1569static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ 1570 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);} 1571#endif 1572 1573#define H264_CHROMA_MC(OPNAME, OP)\ 1574static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ 1575 const int A=(8-x)*(8-y);\ 1576 const int B=( x)*(8-y);\ 1577 const int C=(8-x)*( y);\ 1578 const int D=( x)*( y);\ 1579 int i;\ 1580 \ 1581 assert(x<8 && y<8 && x>=0 && y>=0);\ 1582\ 1583 if(D){\ 1584 for(i=0; i<h; i++){\ 1585 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ 1586 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ 1587 dst+= stride;\ 1588 src+= stride;\ 1589 }\ 1590 }else{\ 1591 const int E= B+C;\ 1592 const int step= C ? stride : 1;\ 1593 for(i=0; i<h; i++){\ 1594 OP(dst[0], (A*src[0] + E*src[step+0]));\ 1595 OP(dst[1], (A*src[1] + E*src[step+1]));\ 1596 dst+= stride;\ 1597 src+= stride;\ 1598 }\ 1599 }\ 1600}\ 1601\ 1602static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ 1603 const int A=(8-x)*(8-y);\ 1604 const int B=( x)*(8-y);\ 1605 const int C=(8-x)*( y);\ 1606 const int D=( x)*( y);\ 1607 int i;\ 1608 \ 1609 assert(x<8 && y<8 && x>=0 && y>=0);\ 1610\ 1611 if(D){\ 1612 for(i=0; i<h; i++){\ 1613 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ 1614 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ 1615 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ 1616 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ 1617 dst+= stride;\ 1618 src+= stride;\ 1619 }\ 1620 }else{\ 1621 const int E= B+C;\ 1622 const int step= C ? stride : 1;\ 1623 for(i=0; i<h; i++){\ 1624 OP(dst[0], (A*src[0] + E*src[step+0]));\ 1625 OP(dst[1], (A*src[1] + E*src[step+1]));\ 1626 OP(dst[2], (A*src[2] + E*src[step+2]));\ 1627 OP(dst[3], (A*src[3] + E*src[step+3]));\ 1628 dst+= stride;\ 1629 src+= stride;\ 1630 }\ 1631 }\ 1632}\ 1633\ 1634static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ 1635 const int A=(8-x)*(8-y);\ 1636 const int B=( x)*(8-y);\ 1637 const int C=(8-x)*( y);\ 1638 const int D=( x)*( y);\ 1639 int i;\ 1640 \ 1641 assert(x<8 && y<8 && x>=0 && y>=0);\ 1642\ 1643 if(D){\ 1644 for(i=0; i<h; i++){\ 1645 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ 1646 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ 1647 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ 1648 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ 1649 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ 1650 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ 1651 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ 1652 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ 1653 dst+= stride;\ 1654 src+= stride;\ 1655 }\ 1656 }else{\ 1657 const int E= B+C;\ 1658 const int step= C ? stride : 1;\ 1659 for(i=0; i<h; i++){\ 1660 OP(dst[0], (A*src[0] + E*src[step+0]));\ 1661 OP(dst[1], (A*src[1] + E*src[step+1]));\ 1662 OP(dst[2], (A*src[2] + E*src[step+2]));\ 1663 OP(dst[3], (A*src[3] + E*src[step+3]));\ 1664 OP(dst[4], (A*src[4] + E*src[step+4]));\ 1665 OP(dst[5], (A*src[5] + E*src[step+5]));\ 1666 OP(dst[6], (A*src[6] + E*src[step+6]));\ 1667 OP(dst[7], (A*src[7] + E*src[step+7]));\ 1668 dst+= stride;\ 1669 src+= stride;\ 1670 }\ 1671 }\ 1672} 1673 1674#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) 1675#define op_put(a, b) a = (((b) + 32)>>6) 1676 1677H264_CHROMA_MC(put_ , op_put) 1678H264_CHROMA_MC(avg_ , op_avg) 1679#undef op_avg 1680#undef op_put 1681 1682static void put_no_rnd_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ 1683 const int A=(8-x)*(8-y); 1684 const int B=( x)*(8-y); 1685 const int C=(8-x)*( y); 1686 const int D=( x)*( y); 1687 int i; 1688 1689 assert(x<8 && y<8 && x>=0 && y>=0); 1690 1691 for(i=0; i<h; i++) 1692 { 1693 dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6; 1694 dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6; 1695 dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6; 1696 dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6; 1697 dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6; 1698 dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6; 1699 dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6; 1700 dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6; 1701 dst+= stride; 1702 src+= stride; 1703 } 1704} 1705 1706#define QPEL_MC(r, OPNAME, RND, OP) \ 1707static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ 1708 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 1709 int i;\ 1710 for(i=0; i<h; i++)\ 1711 {\ 1712 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\ 1713 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\ 1714 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\ 1715 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\ 1716 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\ 1717 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\ 1718 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\ 1719 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\ 1720 dst+=dstStride;\ 1721 src+=srcStride;\ 1722 }\ 1723}\ 1724\ 1725static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 1726 const int w=8;\ 1727 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 1728 int i;\ 1729 for(i=0; i<w; i++)\ 1730 {\ 1731 const int src0= src[0*srcStride];\ 1732 const int src1= src[1*srcStride];\ 1733 const int src2= src[2*srcStride];\ 1734 const int src3= src[3*srcStride];\ 1735 const int src4= src[4*srcStride];\ 1736 const int src5= src[5*srcStride];\ 1737 const int src6= src[6*srcStride];\ 1738 const int src7= src[7*srcStride];\ 1739 const int src8= src[8*srcStride];\ 1740 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\ 1741 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\ 1742 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\ 1743 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\ 1744 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\ 1745 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\ 1746 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\ 1747 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ 1748 dst++;\ 1749 src++;\ 1750 }\ 1751}\ 1752\ 1753static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ 1754 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 1755 int i;\ 1756 \ 1757 for(i=0; i<h; i++)\ 1758 {\ 1759 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\ 1760 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\ 1761 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\ 1762 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\ 1763 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\ 1764 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\ 1765 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\ 1766 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\ 1767 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\ 1768 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\ 1769 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\ 1770 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\ 1771 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\ 1772 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\ 1773 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\ 1774 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\ 1775 dst+=dstStride;\ 1776 src+=srcStride;\ 1777 }\ 1778}\ 1779\ 1780static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 1781 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 1782 int i;\ 1783 const int w=16;\ 1784 for(i=0; i<w; i++)\ 1785 {\ 1786 const int src0= src[0*srcStride];\ 1787 const int src1= src[1*srcStride];\ 1788 const int src2= src[2*srcStride];\ 1789 const int src3= src[3*srcStride];\ 1790 const int src4= src[4*srcStride];\ 1791 const int src5= src[5*srcStride];\ 1792 const int src6= src[6*srcStride];\ 1793 const int src7= src[7*srcStride];\ 1794 const int src8= src[8*srcStride];\ 1795 const int src9= src[9*srcStride];\ 1796 const int src10= src[10*srcStride];\ 1797 const int src11= src[11*srcStride];\ 1798 const int src12= src[12*srcStride];\ 1799 const int src13= src[13*srcStride];\ 1800 const int src14= src[14*srcStride];\ 1801 const int src15= src[15*srcStride];\ 1802 const int src16= src[16*srcStride];\ 1803 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\ 1804 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\ 1805 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\ 1806 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\ 1807 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\ 1808 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\ 1809 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\ 1810 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\ 1811 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\ 1812 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\ 1813 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\ 1814 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\ 1815 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\ 1816 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\ 1817 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\ 1818 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ 1819 dst++;\ 1820 src++;\ 1821 }\ 1822}\ 1823\ 1824static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ 1825 OPNAME ## pixels8_c(dst, src, stride, 8);\ 1826}\ 1827\ 1828static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ 1829 uint8_t half[64];\ 1830 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ 1831 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\ 1832}\ 1833\ 1834static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ 1835 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\ 1836}\ 1837\ 1838static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ 1839 uint8_t half[64];\ 1840 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ 1841 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\ 1842}\ 1843\ 1844static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ 1845 uint8_t full[16*9];\ 1846 uint8_t half[64];\ 1847 copy_block9(full, src, 16, stride, 9);\ 1848 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ 1849 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\ 1850}\ 1851\ 1852static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ 1853 uint8_t full[16*9];\ 1854 copy_block9(full, src, 16, stride, 9);\ 1855 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ 1856}\ 1857\ 1858static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ 1859 uint8_t full[16*9];\ 1860 uint8_t half[64];\ 1861 copy_block9(full, src, 16, stride, 9);\ 1862 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ 1863 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\ 1864}\ 1865void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ 1866 uint8_t full[16*9];\ 1867 uint8_t halfH[72];\ 1868 uint8_t halfV[64];\ 1869 uint8_t halfHV[64];\ 1870 copy_block9(full, src, 16, stride, 9);\ 1871 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 1872 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ 1873 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1874 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ 1875}\ 1876static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ 1877 uint8_t full[16*9];\ 1878 uint8_t halfH[72];\ 1879 uint8_t halfHV[64];\ 1880 copy_block9(full, src, 16, stride, 9);\ 1881 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 1882 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ 1883 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1884 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ 1885}\ 1886void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ 1887 uint8_t full[16*9];\ 1888 uint8_t halfH[72];\ 1889 uint8_t halfV[64];\ 1890 uint8_t halfHV[64];\ 1891 copy_block9(full, src, 16, stride, 9);\ 1892 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 1893 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ 1894 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1895 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ 1896}\ 1897static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ 1898 uint8_t full[16*9];\ 1899 uint8_t halfH[72];\ 1900 uint8_t halfHV[64];\ 1901 copy_block9(full, src, 16, stride, 9);\ 1902 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 1903 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ 1904 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1905 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ 1906}\ 1907void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ 1908 uint8_t full[16*9];\ 1909 uint8_t halfH[72];\ 1910 uint8_t halfV[64];\ 1911 uint8_t halfHV[64];\ 1912 copy_block9(full, src, 16, stride, 9);\ 1913 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 1914 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ 1915 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1916 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ 1917}\ 1918static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ 1919 uint8_t full[16*9];\ 1920 uint8_t halfH[72];\ 1921 uint8_t halfHV[64];\ 1922 copy_block9(full, src, 16, stride, 9);\ 1923 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 1924 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ 1925 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1926 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ 1927}\ 1928void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ 1929 uint8_t full[16*9];\ 1930 uint8_t halfH[72];\ 1931 uint8_t halfV[64];\ 1932 uint8_t halfHV[64];\ 1933 copy_block9(full, src, 16, stride, 9);\ 1934 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\ 1935 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ 1936 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1937 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ 1938}\ 1939static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ 1940 uint8_t full[16*9];\ 1941 uint8_t halfH[72];\ 1942 uint8_t halfHV[64];\ 1943 copy_block9(full, src, 16, stride, 9);\ 1944 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 1945 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ 1946 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1947 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ 1948}\ 1949static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ 1950 uint8_t halfH[72];\ 1951 uint8_t halfHV[64];\ 1952 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ 1953 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1954 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ 1955}\ 1956static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ 1957 uint8_t halfH[72];\ 1958 uint8_t halfHV[64];\ 1959 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ 1960 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1961 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ 1962}\ 1963void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ 1964 uint8_t full[16*9];\ 1965 uint8_t halfH[72];\ 1966 uint8_t halfV[64];\ 1967 uint8_t halfHV[64];\ 1968 copy_block9(full, src, 16, stride, 9);\ 1969 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 1970 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ 1971 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1972 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ 1973}\ 1974static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ 1975 uint8_t full[16*9];\ 1976 uint8_t halfH[72];\ 1977 copy_block9(full, src, 16, stride, 9);\ 1978 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 1979 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ 1980 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ 1981}\ 1982void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ 1983 uint8_t full[16*9];\ 1984 uint8_t halfH[72];\ 1985 uint8_t halfV[64];\ 1986 uint8_t halfHV[64];\ 1987 copy_block9(full, src, 16, stride, 9);\ 1988 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 1989 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ 1990 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 1991 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ 1992}\ 1993static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ 1994 uint8_t full[16*9];\ 1995 uint8_t halfH[72];\ 1996 copy_block9(full, src, 16, stride, 9);\ 1997 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 1998 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ 1999 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ 2000}\ 2001static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ 2002 uint8_t halfH[72];\ 2003 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ 2004 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ 2005}\ 2006static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ 2007 OPNAME ## pixels16_c(dst, src, stride, 16);\ 2008}\ 2009\ 2010static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ 2011 uint8_t half[256];\ 2012 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ 2013 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\ 2014}\ 2015\ 2016static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ 2017 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\ 2018}\ 2019\ 2020static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ 2021 uint8_t half[256];\ 2022 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ 2023 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\ 2024}\ 2025\ 2026static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ 2027 uint8_t full[24*17];\ 2028 uint8_t half[256];\ 2029 copy_block17(full, src, 24, stride, 17);\ 2030 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ 2031 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\ 2032}\ 2033\ 2034static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ 2035 uint8_t full[24*17];\ 2036 copy_block17(full, src, 24, stride, 17);\ 2037 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\ 2038}\ 2039\ 2040static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ 2041 uint8_t full[24*17];\ 2042 uint8_t half[256];\ 2043 copy_block17(full, src, 24, stride, 17);\ 2044 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ 2045 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\ 2046}\ 2047void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ 2048 uint8_t full[24*17];\ 2049 uint8_t halfH[272];\ 2050 uint8_t halfV[256];\ 2051 uint8_t halfHV[256];\ 2052 copy_block17(full, src, 24, stride, 17);\ 2053 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 2054 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ 2055 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 2056 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ 2057}\ 2058static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ 2059 uint8_t full[24*17];\ 2060 uint8_t halfH[272];\ 2061 uint8_t halfHV[256];\ 2062 copy_block17(full, src, 24, stride, 17);\ 2063 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 2064 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ 2065 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 2066 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ 2067}\ 2068void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ 2069 uint8_t full[24*17];\ 2070 uint8_t halfH[272];\ 2071 uint8_t halfV[256];\ 2072 uint8_t halfHV[256];\ 2073 copy_block17(full, src, 24, stride, 17);\ 2074 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 2075 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ 2076 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 2077 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ 2078}\ 2079static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ 2080 uint8_t full[24*17];\ 2081 uint8_t halfH[272];\ 2082 uint8_t halfHV[256];\ 2083 copy_block17(full, src, 24, stride, 17);\ 2084 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 2085 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ 2086 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 2087 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ 2088}\ 2089void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ 2090 uint8_t full[24*17];\ 2091 uint8_t halfH[272];\ 2092 uint8_t halfV[256];\ 2093 uint8_t halfHV[256];\ 2094 copy_block17(full, src, 24, stride, 17);\ 2095 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 2096 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ 2097 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 2098 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ 2099}\ 2100static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ 2101 uint8_t full[24*17];\ 2102 uint8_t halfH[272];\ 2103 uint8_t halfHV[256];\ 2104 copy_block17(full, src, 24, stride, 17);\ 2105 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 2106 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ 2107 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 2108 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ 2109}\ 2110void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ 2111 uint8_t full[24*17];\ 2112 uint8_t halfH[272];\ 2113 uint8_t halfV[256];\ 2114 uint8_t halfHV[256];\ 2115 copy_block17(full, src, 24, stride, 17);\ 2116 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\ 2117 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ 2118 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 2119 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ 2120}\ 2121static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ 2122 uint8_t full[24*17];\ 2123 uint8_t halfH[272];\ 2124 uint8_t halfHV[256];\ 2125 copy_block17(full, src, 24, stride, 17);\ 2126 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 2127 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ 2128 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 2129 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ 2130}\ 2131static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ 2132 uint8_t halfH[272];\ 2133 uint8_t halfHV[256];\ 2134 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ 2135 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 2136 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ 2137}\ 2138static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ 2139 uint8_t halfH[272];\ 2140 uint8_t halfHV[256];\ 2141 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ 2142 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 2143 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ 2144}\ 2145void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ 2146 uint8_t full[24*17];\ 2147 uint8_t halfH[272];\ 2148 uint8_t halfV[256];\ 2149 uint8_t halfHV[256];\ 2150 copy_block17(full, src, 24, stride, 17);\ 2151 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 2152 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ 2153 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 2154 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ 2155}\ 2156static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ 2157 uint8_t full[24*17];\ 2158 uint8_t halfH[272];\ 2159 copy_block17(full, src, 24, stride, 17);\ 2160 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 2161 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ 2162 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ 2163}\ 2164void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ 2165 uint8_t full[24*17];\ 2166 uint8_t halfH[272];\ 2167 uint8_t halfV[256];\ 2168 uint8_t halfHV[256];\ 2169 copy_block17(full, src, 24, stride, 17);\ 2170 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 2171 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ 2172 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 2173 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ 2174}\ 2175static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ 2176 uint8_t full[24*17];\ 2177 uint8_t halfH[272];\ 2178 copy_block17(full, src, 24, stride, 17);\ 2179 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 2180 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ 2181 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ 2182}\ 2183static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ 2184 uint8_t halfH[272];\ 2185 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ 2186 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ 2187} 2188 2189#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) 2190#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1) 2191#define op_put(a, b) a = cm[((b) + 16)>>5] 2192#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5] 2193 2194QPEL_MC(0, put_ , _ , op_put) 2195QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) 2196QPEL_MC(0, avg_ , _ , op_avg) 2197//QPEL_MC(1, avg_no_rnd , _ , op_avg) 2198#undef op_avg 2199#undef op_avg_no_rnd 2200#undef op_put 2201#undef op_put_no_rnd 2202 2203#if 1 2204#define H264_LOWPASS(OPNAME, OP, OP2) \ 2205static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 2206 const int h=2;\ 2207 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 2208 int i;\ 2209 for(i=0; i<h; i++)\ 2210 {\ 2211 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ 2212 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ 2213 dst+=dstStride;\ 2214 src+=srcStride;\ 2215 }\ 2216}\ 2217\ 2218static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 2219 const int w=2;\ 2220 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 2221 int i;\ 2222 for(i=0; i<w; i++)\ 2223 {\ 2224 const int srcB= src[-2*srcStride];\ 2225 const int srcA= src[-1*srcStride];\ 2226 const int src0= src[0 *srcStride];\ 2227 const int src1= src[1 *srcStride];\ 2228 const int src2= src[2 *srcStride];\ 2229 const int src3= src[3 *srcStride];\ 2230 const int src4= src[4 *srcStride];\ 2231 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ 2232 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ 2233 dst++;\ 2234 src++;\ 2235 }\ 2236}\ 2237\ 2238static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 2239 const int h=2;\ 2240 const int w=2;\ 2241 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 2242 int i;\ 2243 src -= 2*srcStride;\ 2244 for(i=0; i<h+5; i++)\ 2245 {\ 2246 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ 2247 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ 2248 tmp+=tmpStride;\ 2249 src+=srcStride;\ 2250 }\ 2251 tmp -= tmpStride*(h+5-2);\ 2252 for(i=0; i<w; i++)\ 2253 {\ 2254 const int tmpB= tmp[-2*tmpStride];\ 2255 const int tmpA= tmp[-1*tmpStride];\ 2256 const int tmp0= tmp[0 *tmpStride];\ 2257 const int tmp1= tmp[1 *tmpStride];\ 2258 const int tmp2= tmp[2 *tmpStride];\ 2259 const int tmp3= tmp[3 *tmpStride];\ 2260 const int tmp4= tmp[4 *tmpStride];\ 2261 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ 2262 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ 2263 dst++;\ 2264 tmp++;\ 2265 }\ 2266}\ 2267static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 2268 const int h=4;\ 2269 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 2270 int i;\ 2271 for(i=0; i<h; i++)\ 2272 {\ 2273 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ 2274 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ 2275 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ 2276 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ 2277 dst+=dstStride;\ 2278 src+=srcStride;\ 2279 }\ 2280}\ 2281\ 2282static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 2283 const int w=4;\ 2284 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 2285 int i;\ 2286 for(i=0; i<w; i++)\ 2287 {\ 2288 const int srcB= src[-2*srcStride];\ 2289 const int srcA= src[-1*srcStride];\ 2290 const int src0= src[0 *srcStride];\ 2291 const int src1= src[1 *srcStride];\ 2292 const int src2= src[2 *srcStride];\ 2293 const int src3= src[3 *srcStride];\ 2294 const int src4= src[4 *srcStride];\ 2295 const int src5= src[5 *srcStride];\ 2296 const int src6= src[6 *srcStride];\ 2297 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ 2298 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ 2299 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ 2300 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ 2301 dst++;\ 2302 src++;\ 2303 }\ 2304}\ 2305\ 2306static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 2307 const int h=4;\ 2308 const int w=4;\ 2309 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 2310 int i;\ 2311 src -= 2*srcStride;\ 2312 for(i=0; i<h+5; i++)\ 2313 {\ 2314 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ 2315 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ 2316 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\ 2317 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\ 2318 tmp+=tmpStride;\ 2319 src+=srcStride;\ 2320 }\ 2321 tmp -= tmpStride*(h+5-2);\ 2322 for(i=0; i<w; i++)\ 2323 {\ 2324 const int tmpB= tmp[-2*tmpStride];\ 2325 const int tmpA= tmp[-1*tmpStride];\ 2326 const int tmp0= tmp[0 *tmpStride];\ 2327 const int tmp1= tmp[1 *tmpStride];\ 2328 const int tmp2= tmp[2 *tmpStride];\ 2329 const int tmp3= tmp[3 *tmpStride];\ 2330 const int tmp4= tmp[4 *tmpStride];\ 2331 const int tmp5= tmp[5 *tmpStride];\ 2332 const int tmp6= tmp[6 *tmpStride];\ 2333 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ 2334 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ 2335 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ 2336 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ 2337 dst++;\ 2338 tmp++;\ 2339 }\ 2340}\ 2341\ 2342static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 2343 const int h=8;\ 2344 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 2345 int i;\ 2346 for(i=0; i<h; i++)\ 2347 {\ 2348 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\ 2349 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\ 2350 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\ 2351 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\ 2352 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\ 2353 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\ 2354 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\ 2355 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\ 2356 dst+=dstStride;\ 2357 src+=srcStride;\ 2358 }\ 2359}\ 2360\ 2361static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 2362 const int w=8;\ 2363 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 2364 int i;\ 2365 for(i=0; i<w; i++)\ 2366 {\ 2367 const int srcB= src[-2*srcStride];\ 2368 const int srcA= src[-1*srcStride];\ 2369 const int src0= src[0 *srcStride];\ 2370 const int src1= src[1 *srcStride];\ 2371 const int src2= src[2 *srcStride];\ 2372 const int src3= src[3 *srcStride];\ 2373 const int src4= src[4 *srcStride];\ 2374 const int src5= src[5 *srcStride];\ 2375 const int src6= src[6 *srcStride];\ 2376 const int src7= src[7 *srcStride];\ 2377 const int src8= src[8 *srcStride];\ 2378 const int src9= src[9 *srcStride];\ 2379 const int src10=src[10*srcStride];\ 2380 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ 2381 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ 2382 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ 2383 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ 2384 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ 2385 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ 2386 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ 2387 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ 2388 dst++;\ 2389 src++;\ 2390 }\ 2391}\ 2392\ 2393static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 2394 const int h=8;\ 2395 const int w=8;\ 2396 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 2397 int i;\ 2398 src -= 2*srcStride;\ 2399 for(i=0; i<h+5; i++)\ 2400 {\ 2401 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\ 2402 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\ 2403 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\ 2404 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\ 2405 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\ 2406 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\ 2407 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\ 2408 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\ 2409 tmp+=tmpStride;\ 2410 src+=srcStride;\ 2411 }\ 2412 tmp -= tmpStride*(h+5-2);\ 2413 for(i=0; i<w; i++)\ 2414 {\ 2415 const int tmpB= tmp[-2*tmpStride];\ 2416 const int tmpA= tmp[-1*tmpStride];\ 2417 const int tmp0= tmp[0 *tmpStride];\ 2418 const int tmp1= tmp[1 *tmpStride];\ 2419 const int tmp2= tmp[2 *tmpStride];\ 2420 const int tmp3= tmp[3 *tmpStride];\ 2421 const int tmp4= tmp[4 *tmpStride];\ 2422 const int tmp5= tmp[5 *tmpStride];\ 2423 const int tmp6= tmp[6 *tmpStride];\ 2424 const int tmp7= tmp[7 *tmpStride];\ 2425 const int tmp8= tmp[8 *tmpStride];\ 2426 const int tmp9= tmp[9 *tmpStride];\ 2427 const int tmp10=tmp[10*tmpStride];\ 2428 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ 2429 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ 2430 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ 2431 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ 2432 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\ 2433 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\ 2434 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\ 2435 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\ 2436 dst++;\ 2437 tmp++;\ 2438 }\ 2439}\ 2440\ 2441static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 2442 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ 2443 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ 2444 src += 8*srcStride;\ 2445 dst += 8*dstStride;\ 2446 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ 2447 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ 2448}\ 2449\ 2450static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 2451 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ 2452 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ 2453 src += 8*srcStride;\ 2454 dst += 8*dstStride;\ 2455 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ 2456 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ 2457}\ 2458\ 2459static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 2460 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ 2461 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ 2462 src += 8*srcStride;\ 2463 dst += 8*dstStride;\ 2464 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ 2465 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ 2466}\ 2467 2468#define H264_MC(OPNAME, SIZE) \ 2469static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\ 2470 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ 2471}\ 2472\ 2473static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ 2474 uint8_t half[SIZE*SIZE];\ 2475 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ 2476 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\ 2477}\ 2478\ 2479static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ 2480 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ 2481}\ 2482\ 2483static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ 2484 uint8_t half[SIZE*SIZE];\ 2485 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ 2486 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\ 2487}\ 2488\ 2489static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ 2490 uint8_t full[SIZE*(SIZE+5)];\ 2491 uint8_t * const full_mid= full + SIZE*2;\ 2492 uint8_t half[SIZE*SIZE];\ 2493 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ 2494 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ 2495 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ 2496}\ 2497\ 2498static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ 2499 uint8_t full[SIZE*(SIZE+5)];\ 2500 uint8_t * const full_mid= full + SIZE*2;\ 2501 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ 2502 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ 2503}\ 2504\ 2505static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ 2506 uint8_t full[SIZE*(SIZE+5)];\ 2507 uint8_t * const full_mid= full + SIZE*2;\ 2508 uint8_t half[SIZE*SIZE];\ 2509 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ 2510 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ 2511 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ 2512}\ 2513\ 2514static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ 2515 uint8_t full[SIZE*(SIZE+5)];\ 2516 uint8_t * const full_mid= full + SIZE*2;\ 2517 uint8_t halfH[SIZE*SIZE];\ 2518 uint8_t halfV[SIZE*SIZE];\ 2519 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ 2520 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ 2521 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ 2522 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ 2523}\ 2524\ 2525static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ 2526 uint8_t full[SIZE*(SIZE+5)];\ 2527 uint8_t * const full_mid= full + SIZE*2;\ 2528 uint8_t halfH[SIZE*SIZE];\ 2529 uint8_t halfV[SIZE*SIZE];\ 2530 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ 2531 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ 2532 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ 2533 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ 2534}\ 2535\ 2536static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ 2537 uint8_t full[SIZE*(SIZE+5)];\ 2538 uint8_t * const full_mid= full + SIZE*2;\ 2539 uint8_t halfH[SIZE*SIZE];\ 2540 uint8_t halfV[SIZE*SIZE];\ 2541 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ 2542 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ 2543 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ 2544 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ 2545}\ 2546\ 2547static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ 2548 uint8_t full[SIZE*(SIZE+5)];\ 2549 uint8_t * const full_mid= full + SIZE*2;\ 2550 uint8_t halfH[SIZE*SIZE];\ 2551 uint8_t halfV[SIZE*SIZE];\ 2552 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ 2553 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ 2554 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ 2555 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ 2556}\ 2557\ 2558static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ 2559 int16_t tmp[SIZE*(SIZE+5)];\ 2560 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ 2561}\ 2562\ 2563static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ 2564 int16_t tmp[SIZE*(SIZE+5)];\ 2565 uint8_t halfH[SIZE*SIZE];\ 2566 uint8_t halfHV[SIZE*SIZE];\ 2567 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ 2568 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ 2569 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ 2570}\ 2571\ 2572static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ 2573 int16_t tmp[SIZE*(SIZE+5)];\ 2574 uint8_t halfH[SIZE*SIZE];\ 2575 uint8_t halfHV[SIZE*SIZE];\ 2576 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ 2577 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ 2578 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ 2579}\ 2580\ 2581static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ 2582 uint8_t full[SIZE*(SIZE+5)];\ 2583 uint8_t * const full_mid= full + SIZE*2;\ 2584 int16_t tmp[SIZE*(SIZE+5)];\ 2585 uint8_t halfV[SIZE*SIZE];\ 2586 uint8_t halfHV[SIZE*SIZE];\ 2587 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ 2588 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ 2589 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ 2590 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ 2591}\ 2592\ 2593static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ 2594 uint8_t full[SIZE*(SIZE+5)];\ 2595 uint8_t * const full_mid= full + SIZE*2;\ 2596 int16_t tmp[SIZE*(SIZE+5)];\ 2597 uint8_t halfV[SIZE*SIZE];\ 2598 uint8_t halfHV[SIZE*SIZE];\ 2599 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ 2600 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ 2601 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ 2602 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ 2603}\ 2604 2605#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) 2606//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) 2607#define op_put(a, b) a = cm[((b) + 16)>>5] 2608#define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) 2609#define op2_put(a, b) a = cm[((b) + 512)>>10] 2610 2611H264_LOWPASS(put_ , op_put, op2_put) 2612H264_LOWPASS(avg_ , op_avg, op2_avg) 2613H264_MC(put_, 2) 2614H264_MC(put_, 4) 2615H264_MC(put_, 8) 2616H264_MC(put_, 16) 2617H264_MC(avg_, 4) 2618H264_MC(avg_, 8) 2619H264_MC(avg_, 16) 2620 2621#undef op_avg 2622#undef op_put 2623#undef op2_avg 2624#undef op2_put 2625#endif 2626 2627#define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom ) 2628#define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) 2629#define H264_WEIGHT(W,H) \ 2630static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \ 2631 int y; \ 2632 offset <<= log2_denom; \ 2633 if(log2_denom) offset += 1<<(log2_denom-1); \ 2634 for(y=0; y<H; y++, block += stride){ \ 2635 op_scale1(0); \ 2636 op_scale1(1); \ 2637 if(W==2) continue; \ 2638 op_scale1(2); \ 2639 op_scale1(3); \ 2640 if(W==4) continue; \ 2641 op_scale1(4); \ 2642 op_scale1(5); \ 2643 op_scale1(6); \ 2644 op_scale1(7); \ 2645 if(W==8) continue; \ 2646 op_scale1(8); \ 2647 op_scale1(9); \ 2648 op_scale1(10); \ 2649 op_scale1(11); \ 2650 op_scale1(12); \ 2651 op_scale1(13); \ 2652 op_scale1(14); \ 2653 op_scale1(15); \ 2654 } \ 2655} \ 2656static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \ 2657 int y; \ 2658 offset = ((offset + 1) | 1) << log2_denom; \ 2659 for(y=0; y<H; y++, dst += stride, src += stride){ \ 2660 op_scale2(0); \ 2661 op_scale2(1); \ 2662 if(W==2) continue; \ 2663 op_scale2(2); \ 2664 op_scale2(3); \ 2665 if(W==4) continue; \ 2666 op_scale2(4); \ 2667 op_scale2(5); \ 2668 op_scale2(6); \ 2669 op_scale2(7); \ 2670 if(W==8) continue; \ 2671 op_scale2(8); \ 2672 op_scale2(9); \ 2673 op_scale2(10); \ 2674 op_scale2(11); \ 2675 op_scale2(12); \ 2676 op_scale2(13); \ 2677 op_scale2(14); \ 2678 op_scale2(15); \ 2679 } \ 2680} 2681 2682H264_WEIGHT(16,16) 2683H264_WEIGHT(16,8) 2684H264_WEIGHT(8,16) 2685H264_WEIGHT(8,8) 2686H264_WEIGHT(8,4) 2687H264_WEIGHT(4,8) 2688H264_WEIGHT(4,4) 2689H264_WEIGHT(4,2) 2690H264_WEIGHT(2,4) 2691H264_WEIGHT(2,2) 2692 2693#undef op_scale1 2694#undef op_scale2 2695#undef H264_WEIGHT 2696 2697static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ 2698 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 2699 int i; 2700 2701 for(i=0; i<h; i++){ 2702 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4]; 2703 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4]; 2704 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4]; 2705 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4]; 2706 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4]; 2707 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; 2708 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; 2709 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; 2710 dst+=dstStride; 2711 src+=srcStride; 2712 } 2713} 2714 2715#if CONFIG_CAVS_DECODER 2716/* AVS specific */ 2717void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx); 2718 2719void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { 2720 put_pixels8_c(dst, src, stride, 8); 2721} 2722void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { 2723 avg_pixels8_c(dst, src, stride, 8); 2724} 2725void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { 2726 put_pixels16_c(dst, src, stride, 16); 2727} 2728void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { 2729 avg_pixels16_c(dst, src, stride, 16); 2730} 2731#endif /* CONFIG_CAVS_DECODER */ 2732 2733#if CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER 2734/* VC-1 specific */ 2735void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx); 2736 2737void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) { 2738 put_pixels8_c(dst, src, stride, 8); 2739} 2740#endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */ 2741 2742void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx); 2743 2744/* H264 specific */ 2745void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx); 2746 2747#if CONFIG_RV30_DECODER 2748void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx); 2749#endif /* CONFIG_RV30_DECODER */ 2750 2751#if CONFIG_RV40_DECODER 2752static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ 2753 put_pixels16_xy2_c(dst, src, stride, 16); 2754} 2755static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ 2756 avg_pixels16_xy2_c(dst, src, stride, 16); 2757} 2758static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ 2759 put_pixels8_xy2_c(dst, src, stride, 8); 2760} 2761static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ 2762 avg_pixels8_xy2_c(dst, src, stride, 8); 2763} 2764 2765void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx); 2766#endif /* CONFIG_RV40_DECODER */ 2767 2768static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ 2769 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 2770 int i; 2771 2772 for(i=0; i<w; i++){ 2773 const int src_1= src[ -srcStride]; 2774 const int src0 = src[0 ]; 2775 const int src1 = src[ srcStride]; 2776 const int src2 = src[2*srcStride]; 2777 const int src3 = src[3*srcStride]; 2778 const int src4 = src[4*srcStride]; 2779 const int src5 = src[5*srcStride]; 2780 const int src6 = src[6*srcStride]; 2781 const int src7 = src[7*srcStride]; 2782 const int src8 = src[8*srcStride]; 2783 const int src9 = src[9*srcStride]; 2784 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; 2785 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; 2786 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; 2787 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; 2788 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; 2789 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; 2790 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; 2791 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; 2792 src++; 2793 dst++; 2794 } 2795} 2796 2797static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){ 2798 put_pixels8_c(dst, src, stride, 8); 2799} 2800 2801static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ 2802 uint8_t half[64]; 2803 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); 2804 put_pixels8_l2(dst, src, half, stride, stride, 8, 8); 2805} 2806 2807static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ 2808 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); 2809} 2810 2811static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ 2812 uint8_t half[64]; 2813 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); 2814 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8); 2815} 2816 2817static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ 2818 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); 2819} 2820 2821static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ 2822 uint8_t halfH[88]; 2823 uint8_t halfV[64]; 2824 uint8_t halfHV[64]; 2825 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); 2826 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); 2827 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); 2828 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); 2829} 2830static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ 2831 uint8_t halfH[88]; 2832 uint8_t halfV[64]; 2833 uint8_t halfHV[64]; 2834 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); 2835 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); 2836 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); 2837 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); 2838} 2839static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ 2840 uint8_t halfH[88]; 2841 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); 2842 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); 2843} 2844 2845static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){ 2846 if(CONFIG_ANY_H263) { 2847 int x; 2848 const int strength= ff_h263_loop_filter_strength[qscale]; 2849 2850 for(x=0; x<8; x++){ 2851 int d1, d2, ad1; 2852 int p0= src[x-2*stride]; 2853 int p1= src[x-1*stride]; 2854 int p2= src[x+0*stride]; 2855 int p3= src[x+1*stride]; 2856 int d = (p0 - p3 + 4*(p2 - p1)) / 8; 2857 2858 if (d<-2*strength) d1= 0; 2859 else if(d<- strength) d1=-2*strength - d; 2860 else if(d< strength) d1= d; 2861 else if(d< 2*strength) d1= 2*strength - d; 2862 else d1= 0; 2863 2864 p1 += d1; 2865 p2 -= d1; 2866 if(p1&256) p1= ~(p1>>31); 2867 if(p2&256) p2= ~(p2>>31); 2868 2869 src[x-1*stride] = p1; 2870 src[x+0*stride] = p2; 2871 2872 ad1= FFABS(d1)>>1; 2873 2874 d2= av_clip((p0-p3)/4, -ad1, ad1); 2875 2876 src[x-2*stride] = p0 - d2; 2877 src[x+ stride] = p3 + d2; 2878 } 2879 } 2880} 2881 2882static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ 2883 if(CONFIG_ANY_H263) { 2884 int y; 2885 const int strength= ff_h263_loop_filter_strength[qscale]; 2886 2887 for(y=0; y<8; y++){ 2888 int d1, d2, ad1; 2889 int p0= src[y*stride-2]; 2890 int p1= src[y*stride-1]; 2891 int p2= src[y*stride+0]; 2892 int p3= src[y*stride+1]; 2893 int d = (p0 - p3 + 4*(p2 - p1)) / 8; 2894 2895 if (d<-2*strength) d1= 0; 2896 else if(d<- strength) d1=-2*strength - d; 2897 else if(d< strength) d1= d; 2898 else if(d< 2*strength) d1= 2*strength - d; 2899 else d1= 0; 2900 2901 p1 += d1; 2902 p2 -= d1; 2903 if(p1&256) p1= ~(p1>>31); 2904 if(p2&256) p2= ~(p2>>31); 2905 2906 src[y*stride-1] = p1; 2907 src[y*stride+0] = p2; 2908 2909 ad1= FFABS(d1)>>1; 2910 2911 d2= av_clip((p0-p3)/4, -ad1, ad1); 2912 2913 src[y*stride-2] = p0 - d2; 2914 src[y*stride+1] = p3 + d2; 2915 } 2916 } 2917} 2918 2919static void h261_loop_filter_c(uint8_t *src, int stride){ 2920 int x,y,xy,yz; 2921 int temp[64]; 2922 2923 for(x=0; x<8; x++){ 2924 temp[x ] = 4*src[x ]; 2925 temp[x + 7*8] = 4*src[x + 7*stride]; 2926 } 2927 for(y=1; y<7; y++){ 2928 for(x=0; x<8; x++){ 2929 xy = y * stride + x; 2930 yz = y * 8 + x; 2931 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride]; 2932 } 2933 } 2934 2935 for(y=0; y<8; y++){ 2936 src[ y*stride] = (temp[ y*8] + 2)>>2; 2937 src[7+y*stride] = (temp[7+y*8] + 2)>>2; 2938 for(x=1; x<7; x++){ 2939 xy = y * stride + x; 2940 yz = y * 8 + x; 2941 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4; 2942 } 2943 } 2944} 2945 2946static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) 2947{ 2948 int i, d; 2949 for( i = 0; i < 4; i++ ) { 2950 if( tc0[i] < 0 ) { 2951 pix += 4*ystride; 2952 continue; 2953 } 2954 for( d = 0; d < 4; d++ ) { 2955 const int p0 = pix[-1*xstride]; 2956 const int p1 = pix[-2*xstride]; 2957 const int p2 = pix[-3*xstride]; 2958 const int q0 = pix[0]; 2959 const int q1 = pix[1*xstride]; 2960 const int q2 = pix[2*xstride]; 2961 2962 if( FFABS( p0 - q0 ) < alpha && 2963 FFABS( p1 - p0 ) < beta && 2964 FFABS( q1 - q0 ) < beta ) { 2965 2966 int tc = tc0[i]; 2967 int i_delta; 2968 2969 if( FFABS( p2 - p0 ) < beta ) { 2970 pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] ); 2971 tc++; 2972 } 2973 if( FFABS( q2 - q0 ) < beta ) { 2974 pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] ); 2975 tc++; 2976 } 2977 2978 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); 2979 pix[-xstride] = av_clip_uint8( p0 + i_delta ); /* p0' */ 2980 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ 2981 } 2982 pix += ystride; 2983 } 2984 } 2985} 2986static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) 2987{ 2988 h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0); 2989} 2990static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) 2991{ 2992 h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0); 2993} 2994 2995static inline void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) 2996{ 2997 int d; 2998 for( d = 0; d < 16; d++ ) { 2999 const int p2 = pix[-3*xstride]; 3000 const int p1 = pix[-2*xstride]; 3001 const int p0 = pix[-1*xstride]; 3002 3003 const int q0 = pix[ 0*xstride]; 3004 const int q1 = pix[ 1*xstride]; 3005 const int q2 = pix[ 2*xstride]; 3006 3007 if( FFABS( p0 - q0 ) < alpha && 3008 FFABS( p1 - p0 ) < beta && 3009 FFABS( q1 - q0 ) < beta ) { 3010 3011 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ 3012 if( FFABS( p2 - p0 ) < beta) 3013 { 3014 const int p3 = pix[-4*xstride]; 3015 /* p0', p1', p2' */ 3016 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; 3017 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; 3018 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; 3019 } else { 3020 /* p0' */ 3021 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; 3022 } 3023 if( FFABS( q2 - q0 ) < beta) 3024 { 3025 const int q3 = pix[3*xstride]; 3026 /* q0', q1', q2' */ 3027 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; 3028 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; 3029 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; 3030 } else { 3031 /* q0' */ 3032 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; 3033 } 3034 }else{ 3035 /* p0', q0' */ 3036 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; 3037 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; 3038 } 3039 } 3040 pix += ystride; 3041 } 3042} 3043static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta) 3044{ 3045 h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta); 3046} 3047static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta) 3048{ 3049 h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta); 3050} 3051 3052static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) 3053{ 3054 int i, d; 3055 for( i = 0; i < 4; i++ ) { 3056 const int tc = tc0[i]; 3057 if( tc <= 0 ) { 3058 pix += 2*ystride; 3059 continue; 3060 } 3061 for( d = 0; d < 2; d++ ) { 3062 const int p0 = pix[-1*xstride]; 3063 const int p1 = pix[-2*xstride]; 3064 const int q0 = pix[0]; 3065 const int q1 = pix[1*xstride]; 3066 3067 if( FFABS( p0 - q0 ) < alpha && 3068 FFABS( p1 - p0 ) < beta && 3069 FFABS( q1 - q0 ) < beta ) { 3070 3071 int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); 3072 3073 pix[-xstride] = av_clip_uint8( p0 + delta ); /* p0' */ 3074 pix[0] = av_clip_uint8( q0 - delta ); /* q0' */ 3075 } 3076 pix += ystride; 3077 } 3078 } 3079} 3080static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) 3081{ 3082 h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0); 3083} 3084static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) 3085{ 3086 h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0); 3087} 3088 3089static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) 3090{ 3091 int d; 3092 for( d = 0; d < 8; d++ ) { 3093 const int p0 = pix[-1*xstride]; 3094 const int p1 = pix[-2*xstride]; 3095 const int q0 = pix[0]; 3096 const int q1 = pix[1*xstride]; 3097 3098 if( FFABS( p0 - q0 ) < alpha && 3099 FFABS( p1 - p0 ) < beta && 3100 FFABS( q1 - q0 ) < beta ) { 3101 3102 pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ 3103 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ 3104 } 3105 pix += ystride; 3106 } 3107} 3108static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) 3109{ 3110 h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta); 3111} 3112static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) 3113{ 3114 h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta); 3115} 3116 3117static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 3118{ 3119 int s, i; 3120 3121 s = 0; 3122 for(i=0;i<h;i++) { 3123 s += abs(pix1[0] - pix2[0]); 3124 s += abs(pix1[1] - pix2[1]); 3125 s += abs(pix1[2] - pix2[2]); 3126 s += abs(pix1[3] - pix2[3]); 3127 s += abs(pix1[4] - pix2[4]); 3128 s += abs(pix1[5] - pix2[5]); 3129 s += abs(pix1[6] - pix2[6]); 3130 s += abs(pix1[7] - pix2[7]); 3131 s += abs(pix1[8] - pix2[8]); 3132 s += abs(pix1[9] - pix2[9]); 3133 s += abs(pix1[10] - pix2[10]); 3134 s += abs(pix1[11] - pix2[11]); 3135 s += abs(pix1[12] - pix2[12]); 3136 s += abs(pix1[13] - pix2[13]); 3137 s += abs(pix1[14] - pix2[14]); 3138 s += abs(pix1[15] - pix2[15]); 3139 pix1 += line_size; 3140 pix2 += line_size; 3141 } 3142 return s; 3143} 3144 3145static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 3146{ 3147 int s, i; 3148 3149 s = 0; 3150 for(i=0;i<h;i++) { 3151 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); 3152 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); 3153 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); 3154 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); 3155 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); 3156 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); 3157 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); 3158 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); 3159 s += abs(pix1[8] - avg2(pix2[8], pix2[9])); 3160 s += abs(pix1[9] - avg2(pix2[9], pix2[10])); 3161 s += abs(pix1[10] - avg2(pix2[10], pix2[11])); 3162 s += abs(pix1[11] - avg2(pix2[11], pix2[12])); 3163 s += abs(pix1[12] - avg2(pix2[12], pix2[13])); 3164 s += abs(pix1[13] - avg2(pix2[13], pix2[14])); 3165 s += abs(pix1[14] - avg2(pix2[14], pix2[15])); 3166 s += abs(pix1[15] - avg2(pix2[15], pix2[16])); 3167 pix1 += line_size; 3168 pix2 += line_size; 3169 } 3170 return s; 3171} 3172 3173static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 3174{ 3175 int s, i; 3176 uint8_t *pix3 = pix2 + line_size; 3177 3178 s = 0; 3179 for(i=0;i<h;i++) { 3180 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); 3181 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); 3182 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); 3183 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); 3184 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); 3185 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); 3186 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); 3187 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); 3188 s += abs(pix1[8] - avg2(pix2[8], pix3[8])); 3189 s += abs(pix1[9] - avg2(pix2[9], pix3[9])); 3190 s += abs(pix1[10] - avg2(pix2[10], pix3[10])); 3191 s += abs(pix1[11] - avg2(pix2[11], pix3[11])); 3192 s += abs(pix1[12] - avg2(pix2[12], pix3[12])); 3193 s += abs(pix1[13] - avg2(pix2[13], pix3[13])); 3194 s += abs(pix1[14] - avg2(pix2[14], pix3[14])); 3195 s += abs(pix1[15] - avg2(pix2[15], pix3[15])); 3196 pix1 += line_size; 3197 pix2 += line_size; 3198 pix3 += line_size; 3199 } 3200 return s; 3201} 3202 3203static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 3204{ 3205 int s, i; 3206 uint8_t *pix3 = pix2 + line_size; 3207 3208 s = 0; 3209 for(i=0;i<h;i++) { 3210 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); 3211 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); 3212 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); 3213 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); 3214 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); 3215 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); 3216 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); 3217 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); 3218 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); 3219 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); 3220 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); 3221 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); 3222 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); 3223 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); 3224 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); 3225 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); 3226 pix1 += line_size; 3227 pix2 += line_size; 3228 pix3 += line_size; 3229 } 3230 return s; 3231} 3232 3233static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 3234{ 3235 int s, i; 3236 3237 s = 0; 3238 for(i=0;i<h;i++) { 3239 s += abs(pix1[0] - pix2[0]); 3240 s += abs(pix1[1] - pix2[1]); 3241 s += abs(pix1[2] - pix2[2]); 3242 s += abs(pix1[3] - pix2[3]); 3243 s += abs(pix1[4] - pix2[4]); 3244 s += abs(pix1[5] - pix2[5]); 3245 s += abs(pix1[6] - pix2[6]); 3246 s += abs(pix1[7] - pix2[7]); 3247 pix1 += line_size; 3248 pix2 += line_size; 3249 } 3250 return s; 3251} 3252 3253static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 3254{ 3255 int s, i; 3256 3257 s = 0; 3258 for(i=0;i<h;i++) { 3259 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); 3260 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); 3261 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); 3262 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); 3263 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); 3264 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); 3265 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); 3266 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); 3267 pix1 += line_size; 3268 pix2 += line_size; 3269 } 3270 return s; 3271} 3272 3273static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 3274{ 3275 int s, i; 3276 uint8_t *pix3 = pix2 + line_size; 3277 3278 s = 0; 3279 for(i=0;i<h;i++) { 3280 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); 3281 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); 3282 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); 3283 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); 3284 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); 3285 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); 3286 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); 3287 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); 3288 pix1 += line_size; 3289 pix2 += line_size; 3290 pix3 += line_size; 3291 } 3292 return s; 3293} 3294 3295static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 3296{ 3297 int s, i; 3298 uint8_t *pix3 = pix2 + line_size; 3299 3300 s = 0; 3301 for(i=0;i<h;i++) { 3302 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); 3303 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); 3304 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); 3305 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); 3306 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); 3307 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); 3308 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); 3309 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); 3310 pix1 += line_size; 3311 pix2 += line_size; 3312 pix3 += line_size; 3313 } 3314 return s; 3315} 3316 3317static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ 3318 MpegEncContext *c = v; 3319 int score1=0; 3320 int score2=0; 3321 int x,y; 3322 3323 for(y=0; y<h; y++){ 3324 for(x=0; x<16; x++){ 3325 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); 3326 } 3327 if(y+1<h){ 3328 for(x=0; x<15; x++){ 3329 score2+= FFABS( s1[x ] - s1[x +stride] 3330 - s1[x+1] + s1[x+1+stride]) 3331 -FFABS( s2[x ] - s2[x +stride] 3332 - s2[x+1] + s2[x+1+stride]); 3333 } 3334 } 3335 s1+= stride; 3336 s2+= stride; 3337 } 3338 3339 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; 3340 else return score1 + FFABS(score2)*8; 3341} 3342 3343static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ 3344 MpegEncContext *c = v; 3345 int score1=0; 3346 int score2=0; 3347 int x,y; 3348 3349 for(y=0; y<h; y++){ 3350 for(x=0; x<8; x++){ 3351 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); 3352 } 3353 if(y+1<h){ 3354 for(x=0; x<7; x++){ 3355 score2+= FFABS( s1[x ] - s1[x +stride] 3356 - s1[x+1] + s1[x+1+stride]) 3357 -FFABS( s2[x ] - s2[x +stride] 3358 - s2[x+1] + s2[x+1+stride]); 3359 } 3360 } 3361 s1+= stride; 3362 s2+= stride; 3363 } 3364 3365 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; 3366 else return score1 + FFABS(score2)*8; 3367} 3368 3369static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ 3370 int i; 3371 unsigned int sum=0; 3372 3373 for(i=0; i<8*8; i++){ 3374 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT)); 3375 int w= weight[i]; 3376 b>>= RECON_SHIFT; 3377 assert(-512<b && b<512); 3378 3379 sum += (w*b)*(w*b)>>4; 3380 } 3381 return sum>>2; 3382} 3383 3384static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){ 3385 int i; 3386 3387 for(i=0; i<8*8; i++){ 3388 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT); 3389 } 3390} 3391 3392/** 3393 * permutes an 8x8 block. 3394 * @param block the block which will be permuted according to the given permutation vector 3395 * @param permutation the permutation vector 3396 * @param last the last non zero coefficient in scantable order, used to speed the permutation up 3397 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not 3398 * (inverse) permutated to scantable order! 3399 */ 3400void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last) 3401{ 3402 int i; 3403 DCTELEM temp[64]; 3404 3405 if(last<=0) return; 3406 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations 3407 3408 for(i=0; i<=last; i++){ 3409 const int j= scantable[i]; 3410 temp[j]= block[j]; 3411 block[j]=0; 3412 } 3413 3414 for(i=0; i<=last; i++){ 3415 const int j= scantable[i]; 3416 const int perm_j= permutation[j]; 3417 block[perm_j]= temp[j]; 3418 } 3419} 3420 3421static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ 3422 return 0; 3423} 3424 3425void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ 3426 int i; 3427 3428 memset(cmp, 0, sizeof(void*)*6); 3429 3430 for(i=0; i<6; i++){ 3431 switch(type&0xFF){ 3432 case FF_CMP_SAD: 3433 cmp[i]= c->sad[i]; 3434 break; 3435 case FF_CMP_SATD: 3436 cmp[i]= c->hadamard8_diff[i]; 3437 break; 3438 case FF_CMP_SSE: 3439 cmp[i]= c->sse[i]; 3440 break; 3441 case FF_CMP_DCT: 3442 cmp[i]= c->dct_sad[i]; 3443 break; 3444 case FF_CMP_DCT264: 3445 cmp[i]= c->dct264_sad[i]; 3446 break; 3447 case FF_CMP_DCTMAX: 3448 cmp[i]= c->dct_max[i]; 3449 break; 3450 case FF_CMP_PSNR: 3451 cmp[i]= c->quant_psnr[i]; 3452 break; 3453 case FF_CMP_BIT: 3454 cmp[i]= c->bit[i]; 3455 break; 3456 case FF_CMP_RD: 3457 cmp[i]= c->rd[i]; 3458 break; 3459 case FF_CMP_VSAD: 3460 cmp[i]= c->vsad[i]; 3461 break; 3462 case FF_CMP_VSSE: 3463 cmp[i]= c->vsse[i]; 3464 break; 3465 case FF_CMP_ZERO: 3466 cmp[i]= zero_cmp; 3467 break; 3468 case FF_CMP_NSSE: 3469 cmp[i]= c->nsse[i]; 3470 break; 3471#if CONFIG_SNOW_ENCODER 3472 case FF_CMP_W53: 3473 cmp[i]= c->w53[i]; 3474 break; 3475 case FF_CMP_W97: 3476 cmp[i]= c->w97[i]; 3477 break; 3478#endif 3479 default: 3480 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); 3481 } 3482 } 3483} 3484 3485static void clear_block_c(DCTELEM *block) 3486{ 3487 memset(block, 0, sizeof(DCTELEM)*64); 3488} 3489 3490/** 3491 * memset(blocks, 0, sizeof(DCTELEM)*6*64) 3492 */ 3493static void clear_blocks_c(DCTELEM *blocks) 3494{ 3495 memset(blocks, 0, sizeof(DCTELEM)*6*64); 3496} 3497 3498static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ 3499 long i; 3500 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ 3501 long a = *(long*)(src+i); 3502 long b = *(long*)(dst+i); 3503 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); 3504 } 3505 for(; i<w; i++) 3506 dst[i+0] += src[i+0]; 3507} 3508 3509static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ 3510 long i; 3511 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ 3512 long a = *(long*)(src1+i); 3513 long b = *(long*)(src2+i); 3514 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); 3515 } 3516 for(; i<w; i++) 3517 dst[i] = src1[i]+src2[i]; 3518} 3519 3520static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ 3521 long i; 3522#if !HAVE_FAST_UNALIGNED 3523 if((long)src2 & (sizeof(long)-1)){ 3524 for(i=0; i+7<w; i+=8){ 3525 dst[i+0] = src1[i+0]-src2[i+0]; 3526 dst[i+1] = src1[i+1]-src2[i+1]; 3527 dst[i+2] = src1[i+2]-src2[i+2]; 3528 dst[i+3] = src1[i+3]-src2[i+3]; 3529 dst[i+4] = src1[i+4]-src2[i+4]; 3530 dst[i+5] = src1[i+5]-src2[i+5]; 3531 dst[i+6] = src1[i+6]-src2[i+6]; 3532 dst[i+7] = src1[i+7]-src2[i+7]; 3533 } 3534 }else 3535#endif 3536 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ 3537 long a = *(long*)(src1+i); 3538 long b = *(long*)(src2+i); 3539 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80); 3540 } 3541 for(; i<w; i++) 3542 dst[i+0] = src1[i+0]-src2[i+0]; 3543} 3544 3545static void add_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *diff, int w, int *left, int *left_top){ 3546 int i; 3547 uint8_t l, lt; 3548 3549 l= *left; 3550 lt= *left_top; 3551 3552 for(i=0; i<w; i++){ 3553 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i]; 3554 lt= src1[i]; 3555 dst[i]= l; 3556 } 3557 3558 *left= l; 3559 *left_top= lt; 3560} 3561 3562static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ 3563 int i; 3564 uint8_t l, lt; 3565 3566 l= *left; 3567 lt= *left_top; 3568 3569 for(i=0; i<w; i++){ 3570 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); 3571 lt= src1[i]; 3572 l= src2[i]; 3573 dst[i]= l - pred; 3574 } 3575 3576 *left= l; 3577 *left_top= lt; 3578} 3579 3580#define BUTTERFLY2(o1,o2,i1,i2) \ 3581o1= (i1)+(i2);\ 3582o2= (i1)-(i2); 3583 3584#define BUTTERFLY1(x,y) \ 3585{\ 3586 int a,b;\ 3587 a= x;\ 3588 b= y;\ 3589 x= a+b;\ 3590 y= a-b;\ 3591} 3592 3593#define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y))) 3594 3595static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ 3596 int i; 3597 int temp[64]; 3598 int sum=0; 3599 3600 assert(h==8); 3601 3602 for(i=0; i<8; i++){ 3603 //FIXME try pointer walks 3604 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]); 3605 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]); 3606 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]); 3607 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]); 3608 3609 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); 3610 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); 3611 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); 3612 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); 3613 3614 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); 3615 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); 3616 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); 3617 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); 3618 } 3619 3620 for(i=0; i<8; i++){ 3621 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); 3622 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); 3623 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); 3624 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); 3625 3626 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); 3627 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); 3628 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); 3629 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); 3630 3631 sum += 3632 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) 3633 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) 3634 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) 3635 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); 3636 } 3637#if 0 3638static int maxi=0; 3639if(sum>maxi){ 3640 maxi=sum; 3641 printf("MAX:%d\n", maxi); 3642} 3643#endif 3644 return sum; 3645} 3646 3647static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){ 3648 int i; 3649 int temp[64]; 3650 int sum=0; 3651 3652 assert(h==8); 3653 3654 for(i=0; i<8; i++){ 3655 //FIXME try pointer walks 3656 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]); 3657 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]); 3658 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]); 3659 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]); 3660 3661 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); 3662 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); 3663 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); 3664 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); 3665 3666 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); 3667 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); 3668 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); 3669 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); 3670 } 3671 3672 for(i=0; i<8; i++){ 3673 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); 3674 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); 3675 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); 3676 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); 3677 3678 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); 3679 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); 3680 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); 3681 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); 3682 3683 sum += 3684 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) 3685 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) 3686 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) 3687 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); 3688 } 3689 3690 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean 3691 3692 return sum; 3693} 3694 3695static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 3696 MpegEncContext * const s= (MpegEncContext *)c; 3697 DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); 3698 DCTELEM * const temp= (DCTELEM*)aligned_temp; 3699 3700 assert(h==8); 3701 3702 s->dsp.diff_pixels(temp, src1, src2, stride); 3703 s->dsp.fdct(temp); 3704 return s->dsp.sum_abs_dctelem(temp); 3705} 3706 3707#if CONFIG_GPL 3708#define DCT8_1D {\ 3709 const int s07 = SRC(0) + SRC(7);\ 3710 const int s16 = SRC(1) + SRC(6);\ 3711 const int s25 = SRC(2) + SRC(5);\ 3712 const int s34 = SRC(3) + SRC(4);\ 3713 const int a0 = s07 + s34;\ 3714 const int a1 = s16 + s25;\ 3715 const int a2 = s07 - s34;\ 3716 const int a3 = s16 - s25;\ 3717 const int d07 = SRC(0) - SRC(7);\ 3718 const int d16 = SRC(1) - SRC(6);\ 3719 const int d25 = SRC(2) - SRC(5);\ 3720 const int d34 = SRC(3) - SRC(4);\ 3721 const int a4 = d16 + d25 + (d07 + (d07>>1));\ 3722 const int a5 = d07 - d34 - (d25 + (d25>>1));\ 3723 const int a6 = d07 + d34 - (d16 + (d16>>1));\ 3724 const int a7 = d16 - d25 + (d34 + (d34>>1));\ 3725 DST(0, a0 + a1 ) ;\ 3726 DST(1, a4 + (a7>>2)) ;\ 3727 DST(2, a2 + (a3>>1)) ;\ 3728 DST(3, a5 + (a6>>2)) ;\ 3729 DST(4, a0 - a1 ) ;\ 3730 DST(5, a6 - (a5>>2)) ;\ 3731 DST(6, (a2>>1) - a3 ) ;\ 3732 DST(7, (a4>>2) - a7 ) ;\ 3733} 3734 3735static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 3736 MpegEncContext * const s= (MpegEncContext *)c; 3737 DCTELEM dct[8][8]; 3738 int i; 3739 int sum=0; 3740 3741 s->dsp.diff_pixels(dct[0], src1, src2, stride); 3742 3743#define SRC(x) dct[i][x] 3744#define DST(x,v) dct[i][x]= v 3745 for( i = 0; i < 8; i++ ) 3746 DCT8_1D 3747#undef SRC 3748#undef DST 3749 3750#define SRC(x) dct[x][i] 3751#define DST(x,v) sum += FFABS(v) 3752 for( i = 0; i < 8; i++ ) 3753 DCT8_1D 3754#undef SRC 3755#undef DST 3756 return sum; 3757} 3758#endif 3759 3760static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 3761 MpegEncContext * const s= (MpegEncContext *)c; 3762 DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); 3763 DCTELEM * const temp= (DCTELEM*)aligned_temp; 3764 int sum=0, i; 3765 3766 assert(h==8); 3767 3768 s->dsp.diff_pixels(temp, src1, src2, stride); 3769 s->dsp.fdct(temp); 3770 3771 for(i=0; i<64; i++) 3772 sum= FFMAX(sum, FFABS(temp[i])); 3773 3774 return sum; 3775} 3776 3777static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 3778 MpegEncContext * const s= (MpegEncContext *)c; 3779 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]); 3780 DCTELEM * const temp= (DCTELEM*)aligned_temp; 3781 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; 3782 int sum=0, i; 3783 3784 assert(h==8); 3785 s->mb_intra=0; 3786 3787 s->dsp.diff_pixels(temp, src1, src2, stride); 3788 3789 memcpy(bak, temp, 64*sizeof(DCTELEM)); 3790 3791 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); 3792 s->dct_unquantize_inter(s, temp, 0, s->qscale); 3793 ff_simple_idct(temp); //FIXME 3794 3795 for(i=0; i<64; i++) 3796 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); 3797 3798 return sum; 3799} 3800 3801static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 3802 MpegEncContext * const s= (MpegEncContext *)c; 3803 const uint8_t *scantable= s->intra_scantable.permutated; 3804 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); 3805 DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]); 3806 DCTELEM * const temp= (DCTELEM*)aligned_temp; 3807 uint8_t * const bak= (uint8_t*)aligned_bak; 3808 int i, last, run, bits, level, distortion, start_i; 3809 const int esc_length= s->ac_esc_length; 3810 uint8_t * length; 3811 uint8_t * last_length; 3812 3813 assert(h==8); 3814 3815 for(i=0; i<8; i++){ 3816 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0]; 3817 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1]; 3818 } 3819 3820 s->dsp.diff_pixels(temp, src1, src2, stride); 3821 3822 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); 3823 3824 bits=0; 3825 3826 if (s->mb_intra) { 3827 start_i = 1; 3828 length = s->intra_ac_vlc_length; 3829 last_length= s->intra_ac_vlc_last_length; 3830 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma 3831 } else { 3832 start_i = 0; 3833 length = s->inter_ac_vlc_length; 3834 last_length= s->inter_ac_vlc_last_length; 3835 } 3836 3837 if(last>=start_i){ 3838 run=0; 3839 for(i=start_i; i<last; i++){ 3840 int j= scantable[i]; 3841 level= temp[j]; 3842 3843 if(level){ 3844 level+=64; 3845 if((level&(~127)) == 0){ 3846 bits+= length[UNI_AC_ENC_INDEX(run, level)]; 3847 }else 3848 bits+= esc_length; 3849 run=0; 3850 }else 3851 run++; 3852 } 3853 i= scantable[last]; 3854 3855 level= temp[i] + 64; 3856 3857 assert(level - 64); 3858 3859 if((level&(~127)) == 0){ 3860 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; 3861 }else 3862 bits+= esc_length; 3863 3864 } 3865 3866 if(last>=0){ 3867 if(s->mb_intra) 3868 s->dct_unquantize_intra(s, temp, 0, s->qscale); 3869 else 3870 s->dct_unquantize_inter(s, temp, 0, s->qscale); 3871 } 3872 3873 s->dsp.idct_add(bak, stride, temp); 3874 3875 distortion= s->dsp.sse[1](NULL, bak, src1, stride, 8); 3876 3877 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7); 3878} 3879 3880static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 3881 MpegEncContext * const s= (MpegEncContext *)c; 3882 const uint8_t *scantable= s->intra_scantable.permutated; 3883 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); 3884 DCTELEM * const temp= (DCTELEM*)aligned_temp; 3885 int i, last, run, bits, level, start_i; 3886 const int esc_length= s->ac_esc_length; 3887 uint8_t * length; 3888 uint8_t * last_length; 3889 3890 assert(h==8); 3891 3892 s->dsp.diff_pixels(temp, src1, src2, stride); 3893 3894 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); 3895 3896 bits=0; 3897 3898 if (s->mb_intra) { 3899 start_i = 1; 3900 length = s->intra_ac_vlc_length; 3901 last_length= s->intra_ac_vlc_last_length; 3902 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma 3903 } else { 3904 start_i = 0; 3905 length = s->inter_ac_vlc_length; 3906 last_length= s->inter_ac_vlc_last_length; 3907 } 3908 3909 if(last>=start_i){ 3910 run=0; 3911 for(i=start_i; i<last; i++){ 3912 int j= scantable[i]; 3913 level= temp[j]; 3914 3915 if(level){ 3916 level+=64; 3917 if((level&(~127)) == 0){ 3918 bits+= length[UNI_AC_ENC_INDEX(run, level)]; 3919 }else 3920 bits+= esc_length; 3921 run=0; 3922 }else 3923 run++; 3924 } 3925 i= scantable[last]; 3926 3927 level= temp[i] + 64; 3928 3929 assert(level - 64); 3930 3931 if((level&(~127)) == 0){ 3932 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; 3933 }else 3934 bits+= esc_length; 3935 } 3936 3937 return bits; 3938} 3939 3940#define VSAD_INTRA(size) \ 3941static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \ 3942 int score=0; \ 3943 int x,y; \ 3944 \ 3945 for(y=1; y<h; y++){ \ 3946 for(x=0; x<size; x+=4){ \ 3947 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \ 3948 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \ 3949 } \ 3950 s+= stride; \ 3951 } \ 3952 \ 3953 return score; \ 3954} 3955VSAD_INTRA(8) 3956VSAD_INTRA(16) 3957 3958static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ 3959 int score=0; 3960 int x,y; 3961 3962 for(y=1; y<h; y++){ 3963 for(x=0; x<16; x++){ 3964 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); 3965 } 3966 s1+= stride; 3967 s2+= stride; 3968 } 3969 3970 return score; 3971} 3972 3973#define SQ(a) ((a)*(a)) 3974#define VSSE_INTRA(size) \ 3975static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \ 3976 int score=0; \ 3977 int x,y; \ 3978 \ 3979 for(y=1; y<h; y++){ \ 3980 for(x=0; x<size; x+=4){ \ 3981 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \ 3982 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \ 3983 } \ 3984 s+= stride; \ 3985 } \ 3986 \ 3987 return score; \ 3988} 3989VSSE_INTRA(8) 3990VSSE_INTRA(16) 3991 3992static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ 3993 int score=0; 3994 int x,y; 3995 3996 for(y=1; y<h; y++){ 3997 for(x=0; x<16; x++){ 3998 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); 3999 } 4000 s1+= stride; 4001 s2+= stride; 4002 } 4003 4004 return score; 4005} 4006 4007static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, 4008 int size){ 4009 int score=0; 4010 int i; 4011 for(i=0; i<size; i++) 4012 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]); 4013 return score; 4014} 4015 4016WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) 4017WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) 4018WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) 4019#if CONFIG_GPL 4020WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c) 4021#endif 4022WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c) 4023WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) 4024WRAPPER8_16_SQ(rd8x8_c, rd16_c) 4025WRAPPER8_16_SQ(bit8x8_c, bit16_c) 4026 4027static void vector_fmul_c(float *dst, const float *src, int len){ 4028 int i; 4029 for(i=0; i<len; i++) 4030 dst[i] *= src[i]; 4031} 4032 4033static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){ 4034 int i; 4035 src1 += len-1; 4036 for(i=0; i<len; i++) 4037 dst[i] = src0[i] * src1[-i]; 4038} 4039 4040void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step){ 4041 int i; 4042 for(i=0; i<len; i++) 4043 dst[i*step] = src0[i] * src1[i] + src2[i] + src3; 4044} 4045 4046void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){ 4047 int i,j; 4048 dst += len; 4049 win += len; 4050 src0+= len; 4051 for(i=-len, j=len-1; i<0; i++, j--) { 4052 float s0 = src0[i]; 4053 float s1 = src1[j]; 4054 float wi = win[i]; 4055 float wj = win[j]; 4056 dst[i] = s0*wj - s1*wi + add_bias; 4057 dst[j] = s0*wi + s1*wj + add_bias; 4058 } 4059} 4060 4061static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){ 4062 int i; 4063 for(i=0; i<len; i++) 4064 dst[i] = src[i] * mul; 4065} 4066 4067static av_always_inline int float_to_int16_one(const float *src){ 4068 int_fast32_t tmp = *(const int32_t*)src; 4069 if(tmp & 0xf0000){ 4070 tmp = (0x43c0ffff - tmp)>>31; 4071 // is this faster on some gcc/cpu combinations? 4072// if(tmp > 0x43c0ffff) tmp = 0xFFFF; 4073// else tmp = 0; 4074 } 4075 return tmp - 0x8000; 4076} 4077 4078void ff_float_to_int16_c(int16_t *dst, const float *src, long len){ 4079 int i; 4080 for(i=0; i<len; i++) 4081 dst[i] = float_to_int16_one(src+i); 4082} 4083 4084void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){ 4085 int i,j,c; 4086 if(channels==2){ 4087 for(i=0; i<len; i++){ 4088 dst[2*i] = float_to_int16_one(src[0]+i); 4089 dst[2*i+1] = float_to_int16_one(src[1]+i); 4090 } 4091 }else{ 4092 for(c=0; c<channels; c++) 4093 for(i=0, j=c; i<len; i++, j+=channels) 4094 dst[j] = float_to_int16_one(src[c]+i); 4095 } 4096} 4097 4098static void add_int16_c(int16_t * v1, int16_t * v2, int order) 4099{ 4100 while (order--) 4101 *v1++ += *v2++; 4102} 4103 4104static void sub_int16_c(int16_t * v1, int16_t * v2, int order) 4105{ 4106 while (order--) 4107 *v1++ -= *v2++; 4108} 4109 4110static int32_t scalarproduct_int16_c(int16_t * v1, int16_t * v2, int order, int shift) 4111{ 4112 int res = 0; 4113 4114 while (order--) 4115 res += (*v1++ * *v2++) >> shift; 4116 4117 return res; 4118} 4119 4120#define W0 2048 4121#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ 4122#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ 4123#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ 4124#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */ 4125#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ 4126#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ 4127#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ 4128 4129static void wmv2_idct_row(short * b) 4130{ 4131 int s1,s2; 4132 int a0,a1,a2,a3,a4,a5,a6,a7; 4133 /*step 1*/ 4134 a1 = W1*b[1]+W7*b[7]; 4135 a7 = W7*b[1]-W1*b[7]; 4136 a5 = W5*b[5]+W3*b[3]; 4137 a3 = W3*b[5]-W5*b[3]; 4138 a2 = W2*b[2]+W6*b[6]; 4139 a6 = W6*b[2]-W2*b[6]; 4140 a0 = W0*b[0]+W0*b[4]; 4141 a4 = W0*b[0]-W0*b[4]; 4142 /*step 2*/ 4143 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7, 4144 s2 = (181*(a1-a5-a7+a3)+128)>>8; 4145 /*step 3*/ 4146 b[0] = (a0+a2+a1+a5 + (1<<7))>>8; 4147 b[1] = (a4+a6 +s1 + (1<<7))>>8; 4148 b[2] = (a4-a6 +s2 + (1<<7))>>8; 4149 b[3] = (a0-a2+a7+a3 + (1<<7))>>8; 4150 b[4] = (a0-a2-a7-a3 + (1<<7))>>8; 4151 b[5] = (a4-a6 -s2 + (1<<7))>>8; 4152 b[6] = (a4+a6 -s1 + (1<<7))>>8; 4153 b[7] = (a0+a2-a1-a5 + (1<<7))>>8; 4154} 4155static void wmv2_idct_col(short * b) 4156{ 4157 int s1,s2; 4158 int a0,a1,a2,a3,a4,a5,a6,a7; 4159 /*step 1, with extended precision*/ 4160 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3; 4161 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3; 4162 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3; 4163 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3; 4164 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3; 4165 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3; 4166 a0 = (W0*b[8*0]+W0*b[8*4] )>>3; 4167 a4 = (W0*b[8*0]-W0*b[8*4] )>>3; 4168 /*step 2*/ 4169 s1 = (181*(a1-a5+a7-a3)+128)>>8; 4170 s2 = (181*(a1-a5-a7+a3)+128)>>8; 4171 /*step 3*/ 4172 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14; 4173 b[8*1] = (a4+a6 +s1 + (1<<13))>>14; 4174 b[8*2] = (a4-a6 +s2 + (1<<13))>>14; 4175 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14; 4176 4177 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14; 4178 b[8*5] = (a4-a6 -s2 + (1<<13))>>14; 4179 b[8*6] = (a4+a6 -s1 + (1<<13))>>14; 4180 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14; 4181} 4182void ff_wmv2_idct_c(short * block){ 4183 int i; 4184 4185 for(i=0;i<64;i+=8){ 4186 wmv2_idct_row(block+i); 4187 } 4188 for(i=0;i<8;i++){ 4189 wmv2_idct_col(block+i); 4190 } 4191} 4192/* XXX: those functions should be suppressed ASAP when all IDCTs are 4193 converted */ 4194static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block) 4195{ 4196 ff_wmv2_idct_c(block); 4197 put_pixels_clamped_c(block, dest, line_size); 4198} 4199static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block) 4200{ 4201 ff_wmv2_idct_c(block); 4202 add_pixels_clamped_c(block, dest, line_size); 4203} 4204static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) 4205{ 4206 j_rev_dct (block); 4207 put_pixels_clamped_c(block, dest, line_size); 4208} 4209static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) 4210{ 4211 j_rev_dct (block); 4212 add_pixels_clamped_c(block, dest, line_size); 4213} 4214 4215static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block) 4216{ 4217 j_rev_dct4 (block); 4218 put_pixels_clamped4_c(block, dest, line_size); 4219} 4220static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block) 4221{ 4222 j_rev_dct4 (block); 4223 add_pixels_clamped4_c(block, dest, line_size); 4224} 4225 4226static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block) 4227{ 4228 j_rev_dct2 (block); 4229 put_pixels_clamped2_c(block, dest, line_size); 4230} 4231static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) 4232{ 4233 j_rev_dct2 (block); 4234 add_pixels_clamped2_c(block, dest, line_size); 4235} 4236 4237static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) 4238{ 4239 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 4240 4241 dest[0] = cm[(block[0] + 4)>>3]; 4242} 4243static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) 4244{ 4245 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 4246 4247 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; 4248} 4249 4250static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; } 4251 4252/* init static data */ 4253void dsputil_static_init(void) 4254{ 4255 int i; 4256 4257 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i; 4258 for(i=0;i<MAX_NEG_CROP;i++) { 4259 ff_cropTbl[i] = 0; 4260 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255; 4261 } 4262 4263 for(i=0;i<512;i++) { 4264 ff_squareTbl[i] = (i - 256) * (i - 256); 4265 } 4266 4267 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; 4268} 4269 4270int ff_check_alignment(void){ 4271 static int did_fail=0; 4272 DECLARE_ALIGNED_16(int, aligned); 4273 4274 if((long)&aligned & 15){ 4275 if(!did_fail){ 4276#if HAVE_MMX || HAVE_ALTIVEC 4277 av_log(NULL, AV_LOG_ERROR, 4278 "Compiler did not align stack variables. Libavcodec has been miscompiled\n" 4279 "and may be very slow or crash. This is not a bug in libavcodec,\n" 4280 "but in the compiler. You may try recompiling using gcc >= 4.2.\n" 4281 "Do not report crashes to FFmpeg developers.\n"); 4282#endif 4283 did_fail=1; 4284 } 4285 return -1; 4286 } 4287 return 0; 4288} 4289 4290void dsputil_init(DSPContext* c, AVCodecContext *avctx) 4291{ 4292 int i; 4293 4294 ff_check_alignment(); 4295 4296#if CONFIG_ENCODERS 4297 if(avctx->dct_algo==FF_DCT_FASTINT) { 4298 c->fdct = fdct_ifast; 4299 c->fdct248 = fdct_ifast248; 4300 } 4301 else if(avctx->dct_algo==FF_DCT_FAAN) { 4302 c->fdct = ff_faandct; 4303 c->fdct248 = ff_faandct248; 4304 } 4305 else { 4306 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default 4307 c->fdct248 = ff_fdct248_islow; 4308 } 4309#endif //CONFIG_ENCODERS 4310 4311 if(avctx->lowres==1){ 4312 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){ 4313 c->idct_put= ff_jref_idct4_put; 4314 c->idct_add= ff_jref_idct4_add; 4315 }else{ 4316 c->idct_put= ff_h264_lowres_idct_put_c; 4317 c->idct_add= ff_h264_lowres_idct_add_c; 4318 } 4319 c->idct = j_rev_dct4; 4320 c->idct_permutation_type= FF_NO_IDCT_PERM; 4321 }else if(avctx->lowres==2){ 4322 c->idct_put= ff_jref_idct2_put; 4323 c->idct_add= ff_jref_idct2_add; 4324 c->idct = j_rev_dct2; 4325 c->idct_permutation_type= FF_NO_IDCT_PERM; 4326 }else if(avctx->lowres==3){ 4327 c->idct_put= ff_jref_idct1_put; 4328 c->idct_add= ff_jref_idct1_add; 4329 c->idct = j_rev_dct1; 4330 c->idct_permutation_type= FF_NO_IDCT_PERM; 4331 }else{ 4332 if(avctx->idct_algo==FF_IDCT_INT){ 4333 c->idct_put= ff_jref_idct_put; 4334 c->idct_add= ff_jref_idct_add; 4335 c->idct = j_rev_dct; 4336 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; 4337 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER || CONFIG_THEORA_DECODER ) && 4338 avctx->idct_algo==FF_IDCT_VP3){ 4339 c->idct_put= ff_vp3_idct_put_c; 4340 c->idct_add= ff_vp3_idct_add_c; 4341 c->idct = ff_vp3_idct_c; 4342 c->idct_permutation_type= FF_NO_IDCT_PERM; 4343 }else if(avctx->idct_algo==FF_IDCT_WMV2){ 4344 c->idct_put= ff_wmv2_idct_put_c; 4345 c->idct_add= ff_wmv2_idct_add_c; 4346 c->idct = ff_wmv2_idct_c; 4347 c->idct_permutation_type= FF_NO_IDCT_PERM; 4348 }else if(avctx->idct_algo==FF_IDCT_FAAN){ 4349 c->idct_put= ff_faanidct_put; 4350 c->idct_add= ff_faanidct_add; 4351 c->idct = ff_faanidct; 4352 c->idct_permutation_type= FF_NO_IDCT_PERM; 4353 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) { 4354 c->idct_put= ff_ea_idct_put_c; 4355 c->idct_permutation_type= FF_NO_IDCT_PERM; 4356 }else{ //accurate/default 4357 c->idct_put= ff_simple_idct_put; 4358 c->idct_add= ff_simple_idct_add; 4359 c->idct = ff_simple_idct; 4360 c->idct_permutation_type= FF_NO_IDCT_PERM; 4361 } 4362 } 4363 4364 if (CONFIG_H264_DECODER) { 4365 c->h264_idct_add= ff_h264_idct_add_c; 4366 c->h264_idct8_add= ff_h264_idct8_add_c; 4367 c->h264_idct_dc_add= ff_h264_idct_dc_add_c; 4368 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c; 4369 c->h264_idct_add16 = ff_h264_idct_add16_c; 4370 c->h264_idct8_add4 = ff_h264_idct8_add4_c; 4371 c->h264_idct_add8 = ff_h264_idct_add8_c; 4372 c->h264_idct_add16intra= ff_h264_idct_add16intra_c; 4373 } 4374 4375 c->get_pixels = get_pixels_c; 4376 c->diff_pixels = diff_pixels_c; 4377 c->put_pixels_clamped = put_pixels_clamped_c; 4378 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; 4379 c->add_pixels_clamped = add_pixels_clamped_c; 4380 c->add_pixels8 = add_pixels8_c; 4381 c->add_pixels4 = add_pixels4_c; 4382 c->sum_abs_dctelem = sum_abs_dctelem_c; 4383 c->gmc1 = gmc1_c; 4384 c->gmc = ff_gmc_c; 4385 c->clear_block = clear_block_c; 4386 c->clear_blocks = clear_blocks_c; 4387 c->pix_sum = pix_sum_c; 4388 c->pix_norm1 = pix_norm1_c; 4389 4390 /* TODO [0] 16 [1] 8 */ 4391 c->pix_abs[0][0] = pix_abs16_c; 4392 c->pix_abs[0][1] = pix_abs16_x2_c; 4393 c->pix_abs[0][2] = pix_abs16_y2_c; 4394 c->pix_abs[0][3] = pix_abs16_xy2_c; 4395 c->pix_abs[1][0] = pix_abs8_c; 4396 c->pix_abs[1][1] = pix_abs8_x2_c; 4397 c->pix_abs[1][2] = pix_abs8_y2_c; 4398 c->pix_abs[1][3] = pix_abs8_xy2_c; 4399 4400#define dspfunc(PFX, IDX, NUM) \ 4401 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \ 4402 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \ 4403 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \ 4404 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c 4405 4406 dspfunc(put, 0, 16); 4407 dspfunc(put_no_rnd, 0, 16); 4408 dspfunc(put, 1, 8); 4409 dspfunc(put_no_rnd, 1, 8); 4410 dspfunc(put, 2, 4); 4411 dspfunc(put, 3, 2); 4412 4413 dspfunc(avg, 0, 16); 4414 dspfunc(avg_no_rnd, 0, 16); 4415 dspfunc(avg, 1, 8); 4416 dspfunc(avg_no_rnd, 1, 8); 4417 dspfunc(avg, 2, 4); 4418 dspfunc(avg, 3, 2); 4419#undef dspfunc 4420 4421 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c; 4422 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c; 4423 4424 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; 4425 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; 4426 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; 4427 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; 4428 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; 4429 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; 4430 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; 4431 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; 4432 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; 4433 4434 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; 4435 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; 4436 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; 4437 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; 4438 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; 4439 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; 4440 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; 4441 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; 4442 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; 4443 4444#define dspfunc(PFX, IDX, NUM) \ 4445 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \ 4446 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \ 4447 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \ 4448 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \ 4449 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \ 4450 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \ 4451 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \ 4452 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \ 4453 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \ 4454 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \ 4455 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \ 4456 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \ 4457 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \ 4458 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \ 4459 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \ 4460 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c 4461 4462 dspfunc(put_qpel, 0, 16); 4463 dspfunc(put_no_rnd_qpel, 0, 16); 4464 4465 dspfunc(avg_qpel, 0, 16); 4466 /* dspfunc(avg_no_rnd_qpel, 0, 16); */ 4467 4468 dspfunc(put_qpel, 1, 8); 4469 dspfunc(put_no_rnd_qpel, 1, 8); 4470 4471 dspfunc(avg_qpel, 1, 8); 4472 /* dspfunc(avg_no_rnd_qpel, 1, 8); */ 4473 4474 dspfunc(put_h264_qpel, 0, 16); 4475 dspfunc(put_h264_qpel, 1, 8); 4476 dspfunc(put_h264_qpel, 2, 4); 4477 dspfunc(put_h264_qpel, 3, 2); 4478 dspfunc(avg_h264_qpel, 0, 16); 4479 dspfunc(avg_h264_qpel, 1, 8); 4480 dspfunc(avg_h264_qpel, 2, 4); 4481 4482#undef dspfunc 4483 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c; 4484 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c; 4485 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c; 4486 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c; 4487 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c; 4488 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c; 4489 c->put_no_rnd_h264_chroma_pixels_tab[0]= put_no_rnd_h264_chroma_mc8_c; 4490 4491 c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c; 4492 c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c; 4493 c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c; 4494 c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c; 4495 c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c; 4496 c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c; 4497 c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c; 4498 c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c; 4499 c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c; 4500 c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c; 4501 c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c; 4502 c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c; 4503 c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c; 4504 c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c; 4505 c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c; 4506 c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c; 4507 c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c; 4508 c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c; 4509 c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c; 4510 c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c; 4511 4512 c->draw_edges = draw_edges_c; 4513 4514#if CONFIG_CAVS_DECODER 4515 ff_cavsdsp_init(c,avctx); 4516#endif 4517#if CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER 4518 ff_vc1dsp_init(c,avctx); 4519#endif 4520#if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER 4521 ff_intrax8dsp_init(c,avctx); 4522#endif 4523#if CONFIG_RV30_DECODER 4524 ff_rv30dsp_init(c,avctx); 4525#endif 4526#if CONFIG_RV40_DECODER 4527 ff_rv40dsp_init(c,avctx); 4528 c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c; 4529 c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c; 4530 c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c; 4531 c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c; 4532#endif 4533 4534 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; 4535 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; 4536 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; 4537 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; 4538 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; 4539 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; 4540 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; 4541 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; 4542 4543#define SET_CMP_FUNC(name) \ 4544 c->name[0]= name ## 16_c;\ 4545 c->name[1]= name ## 8x8_c; 4546 4547 SET_CMP_FUNC(hadamard8_diff) 4548 c->hadamard8_diff[4]= hadamard8_intra16_c; 4549 c->hadamard8_diff[5]= hadamard8_intra8x8_c; 4550 SET_CMP_FUNC(dct_sad) 4551 SET_CMP_FUNC(dct_max) 4552#if CONFIG_GPL 4553 SET_CMP_FUNC(dct264_sad) 4554#endif 4555 c->sad[0]= pix_abs16_c; 4556 c->sad[1]= pix_abs8_c; 4557 c->sse[0]= sse16_c; 4558 c->sse[1]= sse8_c; 4559 c->sse[2]= sse4_c; 4560 SET_CMP_FUNC(quant_psnr) 4561 SET_CMP_FUNC(rd) 4562 SET_CMP_FUNC(bit) 4563 c->vsad[0]= vsad16_c; 4564 c->vsad[4]= vsad_intra16_c; 4565 c->vsad[5]= vsad_intra8_c; 4566 c->vsse[0]= vsse16_c; 4567 c->vsse[4]= vsse_intra16_c; 4568 c->vsse[5]= vsse_intra8_c; 4569 c->nsse[0]= nsse16_c; 4570 c->nsse[1]= nsse8_c; 4571#if CONFIG_SNOW_ENCODER 4572 c->w53[0]= w53_16_c; 4573 c->w53[1]= w53_8_c; 4574 c->w97[0]= w97_16_c; 4575 c->w97[1]= w97_8_c; 4576#endif 4577 4578 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; 4579 4580 c->add_bytes= add_bytes_c; 4581 c->add_bytes_l2= add_bytes_l2_c; 4582 c->diff_bytes= diff_bytes_c; 4583 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c; 4584 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; 4585 c->bswap_buf= bswap_buf; 4586#if CONFIG_PNG_DECODER 4587 c->add_png_paeth_prediction= ff_add_png_paeth_prediction; 4588#endif 4589 4590 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c; 4591 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; 4592 c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c; 4593 c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c; 4594 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c; 4595 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c; 4596 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c; 4597 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c; 4598 c->h264_loop_filter_strength= NULL; 4599 4600 if (CONFIG_ANY_H263) { 4601 c->h263_h_loop_filter= h263_h_loop_filter_c; 4602 c->h263_v_loop_filter= h263_v_loop_filter_c; 4603 } 4604 4605 if (CONFIG_VP3_DECODER || CONFIG_THEORA_DECODER) { 4606 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c; 4607 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c; 4608 } 4609 if (CONFIG_VP6_DECODER) { 4610 c->vp6_filter_diag4= ff_vp6_filter_diag4_c; 4611 } 4612 4613 c->h261_loop_filter= h261_loop_filter_c; 4614 4615 c->try_8x8basis= try_8x8basis_c; 4616 c->add_8x8basis= add_8x8basis_c; 4617 4618#if CONFIG_SNOW_DECODER 4619 c->vertical_compose97i = ff_snow_vertical_compose97i; 4620 c->horizontal_compose97i = ff_snow_horizontal_compose97i; 4621 c->inner_add_yblock = ff_snow_inner_add_yblock; 4622#endif 4623 4624#if CONFIG_VORBIS_DECODER 4625 c->vorbis_inverse_coupling = vorbis_inverse_coupling; 4626#endif 4627#if CONFIG_AC3_DECODER 4628 c->ac3_downmix = ff_ac3_downmix_c; 4629#endif 4630#if CONFIG_FLAC_ENCODER 4631 c->flac_compute_autocorr = ff_flac_compute_autocorr; 4632#endif 4633 c->vector_fmul = vector_fmul_c; 4634 c->vector_fmul_reverse = vector_fmul_reverse_c; 4635 c->vector_fmul_add_add = ff_vector_fmul_add_add_c; 4636 c->vector_fmul_window = ff_vector_fmul_window_c; 4637 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c; 4638 c->float_to_int16 = ff_float_to_int16_c; 4639 c->float_to_int16_interleave = ff_float_to_int16_interleave_c; 4640 c->add_int16 = add_int16_c; 4641 c->sub_int16 = sub_int16_c; 4642 c->scalarproduct_int16 = scalarproduct_int16_c; 4643 4644 c->shrink[0]= ff_img_copy_plane; 4645 c->shrink[1]= ff_shrink22; 4646 c->shrink[2]= ff_shrink44; 4647 c->shrink[3]= ff_shrink88; 4648 4649 c->prefetch= just_return; 4650 4651 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab)); 4652 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab)); 4653 4654 if (HAVE_MMX) dsputil_init_mmx (c, avctx); 4655 if (ARCH_ARM) dsputil_init_arm (c, avctx); 4656 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx); 4657 if (HAVE_VIS) dsputil_init_vis (c, avctx); 4658 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx); 4659 if (ARCH_PPC) dsputil_init_ppc (c, avctx); 4660 if (HAVE_MMI) dsputil_init_mmi (c, avctx); 4661 if (ARCH_SH4) dsputil_init_sh4 (c, avctx); 4662 if (ARCH_BFIN) dsputil_init_bfin (c, avctx); 4663 4664 for(i=0; i<64; i++){ 4665 if(!c->put_2tap_qpel_pixels_tab[0][i]) 4666 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i]; 4667 if(!c->avg_2tap_qpel_pixels_tab[0][i]) 4668 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i]; 4669 } 4670 4671 switch(c->idct_permutation_type){ 4672 case FF_NO_IDCT_PERM: 4673 for(i=0; i<64; i++) 4674 c->idct_permutation[i]= i; 4675 break; 4676 case FF_LIBMPEG2_IDCT_PERM: 4677 for(i=0; i<64; i++) 4678 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); 4679 break; 4680 case FF_SIMPLE_IDCT_PERM: 4681 for(i=0; i<64; i++) 4682 c->idct_permutation[i]= simple_mmx_permutation[i]; 4683 break; 4684 case FF_TRANSPOSE_IDCT_PERM: 4685 for(i=0; i<64; i++) 4686 c->idct_permutation[i]= ((i&7)<<3) | (i>>3); 4687 break; 4688 case FF_PARTTRANS_IDCT_PERM: 4689 for(i=0; i<64; i++) 4690 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); 4691 break; 4692 case FF_SSE2_IDCT_PERM: 4693 for(i=0; i<64; i++) 4694 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; 4695 break; 4696 default: 4697 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); 4698 } 4699} 4700 4701