1/* 2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder 3 * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at> 4 * 5 * This file is part of Libav. 6 * 7 * Libav is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * Libav is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with Libav; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * H.264 / AVC / MPEG4 part10 prediction functions. 25 * @author Michael Niedermayer <michaelni@gmx.at> 26 */ 27 28#include "mathops.h" 29 30#include "bit_depth_template.c" 31 32static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int _stride){ 33 pixel *src = (pixel*)_src; 34 int stride = _stride/sizeof(pixel); 35 const pixel4 a= AV_RN4PA(src-stride); 36 37 AV_WN4PA(src+0*stride, a); 38 AV_WN4PA(src+1*stride, a); 39 AV_WN4PA(src+2*stride, a); 40 AV_WN4PA(src+3*stride, a); 41} 42 43static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright, int _stride){ 44 pixel *src = (pixel*)_src; 45 int stride = _stride/sizeof(pixel); 46 AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(src[-1+0*stride])); 47 AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4(src[-1+1*stride])); 48 AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4(src[-1+2*stride])); 49 AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4(src[-1+3*stride])); 50} 51 52static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ 53 pixel *src = (pixel*)_src; 54 int stride = _stride/sizeof(pixel); 55 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] 56 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3; 57 const pixel4 a = PIXEL_SPLAT_X4(dc); 58 59 AV_WN4PA(src+0*stride, a); 60 AV_WN4PA(src+1*stride, a); 61 AV_WN4PA(src+2*stride, a); 62 AV_WN4PA(src+3*stride, a); 63} 64 65static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ 66 pixel *src = (pixel*)_src; 67 int stride = _stride/sizeof(pixel); 68 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2; 69 const pixel4 a = PIXEL_SPLAT_X4(dc); 70 71 AV_WN4PA(src+0*stride, a); 72 AV_WN4PA(src+1*stride, a); 73 AV_WN4PA(src+2*stride, a); 74 AV_WN4PA(src+3*stride, a); 75} 76 77static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ 78 pixel *src = (pixel*)_src; 79 int stride = _stride/sizeof(pixel); 80 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2; 81 const pixel4 a = PIXEL_SPLAT_X4(dc); 82 83 AV_WN4PA(src+0*stride, a); 84 AV_WN4PA(src+1*stride, a); 85 AV_WN4PA(src+2*stride, a); 86 AV_WN4PA(src+3*stride, a); 87} 88 89static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ 90 pixel *src = (pixel*)_src; 91 int stride = _stride/sizeof(pixel); 92 const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)); 93 94 AV_WN4PA(src+0*stride, a); 95 AV_WN4PA(src+1*stride, a); 96 AV_WN4PA(src+2*stride, a); 97 AV_WN4PA(src+3*stride, a); 98} 99 100static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ 101 pixel *src = (pixel*)_src; 102 int stride = _stride/sizeof(pixel); 103 const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1); 104 105 AV_WN4PA(src+0*stride, a); 106 AV_WN4PA(src+1*stride, a); 107 AV_WN4PA(src+2*stride, a); 108 AV_WN4PA(src+3*stride, a); 109} 110 111static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ 112 pixel *src = (pixel*)_src; 113 int stride = _stride/sizeof(pixel); 114 const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1); 115 116 AV_WN4PA(src+0*stride, a); 117 AV_WN4PA(src+1*stride, a); 118 AV_WN4PA(src+2*stride, a); 119 AV_WN4PA(src+3*stride, a); 120} 121 122 123#define LOAD_TOP_RIGHT_EDGE\ 124 const unsigned av_unused t4 = topright[0];\ 125 const unsigned av_unused t5 = topright[1];\ 126 const unsigned av_unused t6 = topright[2];\ 127 const unsigned av_unused t7 = topright[3];\ 128 129#define LOAD_DOWN_LEFT_EDGE\ 130 const unsigned av_unused l4 = src[-1+4*stride];\ 131 const unsigned av_unused l5 = src[-1+5*stride];\ 132 const unsigned av_unused l6 = src[-1+6*stride];\ 133 const unsigned av_unused l7 = src[-1+7*stride];\ 134 135#define LOAD_LEFT_EDGE\ 136 const unsigned av_unused l0 = src[-1+0*stride];\ 137 const unsigned av_unused l1 = src[-1+1*stride];\ 138 const unsigned av_unused l2 = src[-1+2*stride];\ 139 const unsigned av_unused l3 = src[-1+3*stride];\ 140 141#define LOAD_TOP_EDGE\ 142 const unsigned av_unused t0 = src[ 0-1*stride];\ 143 const unsigned av_unused t1 = src[ 1-1*stride];\ 144 const unsigned av_unused t2 = src[ 2-1*stride];\ 145 const unsigned av_unused t3 = src[ 3-1*stride];\ 146 147static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright, int _stride){ 148 pixel *src = (pixel*)_src; 149 int stride = _stride/sizeof(pixel); 150 const int lt= src[-1-1*stride]; 151 LOAD_TOP_EDGE 152 LOAD_LEFT_EDGE 153 154 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2; 155 src[0+2*stride]= 156 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2; 157 src[0+1*stride]= 158 src[1+2*stride]= 159 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2; 160 src[0+0*stride]= 161 src[1+1*stride]= 162 src[2+2*stride]= 163 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2; 164 src[1+0*stride]= 165 src[2+1*stride]= 166 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2; 167 src[2+0*stride]= 168 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; 169 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2; 170} 171 172static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright, int _stride){ 173 pixel *src = (pixel*)_src; 174 const pixel *topright = (const pixel*)_topright; 175 int stride = _stride/sizeof(pixel); 176 LOAD_TOP_EDGE 177 LOAD_TOP_RIGHT_EDGE 178// LOAD_LEFT_EDGE 179 180 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2; 181 src[1+0*stride]= 182 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2; 183 src[2+0*stride]= 184 src[1+1*stride]= 185 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2; 186 src[3+0*stride]= 187 src[2+1*stride]= 188 src[1+2*stride]= 189 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2; 190 src[3+1*stride]= 191 src[2+2*stride]= 192 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2; 193 src[3+2*stride]= 194 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2; 195 src[3+3*stride]=(t6 + 3*t7 + 2)>>2; 196} 197 198static void FUNCC(pred4x4_vertical_right)(uint8_t *_src, const uint8_t *topright, int _stride){ 199 pixel *src = (pixel*)_src; 200 int stride = _stride/sizeof(pixel); 201 const int lt= src[-1-1*stride]; 202 LOAD_TOP_EDGE 203 LOAD_LEFT_EDGE 204 205 src[0+0*stride]= 206 src[1+2*stride]=(lt + t0 + 1)>>1; 207 src[1+0*stride]= 208 src[2+2*stride]=(t0 + t1 + 1)>>1; 209 src[2+0*stride]= 210 src[3+2*stride]=(t1 + t2 + 1)>>1; 211 src[3+0*stride]=(t2 + t3 + 1)>>1; 212 src[0+1*stride]= 213 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2; 214 src[1+1*stride]= 215 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2; 216 src[2+1*stride]= 217 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2; 218 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2; 219 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2; 220 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; 221} 222 223static void FUNCC(pred4x4_vertical_left)(uint8_t *_src, const uint8_t *_topright, int _stride){ 224 pixel *src = (pixel*)_src; 225 const pixel *topright = (const pixel*)_topright; 226 int stride = _stride/sizeof(pixel); 227 LOAD_TOP_EDGE 228 LOAD_TOP_RIGHT_EDGE 229 230 src[0+0*stride]=(t0 + t1 + 1)>>1; 231 src[1+0*stride]= 232 src[0+2*stride]=(t1 + t2 + 1)>>1; 233 src[2+0*stride]= 234 src[1+2*stride]=(t2 + t3 + 1)>>1; 235 src[3+0*stride]= 236 src[2+2*stride]=(t3 + t4+ 1)>>1; 237 src[3+2*stride]=(t4 + t5+ 1)>>1; 238 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; 239 src[1+1*stride]= 240 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2; 241 src[2+1*stride]= 242 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2; 243 src[3+1*stride]= 244 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2; 245 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; 246} 247 248static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright, int _stride){ 249 pixel *src = (pixel*)_src; 250 int stride = _stride/sizeof(pixel); 251 LOAD_LEFT_EDGE 252 253 src[0+0*stride]=(l0 + l1 + 1)>>1; 254 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2; 255 src[2+0*stride]= 256 src[0+1*stride]=(l1 + l2 + 1)>>1; 257 src[3+0*stride]= 258 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2; 259 src[2+1*stride]= 260 src[0+2*stride]=(l2 + l3 + 1)>>1; 261 src[3+1*stride]= 262 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2; 263 src[3+2*stride]= 264 src[1+3*stride]= 265 src[0+3*stride]= 266 src[2+2*stride]= 267 src[2+3*stride]= 268 src[3+3*stride]=l3; 269} 270 271static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src, const uint8_t *topright, int _stride){ 272 pixel *src = (pixel*)_src; 273 int stride = _stride/sizeof(pixel); 274 const int lt= src[-1-1*stride]; 275 LOAD_TOP_EDGE 276 LOAD_LEFT_EDGE 277 278 src[0+0*stride]= 279 src[2+1*stride]=(lt + l0 + 1)>>1; 280 src[1+0*stride]= 281 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2; 282 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2; 283 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2; 284 src[0+1*stride]= 285 src[2+2*stride]=(l0 + l1 + 1)>>1; 286 src[1+1*stride]= 287 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2; 288 src[0+2*stride]= 289 src[2+3*stride]=(l1 + l2+ 1)>>1; 290 src[1+2*stride]= 291 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; 292 src[0+3*stride]=(l2 + l3 + 1)>>1; 293 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2; 294} 295 296static void FUNCC(pred16x16_vertical)(uint8_t *_src, int _stride){ 297 int i; 298 pixel *src = (pixel*)_src; 299 int stride = _stride/sizeof(pixel); 300 const pixel4 a = AV_RN4PA(((pixel4*)(src-stride))+0); 301 const pixel4 b = AV_RN4PA(((pixel4*)(src-stride))+1); 302 const pixel4 c = AV_RN4PA(((pixel4*)(src-stride))+2); 303 const pixel4 d = AV_RN4PA(((pixel4*)(src-stride))+3); 304 305 for(i=0; i<16; i++){ 306 AV_WN4PA(((pixel4*)(src+i*stride))+0, a); 307 AV_WN4PA(((pixel4*)(src+i*stride))+1, b); 308 AV_WN4PA(((pixel4*)(src+i*stride))+2, c); 309 AV_WN4PA(((pixel4*)(src+i*stride))+3, d); 310 } 311} 312 313static void FUNCC(pred16x16_horizontal)(uint8_t *_src, int stride){ 314 int i; 315 pixel *src = (pixel*)_src; 316 stride /= sizeof(pixel); 317 318 for(i=0; i<16; i++){ 319 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]); 320 321 AV_WN4PA(((pixel4*)(src+i*stride))+0, a); 322 AV_WN4PA(((pixel4*)(src+i*stride))+1, a); 323 AV_WN4PA(((pixel4*)(src+i*stride))+2, a); 324 AV_WN4PA(((pixel4*)(src+i*stride))+3, a); 325 } 326} 327 328#define PREDICT_16x16_DC(v)\ 329 for(i=0; i<16; i++){\ 330 AV_WN4PA(src+ 0, v);\ 331 AV_WN4PA(src+ 4, v);\ 332 AV_WN4PA(src+ 8, v);\ 333 AV_WN4PA(src+12, v);\ 334 src += stride;\ 335 } 336 337static void FUNCC(pred16x16_dc)(uint8_t *_src, int stride){ 338 int i, dc=0; 339 pixel *src = (pixel*)_src; 340 pixel4 dcsplat; 341 stride /= sizeof(pixel); 342 343 for(i=0;i<16; i++){ 344 dc+= src[-1+i*stride]; 345 } 346 347 for(i=0;i<16; i++){ 348 dc+= src[i-stride]; 349 } 350 351 dcsplat = PIXEL_SPLAT_X4((dc+16)>>5); 352 PREDICT_16x16_DC(dcsplat); 353} 354 355static void FUNCC(pred16x16_left_dc)(uint8_t *_src, int stride){ 356 int i, dc=0; 357 pixel *src = (pixel*)_src; 358 pixel4 dcsplat; 359 stride /= sizeof(pixel); 360 361 for(i=0;i<16; i++){ 362 dc+= src[-1+i*stride]; 363 } 364 365 dcsplat = PIXEL_SPLAT_X4((dc+8)>>4); 366 PREDICT_16x16_DC(dcsplat); 367} 368 369static void FUNCC(pred16x16_top_dc)(uint8_t *_src, int stride){ 370 int i, dc=0; 371 pixel *src = (pixel*)_src; 372 pixel4 dcsplat; 373 stride /= sizeof(pixel); 374 375 for(i=0;i<16; i++){ 376 dc+= src[i-stride]; 377 } 378 379 dcsplat = PIXEL_SPLAT_X4((dc+8)>>4); 380 PREDICT_16x16_DC(dcsplat); 381} 382 383#define PRED16x16_X(n, v) \ 384static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, int stride){\ 385 int i;\ 386 pixel *src = (pixel*)_src;\ 387 stride /= sizeof(pixel);\ 388 PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\ 389} 390 391PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1) 392PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0) 393PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1) 394 395static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src, int _stride, const int svq3, const int rv40){ 396 int i, j, k; 397 int a; 398 INIT_CLIP 399 pixel *src = (pixel*)_src; 400 int stride = _stride/sizeof(pixel); 401 const pixel * const src0 = src +7-stride; 402 const pixel * src1 = src +8*stride-1; 403 const pixel * src2 = src1-2*stride; // == src+6*stride-1; 404 int H = src0[1] - src0[-1]; 405 int V = src1[0] - src2[ 0]; 406 for(k=2; k<=8; ++k) { 407 src1 += stride; src2 -= stride; 408 H += k*(src0[k] - src0[-k]); 409 V += k*(src1[0] - src2[ 0]); 410 } 411 if(svq3){ 412 H = ( 5*(H/4) ) / 16; 413 V = ( 5*(V/4) ) / 16; 414 415 /* required for 100% accuracy */ 416 i = H; H = V; V = i; 417 }else if(rv40){ 418 H = ( H + (H>>2) ) >> 4; 419 V = ( V + (V>>2) ) >> 4; 420 }else{ 421 H = ( 5*H+32 ) >> 6; 422 V = ( 5*V+32 ) >> 6; 423 } 424 425 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H); 426 for(j=16; j>0; --j) { 427 int b = a; 428 a += V; 429 for(i=-16; i<0; i+=4) { 430 src[16+i] = CLIP((b ) >> 5); 431 src[17+i] = CLIP((b+ H) >> 5); 432 src[18+i] = CLIP((b+2*H) >> 5); 433 src[19+i] = CLIP((b+3*H) >> 5); 434 b += 4*H; 435 } 436 src += stride; 437 } 438} 439 440static void FUNCC(pred16x16_plane)(uint8_t *src, int stride){ 441 FUNCC(pred16x16_plane_compat)(src, stride, 0, 0); 442} 443 444static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){ 445 int i; 446 pixel *src = (pixel*)_src; 447 int stride = _stride/sizeof(pixel); 448 const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0); 449 const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1); 450 451 for(i=0; i<8; i++){ 452 AV_WN4PA(((pixel4*)(src+i*stride))+0, a); 453 AV_WN4PA(((pixel4*)(src+i*stride))+1, b); 454 } 455} 456 457static void FUNCC(pred8x16_vertical)(uint8_t *_src, int _stride){ 458 int i; 459 pixel *src = (pixel*)_src; 460 int stride = _stride>>(sizeof(pixel)-1); 461 const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0); 462 const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1); 463 464 for(i=0; i<16; i++){ 465 AV_WN4PA(((pixel4*)(src+i*stride))+0, a); 466 AV_WN4PA(((pixel4*)(src+i*stride))+1, b); 467 } 468} 469 470static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){ 471 int i; 472 pixel *src = (pixel*)_src; 473 stride /= sizeof(pixel); 474 475 for(i=0; i<8; i++){ 476 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]); 477 AV_WN4PA(((pixel4*)(src+i*stride))+0, a); 478 AV_WN4PA(((pixel4*)(src+i*stride))+1, a); 479 } 480} 481 482static void FUNCC(pred8x16_horizontal)(uint8_t *_src, int stride){ 483 int i; 484 pixel *src = (pixel*)_src; 485 stride >>= sizeof(pixel)-1; 486 for(i=0; i<16; i++){ 487 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]); 488 AV_WN4PA(((pixel4*)(src+i*stride))+0, a); 489 AV_WN4PA(((pixel4*)(src+i*stride))+1, a); 490 } 491} 492 493#define PRED8x8_X(n, v)\ 494static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\ 495 int i;\ 496 const pixel4 a = PIXEL_SPLAT_X4(v);\ 497 pixel *src = (pixel*)_src;\ 498 stride /= sizeof(pixel);\ 499 for(i=0; i<8; i++){\ 500 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\ 501 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\ 502 }\ 503} 504 505PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1) 506PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0) 507PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1) 508 509static void FUNCC(pred8x16_128_dc)(uint8_t *_src, int stride){ 510 FUNCC(pred8x8_128_dc)(_src, stride); 511 FUNCC(pred8x8_128_dc)(_src+8*stride, stride); 512} 513 514static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){ 515 int i; 516 int dc0, dc2; 517 pixel4 dc0splat, dc2splat; 518 pixel *src = (pixel*)_src; 519 stride /= sizeof(pixel); 520 521 dc0=dc2=0; 522 for(i=0;i<4; i++){ 523 dc0+= src[-1+i*stride]; 524 dc2+= src[-1+(i+4)*stride]; 525 } 526 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); 527 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); 528 529 for(i=0; i<4; i++){ 530 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); 531 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc0splat); 532 } 533 for(i=4; i<8; i++){ 534 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat); 535 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc2splat); 536 } 537} 538 539static void FUNCC(pred8x16_left_dc)(uint8_t *_src, int stride){ 540 FUNCC(pred8x8_left_dc)(_src, stride); 541 FUNCC(pred8x8_left_dc)(_src+8*stride, stride); 542} 543 544static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){ 545 int i; 546 int dc0, dc1; 547 pixel4 dc0splat, dc1splat; 548 pixel *src = (pixel*)_src; 549 stride /= sizeof(pixel); 550 551 dc0=dc1=0; 552 for(i=0;i<4; i++){ 553 dc0+= src[i-stride]; 554 dc1+= src[4+i-stride]; 555 } 556 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); 557 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); 558 559 for(i=0; i<4; i++){ 560 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); 561 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); 562 } 563 for(i=4; i<8; i++){ 564 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); 565 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); 566 } 567} 568 569static void FUNCC(pred8x16_top_dc)(uint8_t *_src, int stride){ 570 int i; 571 int dc0, dc1; 572 pixel4 dc0splat, dc1splat; 573 pixel *src = (pixel*)_src; 574 stride >>= sizeof(pixel)-1; 575 576 dc0=dc1=0; 577 for(i=0;i<4; i++){ 578 dc0+= src[i-stride]; 579 dc1+= src[4+i-stride]; 580 } 581 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); 582 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); 583 584 for(i=0; i<16; i++){ 585 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); 586 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); 587 } 588} 589 590static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){ 591 int i; 592 int dc0, dc1, dc2; 593 pixel4 dc0splat, dc1splat, dc2splat, dc3splat; 594 pixel *src = (pixel*)_src; 595 stride /= sizeof(pixel); 596 597 dc0=dc1=dc2=0; 598 for(i=0;i<4; i++){ 599 dc0+= src[-1+i*stride] + src[i-stride]; 600 dc1+= src[4+i-stride]; 601 dc2+= src[-1+(i+4)*stride]; 602 } 603 dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3); 604 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); 605 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); 606 dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3); 607 608 for(i=0; i<4; i++){ 609 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); 610 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); 611 } 612 for(i=4; i<8; i++){ 613 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat); 614 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat); 615 } 616} 617 618static void FUNCC(pred8x16_dc)(uint8_t *_src, int stride){ 619 int i; 620 int dc0, dc1, dc2, dc3, dc4; 621 pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat; 622 pixel *src = (pixel*)_src; 623 stride >>= sizeof(pixel)-1; 624 625 dc0=dc1=dc2=dc3=dc4=0; 626 for(i=0;i<4; i++){ 627 dc0+= src[-1+i*stride] + src[i-stride]; 628 dc1+= src[4+i-stride]; 629 dc2+= src[-1+(i+4)*stride]; 630 dc3+= src[-1+(i+8)*stride]; 631 dc4+= src[-1+(i+12)*stride]; 632 } 633 dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3); 634 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); 635 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); 636 dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3); 637 dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2); 638 dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3); 639 dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2); 640 dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3); 641 642 for(i=0; i<4; i++){ 643 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); 644 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); 645 } 646 for(i=4; i<8; i++){ 647 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat); 648 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat); 649 } 650 for(i=8; i<12; i++){ 651 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat); 652 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat); 653 } 654 for(i=12; i<16; i++){ 655 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat); 656 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat); 657 } 658} 659 660static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){ 661 FUNCC(pred8x8_top_dc)(src, stride); 662 FUNCC(pred4x4_dc)(src, NULL, stride); 663} 664 665static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, int stride){ 666 FUNCC(pred8x16_top_dc)(src, stride); 667 FUNCC(pred4x4_dc)(src, NULL, stride); 668} 669 670static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){ 671 FUNCC(pred8x8_dc)(src, stride); 672 FUNCC(pred4x4_top_dc)(src, NULL, stride); 673} 674 675static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, int stride){ 676 FUNCC(pred8x16_dc)(src, stride); 677 FUNCC(pred4x4_top_dc)(src, NULL, stride); 678} 679 680static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){ 681 FUNCC(pred8x8_left_dc)(src, stride); 682 FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); 683 FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); 684} 685 686static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, int stride){ 687 FUNCC(pred8x16_left_dc)(src, stride); 688 FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); 689 FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); 690} 691 692static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){ 693 FUNCC(pred8x8_left_dc)(src, stride); 694 FUNCC(pred4x4_128_dc)(src , NULL, stride); 695 FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); 696} 697 698static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, int stride){ 699 FUNCC(pred8x16_left_dc)(src, stride); 700 FUNCC(pred4x4_128_dc)(src , NULL, stride); 701 FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); 702} 703 704static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){ 705 int j, k; 706 int a; 707 INIT_CLIP 708 pixel *src = (pixel*)_src; 709 int stride = _stride/sizeof(pixel); 710 const pixel * const src0 = src +3-stride; 711 const pixel * src1 = src +4*stride-1; 712 const pixel * src2 = src1-2*stride; // == src+2*stride-1; 713 int H = src0[1] - src0[-1]; 714 int V = src1[0] - src2[ 0]; 715 for(k=2; k<=4; ++k) { 716 src1 += stride; src2 -= stride; 717 H += k*(src0[k] - src0[-k]); 718 V += k*(src1[0] - src2[ 0]); 719 } 720 H = ( 17*H+16 ) >> 5; 721 V = ( 17*V+16 ) >> 5; 722 723 a = 16*(src1[0] + src2[8]+1) - 3*(V+H); 724 for(j=8; j>0; --j) { 725 int b = a; 726 a += V; 727 src[0] = CLIP((b ) >> 5); 728 src[1] = CLIP((b+ H) >> 5); 729 src[2] = CLIP((b+2*H) >> 5); 730 src[3] = CLIP((b+3*H) >> 5); 731 src[4] = CLIP((b+4*H) >> 5); 732 src[5] = CLIP((b+5*H) >> 5); 733 src[6] = CLIP((b+6*H) >> 5); 734 src[7] = CLIP((b+7*H) >> 5); 735 src += stride; 736 } 737} 738 739static void FUNCC(pred8x16_plane)(uint8_t *_src, int _stride){ 740 int j, k; 741 int a; 742 INIT_CLIP 743 pixel *src = (pixel*)_src; 744 int stride = _stride>>(sizeof(pixel)-1); 745 const pixel * const src0 = src +3-stride; 746 const pixel * src1 = src +8*stride-1; 747 const pixel * src2 = src1-2*stride; // == src+6*stride-1; 748 int H = src0[1] - src0[-1]; 749 int V = src1[0] - src2[ 0]; 750 751 for (k = 2; k <= 4; ++k) { 752 src1 += stride; src2 -= stride; 753 H += k*(src0[k] - src0[-k]); 754 V += k*(src1[0] - src2[ 0]); 755 } 756 for (; k <= 8; ++k) { 757 src1 += stride; src2 -= stride; 758 V += k*(src1[0] - src2[0]); 759 } 760 761 H = (17*H+16) >> 5; 762 V = (5*V+32) >> 6; 763 764 a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H; 765 for(j=16; j>0; --j) { 766 int b = a; 767 a += V; 768 src[0] = CLIP((b ) >> 5); 769 src[1] = CLIP((b+ H) >> 5); 770 src[2] = CLIP((b+2*H) >> 5); 771 src[3] = CLIP((b+3*H) >> 5); 772 src[4] = CLIP((b+4*H) >> 5); 773 src[5] = CLIP((b+5*H) >> 5); 774 src[6] = CLIP((b+6*H) >> 5); 775 src[7] = CLIP((b+7*H) >> 5); 776 src += stride; 777 } 778} 779 780#define SRC(x,y) src[(x)+(y)*stride] 781#define PL(y) \ 782 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2; 783#define PREDICT_8x8_LOAD_LEFT \ 784 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \ 785 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \ 786 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \ 787 const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2 788 789#define PT(x) \ 790 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; 791#define PREDICT_8x8_LOAD_TOP \ 792 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \ 793 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \ 794 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \ 795 const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \ 796 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2 797 798#define PTR(x) \ 799 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; 800#define PREDICT_8x8_LOAD_TOPRIGHT \ 801 int t8, t9, t10, t11, t12, t13, t14, t15; \ 802 if(has_topright) { \ 803 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \ 804 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \ 805 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1); 806 807#define PREDICT_8x8_LOAD_TOPLEFT \ 808 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2 809 810#define PREDICT_8x8_DC(v) \ 811 int y; \ 812 for( y = 0; y < 8; y++ ) { \ 813 AV_WN4PA(((pixel4*)src)+0, v); \ 814 AV_WN4PA(((pixel4*)src)+1, v); \ 815 src += stride; \ 816 } 817 818static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 819{ 820 pixel *src = (pixel*)_src; 821 int stride = _stride/sizeof(pixel); 822 823 PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1))); 824} 825static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 826{ 827 pixel *src = (pixel*)_src; 828 int stride = _stride/sizeof(pixel); 829 830 PREDICT_8x8_LOAD_LEFT; 831 const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3); 832 PREDICT_8x8_DC(dc); 833} 834static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 835{ 836 pixel *src = (pixel*)_src; 837 int stride = _stride/sizeof(pixel); 838 839 PREDICT_8x8_LOAD_TOP; 840 const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3); 841 PREDICT_8x8_DC(dc); 842} 843static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 844{ 845 pixel *src = (pixel*)_src; 846 int stride = _stride/sizeof(pixel); 847 848 PREDICT_8x8_LOAD_LEFT; 849 PREDICT_8x8_LOAD_TOP; 850 const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7 851 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4); 852 PREDICT_8x8_DC(dc); 853} 854static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 855{ 856 pixel *src = (pixel*)_src; 857 int stride = _stride/sizeof(pixel); 858 pixel4 a; 859 860 PREDICT_8x8_LOAD_LEFT; 861#define ROW(y) a = PIXEL_SPLAT_X4(l##y); \ 862 AV_WN4PA(src+y*stride, a); \ 863 AV_WN4PA(src+y*stride+4, a); 864 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7); 865#undef ROW 866} 867static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 868{ 869 int y; 870 pixel *src = (pixel*)_src; 871 int stride = _stride/sizeof(pixel); 872 pixel4 a, b; 873 874 PREDICT_8x8_LOAD_TOP; 875 src[0] = t0; 876 src[1] = t1; 877 src[2] = t2; 878 src[3] = t3; 879 src[4] = t4; 880 src[5] = t5; 881 src[6] = t6; 882 src[7] = t7; 883 a = AV_RN4PA(((pixel4*)src)+0); 884 b = AV_RN4PA(((pixel4*)src)+1); 885 for( y = 1; y < 8; y++ ) { 886 AV_WN4PA(((pixel4*)(src+y*stride))+0, a); 887 AV_WN4PA(((pixel4*)(src+y*stride))+1, b); 888 } 889} 890static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 891{ 892 pixel *src = (pixel*)_src; 893 int stride = _stride/sizeof(pixel); 894 PREDICT_8x8_LOAD_TOP; 895 PREDICT_8x8_LOAD_TOPRIGHT; 896 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2; 897 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2; 898 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2; 899 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2; 900 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2; 901 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2; 902 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2; 903 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2; 904 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2; 905 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2; 906 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2; 907 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2; 908 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2; 909 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2; 910 SRC(7,7)= (t14 + 3*t15 + 2) >> 2; 911} 912static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 913{ 914 pixel *src = (pixel*)_src; 915 int stride = _stride/sizeof(pixel); 916 PREDICT_8x8_LOAD_TOP; 917 PREDICT_8x8_LOAD_LEFT; 918 PREDICT_8x8_LOAD_TOPLEFT; 919 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2; 920 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2; 921 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2; 922 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2; 923 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2; 924 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2; 925 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2; 926 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2; 927 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2; 928 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2; 929 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2; 930 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2; 931 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2; 932 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2; 933 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2; 934} 935static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 936{ 937 pixel *src = (pixel*)_src; 938 int stride = _stride/sizeof(pixel); 939 PREDICT_8x8_LOAD_TOP; 940 PREDICT_8x8_LOAD_LEFT; 941 PREDICT_8x8_LOAD_TOPLEFT; 942 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2; 943 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2; 944 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2; 945 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2; 946 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2; 947 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2; 948 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2; 949 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1; 950 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2; 951 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1; 952 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2; 953 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1; 954 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2; 955 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1; 956 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2; 957 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1; 958 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2; 959 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1; 960 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2; 961 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1; 962 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2; 963 SRC(7,0)= (t6 + t7 + 1) >> 1; 964} 965static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 966{ 967 pixel *src = (pixel*)_src; 968 int stride = _stride/sizeof(pixel); 969 PREDICT_8x8_LOAD_TOP; 970 PREDICT_8x8_LOAD_LEFT; 971 PREDICT_8x8_LOAD_TOPLEFT; 972 SRC(0,7)= (l6 + l7 + 1) >> 1; 973 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2; 974 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1; 975 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2; 976 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1; 977 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2; 978 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1; 979 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2; 980 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1; 981 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2; 982 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1; 983 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2; 984 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1; 985 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2; 986 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1; 987 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2; 988 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2; 989 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2; 990 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2; 991 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2; 992 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2; 993 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2; 994} 995static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 996{ 997 pixel *src = (pixel*)_src; 998 int stride = _stride/sizeof(pixel); 999 PREDICT_8x8_LOAD_TOP; 1000 PREDICT_8x8_LOAD_TOPRIGHT; 1001 SRC(0,0)= (t0 + t1 + 1) >> 1; 1002 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2; 1003 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1; 1004 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2; 1005 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1; 1006 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2; 1007 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1; 1008 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2; 1009 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1; 1010 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2; 1011 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1; 1012 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2; 1013 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1; 1014 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2; 1015 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1; 1016 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2; 1017 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1; 1018 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2; 1019 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1; 1020 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2; 1021 SRC(7,6)= (t10 + t11 + 1) >> 1; 1022 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2; 1023} 1024static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 1025{ 1026 pixel *src = (pixel*)_src; 1027 int stride = _stride/sizeof(pixel); 1028 PREDICT_8x8_LOAD_LEFT; 1029 SRC(0,0)= (l0 + l1 + 1) >> 1; 1030 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2; 1031 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1; 1032 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2; 1033 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1; 1034 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2; 1035 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1; 1036 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2; 1037 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1; 1038 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2; 1039 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1; 1040 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2; 1041 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1; 1042 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2; 1043 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)= 1044 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)= 1045 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)= 1046 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7; 1047} 1048#undef PREDICT_8x8_LOAD_LEFT 1049#undef PREDICT_8x8_LOAD_TOP 1050#undef PREDICT_8x8_LOAD_TOPLEFT 1051#undef PREDICT_8x8_LOAD_TOPRIGHT 1052#undef PREDICT_8x8_DC 1053#undef PTR 1054#undef PT 1055#undef PL 1056#undef SRC 1057 1058static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ 1059 int i; 1060 pixel *pix = (pixel*)_pix; 1061 const dctcoef *block = (const dctcoef*)_block; 1062 stride /= sizeof(pixel); 1063 pix -= stride; 1064 for(i=0; i<4; i++){ 1065 pixel v = pix[0]; 1066 pix[1*stride]= v += block[0]; 1067 pix[2*stride]= v += block[4]; 1068 pix[3*stride]= v += block[8]; 1069 pix[4*stride]= v + block[12]; 1070 pix++; 1071 block++; 1072 } 1073} 1074 1075static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ 1076 int i; 1077 pixel *pix = (pixel*)_pix; 1078 const dctcoef *block = (const dctcoef*)_block; 1079 stride /= sizeof(pixel); 1080 for(i=0; i<4; i++){ 1081 pixel v = pix[-1]; 1082 pix[0]= v += block[0]; 1083 pix[1]= v += block[1]; 1084 pix[2]= v += block[2]; 1085 pix[3]= v + block[3]; 1086 pix+= stride; 1087 block+= 4; 1088 } 1089} 1090 1091static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ 1092 int i; 1093 pixel *pix = (pixel*)_pix; 1094 const dctcoef *block = (const dctcoef*)_block; 1095 stride /= sizeof(pixel); 1096 pix -= stride; 1097 for(i=0; i<8; i++){ 1098 pixel v = pix[0]; 1099 pix[1*stride]= v += block[0]; 1100 pix[2*stride]= v += block[8]; 1101 pix[3*stride]= v += block[16]; 1102 pix[4*stride]= v += block[24]; 1103 pix[5*stride]= v += block[32]; 1104 pix[6*stride]= v += block[40]; 1105 pix[7*stride]= v += block[48]; 1106 pix[8*stride]= v + block[56]; 1107 pix++; 1108 block++; 1109 } 1110} 1111 1112static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ 1113 int i; 1114 pixel *pix = (pixel*)_pix; 1115 const dctcoef *block = (const dctcoef*)_block; 1116 stride /= sizeof(pixel); 1117 for(i=0; i<8; i++){ 1118 pixel v = pix[-1]; 1119 pix[0]= v += block[0]; 1120 pix[1]= v += block[1]; 1121 pix[2]= v += block[2]; 1122 pix[3]= v += block[3]; 1123 pix[4]= v += block[4]; 1124 pix[5]= v += block[5]; 1125 pix[6]= v += block[6]; 1126 pix[7]= v + block[7]; 1127 pix+= stride; 1128 block+= 8; 1129 } 1130} 1131 1132static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ 1133 int i; 1134 for(i=0; i<16; i++) 1135 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); 1136} 1137 1138static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ 1139 int i; 1140 for(i=0; i<16; i++) 1141 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); 1142} 1143 1144static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ 1145 int i; 1146 for(i=0; i<4; i++) 1147 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); 1148} 1149 1150static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ 1151 int i; 1152 for(i=0; i<4; i++) 1153 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); 1154 for(i=4; i<8; i++) 1155 FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride); 1156} 1157 1158static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ 1159 int i; 1160 for(i=0; i<4; i++) 1161 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); 1162} 1163 1164static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ 1165 int i; 1166 for(i=0; i<4; i++) 1167 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); 1168 for(i=4; i<8; i++) 1169 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride); 1170} 1171