1/* 2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder 3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * H.264 / AVC / MPEG4 part10 prediction functions. 25 * @author Michael Niedermayer <michaelni@gmx.at> 26 */ 27 28#include "avcodec.h" 29#include "mpegvideo.h" 30#include "h264pred.h" 31 32static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){ 33 const uint32_t a= ((uint32_t*)(src-stride))[0]; 34 ((uint32_t*)(src+0*stride))[0]= a; 35 ((uint32_t*)(src+1*stride))[0]= a; 36 ((uint32_t*)(src+2*stride))[0]= a; 37 ((uint32_t*)(src+3*stride))[0]= a; 38} 39 40static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){ 41 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101; 42 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101; 43 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101; 44 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101; 45} 46 47static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){ 48 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] 49 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3; 50 51 ((uint32_t*)(src+0*stride))[0]= 52 ((uint32_t*)(src+1*stride))[0]= 53 ((uint32_t*)(src+2*stride))[0]= 54 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 55} 56 57static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){ 58 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2; 59 60 ((uint32_t*)(src+0*stride))[0]= 61 ((uint32_t*)(src+1*stride))[0]= 62 ((uint32_t*)(src+2*stride))[0]= 63 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 64} 65 66static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){ 67 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2; 68 69 ((uint32_t*)(src+0*stride))[0]= 70 ((uint32_t*)(src+1*stride))[0]= 71 ((uint32_t*)(src+2*stride))[0]= 72 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 73} 74 75static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){ 76 ((uint32_t*)(src+0*stride))[0]= 77 ((uint32_t*)(src+1*stride))[0]= 78 ((uint32_t*)(src+2*stride))[0]= 79 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U; 80} 81 82 83#define LOAD_TOP_RIGHT_EDGE\ 84 const int av_unused t4= topright[0];\ 85 const int av_unused t5= topright[1];\ 86 const int av_unused t6= topright[2];\ 87 const int av_unused t7= topright[3];\ 88 89#define LOAD_DOWN_LEFT_EDGE\ 90 const int av_unused l4= src[-1+4*stride];\ 91 const int av_unused l5= src[-1+5*stride];\ 92 const int av_unused l6= src[-1+6*stride];\ 93 const int av_unused l7= src[-1+7*stride];\ 94 95#define LOAD_LEFT_EDGE\ 96 const int av_unused l0= src[-1+0*stride];\ 97 const int av_unused l1= src[-1+1*stride];\ 98 const int av_unused l2= src[-1+2*stride];\ 99 const int av_unused l3= src[-1+3*stride];\ 100 101#define LOAD_TOP_EDGE\ 102 const int av_unused t0= src[ 0-1*stride];\ 103 const int av_unused t1= src[ 1-1*stride];\ 104 const int av_unused t2= src[ 2-1*stride];\ 105 const int av_unused t3= src[ 3-1*stride];\ 106 107static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){ 108 const int lt= src[-1-1*stride]; 109 LOAD_TOP_EDGE 110 LOAD_LEFT_EDGE 111 112 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2; 113 src[0+2*stride]= 114 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2; 115 src[0+1*stride]= 116 src[1+2*stride]= 117 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2; 118 src[0+0*stride]= 119 src[1+1*stride]= 120 src[2+2*stride]= 121 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2; 122 src[1+0*stride]= 123 src[2+1*stride]= 124 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2; 125 src[2+0*stride]= 126 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; 127 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2; 128} 129 130static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){ 131 LOAD_TOP_EDGE 132 LOAD_TOP_RIGHT_EDGE 133// LOAD_LEFT_EDGE 134 135 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2; 136 src[1+0*stride]= 137 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2; 138 src[2+0*stride]= 139 src[1+1*stride]= 140 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2; 141 src[3+0*stride]= 142 src[2+1*stride]= 143 src[1+2*stride]= 144 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2; 145 src[3+1*stride]= 146 src[2+2*stride]= 147 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2; 148 src[3+2*stride]= 149 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2; 150 src[3+3*stride]=(t6 + 3*t7 + 2)>>2; 151} 152 153static void pred4x4_down_left_svq3_c(uint8_t *src, uint8_t *topright, int stride){ 154 LOAD_TOP_EDGE 155 LOAD_LEFT_EDGE 156 const av_unused int unu0= t0; 157 const av_unused int unu1= l0; 158 159 src[0+0*stride]=(l1 + t1)>>1; 160 src[1+0*stride]= 161 src[0+1*stride]=(l2 + t2)>>1; 162 src[2+0*stride]= 163 src[1+1*stride]= 164 src[0+2*stride]= 165 src[3+0*stride]= 166 src[2+1*stride]= 167 src[1+2*stride]= 168 src[0+3*stride]= 169 src[3+1*stride]= 170 src[2+2*stride]= 171 src[1+3*stride]= 172 src[3+2*stride]= 173 src[2+3*stride]= 174 src[3+3*stride]=(l3 + t3)>>1; 175} 176 177static void pred4x4_down_left_rv40_c(uint8_t *src, uint8_t *topright, int stride){ 178 LOAD_TOP_EDGE 179 LOAD_TOP_RIGHT_EDGE 180 LOAD_LEFT_EDGE 181 LOAD_DOWN_LEFT_EDGE 182 183 src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3; 184 src[1+0*stride]= 185 src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3; 186 src[2+0*stride]= 187 src[1+1*stride]= 188 src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + l4 + 2*l3 + 2)>>3; 189 src[3+0*stride]= 190 src[2+1*stride]= 191 src[1+2*stride]= 192 src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3 + l5 + 2*l4 + 2)>>3; 193 src[3+1*stride]= 194 src[2+2*stride]= 195 src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l4 + l6 + 2*l5 + 2)>>3; 196 src[3+2*stride]= 197 src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l5 + l7 + 2*l6 + 2)>>3; 198 src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2; 199} 200 201static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, uint8_t *topright, int stride){ 202 LOAD_TOP_EDGE 203 LOAD_TOP_RIGHT_EDGE 204 LOAD_LEFT_EDGE 205 206 src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3; 207 src[1+0*stride]= 208 src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3; 209 src[2+0*stride]= 210 src[1+1*stride]= 211 src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + 3*l3 + 2)>>3; 212 src[3+0*stride]= 213 src[2+1*stride]= 214 src[1+2*stride]= 215 src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3*4 + 2)>>3; 216 src[3+1*stride]= 217 src[2+2*stride]= 218 src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l3*4 + 2)>>3; 219 src[3+2*stride]= 220 src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l3*4 + 2)>>3; 221 src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2; 222} 223 224static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){ 225 const int lt= src[-1-1*stride]; 226 LOAD_TOP_EDGE 227 LOAD_LEFT_EDGE 228 229 src[0+0*stride]= 230 src[1+2*stride]=(lt + t0 + 1)>>1; 231 src[1+0*stride]= 232 src[2+2*stride]=(t0 + t1 + 1)>>1; 233 src[2+0*stride]= 234 src[3+2*stride]=(t1 + t2 + 1)>>1; 235 src[3+0*stride]=(t2 + t3 + 1)>>1; 236 src[0+1*stride]= 237 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2; 238 src[1+1*stride]= 239 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2; 240 src[2+1*stride]= 241 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2; 242 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2; 243 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2; 244 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; 245} 246 247static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){ 248 LOAD_TOP_EDGE 249 LOAD_TOP_RIGHT_EDGE 250 251 src[0+0*stride]=(t0 + t1 + 1)>>1; 252 src[1+0*stride]= 253 src[0+2*stride]=(t1 + t2 + 1)>>1; 254 src[2+0*stride]= 255 src[1+2*stride]=(t2 + t3 + 1)>>1; 256 src[3+0*stride]= 257 src[2+2*stride]=(t3 + t4+ 1)>>1; 258 src[3+2*stride]=(t4 + t5+ 1)>>1; 259 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; 260 src[1+1*stride]= 261 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2; 262 src[2+1*stride]= 263 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2; 264 src[3+1*stride]= 265 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2; 266 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; 267} 268 269static void pred4x4_vertical_left_rv40(uint8_t *src, uint8_t *topright, int stride, 270 const int l0, const int l1, const int l2, const int l3, const int l4){ 271 LOAD_TOP_EDGE 272 LOAD_TOP_RIGHT_EDGE 273 274 src[0+0*stride]=(2*t0 + 2*t1 + l1 + 2*l2 + l3 + 4)>>3; 275 src[1+0*stride]= 276 src[0+2*stride]=(t1 + t2 + 1)>>1; 277 src[2+0*stride]= 278 src[1+2*stride]=(t2 + t3 + 1)>>1; 279 src[3+0*stride]= 280 src[2+2*stride]=(t3 + t4+ 1)>>1; 281 src[3+2*stride]=(t4 + t5+ 1)>>1; 282 src[0+1*stride]=(t0 + 2*t1 + t2 + l2 + 2*l3 + l4 + 4)>>3; 283 src[1+1*stride]= 284 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2; 285 src[2+1*stride]= 286 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2; 287 src[3+1*stride]= 288 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2; 289 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; 290} 291 292static void pred4x4_vertical_left_rv40_c(uint8_t *src, uint8_t *topright, int stride){ 293 LOAD_LEFT_EDGE 294 LOAD_DOWN_LEFT_EDGE 295 296 pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l4); 297} 298 299static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src, uint8_t *topright, int stride){ 300 LOAD_LEFT_EDGE 301 302 pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l3); 303} 304 305static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){ 306 LOAD_LEFT_EDGE 307 308 src[0+0*stride]=(l0 + l1 + 1)>>1; 309 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2; 310 src[2+0*stride]= 311 src[0+1*stride]=(l1 + l2 + 1)>>1; 312 src[3+0*stride]= 313 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2; 314 src[2+1*stride]= 315 src[0+2*stride]=(l2 + l3 + 1)>>1; 316 src[3+1*stride]= 317 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2; 318 src[3+2*stride]= 319 src[1+3*stride]= 320 src[0+3*stride]= 321 src[2+2*stride]= 322 src[2+3*stride]= 323 src[3+3*stride]=l3; 324} 325 326static void pred4x4_horizontal_up_rv40_c(uint8_t *src, uint8_t *topright, int stride){ 327 LOAD_LEFT_EDGE 328 LOAD_DOWN_LEFT_EDGE 329 LOAD_TOP_EDGE 330 LOAD_TOP_RIGHT_EDGE 331 332 src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3; 333 src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3; 334 src[2+0*stride]= 335 src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3; 336 src[3+0*stride]= 337 src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3; 338 src[2+1*stride]= 339 src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3; 340 src[3+1*stride]= 341 src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3; 342 src[3+2*stride]= 343 src[1+3*stride]=(l3 + 2*l4 + l5 + 2)>>2; 344 src[0+3*stride]= 345 src[2+2*stride]=(t6 + t7 + l3 + l4 + 2)>>2; 346 src[2+3*stride]=(l4 + l5 + 1)>>1; 347 src[3+3*stride]=(l4 + 2*l5 + l6 + 2)>>2; 348} 349 350static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src, uint8_t *topright, int stride){ 351 LOAD_LEFT_EDGE 352 LOAD_TOP_EDGE 353 LOAD_TOP_RIGHT_EDGE 354 355 src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3; 356 src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3; 357 src[2+0*stride]= 358 src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3; 359 src[3+0*stride]= 360 src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3; 361 src[2+1*stride]= 362 src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3; 363 src[3+1*stride]= 364 src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3; 365 src[3+2*stride]= 366 src[1+3*stride]=l3; 367 src[0+3*stride]= 368 src[2+2*stride]=(t6 + t7 + 2*l3 + 2)>>2; 369 src[2+3*stride]= 370 src[3+3*stride]=l3; 371} 372 373static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){ 374 const int lt= src[-1-1*stride]; 375 LOAD_TOP_EDGE 376 LOAD_LEFT_EDGE 377 378 src[0+0*stride]= 379 src[2+1*stride]=(lt + l0 + 1)>>1; 380 src[1+0*stride]= 381 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2; 382 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2; 383 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2; 384 src[0+1*stride]= 385 src[2+2*stride]=(l0 + l1 + 1)>>1; 386 src[1+1*stride]= 387 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2; 388 src[0+2*stride]= 389 src[2+3*stride]=(l1 + l2+ 1)>>1; 390 src[1+2*stride]= 391 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; 392 src[0+3*stride]=(l2 + l3 + 1)>>1; 393 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2; 394} 395 396static void pred16x16_vertical_c(uint8_t *src, int stride){ 397 int i; 398 const uint32_t a= ((uint32_t*)(src-stride))[0]; 399 const uint32_t b= ((uint32_t*)(src-stride))[1]; 400 const uint32_t c= ((uint32_t*)(src-stride))[2]; 401 const uint32_t d= ((uint32_t*)(src-stride))[3]; 402 403 for(i=0; i<16; i++){ 404 ((uint32_t*)(src+i*stride))[0]= a; 405 ((uint32_t*)(src+i*stride))[1]= b; 406 ((uint32_t*)(src+i*stride))[2]= c; 407 ((uint32_t*)(src+i*stride))[3]= d; 408 } 409} 410 411static void pred16x16_horizontal_c(uint8_t *src, int stride){ 412 int i; 413 414 for(i=0; i<16; i++){ 415 ((uint32_t*)(src+i*stride))[0]= 416 ((uint32_t*)(src+i*stride))[1]= 417 ((uint32_t*)(src+i*stride))[2]= 418 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101; 419 } 420} 421 422static void pred16x16_dc_c(uint8_t *src, int stride){ 423 int i, dc=0; 424 425 for(i=0;i<16; i++){ 426 dc+= src[-1+i*stride]; 427 } 428 429 for(i=0;i<16; i++){ 430 dc+= src[i-stride]; 431 } 432 433 dc= 0x01010101*((dc + 16)>>5); 434 435 for(i=0; i<16; i++){ 436 ((uint32_t*)(src+i*stride))[0]= 437 ((uint32_t*)(src+i*stride))[1]= 438 ((uint32_t*)(src+i*stride))[2]= 439 ((uint32_t*)(src+i*stride))[3]= dc; 440 } 441} 442 443static void pred16x16_left_dc_c(uint8_t *src, int stride){ 444 int i, dc=0; 445 446 for(i=0;i<16; i++){ 447 dc+= src[-1+i*stride]; 448 } 449 450 dc= 0x01010101*((dc + 8)>>4); 451 452 for(i=0; i<16; i++){ 453 ((uint32_t*)(src+i*stride))[0]= 454 ((uint32_t*)(src+i*stride))[1]= 455 ((uint32_t*)(src+i*stride))[2]= 456 ((uint32_t*)(src+i*stride))[3]= dc; 457 } 458} 459 460static void pred16x16_top_dc_c(uint8_t *src, int stride){ 461 int i, dc=0; 462 463 for(i=0;i<16; i++){ 464 dc+= src[i-stride]; 465 } 466 dc= 0x01010101*((dc + 8)>>4); 467 468 for(i=0; i<16; i++){ 469 ((uint32_t*)(src+i*stride))[0]= 470 ((uint32_t*)(src+i*stride))[1]= 471 ((uint32_t*)(src+i*stride))[2]= 472 ((uint32_t*)(src+i*stride))[3]= dc; 473 } 474} 475 476static void pred16x16_128_dc_c(uint8_t *src, int stride){ 477 int i; 478 479 for(i=0; i<16; i++){ 480 ((uint32_t*)(src+i*stride))[0]= 481 ((uint32_t*)(src+i*stride))[1]= 482 ((uint32_t*)(src+i*stride))[2]= 483 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U; 484 } 485} 486 487static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3, const int rv40){ 488 int i, j, k; 489 int a; 490 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 491 const uint8_t * const src0 = src+7-stride; 492 const uint8_t *src1 = src+8*stride-1; 493 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1; 494 int H = src0[1] - src0[-1]; 495 int V = src1[0] - src2[ 0]; 496 for(k=2; k<=8; ++k) { 497 src1 += stride; src2 -= stride; 498 H += k*(src0[k] - src0[-k]); 499 V += k*(src1[0] - src2[ 0]); 500 } 501 if(svq3){ 502 H = ( 5*(H/4) ) / 16; 503 V = ( 5*(V/4) ) / 16; 504 505 /* required for 100% accuracy */ 506 i = H; H = V; V = i; 507 }else if(rv40){ 508 H = ( H + (H>>2) ) >> 4; 509 V = ( V + (V>>2) ) >> 4; 510 }else{ 511 H = ( 5*H+32 ) >> 6; 512 V = ( 5*V+32 ) >> 6; 513 } 514 515 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H); 516 for(j=16; j>0; --j) { 517 int b = a; 518 a += V; 519 for(i=-16; i<0; i+=4) { 520 src[16+i] = cm[ (b ) >> 5 ]; 521 src[17+i] = cm[ (b+ H) >> 5 ]; 522 src[18+i] = cm[ (b+2*H) >> 5 ]; 523 src[19+i] = cm[ (b+3*H) >> 5 ]; 524 b += 4*H; 525 } 526 src += stride; 527 } 528} 529 530static void pred16x16_plane_c(uint8_t *src, int stride){ 531 pred16x16_plane_compat_c(src, stride, 0, 0); 532} 533 534static void pred16x16_plane_svq3_c(uint8_t *src, int stride){ 535 pred16x16_plane_compat_c(src, stride, 1, 0); 536} 537 538static void pred16x16_plane_rv40_c(uint8_t *src, int stride){ 539 pred16x16_plane_compat_c(src, stride, 0, 1); 540} 541 542static void pred8x8_vertical_c(uint8_t *src, int stride){ 543 int i; 544 const uint32_t a= ((uint32_t*)(src-stride))[0]; 545 const uint32_t b= ((uint32_t*)(src-stride))[1]; 546 547 for(i=0; i<8; i++){ 548 ((uint32_t*)(src+i*stride))[0]= a; 549 ((uint32_t*)(src+i*stride))[1]= b; 550 } 551} 552 553static void pred8x8_horizontal_c(uint8_t *src, int stride){ 554 int i; 555 556 for(i=0; i<8; i++){ 557 ((uint32_t*)(src+i*stride))[0]= 558 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101; 559 } 560} 561 562static void pred8x8_128_dc_c(uint8_t *src, int stride){ 563 int i; 564 565 for(i=0; i<8; i++){ 566 ((uint32_t*)(src+i*stride))[0]= 567 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U; 568 } 569} 570 571static void pred8x8_left_dc_c(uint8_t *src, int stride){ 572 int i; 573 int dc0, dc2; 574 575 dc0=dc2=0; 576 for(i=0;i<4; i++){ 577 dc0+= src[-1+i*stride]; 578 dc2+= src[-1+(i+4)*stride]; 579 } 580 dc0= 0x01010101*((dc0 + 2)>>2); 581 dc2= 0x01010101*((dc2 + 2)>>2); 582 583 for(i=0; i<4; i++){ 584 ((uint32_t*)(src+i*stride))[0]= 585 ((uint32_t*)(src+i*stride))[1]= dc0; 586 } 587 for(i=4; i<8; i++){ 588 ((uint32_t*)(src+i*stride))[0]= 589 ((uint32_t*)(src+i*stride))[1]= dc2; 590 } 591} 592 593static void pred8x8_left_dc_rv40_c(uint8_t *src, int stride){ 594 int i; 595 int dc0; 596 597 dc0=0; 598 for(i=0;i<8; i++) 599 dc0+= src[-1+i*stride]; 600 dc0= 0x01010101*((dc0 + 4)>>3); 601 602 for(i=0; i<8; i++){ 603 ((uint32_t*)(src+i*stride))[0]= 604 ((uint32_t*)(src+i*stride))[1]= dc0; 605 } 606} 607 608static void pred8x8_top_dc_c(uint8_t *src, int stride){ 609 int i; 610 int dc0, dc1; 611 612 dc0=dc1=0; 613 for(i=0;i<4; i++){ 614 dc0+= src[i-stride]; 615 dc1+= src[4+i-stride]; 616 } 617 dc0= 0x01010101*((dc0 + 2)>>2); 618 dc1= 0x01010101*((dc1 + 2)>>2); 619 620 for(i=0; i<4; i++){ 621 ((uint32_t*)(src+i*stride))[0]= dc0; 622 ((uint32_t*)(src+i*stride))[1]= dc1; 623 } 624 for(i=4; i<8; i++){ 625 ((uint32_t*)(src+i*stride))[0]= dc0; 626 ((uint32_t*)(src+i*stride))[1]= dc1; 627 } 628} 629 630static void pred8x8_top_dc_rv40_c(uint8_t *src, int stride){ 631 int i; 632 int dc0; 633 634 dc0=0; 635 for(i=0;i<8; i++) 636 dc0+= src[i-stride]; 637 dc0= 0x01010101*((dc0 + 4)>>3); 638 639 for(i=0; i<8; i++){ 640 ((uint32_t*)(src+i*stride))[0]= 641 ((uint32_t*)(src+i*stride))[1]= dc0; 642 } 643} 644 645 646static void pred8x8_dc_c(uint8_t *src, int stride){ 647 int i; 648 int dc0, dc1, dc2, dc3; 649 650 dc0=dc1=dc2=0; 651 for(i=0;i<4; i++){ 652 dc0+= src[-1+i*stride] + src[i-stride]; 653 dc1+= src[4+i-stride]; 654 dc2+= src[-1+(i+4)*stride]; 655 } 656 dc3= 0x01010101*((dc1 + dc2 + 4)>>3); 657 dc0= 0x01010101*((dc0 + 4)>>3); 658 dc1= 0x01010101*((dc1 + 2)>>2); 659 dc2= 0x01010101*((dc2 + 2)>>2); 660 661 for(i=0; i<4; i++){ 662 ((uint32_t*)(src+i*stride))[0]= dc0; 663 ((uint32_t*)(src+i*stride))[1]= dc1; 664 } 665 for(i=4; i<8; i++){ 666 ((uint32_t*)(src+i*stride))[0]= dc2; 667 ((uint32_t*)(src+i*stride))[1]= dc3; 668 } 669} 670 671//the following 4 function should not be optimized! 672static void pred8x8_mad_cow_dc_l0t(uint8_t *src, int stride){ 673 pred8x8_top_dc_c(src, stride); 674 pred4x4_dc_c(src, NULL, stride); 675} 676 677static void pred8x8_mad_cow_dc_0lt(uint8_t *src, int stride){ 678 pred8x8_dc_c(src, stride); 679 pred4x4_top_dc_c(src, NULL, stride); 680} 681 682static void pred8x8_mad_cow_dc_l00(uint8_t *src, int stride){ 683 pred8x8_left_dc_c(src, stride); 684 pred4x4_128_dc_c(src + 4*stride , NULL, stride); 685 pred4x4_128_dc_c(src + 4*stride + 4, NULL, stride); 686} 687 688static void pred8x8_mad_cow_dc_0l0(uint8_t *src, int stride){ 689 pred8x8_left_dc_c(src, stride); 690 pred4x4_128_dc_c(src , NULL, stride); 691 pred4x4_128_dc_c(src + 4, NULL, stride); 692} 693 694static void pred8x8_dc_rv40_c(uint8_t *src, int stride){ 695 int i; 696 int dc0=0; 697 698 for(i=0;i<4; i++){ 699 dc0+= src[-1+i*stride] + src[i-stride]; 700 dc0+= src[4+i-stride]; 701 dc0+= src[-1+(i+4)*stride]; 702 } 703 dc0= 0x01010101*((dc0 + 8)>>4); 704 705 for(i=0; i<4; i++){ 706 ((uint32_t*)(src+i*stride))[0]= dc0; 707 ((uint32_t*)(src+i*stride))[1]= dc0; 708 } 709 for(i=4; i<8; i++){ 710 ((uint32_t*)(src+i*stride))[0]= dc0; 711 ((uint32_t*)(src+i*stride))[1]= dc0; 712 } 713} 714 715static void pred8x8_plane_c(uint8_t *src, int stride){ 716 int j, k; 717 int a; 718 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 719 const uint8_t * const src0 = src+3-stride; 720 const uint8_t *src1 = src+4*stride-1; 721 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1; 722 int H = src0[1] - src0[-1]; 723 int V = src1[0] - src2[ 0]; 724 for(k=2; k<=4; ++k) { 725 src1 += stride; src2 -= stride; 726 H += k*(src0[k] - src0[-k]); 727 V += k*(src1[0] - src2[ 0]); 728 } 729 H = ( 17*H+16 ) >> 5; 730 V = ( 17*V+16 ) >> 5; 731 732 a = 16*(src1[0] + src2[8]+1) - 3*(V+H); 733 for(j=8; j>0; --j) { 734 int b = a; 735 a += V; 736 src[0] = cm[ (b ) >> 5 ]; 737 src[1] = cm[ (b+ H) >> 5 ]; 738 src[2] = cm[ (b+2*H) >> 5 ]; 739 src[3] = cm[ (b+3*H) >> 5 ]; 740 src[4] = cm[ (b+4*H) >> 5 ]; 741 src[5] = cm[ (b+5*H) >> 5 ]; 742 src[6] = cm[ (b+6*H) >> 5 ]; 743 src[7] = cm[ (b+7*H) >> 5 ]; 744 src += stride; 745 } 746} 747 748#define SRC(x,y) src[(x)+(y)*stride] 749#define PL(y) \ 750 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2; 751#define PREDICT_8x8_LOAD_LEFT \ 752 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \ 753 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \ 754 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \ 755 const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2 756 757#define PT(x) \ 758 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; 759#define PREDICT_8x8_LOAD_TOP \ 760 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \ 761 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \ 762 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \ 763 const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \ 764 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2 765 766#define PTR(x) \ 767 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; 768#define PREDICT_8x8_LOAD_TOPRIGHT \ 769 int t8, t9, t10, t11, t12, t13, t14, t15; \ 770 if(has_topright) { \ 771 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \ 772 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \ 773 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1); 774 775#define PREDICT_8x8_LOAD_TOPLEFT \ 776 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2 777 778#define PREDICT_8x8_DC(v) \ 779 int y; \ 780 for( y = 0; y < 8; y++ ) { \ 781 ((uint32_t*)src)[0] = \ 782 ((uint32_t*)src)[1] = v; \ 783 src += stride; \ 784 } 785 786static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride) 787{ 788 PREDICT_8x8_DC(0x80808080); 789} 790static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride) 791{ 792 PREDICT_8x8_LOAD_LEFT; 793 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101; 794 PREDICT_8x8_DC(dc); 795} 796static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride) 797{ 798 PREDICT_8x8_LOAD_TOP; 799 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101; 800 PREDICT_8x8_DC(dc); 801} 802static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride) 803{ 804 PREDICT_8x8_LOAD_LEFT; 805 PREDICT_8x8_LOAD_TOP; 806 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7 807 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101; 808 PREDICT_8x8_DC(dc); 809} 810static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride) 811{ 812 PREDICT_8x8_LOAD_LEFT; 813#define ROW(y) ((uint32_t*)(src+y*stride))[0] =\ 814 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y 815 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7); 816#undef ROW 817} 818static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride) 819{ 820 int y; 821 PREDICT_8x8_LOAD_TOP; 822 src[0] = t0; 823 src[1] = t1; 824 src[2] = t2; 825 src[3] = t3; 826 src[4] = t4; 827 src[5] = t5; 828 src[6] = t6; 829 src[7] = t7; 830 for( y = 1; y < 8; y++ ) 831 *(uint64_t*)(src+y*stride) = *(uint64_t*)src; 832} 833static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride) 834{ 835 PREDICT_8x8_LOAD_TOP; 836 PREDICT_8x8_LOAD_TOPRIGHT; 837 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2; 838 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2; 839 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2; 840 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2; 841 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2; 842 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2; 843 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2; 844 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2; 845 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2; 846 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2; 847 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2; 848 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2; 849 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2; 850 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2; 851 SRC(7,7)= (t14 + 3*t15 + 2) >> 2; 852} 853static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride) 854{ 855 PREDICT_8x8_LOAD_TOP; 856 PREDICT_8x8_LOAD_LEFT; 857 PREDICT_8x8_LOAD_TOPLEFT; 858 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2; 859 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2; 860 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2; 861 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2; 862 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2; 863 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2; 864 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2; 865 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2; 866 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2; 867 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2; 868 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2; 869 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2; 870 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2; 871 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2; 872 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2; 873 874} 875static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride) 876{ 877 PREDICT_8x8_LOAD_TOP; 878 PREDICT_8x8_LOAD_LEFT; 879 PREDICT_8x8_LOAD_TOPLEFT; 880 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2; 881 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2; 882 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2; 883 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2; 884 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2; 885 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2; 886 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2; 887 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1; 888 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2; 889 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1; 890 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2; 891 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1; 892 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2; 893 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1; 894 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2; 895 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1; 896 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2; 897 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1; 898 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2; 899 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1; 900 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2; 901 SRC(7,0)= (t6 + t7 + 1) >> 1; 902} 903static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride) 904{ 905 PREDICT_8x8_LOAD_TOP; 906 PREDICT_8x8_LOAD_LEFT; 907 PREDICT_8x8_LOAD_TOPLEFT; 908 SRC(0,7)= (l6 + l7 + 1) >> 1; 909 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2; 910 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1; 911 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2; 912 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1; 913 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2; 914 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1; 915 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2; 916 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1; 917 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2; 918 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1; 919 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2; 920 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1; 921 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2; 922 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1; 923 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2; 924 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2; 925 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2; 926 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2; 927 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2; 928 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2; 929 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2; 930} 931static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride) 932{ 933 PREDICT_8x8_LOAD_TOP; 934 PREDICT_8x8_LOAD_TOPRIGHT; 935 SRC(0,0)= (t0 + t1 + 1) >> 1; 936 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2; 937 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1; 938 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2; 939 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1; 940 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2; 941 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1; 942 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2; 943 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1; 944 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2; 945 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1; 946 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2; 947 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1; 948 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2; 949 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1; 950 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2; 951 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1; 952 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2; 953 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1; 954 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2; 955 SRC(7,6)= (t10 + t11 + 1) >> 1; 956 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2; 957} 958static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride) 959{ 960 PREDICT_8x8_LOAD_LEFT; 961 SRC(0,0)= (l0 + l1 + 1) >> 1; 962 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2; 963 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1; 964 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2; 965 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1; 966 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2; 967 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1; 968 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2; 969 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1; 970 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2; 971 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1; 972 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2; 973 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1; 974 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2; 975 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)= 976 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)= 977 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)= 978 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7; 979} 980#undef PREDICT_8x8_LOAD_LEFT 981#undef PREDICT_8x8_LOAD_TOP 982#undef PREDICT_8x8_LOAD_TOPLEFT 983#undef PREDICT_8x8_LOAD_TOPRIGHT 984#undef PREDICT_8x8_DC 985#undef PTR 986#undef PT 987#undef PL 988#undef SRC 989 990static void pred4x4_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){ 991 int i; 992 pix -= stride; 993 for(i=0; i<4; i++){ 994 uint8_t v = pix[0]; 995 pix[1*stride]= v += block[0]; 996 pix[2*stride]= v += block[4]; 997 pix[3*stride]= v += block[8]; 998 pix[4*stride]= v + block[12]; 999 pix++; 1000 block++; 1001 } 1002} 1003 1004static void pred4x4_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){ 1005 int i; 1006 for(i=0; i<4; i++){ 1007 uint8_t v = pix[-1]; 1008 pix[0]= v += block[0]; 1009 pix[1]= v += block[1]; 1010 pix[2]= v += block[2]; 1011 pix[3]= v + block[3]; 1012 pix+= stride; 1013 block+= 4; 1014 } 1015} 1016 1017static void pred8x8l_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){ 1018 int i; 1019 pix -= stride; 1020 for(i=0; i<8; i++){ 1021 uint8_t v = pix[0]; 1022 pix[1*stride]= v += block[0]; 1023 pix[2*stride]= v += block[8]; 1024 pix[3*stride]= v += block[16]; 1025 pix[4*stride]= v += block[24]; 1026 pix[5*stride]= v += block[32]; 1027 pix[6*stride]= v += block[40]; 1028 pix[7*stride]= v += block[48]; 1029 pix[8*stride]= v + block[56]; 1030 pix++; 1031 block++; 1032 } 1033} 1034 1035static void pred8x8l_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){ 1036 int i; 1037 for(i=0; i<8; i++){ 1038 uint8_t v = pix[-1]; 1039 pix[0]= v += block[0]; 1040 pix[1]= v += block[1]; 1041 pix[2]= v += block[2]; 1042 pix[3]= v += block[3]; 1043 pix[4]= v += block[4]; 1044 pix[5]= v += block[5]; 1045 pix[6]= v += block[6]; 1046 pix[7]= v + block[7]; 1047 pix+= stride; 1048 block+= 8; 1049 } 1050} 1051 1052static void pred16x16_vertical_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ 1053 int i; 1054 for(i=0; i<16; i++) 1055 pred4x4_vertical_add_c(pix + block_offset[i], block + i*16, stride); 1056} 1057 1058static void pred16x16_horizontal_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ 1059 int i; 1060 for(i=0; i<16; i++) 1061 pred4x4_horizontal_add_c(pix + block_offset[i], block + i*16, stride); 1062} 1063 1064static void pred8x8_vertical_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ 1065 int i; 1066 for(i=0; i<4; i++) 1067 pred4x4_vertical_add_c(pix + block_offset[i], block + i*16, stride); 1068} 1069 1070static void pred8x8_horizontal_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ 1071 int i; 1072 for(i=0; i<4; i++) 1073 pred4x4_horizontal_add_c(pix + block_offset[i], block + i*16, stride); 1074} 1075 1076 1077/** 1078 * Sets the intra prediction function pointers. 1079 */ 1080void ff_h264_pred_init(H264PredContext *h, int codec_id){ 1081// MpegEncContext * const s = &h->s; 1082 1083 if(codec_id != CODEC_ID_RV40){ 1084 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c; 1085 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c; 1086 h->pred4x4[DC_PRED ]= pred4x4_dc_c; 1087 if(codec_id == CODEC_ID_SVQ3) 1088 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_svq3_c; 1089 else 1090 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c; 1091 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c; 1092 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c; 1093 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c; 1094 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c; 1095 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c; 1096 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c; 1097 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c; 1098 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c; 1099 }else{ 1100 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c; 1101 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c; 1102 h->pred4x4[DC_PRED ]= pred4x4_dc_c; 1103 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_rv40_c; 1104 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c; 1105 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c; 1106 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c; 1107 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_rv40_c; 1108 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_rv40_c; 1109 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c; 1110 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c; 1111 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c; 1112 h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= pred4x4_down_left_rv40_nodown_c; 1113 h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= pred4x4_horizontal_up_rv40_nodown_c; 1114 h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= pred4x4_vertical_left_rv40_nodown_c; 1115 } 1116 1117 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c; 1118 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c; 1119 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c; 1120 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c; 1121 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c; 1122 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c; 1123 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c; 1124 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c; 1125 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c; 1126 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c; 1127 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c; 1128 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c; 1129 1130 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c; 1131 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c; 1132 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c; 1133 if(codec_id != CODEC_ID_RV40){ 1134 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c; 1135 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c; 1136 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c; 1137 h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= pred8x8_mad_cow_dc_l0t; 1138 h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= pred8x8_mad_cow_dc_0lt; 1139 h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= pred8x8_mad_cow_dc_l00; 1140 h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= pred8x8_mad_cow_dc_0l0; 1141 }else{ 1142 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_rv40_c; 1143 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_rv40_c; 1144 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_rv40_c; 1145 } 1146 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c; 1147 1148 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c; 1149 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c; 1150 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c; 1151 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c; 1152 switch(codec_id){ 1153 case CODEC_ID_SVQ3: 1154 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_svq3_c; 1155 break; 1156 case CODEC_ID_RV40: 1157 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_rv40_c; 1158 break; 1159 default: 1160 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c; 1161 } 1162 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c; 1163 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c; 1164 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c; 1165 1166 //special lossless h/v prediction for h264 1167 h->pred4x4_add [VERT_PRED ]= pred4x4_vertical_add_c; 1168 h->pred4x4_add [ HOR_PRED ]= pred4x4_horizontal_add_c; 1169 h->pred8x8l_add [VERT_PRED ]= pred8x8l_vertical_add_c; 1170 h->pred8x8l_add [ HOR_PRED ]= pred8x8l_horizontal_add_c; 1171 h->pred8x8_add [VERT_PRED8x8]= pred8x8_vertical_add_c; 1172 h->pred8x8_add [ HOR_PRED8x8]= pred8x8_horizontal_add_c; 1173 h->pred16x16_add[VERT_PRED8x8]= pred16x16_vertical_add_c; 1174 h->pred16x16_add[ HOR_PRED8x8]= pred16x16_horizontal_add_c; 1175 1176 if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id); 1177} 1178