1/* 2 * VC-1 and WMV3 decoder - DSP functions 3 * Copyright (c) 2006 Konstantin Shishkov 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23* @file libavcodec/vc1dsp.c 24 * VC-1 and WMV3 decoder 25 * 26 */ 27 28#include "dsputil.h" 29 30 31/** Apply overlap transform to horizontal edge 32*/ 33static void vc1_v_overlap_c(uint8_t* src, int stride) 34{ 35 int i; 36 int a, b, c, d; 37 int d1, d2; 38 int rnd = 1; 39 for(i = 0; i < 8; i++) { 40 a = src[-2*stride]; 41 b = src[-stride]; 42 c = src[0]; 43 d = src[stride]; 44 d1 = (a - d + 3 + rnd) >> 3; 45 d2 = (a - d + b - c + 4 - rnd) >> 3; 46 47 src[-2*stride] = a - d1; 48 src[-stride] = av_clip_uint8(b - d2); 49 src[0] = av_clip_uint8(c + d2); 50 src[stride] = d + d1; 51 src++; 52 rnd = !rnd; 53 } 54} 55 56/** Apply overlap transform to vertical edge 57*/ 58static void vc1_h_overlap_c(uint8_t* src, int stride) 59{ 60 int i; 61 int a, b, c, d; 62 int d1, d2; 63 int rnd = 1; 64 for(i = 0; i < 8; i++) { 65 a = src[-2]; 66 b = src[-1]; 67 c = src[0]; 68 d = src[1]; 69 d1 = (a - d + 3 + rnd) >> 3; 70 d2 = (a - d + b - c + 4 - rnd) >> 3; 71 72 src[-2] = a - d1; 73 src[-1] = av_clip_uint8(b - d2); 74 src[0] = av_clip_uint8(c + d2); 75 src[1] = d + d1; 76 src += stride; 77 rnd = !rnd; 78 } 79} 80 81 82/** Do inverse transform on 8x8 block 83*/ 84static void vc1_inv_trans_8x8_c(DCTELEM block[64]) 85{ 86 int i; 87 register int t1,t2,t3,t4,t5,t6,t7,t8; 88 DCTELEM *src, *dst; 89 90 src = block; 91 dst = block; 92 for(i = 0; i < 8; i++){ 93 t1 = 12 * (src[0] + src[4]) + 4; 94 t2 = 12 * (src[0] - src[4]) + 4; 95 t3 = 16 * src[2] + 6 * src[6]; 96 t4 = 6 * src[2] - 16 * src[6]; 97 98 t5 = t1 + t3; 99 t6 = t2 + t4; 100 t7 = t2 - t4; 101 t8 = t1 - t3; 102 103 t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; 104 t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; 105 t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; 106 t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; 107 108 dst[0] = (t5 + t1) >> 3; 109 dst[1] = (t6 + t2) >> 3; 110 dst[2] = (t7 + t3) >> 3; 111 dst[3] = (t8 + t4) >> 3; 112 dst[4] = (t8 - t4) >> 3; 113 dst[5] = (t7 - t3) >> 3; 114 dst[6] = (t6 - t2) >> 3; 115 dst[7] = (t5 - t1) >> 3; 116 117 src += 8; 118 dst += 8; 119 } 120 121 src = block; 122 dst = block; 123 for(i = 0; i < 8; i++){ 124 t1 = 12 * (src[ 0] + src[32]) + 64; 125 t2 = 12 * (src[ 0] - src[32]) + 64; 126 t3 = 16 * src[16] + 6 * src[48]; 127 t4 = 6 * src[16] - 16 * src[48]; 128 129 t5 = t1 + t3; 130 t6 = t2 + t4; 131 t7 = t2 - t4; 132 t8 = t1 - t3; 133 134 t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; 135 t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; 136 t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; 137 t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; 138 139 dst[ 0] = (t5 + t1) >> 7; 140 dst[ 8] = (t6 + t2) >> 7; 141 dst[16] = (t7 + t3) >> 7; 142 dst[24] = (t8 + t4) >> 7; 143 dst[32] = (t8 - t4 + 1) >> 7; 144 dst[40] = (t7 - t3 + 1) >> 7; 145 dst[48] = (t6 - t2 + 1) >> 7; 146 dst[56] = (t5 - t1 + 1) >> 7; 147 148 src++; 149 dst++; 150 } 151} 152 153/** Do inverse transform on 8x4 part of block 154*/ 155static void vc1_inv_trans_8x4_c(uint8_t *dest, int linesize, DCTELEM *block) 156{ 157 int i; 158 register int t1,t2,t3,t4,t5,t6,t7,t8; 159 DCTELEM *src, *dst; 160 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 161 162 src = block; 163 dst = block; 164 for(i = 0; i < 4; i++){ 165 t1 = 12 * (src[0] + src[4]) + 4; 166 t2 = 12 * (src[0] - src[4]) + 4; 167 t3 = 16 * src[2] + 6 * src[6]; 168 t4 = 6 * src[2] - 16 * src[6]; 169 170 t5 = t1 + t3; 171 t6 = t2 + t4; 172 t7 = t2 - t4; 173 t8 = t1 - t3; 174 175 t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; 176 t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; 177 t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; 178 t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; 179 180 dst[0] = (t5 + t1) >> 3; 181 dst[1] = (t6 + t2) >> 3; 182 dst[2] = (t7 + t3) >> 3; 183 dst[3] = (t8 + t4) >> 3; 184 dst[4] = (t8 - t4) >> 3; 185 dst[5] = (t7 - t3) >> 3; 186 dst[6] = (t6 - t2) >> 3; 187 dst[7] = (t5 - t1) >> 3; 188 189 src += 8; 190 dst += 8; 191 } 192 193 src = block; 194 for(i = 0; i < 8; i++){ 195 t1 = 17 * (src[ 0] + src[16]) + 64; 196 t2 = 17 * (src[ 0] - src[16]) + 64; 197 t3 = 22 * src[ 8] + 10 * src[24]; 198 t4 = 22 * src[24] - 10 * src[ 8]; 199 200 dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3) >> 7)]; 201 dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4) >> 7)]; 202 dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4) >> 7)]; 203 dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3) >> 7)]; 204 205 src ++; 206 dest++; 207 } 208} 209 210/** Do inverse transform on 4x8 parts of block 211*/ 212static void vc1_inv_trans_4x8_c(uint8_t *dest, int linesize, DCTELEM *block) 213{ 214 int i; 215 register int t1,t2,t3,t4,t5,t6,t7,t8; 216 DCTELEM *src, *dst; 217 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 218 219 src = block; 220 dst = block; 221 for(i = 0; i < 8; i++){ 222 t1 = 17 * (src[0] + src[2]) + 4; 223 t2 = 17 * (src[0] - src[2]) + 4; 224 t3 = 22 * src[1] + 10 * src[3]; 225 t4 = 22 * src[3] - 10 * src[1]; 226 227 dst[0] = (t1 + t3) >> 3; 228 dst[1] = (t2 - t4) >> 3; 229 dst[2] = (t2 + t4) >> 3; 230 dst[3] = (t1 - t3) >> 3; 231 232 src += 8; 233 dst += 8; 234 } 235 236 src = block; 237 for(i = 0; i < 4; i++){ 238 t1 = 12 * (src[ 0] + src[32]) + 64; 239 t2 = 12 * (src[ 0] - src[32]) + 64; 240 t3 = 16 * src[16] + 6 * src[48]; 241 t4 = 6 * src[16] - 16 * src[48]; 242 243 t5 = t1 + t3; 244 t6 = t2 + t4; 245 t7 = t2 - t4; 246 t8 = t1 - t3; 247 248 t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; 249 t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; 250 t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; 251 t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; 252 253 dest[0*linesize] = cm[dest[0*linesize] + ((t5 + t1) >> 7)]; 254 dest[1*linesize] = cm[dest[1*linesize] + ((t6 + t2) >> 7)]; 255 dest[2*linesize] = cm[dest[2*linesize] + ((t7 + t3) >> 7)]; 256 dest[3*linesize] = cm[dest[3*linesize] + ((t8 + t4) >> 7)]; 257 dest[4*linesize] = cm[dest[4*linesize] + ((t8 - t4 + 1) >> 7)]; 258 dest[5*linesize] = cm[dest[5*linesize] + ((t7 - t3 + 1) >> 7)]; 259 dest[6*linesize] = cm[dest[6*linesize] + ((t6 - t2 + 1) >> 7)]; 260 dest[7*linesize] = cm[dest[7*linesize] + ((t5 - t1 + 1) >> 7)]; 261 262 src ++; 263 dest++; 264 } 265} 266 267/** Do inverse transform on 4x4 part of block 268*/ 269static void vc1_inv_trans_4x4_c(uint8_t *dest, int linesize, DCTELEM *block) 270{ 271 int i; 272 register int t1,t2,t3,t4; 273 DCTELEM *src, *dst; 274 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 275 276 src = block; 277 dst = block; 278 for(i = 0; i < 4; i++){ 279 t1 = 17 * (src[0] + src[2]) + 4; 280 t2 = 17 * (src[0] - src[2]) + 4; 281 t3 = 22 * src[1] + 10 * src[3]; 282 t4 = 22 * src[3] - 10 * src[1]; 283 284 dst[0] = (t1 + t3) >> 3; 285 dst[1] = (t2 - t4) >> 3; 286 dst[2] = (t2 + t4) >> 3; 287 dst[3] = (t1 - t3) >> 3; 288 289 src += 8; 290 dst += 8; 291 } 292 293 src = block; 294 for(i = 0; i < 4; i++){ 295 t1 = 17 * (src[ 0] + src[16]) + 64; 296 t2 = 17 * (src[ 0] - src[16]) + 64; 297 t3 = 22 * src[ 8] + 10 * src[24]; 298 t4 = 22 * src[24] - 10 * src[ 8]; 299 300 dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3) >> 7)]; 301 dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4) >> 7)]; 302 dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4) >> 7)]; 303 dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3) >> 7)]; 304 305 src ++; 306 dest++; 307 } 308} 309 310/* motion compensation functions */ 311/** Filter in case of 2 filters */ 312#define VC1_MSPEL_FILTER_16B(DIR, TYPE) \ 313static av_always_inline int vc1_mspel_ ## DIR ## _filter_16bits(const TYPE *src, int stride, int mode) \ 314{ \ 315 switch(mode){ \ 316 case 0: /* no shift - should not occur */ \ 317 return 0; \ 318 case 1: /* 1/4 shift */ \ 319 return -4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2]; \ 320 case 2: /* 1/2 shift */ \ 321 return -src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2]; \ 322 case 3: /* 3/4 shift */ \ 323 return -3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2]; \ 324 } \ 325 return 0; /* should not occur */ \ 326} 327 328VC1_MSPEL_FILTER_16B(ver, uint8_t); 329VC1_MSPEL_FILTER_16B(hor, int16_t); 330 331 332/** Filter used to interpolate fractional pel values 333 */ 334static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r) 335{ 336 switch(mode){ 337 case 0: //no shift 338 return src[0]; 339 case 1: // 1/4 shift 340 return (-4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2] + 32 - r) >> 6; 341 case 2: // 1/2 shift 342 return (-src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2] + 8 - r) >> 4; 343 case 3: // 3/4 shift 344 return (-3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2] + 32 - r) >> 6; 345 } 346 return 0; //should not occur 347} 348 349/** Function used to do motion compensation with bicubic interpolation 350 */ 351static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd) 352{ 353 int i, j; 354 355 if (vmode) { /* Horizontal filter to apply */ 356 int r; 357 358 if (hmode) { /* Vertical filter to apply, output to tmp */ 359 static const int shift_value[] = { 0, 5, 1, 5 }; 360 int shift = (shift_value[hmode]+shift_value[vmode])>>1; 361 int16_t tmp[11*8], *tptr = tmp; 362 363 r = (1<<(shift-1)) + rnd-1; 364 365 src -= 1; 366 for(j = 0; j < 8; j++) { 367 for(i = 0; i < 11; i++) 368 tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode)+r)>>shift; 369 src += stride; 370 tptr += 11; 371 } 372 373 r = 64-rnd; 374 tptr = tmp+1; 375 for(j = 0; j < 8; j++) { 376 for(i = 0; i < 8; i++) 377 dst[i] = av_clip_uint8((vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode)+r)>>7); 378 dst += stride; 379 tptr += 11; 380 } 381 382 return; 383 } 384 else { /* No horizontal filter, output 8 lines to dst */ 385 r = 1-rnd; 386 387 for(j = 0; j < 8; j++) { 388 for(i = 0; i < 8; i++) 389 dst[i] = av_clip_uint8(vc1_mspel_filter(src + i, stride, vmode, r)); 390 src += stride; 391 dst += stride; 392 } 393 return; 394 } 395 } 396 397 /* Horizontal mode with no vertical mode */ 398 for(j = 0; j < 8; j++) { 399 for(i = 0; i < 8; i++) 400 dst[i] = av_clip_uint8(vc1_mspel_filter(src + i, 1, hmode, rnd)); 401 dst += stride; 402 src += stride; 403 } 404} 405 406/* pixel functions - really are entry points to vc1_mspel_mc */ 407 408/* this one is defined in dsputil.c */ 409void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd); 410 411#define PUT_VC1_MSPEL(a, b)\ 412static void put_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ 413 vc1_mspel_mc(dst, src, stride, a, b, rnd); \ 414} 415 416PUT_VC1_MSPEL(1, 0) 417PUT_VC1_MSPEL(2, 0) 418PUT_VC1_MSPEL(3, 0) 419 420PUT_VC1_MSPEL(0, 1) 421PUT_VC1_MSPEL(1, 1) 422PUT_VC1_MSPEL(2, 1) 423PUT_VC1_MSPEL(3, 1) 424 425PUT_VC1_MSPEL(0, 2) 426PUT_VC1_MSPEL(1, 2) 427PUT_VC1_MSPEL(2, 2) 428PUT_VC1_MSPEL(3, 2) 429 430PUT_VC1_MSPEL(0, 3) 431PUT_VC1_MSPEL(1, 3) 432PUT_VC1_MSPEL(2, 3) 433PUT_VC1_MSPEL(3, 3) 434 435void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) { 436 dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c; 437 dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c; 438 dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c; 439 dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c; 440 dsp->vc1_h_overlap = vc1_h_overlap_c; 441 dsp->vc1_v_overlap = vc1_v_overlap_c; 442 443 dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_c; 444 dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_c; 445 dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_c; 446 dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_c; 447 dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_c; 448 dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_c; 449 dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_c; 450 dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_c; 451 dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_c; 452 dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_c; 453 dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_c; 454 dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_c; 455 dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_c; 456 dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_c; 457 dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_c; 458 dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_c; 459} 460