1/* 2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> 3 * 4 * This file is part of Libav. 5 * 6 * Libav is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * Libav is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with Libav; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include <inttypes.h> 22#include <string.h> 23#include <math.h> 24#include <stdio.h> 25#include "config.h" 26#include <assert.h> 27#include "swscale.h" 28#include "swscale_internal.h" 29#include "rgb2rgb.h" 30#include "libavutil/intreadwrite.h" 31#include "libavutil/cpu.h" 32#include "libavutil/avutil.h" 33#include "libavutil/mathematics.h" 34#include "libavutil/bswap.h" 35#include "libavutil/pixdesc.h" 36 37#define DITHER1XBPP 38 39#define RGB2YUV_SHIFT 15 40#define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5)) 41#define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5)) 42#define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5)) 43#define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5)) 44#define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5)) 45#define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5)) 46#define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5)) 47#define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5)) 48#define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5)) 49 50/* 51NOTES 52Special versions: fast Y 1:1 scaling (no interpolation in y direction) 53 54TODO 55more intelligent misalignment avoidance for the horizontal scaler 56write special vertical cubic upscale version 57optimize C code (YV12 / minmax) 58add support for packed pixel YUV input & output 59add support for Y8 output 60optimize BGR24 & BGR32 61add BGR4 output support 62write special BGR->BGR scaler 63*/ 64 65DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={ 66{ 1, 3, 1, 3, 1, 3, 1, 3, }, 67{ 2, 0, 2, 0, 2, 0, 2, 0, }, 68}; 69 70DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={ 71{ 6, 2, 6, 2, 6, 2, 6, 2, }, 72{ 0, 4, 0, 4, 0, 4, 0, 4, }, 73}; 74 75DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={ 76{ 8, 4, 11, 7, 8, 4, 11, 7, }, 77{ 2, 14, 1, 13, 2, 14, 1, 13, }, 78{ 10, 6, 9, 5, 10, 6, 9, 5, }, 79{ 0, 12, 3, 15, 0, 12, 3, 15, }, 80}; 81 82DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={ 83{ 17, 9, 23, 15, 16, 8, 22, 14, }, 84{ 5, 29, 3, 27, 4, 28, 2, 26, }, 85{ 21, 13, 19, 11, 20, 12, 18, 10, }, 86{ 0, 24, 6, 30, 1, 25, 7, 31, }, 87{ 16, 8, 22, 14, 17, 9, 23, 15, }, 88{ 4, 28, 2, 26, 5, 29, 3, 27, }, 89{ 20, 12, 18, 10, 21, 13, 19, 11, }, 90{ 1, 25, 7, 31, 0, 24, 6, 30, }, 91}; 92 93DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={ 94{ 0, 55, 14, 68, 3, 58, 17, 72, }, 95{ 37, 18, 50, 32, 40, 22, 54, 35, }, 96{ 9, 64, 5, 59, 13, 67, 8, 63, }, 97{ 46, 27, 41, 23, 49, 31, 44, 26, }, 98{ 2, 57, 16, 71, 1, 56, 15, 70, }, 99{ 39, 21, 52, 34, 38, 19, 51, 33, }, 100{ 11, 66, 7, 62, 10, 65, 6, 60, }, 101{ 48, 30, 43, 25, 47, 29, 42, 24, }, 102}; 103 104#if 1 105DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={ 106{117, 62, 158, 103, 113, 58, 155, 100, }, 107{ 34, 199, 21, 186, 31, 196, 17, 182, }, 108{144, 89, 131, 76, 141, 86, 127, 72, }, 109{ 0, 165, 41, 206, 10, 175, 52, 217, }, 110{110, 55, 151, 96, 120, 65, 162, 107, }, 111{ 28, 193, 14, 179, 38, 203, 24, 189, }, 112{138, 83, 124, 69, 148, 93, 134, 79, }, 113{ 7, 172, 48, 213, 3, 168, 45, 210, }, 114}; 115#elif 1 116// tries to correct a gamma of 1.5 117DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={ 118{ 0, 143, 18, 200, 2, 156, 25, 215, }, 119{ 78, 28, 125, 64, 89, 36, 138, 74, }, 120{ 10, 180, 3, 161, 16, 195, 8, 175, }, 121{109, 51, 93, 38, 121, 60, 105, 47, }, 122{ 1, 152, 23, 210, 0, 147, 20, 205, }, 123{ 85, 33, 134, 71, 81, 30, 130, 67, }, 124{ 14, 190, 6, 171, 12, 185, 5, 166, }, 125{117, 57, 101, 44, 113, 54, 97, 41, }, 126}; 127#elif 1 128// tries to correct a gamma of 2.0 129DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={ 130{ 0, 124, 8, 193, 0, 140, 12, 213, }, 131{ 55, 14, 104, 42, 66, 19, 119, 52, }, 132{ 3, 168, 1, 145, 6, 187, 3, 162, }, 133{ 86, 31, 70, 21, 99, 39, 82, 28, }, 134{ 0, 134, 11, 206, 0, 129, 9, 200, }, 135{ 62, 17, 114, 48, 58, 16, 109, 45, }, 136{ 5, 181, 2, 157, 4, 175, 1, 151, }, 137{ 95, 36, 78, 26, 90, 34, 74, 24, }, 138}; 139#else 140// tries to correct a gamma of 2.5 141DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={ 142{ 0, 107, 3, 187, 0, 125, 6, 212, }, 143{ 39, 7, 86, 28, 49, 11, 102, 36, }, 144{ 1, 158, 0, 131, 3, 180, 1, 151, }, 145{ 68, 19, 52, 12, 81, 25, 64, 17, }, 146{ 0, 119, 5, 203, 0, 113, 4, 195, }, 147{ 45, 9, 96, 33, 42, 8, 91, 30, }, 148{ 2, 172, 1, 144, 2, 165, 0, 137, }, 149{ 77, 23, 60, 15, 72, 21, 56, 14, }, 150}; 151#endif 152DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = { 153{ 36, 68, 60, 92, 34, 66, 58, 90,}, 154{ 100, 4,124, 28, 98, 2,122, 26,}, 155{ 52, 84, 44, 76, 50, 82, 42, 74,}, 156{ 116, 20,108, 12,114, 18,106, 10,}, 157{ 32, 64, 56, 88, 38, 70, 62, 94,}, 158{ 96, 0,120, 24,102, 6,126, 30,}, 159{ 48, 80, 40, 72, 54, 86, 46, 78,}, 160{ 112, 16,104, 8,118, 22,110, 14,}, 161}; 162DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] = 163{ 64, 64, 64, 64, 64, 64, 64, 64 }; 164 165#define output_pixel(pos, val, bias, signedness) \ 166 if (big_endian) { \ 167 AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \ 168 } else { \ 169 AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \ 170 } 171 172static av_always_inline void 173yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW, 174 int big_endian, int output_bits) 175{ 176 int i; 177 int shift = 19 - output_bits; 178 179 for (i = 0; i < dstW; i++) { 180 int val = src[i] + (1 << (shift - 1)); 181 output_pixel(&dest[i], val, 0, uint); 182 } 183} 184 185static av_always_inline void 186yuv2planeX_16_c_template(const int16_t *filter, int filterSize, 187 const int32_t **src, uint16_t *dest, int dstW, 188 int big_endian, int output_bits) 189{ 190 int i; 191 int shift = 15 + 16 - output_bits; 192 193 for (i = 0; i < dstW; i++) { 194 int val = 1 << (30-output_bits); 195 int j; 196 197 /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline 198 * filters (or anything with negative coeffs, the range can be slightly 199 * wider in both directions. To account for this overflow, we subtract 200 * a constant so it always fits in the signed range (assuming a 201 * reasonable filterSize), and re-add that at the end. */ 202 val -= 0x40000000; 203 for (j = 0; j < filterSize; j++) 204 val += src[j][i] * filter[j]; 205 206 output_pixel(&dest[i], val, 0x8000, int); 207 } 208} 209 210#undef output_pixel 211 212#define output_pixel(pos, val) \ 213 if (big_endian) { \ 214 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \ 215 } else { \ 216 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \ 217 } 218 219static av_always_inline void 220yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW, 221 int big_endian, int output_bits) 222{ 223 int i; 224 int shift = 15 - output_bits; 225 226 for (i = 0; i < dstW; i++) { 227 int val = src[i] + (1 << (shift - 1)); 228 output_pixel(&dest[i], val); 229 } 230} 231 232static av_always_inline void 233yuv2planeX_10_c_template(const int16_t *filter, int filterSize, 234 const int16_t **src, uint16_t *dest, int dstW, 235 int big_endian, int output_bits) 236{ 237 int i; 238 int shift = 11 + 16 - output_bits; 239 240 for (i = 0; i < dstW; i++) { 241 int val = 1 << (26-output_bits); 242 int j; 243 244 for (j = 0; j < filterSize; j++) 245 val += src[j][i] * filter[j]; 246 247 output_pixel(&dest[i], val); 248 } 249} 250 251#undef output_pixel 252 253#define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \ 254static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \ 255 uint8_t *dest, int dstW, \ 256 const uint8_t *dither, int offset)\ 257{ \ 258 yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \ 259 (uint16_t *) dest, dstW, is_be, bits); \ 260}\ 261static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \ 262 const int16_t **src, uint8_t *dest, int dstW, \ 263 const uint8_t *dither, int offset)\ 264{ \ 265 yuv2planeX_## template_size ## _c_template(filter, \ 266 filterSize, (const typeX_t **) src, \ 267 (uint16_t *) dest, dstW, is_be, bits); \ 268} 269yuv2NBPS( 9, BE, 1, 10, int16_t) 270yuv2NBPS( 9, LE, 0, 10, int16_t) 271yuv2NBPS(10, BE, 1, 10, int16_t) 272yuv2NBPS(10, LE, 0, 10, int16_t) 273yuv2NBPS(16, BE, 1, 16, int32_t) 274yuv2NBPS(16, LE, 0, 16, int32_t) 275 276static void yuv2planeX_8_c(const int16_t *filter, int filterSize, 277 const int16_t **src, uint8_t *dest, int dstW, 278 const uint8_t *dither, int offset) 279{ 280 int i; 281 for (i=0; i<dstW; i++) { 282 int val = dither[(i + offset) & 7] << 12; 283 int j; 284 for (j=0; j<filterSize; j++) 285 val += src[j][i] * filter[j]; 286 287 dest[i]= av_clip_uint8(val>>19); 288 } 289} 290 291static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW, 292 const uint8_t *dither, int offset) 293{ 294 int i; 295 for (i=0; i<dstW; i++) { 296 int val = (src[i] + dither[(i + offset) & 7]) >> 7; 297 dest[i]= av_clip_uint8(val); 298 } 299} 300 301static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize, 302 const int16_t **chrUSrc, const int16_t **chrVSrc, 303 uint8_t *dest, int chrDstW) 304{ 305 enum PixelFormat dstFormat = c->dstFormat; 306 const uint8_t *chrDither = c->chrDither8; 307 int i; 308 309 if (dstFormat == PIX_FMT_NV12) 310 for (i=0; i<chrDstW; i++) { 311 int u = chrDither[i & 7] << 12; 312 int v = chrDither[(i + 3) & 7] << 12; 313 int j; 314 for (j=0; j<chrFilterSize; j++) { 315 u += chrUSrc[j][i] * chrFilter[j]; 316 v += chrVSrc[j][i] * chrFilter[j]; 317 } 318 319 dest[2*i]= av_clip_uint8(u>>19); 320 dest[2*i+1]= av_clip_uint8(v>>19); 321 } 322 else 323 for (i=0; i<chrDstW; i++) { 324 int u = chrDither[i & 7] << 12; 325 int v = chrDither[(i + 3) & 7] << 12; 326 int j; 327 for (j=0; j<chrFilterSize; j++) { 328 u += chrUSrc[j][i] * chrFilter[j]; 329 v += chrVSrc[j][i] * chrFilter[j]; 330 } 331 332 dest[2*i]= av_clip_uint8(v>>19); 333 dest[2*i+1]= av_clip_uint8(u>>19); 334 } 335} 336 337#define output_pixel(pos, val) \ 338 if (target == PIX_FMT_GRAY16BE) { \ 339 AV_WB16(pos, val); \ 340 } else { \ 341 AV_WL16(pos, val); \ 342 } 343 344static av_always_inline void 345yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter, 346 const int32_t **lumSrc, int lumFilterSize, 347 const int16_t *chrFilter, const int32_t **chrUSrc, 348 const int32_t **chrVSrc, int chrFilterSize, 349 const int32_t **alpSrc, uint16_t *dest, int dstW, 350 int y, enum PixelFormat target) 351{ 352 int i; 353 354 for (i = 0; i < (dstW >> 1); i++) { 355 int j; 356 int Y1 = (1 << 14) - 0x40000000; 357 int Y2 = (1 << 14) - 0x40000000; 358 359 for (j = 0; j < lumFilterSize; j++) { 360 Y1 += lumSrc[j][i * 2] * lumFilter[j]; 361 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j]; 362 } 363 Y1 >>= 15; 364 Y2 >>= 15; 365 Y1 = av_clip_int16(Y1); 366 Y2 = av_clip_int16(Y2); 367 output_pixel(&dest[i * 2 + 0], 0x8000 + Y1); 368 output_pixel(&dest[i * 2 + 1], 0x8000 + Y2); 369 } 370} 371 372static av_always_inline void 373yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2], 374 const int32_t *ubuf[2], const int32_t *vbuf[2], 375 const int32_t *abuf[2], uint16_t *dest, int dstW, 376 int yalpha, int uvalpha, int y, 377 enum PixelFormat target) 378{ 379 int yalpha1 = 4095 - yalpha; 380 int i; 381 const int32_t *buf0 = buf[0], *buf1 = buf[1]; 382 383 for (i = 0; i < (dstW >> 1); i++) { 384 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15; 385 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15; 386 387 output_pixel(&dest[i * 2 + 0], Y1); 388 output_pixel(&dest[i * 2 + 1], Y2); 389 } 390} 391 392static av_always_inline void 393yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0, 394 const int32_t *ubuf[2], const int32_t *vbuf[2], 395 const int32_t *abuf0, uint16_t *dest, int dstW, 396 int uvalpha, int y, enum PixelFormat target) 397{ 398 int i; 399 400 for (i = 0; i < (dstW >> 1); i++) { 401 int Y1 = buf0[i * 2 ] << 1; 402 int Y2 = buf0[i * 2 + 1] << 1; 403 404 output_pixel(&dest[i * 2 + 0], Y1); 405 output_pixel(&dest[i * 2 + 1], Y2); 406 } 407} 408 409#undef output_pixel 410 411#define YUV2PACKED16WRAPPER(name, base, ext, fmt) \ 412static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ 413 const int16_t **_lumSrc, int lumFilterSize, \ 414 const int16_t *chrFilter, const int16_t **_chrUSrc, \ 415 const int16_t **_chrVSrc, int chrFilterSize, \ 416 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \ 417 int y) \ 418{ \ 419 const int32_t **lumSrc = (const int32_t **) _lumSrc, \ 420 **chrUSrc = (const int32_t **) _chrUSrc, \ 421 **chrVSrc = (const int32_t **) _chrVSrc, \ 422 **alpSrc = (const int32_t **) _alpSrc; \ 423 uint16_t *dest = (uint16_t *) _dest; \ 424 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \ 425 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ 426 alpSrc, dest, dstW, y, fmt); \ 427} \ 428 \ 429static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \ 430 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \ 431 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \ 432 int yalpha, int uvalpha, int y) \ 433{ \ 434 const int32_t **buf = (const int32_t **) _buf, \ 435 **ubuf = (const int32_t **) _ubuf, \ 436 **vbuf = (const int32_t **) _vbuf, \ 437 **abuf = (const int32_t **) _abuf; \ 438 uint16_t *dest = (uint16_t *) _dest; \ 439 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \ 440 dest, dstW, yalpha, uvalpha, y, fmt); \ 441} \ 442 \ 443static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \ 444 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \ 445 const int16_t *_abuf0, uint8_t *_dest, int dstW, \ 446 int uvalpha, int y) \ 447{ \ 448 const int32_t *buf0 = (const int32_t *) _buf0, \ 449 **ubuf = (const int32_t **) _ubuf, \ 450 **vbuf = (const int32_t **) _vbuf, \ 451 *abuf0 = (const int32_t *) _abuf0; \ 452 uint16_t *dest = (uint16_t *) _dest; \ 453 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \ 454 dstW, uvalpha, y, fmt); \ 455} 456 457YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE) 458YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE) 459 460#define output_pixel(pos, acc) \ 461 if (target == PIX_FMT_MONOBLACK) { \ 462 pos = acc; \ 463 } else { \ 464 pos = ~acc; \ 465 } 466 467static av_always_inline void 468yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter, 469 const int16_t **lumSrc, int lumFilterSize, 470 const int16_t *chrFilter, const int16_t **chrUSrc, 471 const int16_t **chrVSrc, int chrFilterSize, 472 const int16_t **alpSrc, uint8_t *dest, int dstW, 473 int y, enum PixelFormat target) 474{ 475 const uint8_t * const d128=dither_8x8_220[y&7]; 476 uint8_t *g = c->table_gU[128] + c->table_gV[128]; 477 int i; 478 unsigned acc = 0; 479 480 for (i = 0; i < dstW - 1; i += 2) { 481 int j; 482 int Y1 = 1 << 18; 483 int Y2 = 1 << 18; 484 485 for (j = 0; j < lumFilterSize; j++) { 486 Y1 += lumSrc[j][i] * lumFilter[j]; 487 Y2 += lumSrc[j][i+1] * lumFilter[j]; 488 } 489 Y1 >>= 19; 490 Y2 >>= 19; 491 if ((Y1 | Y2) & 0x100) { 492 Y1 = av_clip_uint8(Y1); 493 Y2 = av_clip_uint8(Y2); 494 } 495 acc += acc + g[Y1 + d128[(i + 0) & 7]]; 496 acc += acc + g[Y2 + d128[(i + 1) & 7]]; 497 if ((i & 7) == 6) { 498 output_pixel(*dest++, acc); 499 } 500 } 501} 502 503static av_always_inline void 504yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2], 505 const int16_t *ubuf[2], const int16_t *vbuf[2], 506 const int16_t *abuf[2], uint8_t *dest, int dstW, 507 int yalpha, int uvalpha, int y, 508 enum PixelFormat target) 509{ 510 const int16_t *buf0 = buf[0], *buf1 = buf[1]; 511 const uint8_t * const d128 = dither_8x8_220[y & 7]; 512 uint8_t *g = c->table_gU[128] + c->table_gV[128]; 513 int yalpha1 = 4095 - yalpha; 514 int i; 515 516 for (i = 0; i < dstW - 7; i += 8) { 517 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]]; 518 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]]; 519 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]]; 520 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]]; 521 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]]; 522 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]]; 523 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]]; 524 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]]; 525 output_pixel(*dest++, acc); 526 } 527} 528 529static av_always_inline void 530yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0, 531 const int16_t *ubuf[2], const int16_t *vbuf[2], 532 const int16_t *abuf0, uint8_t *dest, int dstW, 533 int uvalpha, int y, enum PixelFormat target) 534{ 535 const uint8_t * const d128 = dither_8x8_220[y & 7]; 536 uint8_t *g = c->table_gU[128] + c->table_gV[128]; 537 int i; 538 539 for (i = 0; i < dstW - 7; i += 8) { 540 int acc = g[(buf0[i ] >> 7) + d128[0]]; 541 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]]; 542 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]]; 543 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]]; 544 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]]; 545 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]]; 546 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]]; 547 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]]; 548 output_pixel(*dest++, acc); 549 } 550} 551 552#undef output_pixel 553 554#define YUV2PACKEDWRAPPER(name, base, ext, fmt) \ 555static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ 556 const int16_t **lumSrc, int lumFilterSize, \ 557 const int16_t *chrFilter, const int16_t **chrUSrc, \ 558 const int16_t **chrVSrc, int chrFilterSize, \ 559 const int16_t **alpSrc, uint8_t *dest, int dstW, \ 560 int y) \ 561{ \ 562 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \ 563 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ 564 alpSrc, dest, dstW, y, fmt); \ 565} \ 566 \ 567static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \ 568 const int16_t *ubuf[2], const int16_t *vbuf[2], \ 569 const int16_t *abuf[2], uint8_t *dest, int dstW, \ 570 int yalpha, int uvalpha, int y) \ 571{ \ 572 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \ 573 dest, dstW, yalpha, uvalpha, y, fmt); \ 574} \ 575 \ 576static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \ 577 const int16_t *ubuf[2], const int16_t *vbuf[2], \ 578 const int16_t *abuf0, uint8_t *dest, int dstW, \ 579 int uvalpha, int y) \ 580{ \ 581 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \ 582 abuf0, dest, dstW, uvalpha, \ 583 y, fmt); \ 584} 585 586YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE) 587YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK) 588 589#define output_pixels(pos, Y1, U, Y2, V) \ 590 if (target == PIX_FMT_YUYV422) { \ 591 dest[pos + 0] = Y1; \ 592 dest[pos + 1] = U; \ 593 dest[pos + 2] = Y2; \ 594 dest[pos + 3] = V; \ 595 } else { \ 596 dest[pos + 0] = U; \ 597 dest[pos + 1] = Y1; \ 598 dest[pos + 2] = V; \ 599 dest[pos + 3] = Y2; \ 600 } 601 602static av_always_inline void 603yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter, 604 const int16_t **lumSrc, int lumFilterSize, 605 const int16_t *chrFilter, const int16_t **chrUSrc, 606 const int16_t **chrVSrc, int chrFilterSize, 607 const int16_t **alpSrc, uint8_t *dest, int dstW, 608 int y, enum PixelFormat target) 609{ 610 int i; 611 612 for (i = 0; i < (dstW >> 1); i++) { 613 int j; 614 int Y1 = 1 << 18; 615 int Y2 = 1 << 18; 616 int U = 1 << 18; 617 int V = 1 << 18; 618 619 for (j = 0; j < lumFilterSize; j++) { 620 Y1 += lumSrc[j][i * 2] * lumFilter[j]; 621 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j]; 622 } 623 for (j = 0; j < chrFilterSize; j++) { 624 U += chrUSrc[j][i] * chrFilter[j]; 625 V += chrVSrc[j][i] * chrFilter[j]; 626 } 627 Y1 >>= 19; 628 Y2 >>= 19; 629 U >>= 19; 630 V >>= 19; 631 if ((Y1 | Y2 | U | V) & 0x100) { 632 Y1 = av_clip_uint8(Y1); 633 Y2 = av_clip_uint8(Y2); 634 U = av_clip_uint8(U); 635 V = av_clip_uint8(V); 636 } 637 output_pixels(4*i, Y1, U, Y2, V); 638 } 639} 640 641static av_always_inline void 642yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2], 643 const int16_t *ubuf[2], const int16_t *vbuf[2], 644 const int16_t *abuf[2], uint8_t *dest, int dstW, 645 int yalpha, int uvalpha, int y, 646 enum PixelFormat target) 647{ 648 const int16_t *buf0 = buf[0], *buf1 = buf[1], 649 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], 650 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1]; 651 int yalpha1 = 4095 - yalpha; 652 int uvalpha1 = 4095 - uvalpha; 653 int i; 654 655 for (i = 0; i < (dstW >> 1); i++) { 656 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19; 657 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19; 658 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19; 659 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19; 660 661 output_pixels(i * 4, Y1, U, Y2, V); 662 } 663} 664 665static av_always_inline void 666yuv2422_1_c_template(SwsContext *c, const int16_t *buf0, 667 const int16_t *ubuf[2], const int16_t *vbuf[2], 668 const int16_t *abuf0, uint8_t *dest, int dstW, 669 int uvalpha, int y, enum PixelFormat target) 670{ 671 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], 672 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1]; 673 int i; 674 675 if (uvalpha < 2048) { 676 for (i = 0; i < (dstW >> 1); i++) { 677 int Y1 = buf0[i * 2] >> 7; 678 int Y2 = buf0[i * 2 + 1] >> 7; 679 int U = ubuf1[i] >> 7; 680 int V = vbuf1[i] >> 7; 681 682 output_pixels(i * 4, Y1, U, Y2, V); 683 } 684 } else { 685 for (i = 0; i < (dstW >> 1); i++) { 686 int Y1 = buf0[i * 2] >> 7; 687 int Y2 = buf0[i * 2 + 1] >> 7; 688 int U = (ubuf0[i] + ubuf1[i]) >> 8; 689 int V = (vbuf0[i] + vbuf1[i]) >> 8; 690 691 output_pixels(i * 4, Y1, U, Y2, V); 692 } 693 } 694} 695 696#undef output_pixels 697 698YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422) 699YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422) 700 701#define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B) 702#define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R) 703#define output_pixel(pos, val) \ 704 if (isBE(target)) { \ 705 AV_WB16(pos, val); \ 706 } else { \ 707 AV_WL16(pos, val); \ 708 } 709 710static av_always_inline void 711yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter, 712 const int32_t **lumSrc, int lumFilterSize, 713 const int16_t *chrFilter, const int32_t **chrUSrc, 714 const int32_t **chrVSrc, int chrFilterSize, 715 const int32_t **alpSrc, uint16_t *dest, int dstW, 716 int y, enum PixelFormat target) 717{ 718 int i; 719 720 for (i = 0; i < (dstW >> 1); i++) { 721 int j; 722 int Y1 = -0x40000000; 723 int Y2 = -0x40000000; 724 int U = -128 << 23; // 19 725 int V = -128 << 23; 726 int R, G, B; 727 728 for (j = 0; j < lumFilterSize; j++) { 729 Y1 += lumSrc[j][i * 2] * lumFilter[j]; 730 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j]; 731 } 732 for (j = 0; j < chrFilterSize; j++) { 733 U += chrUSrc[j][i] * chrFilter[j]; 734 V += chrVSrc[j][i] * chrFilter[j]; 735 } 736 737 // 8bit: 12+15=27; 16-bit: 12+19=31 738 Y1 >>= 14; // 10 739 Y1 += 0x10000; 740 Y2 >>= 14; 741 Y2 += 0x10000; 742 U >>= 14; 743 V >>= 14; 744 745 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit 746 Y1 -= c->yuv2rgb_y_offset; 747 Y2 -= c->yuv2rgb_y_offset; 748 Y1 *= c->yuv2rgb_y_coeff; 749 Y2 *= c->yuv2rgb_y_coeff; 750 Y1 += 1 << 13; // 21 751 Y2 += 1 << 13; 752 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit 753 754 R = V * c->yuv2rgb_v2r_coeff; 755 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; 756 B = U * c->yuv2rgb_u2b_coeff; 757 758 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit 759 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); 760 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); 761 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); 762 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); 763 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); 764 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); 765 dest += 6; 766 } 767} 768 769static av_always_inline void 770yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2], 771 const int32_t *ubuf[2], const int32_t *vbuf[2], 772 const int32_t *abuf[2], uint16_t *dest, int dstW, 773 int yalpha, int uvalpha, int y, 774 enum PixelFormat target) 775{ 776 const int32_t *buf0 = buf[0], *buf1 = buf[1], 777 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], 778 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1]; 779 int yalpha1 = 4095 - yalpha; 780 int uvalpha1 = 4095 - uvalpha; 781 int i; 782 783 for (i = 0; i < (dstW >> 1); i++) { 784 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14; 785 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14; 786 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14; 787 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14; 788 int R, G, B; 789 790 Y1 -= c->yuv2rgb_y_offset; 791 Y2 -= c->yuv2rgb_y_offset; 792 Y1 *= c->yuv2rgb_y_coeff; 793 Y2 *= c->yuv2rgb_y_coeff; 794 Y1 += 1 << 13; 795 Y2 += 1 << 13; 796 797 R = V * c->yuv2rgb_v2r_coeff; 798 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; 799 B = U * c->yuv2rgb_u2b_coeff; 800 801 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); 802 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); 803 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); 804 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); 805 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); 806 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); 807 dest += 6; 808 } 809} 810 811static av_always_inline void 812yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0, 813 const int32_t *ubuf[2], const int32_t *vbuf[2], 814 const int32_t *abuf0, uint16_t *dest, int dstW, 815 int uvalpha, int y, enum PixelFormat target) 816{ 817 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], 818 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1]; 819 int i; 820 821 if (uvalpha < 2048) { 822 for (i = 0; i < (dstW >> 1); i++) { 823 int Y1 = (buf0[i * 2] ) >> 2; 824 int Y2 = (buf0[i * 2 + 1]) >> 2; 825 int U = (ubuf0[i] + (-128 << 11)) >> 2; 826 int V = (vbuf0[i] + (-128 << 11)) >> 2; 827 int R, G, B; 828 829 Y1 -= c->yuv2rgb_y_offset; 830 Y2 -= c->yuv2rgb_y_offset; 831 Y1 *= c->yuv2rgb_y_coeff; 832 Y2 *= c->yuv2rgb_y_coeff; 833 Y1 += 1 << 13; 834 Y2 += 1 << 13; 835 836 R = V * c->yuv2rgb_v2r_coeff; 837 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; 838 B = U * c->yuv2rgb_u2b_coeff; 839 840 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); 841 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); 842 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); 843 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); 844 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); 845 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); 846 dest += 6; 847 } 848 } else { 849 for (i = 0; i < (dstW >> 1); i++) { 850 int Y1 = (buf0[i * 2] ) >> 2; 851 int Y2 = (buf0[i * 2 + 1]) >> 2; 852 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3; 853 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3; 854 int R, G, B; 855 856 Y1 -= c->yuv2rgb_y_offset; 857 Y2 -= c->yuv2rgb_y_offset; 858 Y1 *= c->yuv2rgb_y_coeff; 859 Y2 *= c->yuv2rgb_y_coeff; 860 Y1 += 1 << 13; 861 Y2 += 1 << 13; 862 863 R = V * c->yuv2rgb_v2r_coeff; 864 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; 865 B = U * c->yuv2rgb_u2b_coeff; 866 867 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); 868 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); 869 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); 870 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); 871 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); 872 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); 873 dest += 6; 874 } 875 } 876} 877 878#undef output_pixel 879#undef r_b 880#undef b_r 881 882YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE) 883YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE) 884YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE) 885YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE) 886 887/* 888 * Write out 2 RGB pixels in the target pixel format. This function takes a 889 * R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of 890 * things like endianness conversion and shifting. The caller takes care of 891 * setting the correct offset in these tables from the chroma (U/V) values. 892 * This function then uses the luminance (Y1/Y2) values to write out the 893 * correct RGB values into the destination buffer. 894 */ 895static av_always_inline void 896yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2, 897 unsigned A1, unsigned A2, 898 const void *_r, const void *_g, const void *_b, int y, 899 enum PixelFormat target, int hasAlpha) 900{ 901 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA || 902 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) { 903 uint32_t *dest = (uint32_t *) _dest; 904 const uint32_t *r = (const uint32_t *) _r; 905 const uint32_t *g = (const uint32_t *) _g; 906 const uint32_t *b = (const uint32_t *) _b; 907 908#if CONFIG_SMALL 909 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0; 910 911 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0); 912 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0); 913#else 914 if (hasAlpha) { 915 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24; 916 917 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh); 918 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh); 919 } else { 920 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1]; 921 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2]; 922 } 923#endif 924 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) { 925 uint8_t *dest = (uint8_t *) _dest; 926 const uint8_t *r = (const uint8_t *) _r; 927 const uint8_t *g = (const uint8_t *) _g; 928 const uint8_t *b = (const uint8_t *) _b; 929 930#define r_b ((target == PIX_FMT_RGB24) ? r : b) 931#define b_r ((target == PIX_FMT_RGB24) ? b : r) 932 dest[i * 6 + 0] = r_b[Y1]; 933 dest[i * 6 + 1] = g[Y1]; 934 dest[i * 6 + 2] = b_r[Y1]; 935 dest[i * 6 + 3] = r_b[Y2]; 936 dest[i * 6 + 4] = g[Y2]; 937 dest[i * 6 + 5] = b_r[Y2]; 938#undef r_b 939#undef b_r 940 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 || 941 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 || 942 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) { 943 uint16_t *dest = (uint16_t *) _dest; 944 const uint16_t *r = (const uint16_t *) _r; 945 const uint16_t *g = (const uint16_t *) _g; 946 const uint16_t *b = (const uint16_t *) _b; 947 int dr1, dg1, db1, dr2, dg2, db2; 948 949 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) { 950 dr1 = dither_2x2_8[ y & 1 ][0]; 951 dg1 = dither_2x2_4[ y & 1 ][0]; 952 db1 = dither_2x2_8[(y & 1) ^ 1][0]; 953 dr2 = dither_2x2_8[ y & 1 ][1]; 954 dg2 = dither_2x2_4[ y & 1 ][1]; 955 db2 = dither_2x2_8[(y & 1) ^ 1][1]; 956 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) { 957 dr1 = dither_2x2_8[ y & 1 ][0]; 958 dg1 = dither_2x2_8[ y & 1 ][1]; 959 db1 = dither_2x2_8[(y & 1) ^ 1][0]; 960 dr2 = dither_2x2_8[ y & 1 ][1]; 961 dg2 = dither_2x2_8[ y & 1 ][0]; 962 db2 = dither_2x2_8[(y & 1) ^ 1][1]; 963 } else { 964 dr1 = dither_4x4_16[ y & 3 ][0]; 965 dg1 = dither_4x4_16[ y & 3 ][1]; 966 db1 = dither_4x4_16[(y & 3) ^ 3][0]; 967 dr2 = dither_4x4_16[ y & 3 ][1]; 968 dg2 = dither_4x4_16[ y & 3 ][0]; 969 db2 = dither_4x4_16[(y & 3) ^ 3][1]; 970 } 971 972 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1]; 973 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]; 974 } else /* 8/4-bit */ { 975 uint8_t *dest = (uint8_t *) _dest; 976 const uint8_t *r = (const uint8_t *) _r; 977 const uint8_t *g = (const uint8_t *) _g; 978 const uint8_t *b = (const uint8_t *) _b; 979 int dr1, dg1, db1, dr2, dg2, db2; 980 981 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) { 982 const uint8_t * const d64 = dither_8x8_73[y & 7]; 983 const uint8_t * const d32 = dither_8x8_32[y & 7]; 984 dr1 = dg1 = d32[(i * 2 + 0) & 7]; 985 db1 = d64[(i * 2 + 0) & 7]; 986 dr2 = dg2 = d32[(i * 2 + 1) & 7]; 987 db2 = d64[(i * 2 + 1) & 7]; 988 } else { 989 const uint8_t * const d64 = dither_8x8_73 [y & 7]; 990 const uint8_t * const d128 = dither_8x8_220[y & 7]; 991 dr1 = db1 = d128[(i * 2 + 0) & 7]; 992 dg1 = d64[(i * 2 + 0) & 7]; 993 dr2 = db2 = d128[(i * 2 + 1) & 7]; 994 dg2 = d64[(i * 2 + 1) & 7]; 995 } 996 997 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) { 998 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] + 999 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4); 1000 } else { 1001 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1]; 1002 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]; 1003 } 1004 } 1005} 1006 1007static av_always_inline void 1008yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter, 1009 const int16_t **lumSrc, int lumFilterSize, 1010 const int16_t *chrFilter, const int16_t **chrUSrc, 1011 const int16_t **chrVSrc, int chrFilterSize, 1012 const int16_t **alpSrc, uint8_t *dest, int dstW, 1013 int y, enum PixelFormat target, int hasAlpha) 1014{ 1015 int i; 1016 1017 for (i = 0; i < (dstW >> 1); i++) { 1018 int j; 1019 int Y1 = 1 << 18; 1020 int Y2 = 1 << 18; 1021 int U = 1 << 18; 1022 int V = 1 << 18; 1023 int av_unused A1, A2; 1024 const void *r, *g, *b; 1025 1026 for (j = 0; j < lumFilterSize; j++) { 1027 Y1 += lumSrc[j][i * 2] * lumFilter[j]; 1028 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j]; 1029 } 1030 for (j = 0; j < chrFilterSize; j++) { 1031 U += chrUSrc[j][i] * chrFilter[j]; 1032 V += chrVSrc[j][i] * chrFilter[j]; 1033 } 1034 Y1 >>= 19; 1035 Y2 >>= 19; 1036 U >>= 19; 1037 V >>= 19; 1038 if ((Y1 | Y2 | U | V) & 0x100) { 1039 Y1 = av_clip_uint8(Y1); 1040 Y2 = av_clip_uint8(Y2); 1041 U = av_clip_uint8(U); 1042 V = av_clip_uint8(V); 1043 } 1044 if (hasAlpha) { 1045 A1 = 1 << 18; 1046 A2 = 1 << 18; 1047 for (j = 0; j < lumFilterSize; j++) { 1048 A1 += alpSrc[j][i * 2 ] * lumFilter[j]; 1049 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j]; 1050 } 1051 A1 >>= 19; 1052 A2 >>= 19; 1053 if ((A1 | A2) & 0x100) { 1054 A1 = av_clip_uint8(A1); 1055 A2 = av_clip_uint8(A2); 1056 } 1057 } 1058 1059 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/ 1060 r = c->table_rV[V]; 1061 g = (c->table_gU[U] + c->table_gV[V]); 1062 b = c->table_bU[U]; 1063 1064 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0, 1065 r, g, b, y, target, hasAlpha); 1066 } 1067} 1068 1069static av_always_inline void 1070yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2], 1071 const int16_t *ubuf[2], const int16_t *vbuf[2], 1072 const int16_t *abuf[2], uint8_t *dest, int dstW, 1073 int yalpha, int uvalpha, int y, 1074 enum PixelFormat target, int hasAlpha) 1075{ 1076 const int16_t *buf0 = buf[0], *buf1 = buf[1], 1077 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], 1078 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1], 1079 *abuf0 = hasAlpha ? abuf[0] : NULL, 1080 *abuf1 = hasAlpha ? abuf[1] : NULL; 1081 int yalpha1 = 4095 - yalpha; 1082 int uvalpha1 = 4095 - uvalpha; 1083 int i; 1084 1085 for (i = 0; i < (dstW >> 1); i++) { 1086 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19; 1087 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19; 1088 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19; 1089 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19; 1090 int A1, A2; 1091 const void *r = c->table_rV[V], 1092 *g = (c->table_gU[U] + c->table_gV[V]), 1093 *b = c->table_bU[U]; 1094 1095 if (hasAlpha) { 1096 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19; 1097 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19; 1098 } 1099 1100 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0, 1101 r, g, b, y, target, hasAlpha); 1102 } 1103} 1104 1105static av_always_inline void 1106yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0, 1107 const int16_t *ubuf[2], const int16_t *vbuf[2], 1108 const int16_t *abuf0, uint8_t *dest, int dstW, 1109 int uvalpha, int y, enum PixelFormat target, 1110 int hasAlpha) 1111{ 1112 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], 1113 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1]; 1114 int i; 1115 1116 if (uvalpha < 2048) { 1117 for (i = 0; i < (dstW >> 1); i++) { 1118 int Y1 = buf0[i * 2] >> 7; 1119 int Y2 = buf0[i * 2 + 1] >> 7; 1120 int U = ubuf1[i] >> 7; 1121 int V = vbuf1[i] >> 7; 1122 int A1, A2; 1123 const void *r = c->table_rV[V], 1124 *g = (c->table_gU[U] + c->table_gV[V]), 1125 *b = c->table_bU[U]; 1126 1127 if (hasAlpha) { 1128 A1 = abuf0[i * 2 ] >> 7; 1129 A2 = abuf0[i * 2 + 1] >> 7; 1130 } 1131 1132 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0, 1133 r, g, b, y, target, hasAlpha); 1134 } 1135 } else { 1136 for (i = 0; i < (dstW >> 1); i++) { 1137 int Y1 = buf0[i * 2] >> 7; 1138 int Y2 = buf0[i * 2 + 1] >> 7; 1139 int U = (ubuf0[i] + ubuf1[i]) >> 8; 1140 int V = (vbuf0[i] + vbuf1[i]) >> 8; 1141 int A1, A2; 1142 const void *r = c->table_rV[V], 1143 *g = (c->table_gU[U] + c->table_gV[V]), 1144 *b = c->table_bU[U]; 1145 1146 if (hasAlpha) { 1147 A1 = abuf0[i * 2 ] >> 7; 1148 A2 = abuf0[i * 2 + 1] >> 7; 1149 } 1150 1151 yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0, 1152 r, g, b, y, target, hasAlpha); 1153 } 1154 } 1155} 1156 1157#define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \ 1158static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ 1159 const int16_t **lumSrc, int lumFilterSize, \ 1160 const int16_t *chrFilter, const int16_t **chrUSrc, \ 1161 const int16_t **chrVSrc, int chrFilterSize, \ 1162 const int16_t **alpSrc, uint8_t *dest, int dstW, \ 1163 int y) \ 1164{ \ 1165 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \ 1166 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ 1167 alpSrc, dest, dstW, y, fmt, hasAlpha); \ 1168} 1169#define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \ 1170YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \ 1171static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \ 1172 const int16_t *ubuf[2], const int16_t *vbuf[2], \ 1173 const int16_t *abuf[2], uint8_t *dest, int dstW, \ 1174 int yalpha, int uvalpha, int y) \ 1175{ \ 1176 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \ 1177 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \ 1178} \ 1179 \ 1180static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \ 1181 const int16_t *ubuf[2], const int16_t *vbuf[2], \ 1182 const int16_t *abuf0, uint8_t *dest, int dstW, \ 1183 int uvalpha, int y) \ 1184{ \ 1185 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \ 1186 dstW, uvalpha, y, fmt, hasAlpha); \ 1187} 1188 1189#if CONFIG_SMALL 1190YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) 1191YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) 1192#else 1193#if CONFIG_SWSCALE_ALPHA 1194YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1) 1195YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1) 1196#endif 1197YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0) 1198YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0) 1199#endif 1200YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0) 1201YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0) 1202YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0) 1203YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0) 1204YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0) 1205YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0) 1206YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0) 1207YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0) 1208 1209static av_always_inline void 1210yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter, 1211 const int16_t **lumSrc, int lumFilterSize, 1212 const int16_t *chrFilter, const int16_t **chrUSrc, 1213 const int16_t **chrVSrc, int chrFilterSize, 1214 const int16_t **alpSrc, uint8_t *dest, 1215 int dstW, int y, enum PixelFormat target, int hasAlpha) 1216{ 1217 int i; 1218 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4; 1219 1220 for (i = 0; i < dstW; i++) { 1221 int j; 1222 int Y = 0; 1223 int U = -128 << 19; 1224 int V = -128 << 19; 1225 int av_unused A; 1226 int R, G, B; 1227 1228 for (j = 0; j < lumFilterSize; j++) { 1229 Y += lumSrc[j][i] * lumFilter[j]; 1230 } 1231 for (j = 0; j < chrFilterSize; j++) { 1232 U += chrUSrc[j][i] * chrFilter[j]; 1233 V += chrVSrc[j][i] * chrFilter[j]; 1234 } 1235 Y >>= 10; 1236 U >>= 10; 1237 V >>= 10; 1238 if (hasAlpha) { 1239 A = 1 << 21; 1240 for (j = 0; j < lumFilterSize; j++) { 1241 A += alpSrc[j][i] * lumFilter[j]; 1242 } 1243 A >>= 19; 1244 if (A & 0x100) 1245 A = av_clip_uint8(A); 1246 } 1247 Y -= c->yuv2rgb_y_offset; 1248 Y *= c->yuv2rgb_y_coeff; 1249 Y += 1 << 21; 1250 R = Y + V*c->yuv2rgb_v2r_coeff; 1251 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff; 1252 B = Y + U*c->yuv2rgb_u2b_coeff; 1253 if ((R | G | B) & 0xC0000000) { 1254 R = av_clip_uintp2(R, 30); 1255 G = av_clip_uintp2(G, 30); 1256 B = av_clip_uintp2(B, 30); 1257 } 1258 1259 switch(target) { 1260 case PIX_FMT_ARGB: 1261 dest[0] = hasAlpha ? A : 255; 1262 dest[1] = R >> 22; 1263 dest[2] = G >> 22; 1264 dest[3] = B >> 22; 1265 break; 1266 case PIX_FMT_RGB24: 1267 dest[0] = R >> 22; 1268 dest[1] = G >> 22; 1269 dest[2] = B >> 22; 1270 break; 1271 case PIX_FMT_RGBA: 1272 dest[0] = R >> 22; 1273 dest[1] = G >> 22; 1274 dest[2] = B >> 22; 1275 dest[3] = hasAlpha ? A : 255; 1276 break; 1277 case PIX_FMT_ABGR: 1278 dest[0] = hasAlpha ? A : 255; 1279 dest[1] = B >> 22; 1280 dest[2] = G >> 22; 1281 dest[3] = R >> 22; 1282 dest += 4; 1283 break; 1284 case PIX_FMT_BGR24: 1285 dest[0] = B >> 22; 1286 dest[1] = G >> 22; 1287 dest[2] = R >> 22; 1288 break; 1289 case PIX_FMT_BGRA: 1290 dest[0] = B >> 22; 1291 dest[1] = G >> 22; 1292 dest[2] = R >> 22; 1293 dest[3] = hasAlpha ? A : 255; 1294 break; 1295 } 1296 dest += step; 1297 } 1298} 1299 1300#if CONFIG_SMALL 1301YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) 1302YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) 1303YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) 1304YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) 1305#else 1306#if CONFIG_SWSCALE_ALPHA 1307YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1) 1308YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1) 1309YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1) 1310YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1) 1311#endif 1312YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0) 1313YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0) 1314YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0) 1315YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0) 1316#endif 1317YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0) 1318YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0) 1319 1320static av_always_inline void fillPlane(uint8_t* plane, int stride, 1321 int width, int height, 1322 int y, uint8_t val) 1323{ 1324 int i; 1325 uint8_t *ptr = plane + stride*y; 1326 for (i=0; i<height; i++) { 1327 memset(ptr, val, width); 1328 ptr += stride; 1329 } 1330} 1331 1332#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) 1333 1334#define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b) 1335#define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r) 1336 1337static av_always_inline void 1338rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width, 1339 enum PixelFormat origin) 1340{ 1341 int i; 1342 for (i = 0; i < width; i++) { 1343 unsigned int r_b = input_pixel(&src[i*3+0]); 1344 unsigned int g = input_pixel(&src[i*3+1]); 1345 unsigned int b_r = input_pixel(&src[i*3+2]); 1346 1347 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; 1348 } 1349} 1350 1351static av_always_inline void 1352rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV, 1353 const uint16_t *src1, const uint16_t *src2, 1354 int width, enum PixelFormat origin) 1355{ 1356 int i; 1357 assert(src1==src2); 1358 for (i = 0; i < width; i++) { 1359 int r_b = input_pixel(&src1[i*3+0]); 1360 int g = input_pixel(&src1[i*3+1]); 1361 int b_r = input_pixel(&src1[i*3+2]); 1362 1363 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; 1364 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; 1365 } 1366} 1367 1368static av_always_inline void 1369rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV, 1370 const uint16_t *src1, const uint16_t *src2, 1371 int width, enum PixelFormat origin) 1372{ 1373 int i; 1374 assert(src1==src2); 1375 for (i = 0; i < width; i++) { 1376 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1; 1377 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1; 1378 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1; 1379 1380 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; 1381 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; 1382 } 1383} 1384 1385#undef r 1386#undef b 1387#undef input_pixel 1388 1389#define rgb48funcs(pattern, BE_LE, origin) \ 1390static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \ 1391 int width, uint32_t *unused) \ 1392{ \ 1393 const uint16_t *src = (const uint16_t *) _src; \ 1394 uint16_t *dst = (uint16_t *) _dst; \ 1395 rgb48ToY_c_template(dst, src, width, origin); \ 1396} \ 1397 \ 1398static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \ 1399 const uint8_t *_src1, const uint8_t *_src2, \ 1400 int width, uint32_t *unused) \ 1401{ \ 1402 const uint16_t *src1 = (const uint16_t *) _src1, \ 1403 *src2 = (const uint16_t *) _src2; \ 1404 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \ 1405 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \ 1406} \ 1407 \ 1408static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \ 1409 const uint8_t *_src1, const uint8_t *_src2, \ 1410 int width, uint32_t *unused) \ 1411{ \ 1412 const uint16_t *src1 = (const uint16_t *) _src1, \ 1413 *src2 = (const uint16_t *) _src2; \ 1414 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \ 1415 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \ 1416} 1417 1418rgb48funcs(rgb, LE, PIX_FMT_RGB48LE) 1419rgb48funcs(rgb, BE, PIX_FMT_RGB48BE) 1420rgb48funcs(bgr, LE, PIX_FMT_BGR48LE) 1421rgb48funcs(bgr, BE, PIX_FMT_BGR48BE) 1422 1423#define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \ 1424 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \ 1425 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2]))) 1426 1427static av_always_inline void 1428rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src, 1429 int width, enum PixelFormat origin, 1430 int shr, int shg, int shb, int shp, 1431 int maskr, int maskg, int maskb, 1432 int rsh, int gsh, int bsh, int S) 1433{ 1434 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh; 1435 const unsigned rnd = 33u << (S - 1); 1436 int i; 1437 1438 for (i = 0; i < width; i++) { 1439 int px = input_pixel(i) >> shp; 1440 int b = (px & maskb) >> shb; 1441 int g = (px & maskg) >> shg; 1442 int r = (px & maskr) >> shr; 1443 1444 dst[i] = (ry * r + gy * g + by * b + rnd) >> S; 1445 } 1446} 1447 1448static av_always_inline void 1449rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV, 1450 const uint8_t *src, int width, 1451 enum PixelFormat origin, 1452 int shr, int shg, int shb, int shp, 1453 int maskr, int maskg, int maskb, 1454 int rsh, int gsh, int bsh, int S) 1455{ 1456 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh, 1457 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh; 1458 const unsigned rnd = 257u << (S - 1); 1459 int i; 1460 1461 for (i = 0; i < width; i++) { 1462 int px = input_pixel(i) >> shp; 1463 int b = (px & maskb) >> shb; 1464 int g = (px & maskg) >> shg; 1465 int r = (px & maskr) >> shr; 1466 1467 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S; 1468 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S; 1469 } 1470} 1471 1472static av_always_inline void 1473rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV, 1474 const uint8_t *src, int width, 1475 enum PixelFormat origin, 1476 int shr, int shg, int shb, int shp, 1477 int maskr, int maskg, int maskb, 1478 int rsh, int gsh, int bsh, int S) 1479{ 1480 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh, 1481 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh, 1482 maskgx = ~(maskr | maskb); 1483 const unsigned rnd = 257u << S; 1484 int i; 1485 1486 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1; 1487 for (i = 0; i < width; i++) { 1488 int px0 = input_pixel(2 * i + 0) >> shp; 1489 int px1 = input_pixel(2 * i + 1) >> shp; 1490 int b, r, g = (px0 & maskgx) + (px1 & maskgx); 1491 int rb = px0 + px1 - g; 1492 1493 b = (rb & maskb) >> shb; 1494 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE || 1495 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) { 1496 g >>= shg; 1497 } else { 1498 g = (g & maskg) >> shg; 1499 } 1500 r = (rb & maskr) >> shr; 1501 1502 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1); 1503 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1); 1504 } 1505} 1506 1507#undef input_pixel 1508 1509#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \ 1510 maskg, maskb, rsh, gsh, bsh, S) \ 1511static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \ 1512 int width, uint32_t *unused) \ 1513{ \ 1514 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \ 1515 maskr, maskg, maskb, rsh, gsh, bsh, S); \ 1516} \ 1517 \ 1518static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ 1519 const uint8_t *src, const uint8_t *dummy, \ 1520 int width, uint32_t *unused) \ 1521{ \ 1522 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \ 1523 maskr, maskg, maskb, rsh, gsh, bsh, S); \ 1524} \ 1525 \ 1526static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \ 1527 const uint8_t *src, const uint8_t *dummy, \ 1528 int width, uint32_t *unused) \ 1529{ \ 1530 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \ 1531 maskr, maskg, maskb, rsh, gsh, bsh, S); \ 1532} 1533 1534rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8) 1535rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8) 1536rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8) 1537rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8) 1538rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8) 1539rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7) 1540rgb16_32_wrapper(PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4) 1541rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8) 1542rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7) 1543rgb16_32_wrapper(PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4) 1544rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8) 1545rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7) 1546rgb16_32_wrapper(PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4) 1547rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8) 1548rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7) 1549rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4) 1550 1551static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused) 1552{ 1553 int i; 1554 for (i=0; i<width; i++) { 1555 dst[i]= src[4*i]; 1556 } 1557} 1558 1559static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused) 1560{ 1561 int i; 1562 for (i=0; i<width; i++) { 1563 dst[i]= src[4*i+3]; 1564 } 1565} 1566 1567static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal) 1568{ 1569 int i; 1570 for (i=0; i<width; i++) { 1571 int d= src[i]; 1572 1573 dst[i]= pal[d] & 0xFF; 1574 } 1575} 1576 1577static void palToUV_c(uint8_t *dstU, uint8_t *dstV, 1578 const uint8_t *src1, const uint8_t *src2, 1579 int width, uint32_t *pal) 1580{ 1581 int i; 1582 assert(src1 == src2); 1583 for (i=0; i<width; i++) { 1584 int p= pal[src1[i]]; 1585 1586 dstU[i]= p>>8; 1587 dstV[i]= p>>16; 1588 } 1589} 1590 1591static void monowhite2Y_c(uint8_t *dst, const uint8_t *src, 1592 int width, uint32_t *unused) 1593{ 1594 int i, j; 1595 for (i=0; i<width/8; i++) { 1596 int d= ~src[i]; 1597 for(j=0; j<8; j++) 1598 dst[8*i+j]= ((d>>(7-j))&1)*255; 1599 } 1600} 1601 1602static void monoblack2Y_c(uint8_t *dst, const uint8_t *src, 1603 int width, uint32_t *unused) 1604{ 1605 int i, j; 1606 for (i=0; i<width/8; i++) { 1607 int d= src[i]; 1608 for(j=0; j<8; j++) 1609 dst[8*i+j]= ((d>>(7-j))&1)*255; 1610 } 1611} 1612 1613//FIXME yuy2* can read up to 7 samples too much 1614 1615static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width, 1616 uint32_t *unused) 1617{ 1618 int i; 1619 for (i=0; i<width; i++) 1620 dst[i]= src[2*i]; 1621} 1622 1623static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, 1624 const uint8_t *src2, int width, uint32_t *unused) 1625{ 1626 int i; 1627 for (i=0; i<width; i++) { 1628 dstU[i]= src1[4*i + 1]; 1629 dstV[i]= src1[4*i + 3]; 1630 } 1631 assert(src1 == src2); 1632} 1633 1634static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused) 1635{ 1636 int i; 1637 const uint16_t *src = (const uint16_t *) _src; 1638 uint16_t *dst = (uint16_t *) _dst; 1639 for (i=0; i<width; i++) { 1640 dst[i] = av_bswap16(src[i]); 1641 } 1642} 1643 1644static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1, 1645 const uint8_t *_src2, int width, uint32_t *unused) 1646{ 1647 int i; 1648 const uint16_t *src1 = (const uint16_t *) _src1, 1649 *src2 = (const uint16_t *) _src2; 1650 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; 1651 for (i=0; i<width; i++) { 1652 dstU[i] = av_bswap16(src1[i]); 1653 dstV[i] = av_bswap16(src2[i]); 1654 } 1655} 1656 1657/* This is almost identical to the previous, end exists only because 1658 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */ 1659static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width, 1660 uint32_t *unused) 1661{ 1662 int i; 1663 for (i=0; i<width; i++) 1664 dst[i]= src[2*i+1]; 1665} 1666 1667static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, 1668 const uint8_t *src2, int width, uint32_t *unused) 1669{ 1670 int i; 1671 for (i=0; i<width; i++) { 1672 dstU[i]= src1[4*i + 0]; 1673 dstV[i]= src1[4*i + 2]; 1674 } 1675 assert(src1 == src2); 1676} 1677 1678static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2, 1679 const uint8_t *src, int width) 1680{ 1681 int i; 1682 for (i = 0; i < width; i++) { 1683 dst1[i] = src[2*i+0]; 1684 dst2[i] = src[2*i+1]; 1685 } 1686} 1687 1688static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV, 1689 const uint8_t *src1, const uint8_t *src2, 1690 int width, uint32_t *unused) 1691{ 1692 nvXXtoUV_c(dstU, dstV, src1, width); 1693} 1694 1695static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, 1696 const uint8_t *src1, const uint8_t *src2, 1697 int width, uint32_t *unused) 1698{ 1699 nvXXtoUV_c(dstV, dstU, src1, width); 1700} 1701 1702#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) 1703 1704static void bgr24ToY_c(uint8_t *dst, const uint8_t *src, 1705 int width, uint32_t *unused) 1706{ 1707 int i; 1708 for (i=0; i<width; i++) { 1709 int b= src[i*3+0]; 1710 int g= src[i*3+1]; 1711 int r= src[i*3+2]; 1712 1713 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); 1714 } 1715} 1716 1717static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, 1718 const uint8_t *src2, int width, uint32_t *unused) 1719{ 1720 int i; 1721 for (i=0; i<width; i++) { 1722 int b= src1[3*i + 0]; 1723 int g= src1[3*i + 1]; 1724 int r= src1[3*i + 2]; 1725 1726 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; 1727 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; 1728 } 1729 assert(src1 == src2); 1730} 1731 1732static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, 1733 const uint8_t *src2, int width, uint32_t *unused) 1734{ 1735 int i; 1736 for (i=0; i<width; i++) { 1737 int b= src1[6*i + 0] + src1[6*i + 3]; 1738 int g= src1[6*i + 1] + src1[6*i + 4]; 1739 int r= src1[6*i + 2] + src1[6*i + 5]; 1740 1741 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); 1742 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); 1743 } 1744 assert(src1 == src2); 1745} 1746 1747static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width, 1748 uint32_t *unused) 1749{ 1750 int i; 1751 for (i=0; i<width; i++) { 1752 int r= src[i*3+0]; 1753 int g= src[i*3+1]; 1754 int b= src[i*3+2]; 1755 1756 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); 1757 } 1758} 1759 1760static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, 1761 const uint8_t *src2, int width, uint32_t *unused) 1762{ 1763 int i; 1764 assert(src1==src2); 1765 for (i=0; i<width; i++) { 1766 int r= src1[3*i + 0]; 1767 int g= src1[3*i + 1]; 1768 int b= src1[3*i + 2]; 1769 1770 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; 1771 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; 1772 } 1773} 1774 1775static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, 1776 const uint8_t *src2, int width, uint32_t *unused) 1777{ 1778 int i; 1779 assert(src1==src2); 1780 for (i=0; i<width; i++) { 1781 int r= src1[6*i + 0] + src1[6*i + 3]; 1782 int g= src1[6*i + 1] + src1[6*i + 4]; 1783 int b= src1[6*i + 2] + src1[6*i + 5]; 1784 1785 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); 1786 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); 1787 } 1788} 1789 1790static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width) 1791{ 1792 int i; 1793 for (i = 0; i < width; i++) { 1794 int g = src[0][i]; 1795 int b = src[1][i]; 1796 int r = src[2][i]; 1797 1798 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); 1799 } 1800} 1801 1802static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width) 1803{ 1804 int i; 1805 const uint16_t **src = (const uint16_t **) _src; 1806 uint16_t *dst = (uint16_t *) _dst; 1807 for (i = 0; i < width; i++) { 1808 int g = AV_RL16(src[0] + i); 1809 int b = AV_RL16(src[1] + i); 1810 int r = AV_RL16(src[2] + i); 1811 1812 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); 1813 } 1814} 1815 1816static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width) 1817{ 1818 int i; 1819 const uint16_t **src = (const uint16_t **) _src; 1820 uint16_t *dst = (uint16_t *) _dst; 1821 for (i = 0; i < width; i++) { 1822 int g = AV_RB16(src[0] + i); 1823 int b = AV_RB16(src[1] + i); 1824 int r = AV_RB16(src[2] + i); 1825 1826 dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); 1827 } 1828} 1829 1830static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width) 1831{ 1832 int i; 1833 for (i = 0; i < width; i++) { 1834 int g = src[0][i]; 1835 int b = src[1][i]; 1836 int r = src[2][i]; 1837 1838 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1); 1839 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1); 1840 } 1841} 1842 1843static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width) 1844{ 1845 int i; 1846 const uint16_t **src = (const uint16_t **) _src; 1847 uint16_t *dstU = (uint16_t *) _dstU; 1848 uint16_t *dstV = (uint16_t *) _dstV; 1849 for (i = 0; i < width; i++) { 1850 int g = AV_RL16(src[0] + i); 1851 int b = AV_RL16(src[1] + i); 1852 int r = AV_RL16(src[2] + i); 1853 1854 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1); 1855 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1); 1856 } 1857} 1858 1859static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width) 1860{ 1861 int i; 1862 const uint16_t **src = (const uint16_t **) _src; 1863 uint16_t *dstU = (uint16_t *) _dstU; 1864 uint16_t *dstV = (uint16_t *) _dstV; 1865 for (i = 0; i < width; i++) { 1866 int g = AV_RB16(src[0] + i); 1867 int b = AV_RB16(src[1] + i); 1868 int r = AV_RB16(src[2] + i); 1869 1870 dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1); 1871 dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1); 1872 } 1873} 1874 1875static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, 1876 const int16_t *filter, 1877 const int32_t *filterPos, int filterSize) 1878{ 1879 int i; 1880 int32_t *dst = (int32_t *) _dst; 1881 const uint16_t *src = (const uint16_t *) _src; 1882 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; 1883 int sh = bits - 4; 1884 1885 for (i = 0; i < dstW; i++) { 1886 int j; 1887 int srcPos = filterPos[i]; 1888 int val = 0; 1889 1890 for (j = 0; j < filterSize; j++) { 1891 val += src[srcPos + j] * filter[filterSize * i + j]; 1892 } 1893 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit 1894 dst[i] = FFMIN(val >> sh, (1 << 19) - 1); 1895 } 1896} 1897 1898static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src, 1899 const int16_t *filter, 1900 const int32_t *filterPos, int filterSize) 1901{ 1902 int i; 1903 const uint16_t *src = (const uint16_t *) _src; 1904 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; 1905 1906 for (i = 0; i < dstW; i++) { 1907 int j; 1908 int srcPos = filterPos[i]; 1909 int val = 0; 1910 1911 for (j = 0; j < filterSize; j++) { 1912 val += src[srcPos + j] * filter[filterSize * i + j]; 1913 } 1914 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit 1915 dst[i] = FFMIN(val >> sh, (1 << 15) - 1); 1916 } 1917} 1918 1919// bilinear / bicubic scaling 1920static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, 1921 const int16_t *filter, const int32_t *filterPos, 1922 int filterSize) 1923{ 1924 int i; 1925 for (i=0; i<dstW; i++) { 1926 int j; 1927 int srcPos= filterPos[i]; 1928 int val=0; 1929 for (j=0; j<filterSize; j++) { 1930 val += ((int)src[srcPos + j])*filter[filterSize*i + j]; 1931 } 1932 //filter += hFilterSize; 1933 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ... 1934 //dst[i] = val>>7; 1935 } 1936} 1937 1938static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src, 1939 const int16_t *filter, const int32_t *filterPos, 1940 int filterSize) 1941{ 1942 int i; 1943 int32_t *dst = (int32_t *) _dst; 1944 for (i=0; i<dstW; i++) { 1945 int j; 1946 int srcPos= filterPos[i]; 1947 int val=0; 1948 for (j=0; j<filterSize; j++) { 1949 val += ((int)src[srcPos + j])*filter[filterSize*i + j]; 1950 } 1951 //filter += hFilterSize; 1952 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ... 1953 //dst[i] = val>>7; 1954 } 1955} 1956 1957//FIXME all pal and rgb srcFormats could do this convertion as well 1958//FIXME all scalers more complex than bilinear could do half of this transform 1959static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width) 1960{ 1961 int i; 1962 for (i = 0; i < width; i++) { 1963 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264 1964 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264 1965 } 1966} 1967static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width) 1968{ 1969 int i; 1970 for (i = 0; i < width; i++) { 1971 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469 1972 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469 1973 } 1974} 1975static void lumRangeToJpeg_c(int16_t *dst, int width) 1976{ 1977 int i; 1978 for (i = 0; i < width; i++) 1979 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14; 1980} 1981static void lumRangeFromJpeg_c(int16_t *dst, int width) 1982{ 1983 int i; 1984 for (i = 0; i < width; i++) 1985 dst[i] = (dst[i]*14071 + 33561947)>>14; 1986} 1987 1988static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width) 1989{ 1990 int i; 1991 int32_t *dstU = (int32_t *) _dstU; 1992 int32_t *dstV = (int32_t *) _dstV; 1993 for (i = 0; i < width; i++) { 1994 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264 1995 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264 1996 } 1997} 1998static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width) 1999{ 2000 int i; 2001 int32_t *dstU = (int32_t *) _dstU; 2002 int32_t *dstV = (int32_t *) _dstV; 2003 for (i = 0; i < width; i++) { 2004 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469 2005 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469 2006 } 2007} 2008static void lumRangeToJpeg16_c(int16_t *_dst, int width) 2009{ 2010 int i; 2011 int32_t *dst = (int32_t *) _dst; 2012 for (i = 0; i < width; i++) 2013 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12; 2014} 2015static void lumRangeFromJpeg16_c(int16_t *_dst, int width) 2016{ 2017 int i; 2018 int32_t *dst = (int32_t *) _dst; 2019 for (i = 0; i < width; i++) 2020 dst[i] = (dst[i]*14071 + (33561947<<4))>>14; 2021} 2022 2023static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, 2024 const uint8_t *src, int srcW, int xInc) 2025{ 2026 int i; 2027 unsigned int xpos=0; 2028 for (i=0;i<dstWidth;i++) { 2029 register unsigned int xx=xpos>>16; 2030 register unsigned int xalpha=(xpos&0xFFFF)>>9; 2031 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; 2032 xpos+=xInc; 2033 } 2034} 2035 2036// *** horizontal scale Y line to temp buffer 2037static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, 2038 const uint8_t *src_in[4], int srcW, int xInc, 2039 const int16_t *hLumFilter, 2040 const int32_t *hLumFilterPos, int hLumFilterSize, 2041 uint8_t *formatConvBuffer, 2042 uint32_t *pal, int isAlpha) 2043{ 2044 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12; 2045 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange; 2046 const uint8_t *src = src_in[isAlpha ? 3 : 0]; 2047 2048 if (toYV12) { 2049 toYV12(formatConvBuffer, src, srcW, pal); 2050 src= formatConvBuffer; 2051 } else if (c->readLumPlanar && !isAlpha) { 2052 c->readLumPlanar(formatConvBuffer, src_in, srcW); 2053 src = formatConvBuffer; 2054 } 2055 2056 if (!c->hyscale_fast) { 2057 c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize); 2058 } else { // fast bilinear upscale / crap downscale 2059 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc); 2060 } 2061 2062 if (convertRange) 2063 convertRange(dst, dstWidth); 2064} 2065 2066static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, 2067 int dstWidth, const uint8_t *src1, 2068 const uint8_t *src2, int srcW, int xInc) 2069{ 2070 int i; 2071 unsigned int xpos=0; 2072 for (i=0;i<dstWidth;i++) { 2073 register unsigned int xx=xpos>>16; 2074 register unsigned int xalpha=(xpos&0xFFFF)>>9; 2075 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); 2076 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); 2077 xpos+=xInc; 2078 } 2079} 2080 2081static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, 2082 const uint8_t *src_in[4], 2083 int srcW, int xInc, const int16_t *hChrFilter, 2084 const int32_t *hChrFilterPos, int hChrFilterSize, 2085 uint8_t *formatConvBuffer, uint32_t *pal) 2086{ 2087 const uint8_t *src1 = src_in[1], *src2 = src_in[2]; 2088 if (c->chrToYV12) { 2089 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16); 2090 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal); 2091 src1= formatConvBuffer; 2092 src2= buf2; 2093 } else if (c->readChrPlanar) { 2094 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16); 2095 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW); 2096 src1= formatConvBuffer; 2097 src2= buf2; 2098 } 2099 2100 if (!c->hcscale_fast) { 2101 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize); 2102 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize); 2103 } else { // fast bilinear upscale / crap downscale 2104 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc); 2105 } 2106 2107 if (c->chrConvertRange) 2108 c->chrConvertRange(dst1, dst2, dstWidth); 2109} 2110 2111static av_always_inline void 2112find_c_packed_planar_out_funcs(SwsContext *c, 2113 yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX, 2114 yuv2interleavedX_fn *yuv2nv12cX, 2115 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2, 2116 yuv2packedX_fn *yuv2packedX) 2117{ 2118 enum PixelFormat dstFormat = c->dstFormat; 2119 2120 if (is16BPS(dstFormat)) { 2121 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c; 2122 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c; 2123 } else if (is9_OR_10BPS(dstFormat)) { 2124 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) { 2125 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c; 2126 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c; 2127 } else { 2128 *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c; 2129 *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c; 2130 } 2131 } else { 2132 *yuv2plane1 = yuv2plane1_8_c; 2133 *yuv2planeX = yuv2planeX_8_c; 2134 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) 2135 *yuv2nv12cX = yuv2nv12cX_c; 2136 } 2137 2138 if(c->flags & SWS_FULL_CHR_H_INT) { 2139 switch (dstFormat) { 2140 case PIX_FMT_RGBA: 2141#if CONFIG_SMALL 2142 *yuv2packedX = yuv2rgba32_full_X_c; 2143#else 2144#if CONFIG_SWSCALE_ALPHA 2145 if (c->alpPixBuf) { 2146 *yuv2packedX = yuv2rgba32_full_X_c; 2147 } else 2148#endif /* CONFIG_SWSCALE_ALPHA */ 2149 { 2150 *yuv2packedX = yuv2rgbx32_full_X_c; 2151 } 2152#endif /* !CONFIG_SMALL */ 2153 break; 2154 case PIX_FMT_ARGB: 2155#if CONFIG_SMALL 2156 *yuv2packedX = yuv2argb32_full_X_c; 2157#else 2158#if CONFIG_SWSCALE_ALPHA 2159 if (c->alpPixBuf) { 2160 *yuv2packedX = yuv2argb32_full_X_c; 2161 } else 2162#endif /* CONFIG_SWSCALE_ALPHA */ 2163 { 2164 *yuv2packedX = yuv2xrgb32_full_X_c; 2165 } 2166#endif /* !CONFIG_SMALL */ 2167 break; 2168 case PIX_FMT_BGRA: 2169#if CONFIG_SMALL 2170 *yuv2packedX = yuv2bgra32_full_X_c; 2171#else 2172#if CONFIG_SWSCALE_ALPHA 2173 if (c->alpPixBuf) { 2174 *yuv2packedX = yuv2bgra32_full_X_c; 2175 } else 2176#endif /* CONFIG_SWSCALE_ALPHA */ 2177 { 2178 *yuv2packedX = yuv2bgrx32_full_X_c; 2179 } 2180#endif /* !CONFIG_SMALL */ 2181 break; 2182 case PIX_FMT_ABGR: 2183#if CONFIG_SMALL 2184 *yuv2packedX = yuv2abgr32_full_X_c; 2185#else 2186#if CONFIG_SWSCALE_ALPHA 2187 if (c->alpPixBuf) { 2188 *yuv2packedX = yuv2abgr32_full_X_c; 2189 } else 2190#endif /* CONFIG_SWSCALE_ALPHA */ 2191 { 2192 *yuv2packedX = yuv2xbgr32_full_X_c; 2193 } 2194#endif /* !CONFIG_SMALL */ 2195 break; 2196 case PIX_FMT_RGB24: 2197 *yuv2packedX = yuv2rgb24_full_X_c; 2198 break; 2199 case PIX_FMT_BGR24: 2200 *yuv2packedX = yuv2bgr24_full_X_c; 2201 break; 2202 } 2203 } else { 2204 switch (dstFormat) { 2205 case PIX_FMT_RGB48LE: 2206 *yuv2packed1 = yuv2rgb48le_1_c; 2207 *yuv2packed2 = yuv2rgb48le_2_c; 2208 *yuv2packedX = yuv2rgb48le_X_c; 2209 break; 2210 case PIX_FMT_RGB48BE: 2211 *yuv2packed1 = yuv2rgb48be_1_c; 2212 *yuv2packed2 = yuv2rgb48be_2_c; 2213 *yuv2packedX = yuv2rgb48be_X_c; 2214 break; 2215 case PIX_FMT_BGR48LE: 2216 *yuv2packed1 = yuv2bgr48le_1_c; 2217 *yuv2packed2 = yuv2bgr48le_2_c; 2218 *yuv2packedX = yuv2bgr48le_X_c; 2219 break; 2220 case PIX_FMT_BGR48BE: 2221 *yuv2packed1 = yuv2bgr48be_1_c; 2222 *yuv2packed2 = yuv2bgr48be_2_c; 2223 *yuv2packedX = yuv2bgr48be_X_c; 2224 break; 2225 case PIX_FMT_RGB32: 2226 case PIX_FMT_BGR32: 2227#if CONFIG_SMALL 2228 *yuv2packed1 = yuv2rgb32_1_c; 2229 *yuv2packed2 = yuv2rgb32_2_c; 2230 *yuv2packedX = yuv2rgb32_X_c; 2231#else 2232#if CONFIG_SWSCALE_ALPHA 2233 if (c->alpPixBuf) { 2234 *yuv2packed1 = yuv2rgba32_1_c; 2235 *yuv2packed2 = yuv2rgba32_2_c; 2236 *yuv2packedX = yuv2rgba32_X_c; 2237 } else 2238#endif /* CONFIG_SWSCALE_ALPHA */ 2239 { 2240 *yuv2packed1 = yuv2rgbx32_1_c; 2241 *yuv2packed2 = yuv2rgbx32_2_c; 2242 *yuv2packedX = yuv2rgbx32_X_c; 2243 } 2244#endif /* !CONFIG_SMALL */ 2245 break; 2246 case PIX_FMT_RGB32_1: 2247 case PIX_FMT_BGR32_1: 2248#if CONFIG_SMALL 2249 *yuv2packed1 = yuv2rgb32_1_1_c; 2250 *yuv2packed2 = yuv2rgb32_1_2_c; 2251 *yuv2packedX = yuv2rgb32_1_X_c; 2252#else 2253#if CONFIG_SWSCALE_ALPHA 2254 if (c->alpPixBuf) { 2255 *yuv2packed1 = yuv2rgba32_1_1_c; 2256 *yuv2packed2 = yuv2rgba32_1_2_c; 2257 *yuv2packedX = yuv2rgba32_1_X_c; 2258 } else 2259#endif /* CONFIG_SWSCALE_ALPHA */ 2260 { 2261 *yuv2packed1 = yuv2rgbx32_1_1_c; 2262 *yuv2packed2 = yuv2rgbx32_1_2_c; 2263 *yuv2packedX = yuv2rgbx32_1_X_c; 2264 } 2265#endif /* !CONFIG_SMALL */ 2266 break; 2267 case PIX_FMT_RGB24: 2268 *yuv2packed1 = yuv2rgb24_1_c; 2269 *yuv2packed2 = yuv2rgb24_2_c; 2270 *yuv2packedX = yuv2rgb24_X_c; 2271 break; 2272 case PIX_FMT_BGR24: 2273 *yuv2packed1 = yuv2bgr24_1_c; 2274 *yuv2packed2 = yuv2bgr24_2_c; 2275 *yuv2packedX = yuv2bgr24_X_c; 2276 break; 2277 case PIX_FMT_RGB565LE: 2278 case PIX_FMT_RGB565BE: 2279 case PIX_FMT_BGR565LE: 2280 case PIX_FMT_BGR565BE: 2281 *yuv2packed1 = yuv2rgb16_1_c; 2282 *yuv2packed2 = yuv2rgb16_2_c; 2283 *yuv2packedX = yuv2rgb16_X_c; 2284 break; 2285 case PIX_FMT_RGB555LE: 2286 case PIX_FMT_RGB555BE: 2287 case PIX_FMT_BGR555LE: 2288 case PIX_FMT_BGR555BE: 2289 *yuv2packed1 = yuv2rgb15_1_c; 2290 *yuv2packed2 = yuv2rgb15_2_c; 2291 *yuv2packedX = yuv2rgb15_X_c; 2292 break; 2293 case PIX_FMT_RGB444LE: 2294 case PIX_FMT_RGB444BE: 2295 case PIX_FMT_BGR444LE: 2296 case PIX_FMT_BGR444BE: 2297 *yuv2packed1 = yuv2rgb12_1_c; 2298 *yuv2packed2 = yuv2rgb12_2_c; 2299 *yuv2packedX = yuv2rgb12_X_c; 2300 break; 2301 case PIX_FMT_RGB8: 2302 case PIX_FMT_BGR8: 2303 *yuv2packed1 = yuv2rgb8_1_c; 2304 *yuv2packed2 = yuv2rgb8_2_c; 2305 *yuv2packedX = yuv2rgb8_X_c; 2306 break; 2307 case PIX_FMT_RGB4: 2308 case PIX_FMT_BGR4: 2309 *yuv2packed1 = yuv2rgb4_1_c; 2310 *yuv2packed2 = yuv2rgb4_2_c; 2311 *yuv2packedX = yuv2rgb4_X_c; 2312 break; 2313 case PIX_FMT_RGB4_BYTE: 2314 case PIX_FMT_BGR4_BYTE: 2315 *yuv2packed1 = yuv2rgb4b_1_c; 2316 *yuv2packed2 = yuv2rgb4b_2_c; 2317 *yuv2packedX = yuv2rgb4b_X_c; 2318 break; 2319 } 2320 } 2321 switch (dstFormat) { 2322 case PIX_FMT_GRAY16BE: 2323 *yuv2packed1 = yuv2gray16BE_1_c; 2324 *yuv2packed2 = yuv2gray16BE_2_c; 2325 *yuv2packedX = yuv2gray16BE_X_c; 2326 break; 2327 case PIX_FMT_GRAY16LE: 2328 *yuv2packed1 = yuv2gray16LE_1_c; 2329 *yuv2packed2 = yuv2gray16LE_2_c; 2330 *yuv2packedX = yuv2gray16LE_X_c; 2331 break; 2332 case PIX_FMT_MONOWHITE: 2333 *yuv2packed1 = yuv2monowhite_1_c; 2334 *yuv2packed2 = yuv2monowhite_2_c; 2335 *yuv2packedX = yuv2monowhite_X_c; 2336 break; 2337 case PIX_FMT_MONOBLACK: 2338 *yuv2packed1 = yuv2monoblack_1_c; 2339 *yuv2packed2 = yuv2monoblack_2_c; 2340 *yuv2packedX = yuv2monoblack_X_c; 2341 break; 2342 case PIX_FMT_YUYV422: 2343 *yuv2packed1 = yuv2yuyv422_1_c; 2344 *yuv2packed2 = yuv2yuyv422_2_c; 2345 *yuv2packedX = yuv2yuyv422_X_c; 2346 break; 2347 case PIX_FMT_UYVY422: 2348 *yuv2packed1 = yuv2uyvy422_1_c; 2349 *yuv2packed2 = yuv2uyvy422_2_c; 2350 *yuv2packedX = yuv2uyvy422_X_c; 2351 break; 2352 } 2353} 2354 2355#define DEBUG_SWSCALE_BUFFERS 0 2356#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__) 2357 2358static int swScale(SwsContext *c, const uint8_t* src[], 2359 int srcStride[], int srcSliceY, 2360 int srcSliceH, uint8_t* dst[], int dstStride[]) 2361{ 2362 /* load a few things into local vars to make the code more readable? and faster */ 2363 const int srcW= c->srcW; 2364 const int dstW= c->dstW; 2365 const int dstH= c->dstH; 2366 const int chrDstW= c->chrDstW; 2367 const int chrSrcW= c->chrSrcW; 2368 const int lumXInc= c->lumXInc; 2369 const int chrXInc= c->chrXInc; 2370 const enum PixelFormat dstFormat= c->dstFormat; 2371 const int flags= c->flags; 2372 int32_t *vLumFilterPos= c->vLumFilterPos; 2373 int32_t *vChrFilterPos= c->vChrFilterPos; 2374 int32_t *hLumFilterPos= c->hLumFilterPos; 2375 int32_t *hChrFilterPos= c->hChrFilterPos; 2376 int16_t *vLumFilter= c->vLumFilter; 2377 int16_t *vChrFilter= c->vChrFilter; 2378 int16_t *hLumFilter= c->hLumFilter; 2379 int16_t *hChrFilter= c->hChrFilter; 2380 int32_t *lumMmxFilter= c->lumMmxFilter; 2381 int32_t *chrMmxFilter= c->chrMmxFilter; 2382 int32_t av_unused *alpMmxFilter= c->alpMmxFilter; 2383 const int vLumFilterSize= c->vLumFilterSize; 2384 const int vChrFilterSize= c->vChrFilterSize; 2385 const int hLumFilterSize= c->hLumFilterSize; 2386 const int hChrFilterSize= c->hChrFilterSize; 2387 int16_t **lumPixBuf= c->lumPixBuf; 2388 int16_t **chrUPixBuf= c->chrUPixBuf; 2389 int16_t **chrVPixBuf= c->chrVPixBuf; 2390 int16_t **alpPixBuf= c->alpPixBuf; 2391 const int vLumBufSize= c->vLumBufSize; 2392 const int vChrBufSize= c->vChrBufSize; 2393 uint8_t *formatConvBuffer= c->formatConvBuffer; 2394 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample; 2395 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample); 2396 int lastDstY; 2397 uint32_t *pal=c->pal_yuv; 2398 yuv2planar1_fn yuv2plane1 = c->yuv2plane1; 2399 yuv2planarX_fn yuv2planeX = c->yuv2planeX; 2400 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX; 2401 yuv2packed1_fn yuv2packed1 = c->yuv2packed1; 2402 yuv2packed2_fn yuv2packed2 = c->yuv2packed2; 2403 yuv2packedX_fn yuv2packedX = c->yuv2packedX; 2404 int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat); 2405 2406 /* vars which will change and which we need to store back in the context */ 2407 int dstY= c->dstY; 2408 int lumBufIndex= c->lumBufIndex; 2409 int chrBufIndex= c->chrBufIndex; 2410 int lastInLumBuf= c->lastInLumBuf; 2411 int lastInChrBuf= c->lastInChrBuf; 2412 2413 if (isPacked(c->srcFormat)) { 2414 src[0]= 2415 src[1]= 2416 src[2]= 2417 src[3]= src[0]; 2418 srcStride[0]= 2419 srcStride[1]= 2420 srcStride[2]= 2421 srcStride[3]= srcStride[0]; 2422 } 2423 srcStride[1]<<= c->vChrDrop; 2424 srcStride[2]<<= c->vChrDrop; 2425 2426 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n", 2427 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3], 2428 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]); 2429 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n", 2430 srcSliceY, srcSliceH, dstY, dstH); 2431 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n", 2432 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize); 2433 2434 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) { 2435 static int warnedAlready=0; //FIXME move this into the context perhaps 2436 if (flags & SWS_PRINT_INFO && !warnedAlready) { 2437 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n" 2438 " ->cannot do aligned memory accesses anymore\n"); 2439 warnedAlready=1; 2440 } 2441 } 2442 2443 /* Note the user might start scaling the picture in the middle so this 2444 will not get executed. This is not really intended but works 2445 currently, so people might do it. */ 2446 if (srcSliceY ==0) { 2447 lumBufIndex=-1; 2448 chrBufIndex=-1; 2449 dstY=0; 2450 lastInLumBuf= -1; 2451 lastInChrBuf= -1; 2452 } 2453 2454 if (!should_dither) { 2455 c->chrDither8 = c->lumDither8 = ff_sws_pb_64; 2456 } 2457 lastDstY= dstY; 2458 2459 for (;dstY < dstH; dstY++) { 2460 const int chrDstY= dstY>>c->chrDstVSubSample; 2461 uint8_t *dest[4] = { 2462 dst[0] + dstStride[0] * dstY, 2463 dst[1] + dstStride[1] * chrDstY, 2464 dst[2] + dstStride[2] * chrDstY, 2465 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL, 2466 }; 2467 2468 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input 2469 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)]; 2470 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input 2471 2472 // Last line needed as input 2473 int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1; 2474 int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1; 2475 int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1; 2476 int enough_lines; 2477 2478 //handle holes (FAST_BILINEAR & weird filters) 2479 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1; 2480 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1; 2481 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1); 2482 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1); 2483 2484 DEBUG_BUFFERS("dstY: %d\n", dstY); 2485 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n", 2486 firstLumSrcY, lastLumSrcY, lastInLumBuf); 2487 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n", 2488 firstChrSrcY, lastChrSrcY, lastInChrBuf); 2489 2490 // Do we have enough lines in this slice to output the dstY line 2491 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample); 2492 2493 if (!enough_lines) { 2494 lastLumSrcY = srcSliceY + srcSliceH - 1; 2495 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1; 2496 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n", 2497 lastLumSrcY, lastChrSrcY); 2498 } 2499 2500 //Do horizontal scaling 2501 while(lastInLumBuf < lastLumSrcY) { 2502 const uint8_t *src1[4] = { 2503 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0], 2504 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1], 2505 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2], 2506 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3], 2507 }; 2508 lumBufIndex++; 2509 assert(lumBufIndex < 2*vLumBufSize); 2510 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH); 2511 assert(lastInLumBuf + 1 - srcSliceY >= 0); 2512 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc, 2513 hLumFilter, hLumFilterPos, hLumFilterSize, 2514 formatConvBuffer, 2515 pal, 0); 2516 if (CONFIG_SWSCALE_ALPHA && alpPixBuf) 2517 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW, 2518 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize, 2519 formatConvBuffer, 2520 pal, 1); 2521 lastInLumBuf++; 2522 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n", 2523 lumBufIndex, lastInLumBuf); 2524 } 2525 while(lastInChrBuf < lastChrSrcY) { 2526 const uint8_t *src1[4] = { 2527 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0], 2528 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1], 2529 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2], 2530 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3], 2531 }; 2532 chrBufIndex++; 2533 assert(chrBufIndex < 2*vChrBufSize); 2534 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH)); 2535 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0); 2536 //FIXME replace parameters through context struct (some at least) 2537 2538 if (c->needs_hcscale) 2539 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex], 2540 chrDstW, src1, chrSrcW, chrXInc, 2541 hChrFilter, hChrFilterPos, hChrFilterSize, 2542 formatConvBuffer, pal); 2543 lastInChrBuf++; 2544 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n", 2545 chrBufIndex, lastInChrBuf); 2546 } 2547 //wrap buf index around to stay inside the ring buffer 2548 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize; 2549 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize; 2550 if (!enough_lines) 2551 break; //we can't output a dstY line so let's try with the next slice 2552 2553#if HAVE_MMX 2554 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf); 2555#endif 2556 if (should_dither) { 2557 c->chrDither8 = dither_8x8_128[chrDstY & 7]; 2558 c->lumDither8 = dither_8x8_128[dstY & 7]; 2559 } 2560 if (dstY >= dstH-2) { 2561 // hmm looks like we can't use MMX here without overwriting this array's tail 2562 find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX, 2563 &yuv2packed1, &yuv2packed2, &yuv2packedX); 2564 } 2565 2566 { 2567 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; 2568 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; 2569 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; 2570 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; 2571 2572 if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) { 2573 const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize; 2574 int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize); 2575 for (i = 0; i < neg; i++) 2576 tmpY[i] = lumSrcPtr[neg]; 2577 for ( ; i < end; i++) 2578 tmpY[i] = lumSrcPtr[i]; 2579 for ( ; i < vLumFilterSize; i++) 2580 tmpY[i] = tmpY[i-1]; 2581 lumSrcPtr = tmpY; 2582 2583 if (alpSrcPtr) { 2584 const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize; 2585 for (i = 0; i < neg; i++) 2586 tmpA[i] = alpSrcPtr[neg]; 2587 for ( ; i < end; i++) 2588 tmpA[i] = alpSrcPtr[i]; 2589 for ( ; i < vLumFilterSize; i++) 2590 tmpA[i] = tmpA[i - 1]; 2591 alpSrcPtr = tmpA; 2592 } 2593 } 2594 if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) { 2595 const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize, 2596 **tmpV = (const int16_t **) chrVPixBuf + 2 * vChrBufSize; 2597 int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize); 2598 for (i = 0; i < neg; i++) { 2599 tmpU[i] = chrUSrcPtr[neg]; 2600 tmpV[i] = chrVSrcPtr[neg]; 2601 } 2602 for ( ; i < end; i++) { 2603 tmpU[i] = chrUSrcPtr[i]; 2604 tmpV[i] = chrVSrcPtr[i]; 2605 } 2606 for ( ; i < vChrFilterSize; i++) { 2607 tmpU[i] = tmpU[i - 1]; 2608 tmpV[i] = tmpV[i - 1]; 2609 } 2610 chrUSrcPtr = tmpU; 2611 chrVSrcPtr = tmpV; 2612 } 2613 2614 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like 2615 const int chrSkipMask= (1<<c->chrDstVSubSample)-1; 2616 2617 if (vLumFilterSize == 1) { 2618 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0); 2619 } else { 2620 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize, 2621 lumSrcPtr, dest[0], dstW, c->lumDither8, 0); 2622 } 2623 2624 if (!((dstY&chrSkipMask) || isGray(dstFormat))) { 2625 if (yuv2nv12cX) { 2626 yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW); 2627 } else if (vChrFilterSize == 1) { 2628 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0); 2629 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3); 2630 } else { 2631 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, 2632 chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0); 2633 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, 2634 chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3); 2635 } 2636 } 2637 2638 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){ 2639 if (vLumFilterSize == 1) { 2640 yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0); 2641 } else { 2642 yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize, 2643 alpSrcPtr, dest[3], dstW, c->lumDither8, 0); 2644 } 2645 } 2646 } else { 2647 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); 2648 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2); 2649 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB 2650 int chrAlpha = vChrFilter[2 * dstY + 1]; 2651 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr, 2652 alpPixBuf ? *alpSrcPtr : NULL, 2653 dest[0], dstW, chrAlpha, dstY); 2654 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB 2655 int lumAlpha = vLumFilter[2 * dstY + 1]; 2656 int chrAlpha = vChrFilter[2 * dstY + 1]; 2657 lumMmxFilter[2] = 2658 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001; 2659 chrMmxFilter[2] = 2660 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001; 2661 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr, 2662 alpPixBuf ? alpSrcPtr : NULL, 2663 dest[0], dstW, lumAlpha, chrAlpha, dstY); 2664 } else { //general RGB 2665 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize, 2666 lumSrcPtr, vLumFilterSize, 2667 vChrFilter + dstY * vChrFilterSize, 2668 chrUSrcPtr, chrVSrcPtr, vChrFilterSize, 2669 alpSrcPtr, dest[0], dstW, dstY); 2670 } 2671 } 2672 } 2673 } 2674 2675 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf) 2676 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255); 2677 2678#if HAVE_MMX2 2679 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2) 2680 __asm__ volatile("sfence":::"memory"); 2681#endif 2682 emms_c(); 2683 2684 /* store changed local vars back in the context */ 2685 c->dstY= dstY; 2686 c->lumBufIndex= lumBufIndex; 2687 c->chrBufIndex= chrBufIndex; 2688 c->lastInLumBuf= lastInLumBuf; 2689 c->lastInChrBuf= lastInChrBuf; 2690 2691 return dstY - lastDstY; 2692} 2693 2694static av_cold void sws_init_swScale_c(SwsContext *c) 2695{ 2696 enum PixelFormat srcFormat = c->srcFormat; 2697 2698 find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX, 2699 &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2, 2700 &c->yuv2packedX); 2701 2702 c->chrToYV12 = NULL; 2703 switch(srcFormat) { 2704 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break; 2705 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break; 2706 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break; 2707 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break; 2708 case PIX_FMT_RGB8 : 2709 case PIX_FMT_BGR8 : 2710 case PIX_FMT_PAL8 : 2711 case PIX_FMT_BGR4_BYTE: 2712 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break; 2713 case PIX_FMT_GBRP9LE: 2714 case PIX_FMT_GBRP10LE: 2715 case PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break; 2716 case PIX_FMT_GBRP9BE: 2717 case PIX_FMT_GBRP10BE: 2718 case PIX_FMT_GBRP16BE: c->readChrPlanar = planar_rgb16be_to_uv; break; 2719 case PIX_FMT_GBRP: c->readChrPlanar = planar_rgb_to_uv; break; 2720#if HAVE_BIGENDIAN 2721 case PIX_FMT_YUV444P9LE: 2722 case PIX_FMT_YUV422P9LE: 2723 case PIX_FMT_YUV420P9LE: 2724 case PIX_FMT_YUV422P10LE: 2725 case PIX_FMT_YUV444P10LE: 2726 case PIX_FMT_YUV420P10LE: 2727 case PIX_FMT_YUV420P16LE: 2728 case PIX_FMT_YUV422P16LE: 2729 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break; 2730#else 2731 case PIX_FMT_YUV444P9BE: 2732 case PIX_FMT_YUV422P9BE: 2733 case PIX_FMT_YUV420P9BE: 2734 case PIX_FMT_YUV444P10BE: 2735 case PIX_FMT_YUV422P10BE: 2736 case PIX_FMT_YUV420P10BE: 2737 case PIX_FMT_YUV420P16BE: 2738 case PIX_FMT_YUV422P16BE: 2739 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break; 2740#endif 2741 } 2742 if (c->chrSrcHSubSample) { 2743 switch(srcFormat) { 2744 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break; 2745 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break; 2746 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break; 2747 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break; 2748 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break; 2749 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break; 2750 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break; 2751 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break; 2752 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break; 2753 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break; 2754 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break; 2755 case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_half_c; break; 2756 case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_half_c; break; 2757 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break; 2758 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break; 2759 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break; 2760 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break; 2761 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break; 2762 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break; 2763 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break; 2764 case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_half_c; break; 2765 case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_half_c; break; 2766 } 2767 } else { 2768 switch(srcFormat) { 2769 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break; 2770 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break; 2771 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break; 2772 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break; 2773 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break; 2774 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break; 2775 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break; 2776 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break; 2777 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break; 2778 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break; 2779 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break; 2780 case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_c; break; 2781 case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_c; break; 2782 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break; 2783 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break; 2784 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break; 2785 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break; 2786 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break; 2787 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break; 2788 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break; 2789 case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_c; break; 2790 case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_c; break; 2791 } 2792 } 2793 2794 c->lumToYV12 = NULL; 2795 c->alpToYV12 = NULL; 2796 switch (srcFormat) { 2797 case PIX_FMT_GBRP9LE: 2798 case PIX_FMT_GBRP10LE: 2799 case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break; 2800 case PIX_FMT_GBRP9BE: 2801 case PIX_FMT_GBRP10BE: 2802 case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break; 2803 case PIX_FMT_GBRP: c->readLumPlanar = planar_rgb_to_y; break; 2804#if HAVE_BIGENDIAN 2805 case PIX_FMT_YUV444P9LE: 2806 case PIX_FMT_YUV422P9LE: 2807 case PIX_FMT_YUV420P9LE: 2808 case PIX_FMT_YUV444P10LE: 2809 case PIX_FMT_YUV422P10LE: 2810 case PIX_FMT_YUV420P10LE: 2811 case PIX_FMT_YUV420P16LE: 2812 case PIX_FMT_YUV422P16LE: 2813 case PIX_FMT_YUV444P16LE: 2814 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break; 2815#else 2816 case PIX_FMT_YUV444P9BE: 2817 case PIX_FMT_YUV422P9BE: 2818 case PIX_FMT_YUV420P9BE: 2819 case PIX_FMT_YUV444P10BE: 2820 case PIX_FMT_YUV422P10BE: 2821 case PIX_FMT_YUV420P10BE: 2822 case PIX_FMT_YUV420P16BE: 2823 case PIX_FMT_YUV422P16BE: 2824 case PIX_FMT_YUV444P16BE: 2825 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break; 2826#endif 2827 case PIX_FMT_YUYV422 : 2828 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break; 2829 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break; 2830 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break; 2831 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break; 2832 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break; 2833 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break; 2834 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break; 2835 case PIX_FMT_BGR444LE : c->lumToYV12 = bgr12leToY_c; break; 2836 case PIX_FMT_BGR444BE : c->lumToYV12 = bgr12beToY_c; break; 2837 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break; 2838 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break; 2839 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break; 2840 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break; 2841 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break; 2842 case PIX_FMT_RGB444LE : c->lumToYV12 = rgb12leToY_c; break; 2843 case PIX_FMT_RGB444BE : c->lumToYV12 = rgb12beToY_c; break; 2844 case PIX_FMT_RGB8 : 2845 case PIX_FMT_BGR8 : 2846 case PIX_FMT_PAL8 : 2847 case PIX_FMT_BGR4_BYTE: 2848 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break; 2849 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break; 2850 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break; 2851 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break; 2852 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break; 2853 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break; 2854 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break; 2855 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break; 2856 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break; 2857 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break; 2858 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break; 2859 } 2860 if (c->alpPixBuf) { 2861 switch (srcFormat) { 2862 case PIX_FMT_BGRA: 2863 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break; 2864 case PIX_FMT_ABGR: 2865 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break; 2866 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break; 2867 } 2868 } 2869 2870 if (c->srcBpc == 8) { 2871 if (c->dstBpc <= 10) { 2872 c->hyScale = c->hcScale = hScale8To15_c; 2873 if (c->flags & SWS_FAST_BILINEAR) { 2874 c->hyscale_fast = hyscale_fast_c; 2875 c->hcscale_fast = hcscale_fast_c; 2876 } 2877 } else { 2878 c->hyScale = c->hcScale = hScale8To19_c; 2879 } 2880 } else { 2881 c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c; 2882 } 2883 2884 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { 2885 if (c->dstBpc <= 10) { 2886 if (c->srcRange) { 2887 c->lumConvertRange = lumRangeFromJpeg_c; 2888 c->chrConvertRange = chrRangeFromJpeg_c; 2889 } else { 2890 c->lumConvertRange = lumRangeToJpeg_c; 2891 c->chrConvertRange = chrRangeToJpeg_c; 2892 } 2893 } else { 2894 if (c->srcRange) { 2895 c->lumConvertRange = lumRangeFromJpeg16_c; 2896 c->chrConvertRange = chrRangeFromJpeg16_c; 2897 } else { 2898 c->lumConvertRange = lumRangeToJpeg16_c; 2899 c->chrConvertRange = chrRangeToJpeg16_c; 2900 } 2901 } 2902 } 2903 2904 if (!(isGray(srcFormat) || isGray(c->dstFormat) || 2905 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE)) 2906 c->needs_hcscale = 1; 2907} 2908 2909SwsFunc ff_getSwsFunc(SwsContext *c) 2910{ 2911 sws_init_swScale_c(c); 2912 2913 if (HAVE_MMX) 2914 ff_sws_init_swScale_mmx(c); 2915 if (HAVE_ALTIVEC) 2916 ff_sws_init_swScale_altivec(c); 2917 2918 return swScale; 2919} 2920