1/* 2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#ifndef SWSCALE_SWSCALE_INTERNAL_H 22#define SWSCALE_SWSCALE_INTERNAL_H 23 24#include "config.h" 25 26#if HAVE_ALTIVEC_H 27#include <altivec.h> 28#endif 29 30#include "libavutil/avutil.h" 31 32#define STR(s) AV_TOSTRING(s) //AV_STRINGIFY is too long 33 34#define MAX_FILTER_SIZE 256 35 36#if ARCH_X86 37#define VOFW 5120 38#else 39#define VOFW 2048 // faster on PPC and not tested on others 40#endif 41 42#define VOF (VOFW*2) 43 44#if HAVE_BIGENDIAN 45#define ALT32_CORR (-1) 46#else 47#define ALT32_CORR 1 48#endif 49 50#if ARCH_X86_64 51# define APCK_PTR2 8 52# define APCK_COEF 16 53# define APCK_SIZE 24 54#else 55# define APCK_PTR2 4 56# define APCK_COEF 8 57# define APCK_SIZE 16 58#endif 59 60struct SwsContext; 61 62typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[], 63 int srcStride[], int srcSliceY, int srcSliceH, 64 uint8_t* dst[], int dstStride[]); 65 66/* This struct should be aligned on at least a 32-byte boundary. */ 67typedef struct SwsContext { 68 /** 69 * info on struct for av_log 70 */ 71 const AVClass *av_class; 72 73 /** 74 * Note that src, dst, srcStride, dstStride will be copied in the 75 * sws_scale() wrapper so they can be freely modified here. 76 */ 77 SwsFunc swScale; 78 int srcW; ///< Width of source luma/alpha planes. 79 int srcH; ///< Height of source luma/alpha planes. 80 int dstH; ///< Height of destination luma/alpha planes. 81 int chrSrcW; ///< Width of source chroma planes. 82 int chrSrcH; ///< Height of source chroma planes. 83 int chrDstW; ///< Width of destination chroma planes. 84 int chrDstH; ///< Height of destination chroma planes. 85 int lumXInc, chrXInc; 86 int lumYInc, chrYInc; 87 enum PixelFormat dstFormat; ///< Destination pixel format. 88 enum PixelFormat srcFormat; ///< Source pixel format. 89 int dstFormatBpp; ///< Number of bits per pixel of the destination pixel format. 90 int srcFormatBpp; ///< Number of bits per pixel of the source pixel format. 91 int chrSrcHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source image. 92 int chrSrcVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in source image. 93 int chrDstHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image. 94 int chrDstVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination image. 95 int vChrDrop; ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user. 96 int sliceDir; ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top). 97 double param[2]; ///< Input parameters for scaling algorithms that need them. 98 99 uint32_t pal_yuv[256]; 100 uint32_t pal_rgb[256]; 101 102 /** 103 * @name Scaled horizontal lines ring buffer. 104 * The horizontal scaler keeps just enough scaled lines in a ring buffer 105 * so they may be passed to the vertical scaler. The pointers to the 106 * allocated buffers for each line are duplicated in sequence in the ring 107 * buffer to simplify indexing and avoid wrapping around between lines 108 * inside the vertical scaler code. The wrapping is done before the 109 * vertical scaler is called. 110 */ 111 //@{ 112 int16_t **lumPixBuf; ///< Ring buffer for scaled horizontal luma plane lines to be fed to the vertical scaler. 113 int16_t **chrPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler. 114 int16_t **alpPixBuf; ///< Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler. 115 int vLumBufSize; ///< Number of vertical luma/alpha lines allocated in the ring buffer. 116 int vChrBufSize; ///< Number of vertical chroma lines allocated in the ring buffer. 117 int lastInLumBuf; ///< Last scaled horizontal luma/alpha line from source in the ring buffer. 118 int lastInChrBuf; ///< Last scaled horizontal chroma line from source in the ring buffer. 119 int lumBufIndex; ///< Index in ring buffer of the last scaled horizontal luma/alpha line from source. 120 int chrBufIndex; ///< Index in ring buffer of the last scaled horizontal chroma line from source. 121 //@} 122 123 uint8_t formatConvBuffer[VOF]; //FIXME dynamic allocation, but we have to change a lot of code for this to be useful 124 125 /** 126 * @name Horizontal and vertical filters. 127 * To better understand the following fields, here is a pseudo-code of 128 * their usage in filtering a horizontal line: 129 * @code 130 * for (i = 0; i < width; i++) { 131 * dst[i] = 0; 132 * for (j = 0; j < filterSize; j++) 133 * dst[i] += src[ filterPos[i] + j ] * filter[ filterSize * i + j ]; 134 * dst[i] >>= FRAC_BITS; // The actual implementation is fixed-point. 135 * } 136 * @endcode 137 */ 138 //@{ 139 int16_t *hLumFilter; ///< Array of horizontal filter coefficients for luma/alpha planes. 140 int16_t *hChrFilter; ///< Array of horizontal filter coefficients for chroma planes. 141 int16_t *vLumFilter; ///< Array of vertical filter coefficients for luma/alpha planes. 142 int16_t *vChrFilter; ///< Array of vertical filter coefficients for chroma planes. 143 int16_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes. 144 int16_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes. 145 int16_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes. 146 int16_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes. 147 int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels. 148 int hChrFilterSize; ///< Horizontal filter size for chroma pixels. 149 int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels. 150 int vChrFilterSize; ///< Vertical filter size for chroma pixels. 151 //@} 152 153 int lumMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for luma/alpha planes. 154 int chrMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for chroma planes. 155 uint8_t *lumMmx2FilterCode; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for luma/alpha planes. 156 uint8_t *chrMmx2FilterCode; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for chroma planes. 157 158 int canMMX2BeUsed; 159 160 int dstY; ///< Last destination vertical line output from last slice. 161 int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc... 162 void * yuvTable; // pointer to the yuv->rgb table start so it can be freed() 163 uint8_t * table_rV[256]; 164 uint8_t * table_gU[256]; 165 int table_gV[256]; 166 uint8_t * table_bU[256]; 167 168 //Colorspace stuff 169 int contrast, brightness, saturation; // for sws_getColorspaceDetails 170 int srcColorspaceTable[4]; 171 int dstColorspaceTable[4]; 172 int srcRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (source image). 173 int dstRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (destination image). 174 int yuv2rgb_y_offset; 175 int yuv2rgb_y_coeff; 176 int yuv2rgb_v2r_coeff; 177 int yuv2rgb_v2g_coeff; 178 int yuv2rgb_u2g_coeff; 179 int yuv2rgb_u2b_coeff; 180 181#define RED_DITHER "0*8" 182#define GREEN_DITHER "1*8" 183#define BLUE_DITHER "2*8" 184#define Y_COEFF "3*8" 185#define VR_COEFF "4*8" 186#define UB_COEFF "5*8" 187#define VG_COEFF "6*8" 188#define UG_COEFF "7*8" 189#define Y_OFFSET "8*8" 190#define U_OFFSET "9*8" 191#define V_OFFSET "10*8" 192#define LUM_MMX_FILTER_OFFSET "11*8" 193#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256" 194#define DSTW_OFFSET "11*8+4*4*256*2" //do not change, it is hardcoded in the ASM 195#define ESP_OFFSET "11*8+4*4*256*2+8" 196#define VROUNDER_OFFSET "11*8+4*4*256*2+16" 197#define U_TEMP "11*8+4*4*256*2+24" 198#define V_TEMP "11*8+4*4*256*2+32" 199#define Y_TEMP "11*8+4*4*256*2+40" 200#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48" 201 202 DECLARE_ALIGNED(8, uint64_t, redDither); 203 DECLARE_ALIGNED(8, uint64_t, greenDither); 204 DECLARE_ALIGNED(8, uint64_t, blueDither); 205 206 DECLARE_ALIGNED(8, uint64_t, yCoeff); 207 DECLARE_ALIGNED(8, uint64_t, vrCoeff); 208 DECLARE_ALIGNED(8, uint64_t, ubCoeff); 209 DECLARE_ALIGNED(8, uint64_t, vgCoeff); 210 DECLARE_ALIGNED(8, uint64_t, ugCoeff); 211 DECLARE_ALIGNED(8, uint64_t, yOffset); 212 DECLARE_ALIGNED(8, uint64_t, uOffset); 213 DECLARE_ALIGNED(8, uint64_t, vOffset); 214 int32_t lumMmxFilter[4*MAX_FILTER_SIZE]; 215 int32_t chrMmxFilter[4*MAX_FILTER_SIZE]; 216 int dstW; ///< Width of destination luma/alpha planes. 217 DECLARE_ALIGNED(8, uint64_t, esp); 218 DECLARE_ALIGNED(8, uint64_t, vRounder); 219 DECLARE_ALIGNED(8, uint64_t, u_temp); 220 DECLARE_ALIGNED(8, uint64_t, v_temp); 221 DECLARE_ALIGNED(8, uint64_t, y_temp); 222 int32_t alpMmxFilter[4*MAX_FILTER_SIZE]; 223 224#if HAVE_ALTIVEC 225 vector signed short CY; 226 vector signed short CRV; 227 vector signed short CBU; 228 vector signed short CGU; 229 vector signed short CGV; 230 vector signed short OY; 231 vector unsigned short CSHIFT; 232 vector signed short *vYCoeffsBank, *vCCoeffsBank; 233#endif 234 235#if ARCH_BFIN 236 DECLARE_ALIGNED(4, uint32_t, oy); 237 DECLARE_ALIGNED(4, uint32_t, oc); 238 DECLARE_ALIGNED(4, uint32_t, zero); 239 DECLARE_ALIGNED(4, uint32_t, cy); 240 DECLARE_ALIGNED(4, uint32_t, crv); 241 DECLARE_ALIGNED(4, uint32_t, rmask); 242 DECLARE_ALIGNED(4, uint32_t, cbu); 243 DECLARE_ALIGNED(4, uint32_t, bmask); 244 DECLARE_ALIGNED(4, uint32_t, cgu); 245 DECLARE_ALIGNED(4, uint32_t, cgv); 246 DECLARE_ALIGNED(4, uint32_t, gmask); 247#endif 248 249#if HAVE_VIS 250 DECLARE_ALIGNED(8, uint64_t, sparc_coeffs)[10]; 251#endif 252 253 /* function pointers for swScale() */ 254 void (*yuv2nv12X )(struct SwsContext *c, 255 const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, 256 const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, 257 uint8_t *dest, uint8_t *uDest, 258 int dstW, int chrDstW, int dstFormat); 259 void (*yuv2yuv1 )(struct SwsContext *c, 260 const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc, 261 uint8_t *dest, 262 uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, 263 long dstW, long chrDstW); 264 void (*yuv2yuvX )(struct SwsContext *c, 265 const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, 266 const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, 267 const int16_t **alpSrc, 268 uint8_t *dest, 269 uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, 270 long dstW, long chrDstW); 271 void (*yuv2packed1)(struct SwsContext *c, 272 const uint16_t *buf0, 273 const uint16_t *uvbuf0, const uint16_t *uvbuf1, 274 const uint16_t *abuf0, 275 uint8_t *dest, 276 int dstW, int uvalpha, int dstFormat, int flags, int y); 277 void (*yuv2packed2)(struct SwsContext *c, 278 const uint16_t *buf0, const uint16_t *buf1, 279 const uint16_t *uvbuf0, const uint16_t *uvbuf1, 280 const uint16_t *abuf0, const uint16_t *abuf1, 281 uint8_t *dest, 282 int dstW, int yalpha, int uvalpha, int y); 283 void (*yuv2packedX)(struct SwsContext *c, 284 const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, 285 const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, 286 const int16_t **alpSrc, uint8_t *dest, 287 long dstW, long dstY); 288 289 void (*lumToYV12)(uint8_t *dst, const uint8_t *src, 290 long width, uint32_t *pal); ///< Unscaled conversion of luma plane to YV12 for horizontal scaler. 291 void (*alpToYV12)(uint8_t *dst, const uint8_t *src, 292 long width, uint32_t *pal); ///< Unscaled conversion of alpha plane to YV12 for horizontal scaler. 293 void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV, 294 const uint8_t *src1, const uint8_t *src2, 295 long width, uint32_t *pal); ///< Unscaled conversion of chroma planes to YV12 for horizontal scaler. 296 void (*hyscale_fast)(struct SwsContext *c, 297 int16_t *dst, long dstWidth, 298 const uint8_t *src, int srcW, int xInc); 299 void (*hcscale_fast)(struct SwsContext *c, 300 int16_t *dst, long dstWidth, 301 const uint8_t *src1, const uint8_t *src2, 302 int srcW, int xInc); 303 304 void (*hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, 305 int xInc, const int16_t *filter, const int16_t *filterPos, 306 long filterSize); 307 308 void (*lumConvertRange)(uint16_t *dst, int width); ///< Color range conversion function for luma plane if needed. 309 void (*chrConvertRange)(uint16_t *dst, int width); ///< Color range conversion function for chroma planes if needed. 310 311 int lumSrcOffset; ///< Offset given to luma src pointers passed to horizontal input functions. 312 int chrSrcOffset; ///< Offset given to chroma src pointers passed to horizontal input functions. 313 int alpSrcOffset; ///< Offset given to alpha src pointers passed to horizontal input functions. 314 315 int needs_hcscale; ///< Set if there are chroma planes to be converted. 316 317} SwsContext; 318//FIXME check init (where 0) 319 320SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c); 321int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], 322 int fullRange, int brightness, 323 int contrast, int saturation); 324 325void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], 326 int brightness, int contrast, int saturation); 327SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c); 328SwsFunc ff_yuv2rgb_init_vis(SwsContext *c); 329SwsFunc ff_yuv2rgb_init_mlib(SwsContext *c); 330SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c); 331SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c); 332void ff_bfin_get_unscaled_swscale(SwsContext *c); 333void ff_yuv2packedX_altivec(SwsContext *c, 334 const int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, 335 const int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, 336 uint8_t *dest, int dstW, int dstY); 337 338const char *sws_format_name(enum PixelFormat format); 339 340//FIXME replace this with something faster 341#define is16BPS(x) ( \ 342 (x)==PIX_FMT_GRAY16BE \ 343 || (x)==PIX_FMT_GRAY16LE \ 344 || (x)==PIX_FMT_RGB48BE \ 345 || (x)==PIX_FMT_RGB48LE \ 346 || (x)==PIX_FMT_YUV420P16LE \ 347 || (x)==PIX_FMT_YUV422P16LE \ 348 || (x)==PIX_FMT_YUV444P16LE \ 349 || (x)==PIX_FMT_YUV420P16BE \ 350 || (x)==PIX_FMT_YUV422P16BE \ 351 || (x)==PIX_FMT_YUV444P16BE \ 352 ) 353#define isBE(x) ((x)&1) 354#define isPlanar8YUV(x) ( \ 355 (x)==PIX_FMT_YUV410P \ 356 || (x)==PIX_FMT_YUV420P \ 357 || (x)==PIX_FMT_YUVA420P \ 358 || (x)==PIX_FMT_YUV411P \ 359 || (x)==PIX_FMT_YUV422P \ 360 || (x)==PIX_FMT_YUV444P \ 361 || (x)==PIX_FMT_YUV440P \ 362 || (x)==PIX_FMT_NV12 \ 363 || (x)==PIX_FMT_NV21 \ 364 ) 365#define isPlanarYUV(x) ( \ 366 isPlanar8YUV(x) \ 367 || (x)==PIX_FMT_YUV420P16LE \ 368 || (x)==PIX_FMT_YUV422P16LE \ 369 || (x)==PIX_FMT_YUV444P16LE \ 370 || (x)==PIX_FMT_YUV420P16BE \ 371 || (x)==PIX_FMT_YUV422P16BE \ 372 || (x)==PIX_FMT_YUV444P16BE \ 373 ) 374#define isYUV(x) ( \ 375 (x)==PIX_FMT_UYVY422 \ 376 || (x)==PIX_FMT_YUYV422 \ 377 || isPlanarYUV(x) \ 378 ) 379#define isGray(x) ( \ 380 (x)==PIX_FMT_GRAY8 \ 381 || (x)==PIX_FMT_GRAY16BE \ 382 || (x)==PIX_FMT_GRAY16LE \ 383 ) 384#define isGray16(x) ( \ 385 (x)==PIX_FMT_GRAY16BE \ 386 || (x)==PIX_FMT_GRAY16LE \ 387 ) 388#define isRGBinInt(x) ( \ 389 (x)==PIX_FMT_RGB48BE \ 390 || (x)==PIX_FMT_RGB48LE \ 391 || (x)==PIX_FMT_RGB32 \ 392 || (x)==PIX_FMT_RGB32_1 \ 393 || (x)==PIX_FMT_RGB24 \ 394 || (x)==PIX_FMT_RGB565BE \ 395 || (x)==PIX_FMT_RGB565LE \ 396 || (x)==PIX_FMT_RGB555BE \ 397 || (x)==PIX_FMT_RGB555LE \ 398 || (x)==PIX_FMT_RGB444BE \ 399 || (x)==PIX_FMT_RGB444LE \ 400 || (x)==PIX_FMT_RGB8 \ 401 || (x)==PIX_FMT_RGB4 \ 402 || (x)==PIX_FMT_RGB4_BYTE \ 403 || (x)==PIX_FMT_MONOBLACK \ 404 || (x)==PIX_FMT_MONOWHITE \ 405 ) 406#define isBGRinInt(x) ( \ 407 (x)==PIX_FMT_BGR32 \ 408 || (x)==PIX_FMT_BGR32_1 \ 409 || (x)==PIX_FMT_BGR24 \ 410 || (x)==PIX_FMT_BGR565BE \ 411 || (x)==PIX_FMT_BGR565LE \ 412 || (x)==PIX_FMT_BGR555BE \ 413 || (x)==PIX_FMT_BGR555LE \ 414 || (x)==PIX_FMT_BGR444BE \ 415 || (x)==PIX_FMT_BGR444LE \ 416 || (x)==PIX_FMT_BGR8 \ 417 || (x)==PIX_FMT_BGR4 \ 418 || (x)==PIX_FMT_BGR4_BYTE \ 419 || (x)==PIX_FMT_MONOBLACK \ 420 || (x)==PIX_FMT_MONOWHITE \ 421 ) 422#define isRGBinBytes(x) ( \ 423 (x)==PIX_FMT_RGB48BE \ 424 || (x)==PIX_FMT_RGB48LE \ 425 || (x)==PIX_FMT_RGBA \ 426 || (x)==PIX_FMT_ARGB \ 427 || (x)==PIX_FMT_RGB24 \ 428 ) 429#define isBGRinBytes(x) ( \ 430 (x)==PIX_FMT_BGRA \ 431 || (x)==PIX_FMT_ABGR \ 432 || (x)==PIX_FMT_BGR24 \ 433 ) 434#define isAnyRGB(x) ( \ 435 isRGBinInt(x) \ 436 || isBGRinInt(x) \ 437 ) 438#define isALPHA(x) ( \ 439 (x)==PIX_FMT_BGR32 \ 440 || (x)==PIX_FMT_BGR32_1 \ 441 || (x)==PIX_FMT_RGB32 \ 442 || (x)==PIX_FMT_RGB32_1 \ 443 || (x)==PIX_FMT_YUVA420P \ 444 ) 445#define usePal(x) (av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) 446 447extern const uint64_t ff_dither4[2]; 448extern const uint64_t ff_dither8[2]; 449 450extern const AVClass sws_context_class; 451 452/** 453 * Sets c->swScale to an unscaled converter if one exists for the specific 454 * source and destination formats, bit depths, flags, etc. 455 */ 456void ff_get_unscaled_swscale(SwsContext *c); 457 458/** 459 * Returns the SWS_CPU_CAPS for the optimized code compiled into swscale. 460 */ 461int ff_hardcodedcpuflags(void); 462 463/** 464 * Returns function pointer to fastest main scaler path function depending 465 * on architecture and available optimizations. 466 */ 467SwsFunc ff_getSwsFunc(SwsContext *c); 468 469#endif /* SWSCALE_SWSCALE_INTERNAL_H */ 470