1/* 2 * Sun mediaLib optimized DSP utils 3 * Copyright (c) 2001 Fabrice Bellard 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22#include "libavcodec/dsputil.h" 23#include "libavcodec/mpegvideo.h" 24 25#include <mlib_types.h> 26#include <mlib_status.h> 27#include <mlib_sys.h> 28#include <mlib_algebra.h> 29#include <mlib_video.h> 30 31/* misc */ 32 33static void get_pixels_mlib(DCTELEM *restrict block, const uint8_t *pixels, int line_size) 34{ 35 int i; 36 37 for (i=0;i<8;i++) { 38 mlib_VectorConvert_S16_U8_Mod((mlib_s16 *)block, (mlib_u8 *)pixels, 8); 39 40 pixels += line_size; 41 block += 8; 42 } 43} 44 45static void diff_pixels_mlib(DCTELEM *restrict block, const uint8_t *s1, const uint8_t *s2, int line_size) 46{ 47 int i; 48 49 for (i=0;i<8;i++) { 50 mlib_VectorSub_S16_U8_Mod((mlib_s16 *)block, (mlib_u8 *)s1, (mlib_u8 *)s2, 8); 51 52 s1 += line_size; 53 s2 += line_size; 54 block += 8; 55 } 56} 57 58static void add_pixels_clamped_mlib(const DCTELEM *block, uint8_t *pixels, int line_size) 59{ 60 mlib_VideoAddBlock_U8_S16(pixels, (mlib_s16 *)block, line_size); 61} 62 63/* put block, width 16 pixel, height 8/16 */ 64 65static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref, 66 int stride, int height) 67{ 68 switch (height) { 69 case 8: 70 mlib_VideoCopyRef_U8_U8_16x8(dest, (uint8_t *)ref, stride); 71 break; 72 73 case 16: 74 mlib_VideoCopyRef_U8_U8_16x16(dest, (uint8_t *)ref, stride); 75 break; 76 77 default: 78 assert(0); 79 } 80} 81 82static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref, 83 int stride, int height) 84{ 85 switch (height) { 86 case 8: 87 mlib_VideoInterpX_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride); 88 break; 89 90 case 16: 91 mlib_VideoInterpX_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride); 92 break; 93 94 default: 95 assert(0); 96 } 97} 98 99static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref, 100 int stride, int height) 101{ 102 switch (height) { 103 case 8: 104 mlib_VideoInterpY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride); 105 break; 106 107 case 16: 108 mlib_VideoInterpY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride); 109 break; 110 111 default: 112 assert(0); 113 } 114} 115 116static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref, 117 int stride, int height) 118{ 119 switch (height) { 120 case 8: 121 mlib_VideoInterpXY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride); 122 break; 123 124 case 16: 125 mlib_VideoInterpXY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride); 126 break; 127 128 default: 129 assert(0); 130 } 131} 132 133/* put block, width 8 pixel, height 4/8/16 */ 134 135static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref, 136 int stride, int height) 137{ 138 switch (height) { 139 case 4: 140 mlib_VideoCopyRef_U8_U8_8x4(dest, (uint8_t *)ref, stride); 141 break; 142 143 case 8: 144 mlib_VideoCopyRef_U8_U8_8x8(dest, (uint8_t *)ref, stride); 145 break; 146 147 case 16: 148 mlib_VideoCopyRef_U8_U8_8x16(dest, (uint8_t *)ref, stride); 149 break; 150 151 default: 152 assert(0); 153 } 154} 155 156static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref, 157 int stride, int height) 158{ 159 switch (height) { 160 case 4: 161 mlib_VideoInterpX_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride); 162 break; 163 164 case 8: 165 mlib_VideoInterpX_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride); 166 break; 167 168 case 16: 169 mlib_VideoInterpX_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride); 170 break; 171 172 default: 173 assert(0); 174 } 175} 176 177static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref, 178 int stride, int height) 179{ 180 switch (height) { 181 case 4: 182 mlib_VideoInterpY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride); 183 break; 184 185 case 8: 186 mlib_VideoInterpY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride); 187 break; 188 189 case 16: 190 mlib_VideoInterpY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride); 191 break; 192 193 default: 194 assert(0); 195 } 196} 197 198static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref, 199 int stride, int height) 200{ 201 switch (height) { 202 case 4: 203 mlib_VideoInterpXY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride); 204 break; 205 206 case 8: 207 mlib_VideoInterpXY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride); 208 break; 209 210 case 16: 211 mlib_VideoInterpXY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride); 212 break; 213 214 default: 215 assert(0); 216 } 217} 218 219/* average block, width 16 pixel, height 8/16 */ 220 221static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref, 222 int stride, int height) 223{ 224 switch (height) { 225 case 8: 226 mlib_VideoCopyRefAve_U8_U8_16x8(dest, (uint8_t *)ref, stride); 227 break; 228 229 case 16: 230 mlib_VideoCopyRefAve_U8_U8_16x16(dest, (uint8_t *)ref, stride); 231 break; 232 233 default: 234 assert(0); 235 } 236} 237 238static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref, 239 int stride, int height) 240{ 241 switch (height) { 242 case 8: 243 mlib_VideoInterpAveX_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride); 244 break; 245 246 case 16: 247 mlib_VideoInterpAveX_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride); 248 break; 249 250 default: 251 assert(0); 252 } 253} 254 255static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref, 256 int stride, int height) 257{ 258 switch (height) { 259 case 8: 260 mlib_VideoInterpAveY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride); 261 break; 262 263 case 16: 264 mlib_VideoInterpAveY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride); 265 break; 266 267 default: 268 assert(0); 269 } 270} 271 272static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref, 273 int stride, int height) 274{ 275 switch (height) { 276 case 8: 277 mlib_VideoInterpAveXY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride); 278 break; 279 280 case 16: 281 mlib_VideoInterpAveXY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride); 282 break; 283 284 default: 285 assert(0); 286 } 287} 288 289/* average block, width 8 pixel, height 4/8/16 */ 290 291static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref, 292 int stride, int height) 293{ 294 switch (height) { 295 case 4: 296 mlib_VideoCopyRefAve_U8_U8_8x4(dest, (uint8_t *)ref, stride); 297 break; 298 299 case 8: 300 mlib_VideoCopyRefAve_U8_U8_8x8(dest, (uint8_t *)ref, stride); 301 break; 302 303 case 16: 304 mlib_VideoCopyRefAve_U8_U8_8x16(dest, (uint8_t *)ref, stride); 305 break; 306 307 default: 308 assert(0); 309 } 310} 311 312static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref, 313 int stride, int height) 314{ 315 switch (height) { 316 case 4: 317 mlib_VideoInterpAveX_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride); 318 break; 319 320 case 8: 321 mlib_VideoInterpAveX_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride); 322 break; 323 324 case 16: 325 mlib_VideoInterpAveX_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride); 326 break; 327 328 default: 329 assert(0); 330 } 331} 332 333static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref, 334 int stride, int height) 335{ 336 switch (height) { 337 case 4: 338 mlib_VideoInterpAveY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride); 339 break; 340 341 case 8: 342 mlib_VideoInterpAveY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride); 343 break; 344 345 case 16: 346 mlib_VideoInterpAveY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride); 347 break; 348 349 default: 350 assert(0); 351 } 352} 353 354static void avg_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref, 355 int stride, int height) 356{ 357 switch (height) { 358 case 4: 359 mlib_VideoInterpAveXY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride); 360 break; 361 362 case 8: 363 mlib_VideoInterpAveXY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride); 364 break; 365 366 case 16: 367 mlib_VideoInterpAveXY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride); 368 break; 369 370 default: 371 assert(0); 372 } 373} 374 375/* swap byte order of a buffer */ 376 377static void bswap_buf_mlib(uint32_t *dst, const uint32_t *src, int w) 378{ 379 mlib_VectorReverseByteOrder_U32_U32(dst, src, w); 380} 381 382/* transformations */ 383 384static void ff_idct_put_mlib(uint8_t *dest, int line_size, DCTELEM *data) 385{ 386 int i; 387 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 388 389 mlib_VideoIDCT8x8_S16_S16 (data, data); 390 391 for(i=0;i<8;i++) { 392 dest[0] = cm[data[0]]; 393 dest[1] = cm[data[1]]; 394 dest[2] = cm[data[2]]; 395 dest[3] = cm[data[3]]; 396 dest[4] = cm[data[4]]; 397 dest[5] = cm[data[5]]; 398 dest[6] = cm[data[6]]; 399 dest[7] = cm[data[7]]; 400 401 dest += line_size; 402 data += 8; 403 } 404} 405 406static void ff_idct_add_mlib(uint8_t *dest, int line_size, DCTELEM *data) 407{ 408 mlib_VideoIDCT8x8_S16_S16 (data, data); 409 mlib_VideoAddBlock_U8_S16(dest, (mlib_s16 *)data, line_size); 410} 411 412static void ff_idct_mlib(DCTELEM *data) 413{ 414 mlib_VideoIDCT8x8_S16_S16 (data, data); 415} 416 417static void ff_fdct_mlib(DCTELEM *data) 418{ 419 mlib_VideoDCT8x8_S16_S16 (data, data); 420} 421 422void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx) 423{ 424 c->get_pixels = get_pixels_mlib; 425 c->diff_pixels = diff_pixels_mlib; 426 c->add_pixels_clamped = add_pixels_clamped_mlib; 427 428 c->put_pixels_tab[0][0] = put_pixels16_mlib; 429 c->put_pixels_tab[0][1] = put_pixels16_x2_mlib; 430 c->put_pixels_tab[0][2] = put_pixels16_y2_mlib; 431 c->put_pixels_tab[0][3] = put_pixels16_xy2_mlib; 432 c->put_pixels_tab[1][0] = put_pixels8_mlib; 433 c->put_pixels_tab[1][1] = put_pixels8_x2_mlib; 434 c->put_pixels_tab[1][2] = put_pixels8_y2_mlib; 435 c->put_pixels_tab[1][3] = put_pixels8_xy2_mlib; 436 437 c->avg_pixels_tab[0][0] = avg_pixels16_mlib; 438 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mlib; 439 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mlib; 440 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mlib; 441 c->avg_pixels_tab[1][0] = avg_pixels8_mlib; 442 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mlib; 443 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mlib; 444 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mlib; 445 446 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mlib; 447 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mlib; 448 449 c->bswap_buf = bswap_buf_mlib; 450} 451 452void MPV_common_init_mlib(MpegEncContext *s) 453{ 454 if(s->avctx->dct_algo==FF_DCT_AUTO || s->avctx->dct_algo==FF_DCT_MLIB){ 455 s->dsp.fdct = ff_fdct_mlib; 456 } 457 458 if(s->avctx->idct_algo==FF_IDCT_MLIB){ 459 s->dsp.idct_put= ff_idct_put_mlib; 460 s->dsp.idct_add= ff_idct_add_mlib; 461 s->dsp.idct = ff_idct_mlib; 462 s->dsp.idct_permutation_type= FF_NO_IDCT_PERM; 463 } 464} 465