1/* 2 * This file is part of MPlayer. 3 * 4 * MPlayer is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * MPlayer is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License along 15 * with MPlayer; if not, write to the Free Software Foundation, Inc., 16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 17 */ 18 19#include <stdio.h> 20#include <stdlib.h> 21#include <string.h> 22#include <inttypes.h> 23 24#include "config.h" 25#include "mp_msg.h" 26#include "cpudetect.h" 27 28#include "img_format.h" 29#include "mp_image.h" 30#include "vf.h" 31#include "libavutil/attributes.h" 32#include "libavutil/x86/asm.h" 33 34typedef void (pack_func_t)(unsigned char *dst, unsigned char *y, 35 unsigned char *u, unsigned char *v, int w, int us, int vs); 36 37struct vf_priv_s { 38 int mode; 39 pack_func_t *pack[2]; 40}; 41 42static void pack_nn_C(unsigned char *dst, unsigned char *y, 43 unsigned char *u, unsigned char *v, int w, 44 int av_unused us, int av_unused vs) 45{ 46 int j; 47 for (j = w/2; j; j--) { 48 *dst++ = *y++; 49 *dst++ = *u++; 50 *dst++ = *y++; 51 *dst++ = *v++; 52 } 53} 54 55static void pack_li_0_C(unsigned char *dst, unsigned char *y, 56 unsigned char *u, unsigned char *v, int w, int us, int vs) 57{ 58 int j; 59 for (j = w/2; j; j--) { 60 *dst++ = *y++; 61 *dst++ = (u[us+us] + 7*u[0])>>3; 62 *dst++ = *y++; 63 *dst++ = (v[vs+vs] + 7*v[0])>>3; 64 u++; v++; 65 } 66} 67 68static void pack_li_1_C(unsigned char *dst, unsigned char *y, 69 unsigned char *u, unsigned char *v, int w, int us, int vs) 70{ 71 int j; 72 for (j = w/2; j; j--) { 73 *dst++ = *y++; 74 *dst++ = (3*u[us+us] + 5*u[0])>>3; 75 *dst++ = *y++; 76 *dst++ = (3*v[vs+vs] + 5*v[0])>>3; 77 u++; v++; 78 } 79} 80 81#if HAVE_MMX 82static void pack_nn_MMX(unsigned char *dst, unsigned char *y, 83 unsigned char *u, unsigned char *v, int w, 84 int av_unused us, int av_unused vs) 85{ 86 __asm__ volatile ("" 87 ASMALIGN(4) 88 "1: \n\t" 89 "movq (%0), %%mm1 \n\t" 90 "movq (%0), %%mm2 \n\t" 91 "movq (%1), %%mm4 \n\t" 92 "movq (%2), %%mm6 \n\t" 93 "punpcklbw %%mm6, %%mm4 \n\t" 94 "punpcklbw %%mm4, %%mm1 \n\t" 95 "punpckhbw %%mm4, %%mm2 \n\t" 96 97 "add $8, %0 \n\t" 98 "add $4, %1 \n\t" 99 "add $4, %2 \n\t" 100 "movq %%mm1, (%3) \n\t" 101 "movq %%mm2, 8(%3) \n\t" 102 "add $16, %3 \n\t" 103 "decl %4 \n\t" 104 "jnz 1b \n\t" 105 "emms \n\t" 106 : 107 : "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8) 108 : "memory" 109 ); 110 pack_nn_C(dst, y, u, v, (w&7), 0, 0); 111} 112 113#if HAVE_EBX_AVAILABLE 114static void pack_li_0_MMX(unsigned char *dst, unsigned char *y, 115 unsigned char *u, unsigned char *v, int w, int us, int vs) 116{ 117 __asm__ volatile ("" 118 "push %%"REG_BP" \n\t" 119#if ARCH_X86_64 120 "mov %6, %%"REG_BP" \n\t" 121#else 122 "movl 4(%%"REG_d"), %%"REG_BP" \n\t" 123 "movl (%%"REG_d"), %%"REG_d" \n\t" 124#endif 125 "pxor %%mm0, %%mm0 \n\t" 126 127 ASMALIGN(4) 128 "2: \n\t" 129 "movq (%%"REG_S"), %%mm1 \n\t" 130 "movq (%%"REG_S"), %%mm2 \n\t" 131 132 "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" 133 "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" 134 "punpcklbw %%mm0, %%mm4 \n\t" 135 "punpcklbw %%mm0, %%mm6 \n\t" 136 "movq (%%"REG_a"), %%mm3 \n\t" 137 "movq (%%"REG_b"), %%mm5 \n\t" 138 "punpcklbw %%mm0, %%mm3 \n\t" 139 "punpcklbw %%mm0, %%mm5 \n\t" 140 "paddw %%mm3, %%mm4 \n\t" 141 "paddw %%mm5, %%mm6 \n\t" 142 "paddw %%mm3, %%mm4 \n\t" 143 "paddw %%mm5, %%mm6 \n\t" 144 "paddw %%mm3, %%mm4 \n\t" 145 "paddw %%mm5, %%mm6 \n\t" 146 "paddw %%mm3, %%mm4 \n\t" 147 "paddw %%mm5, %%mm6 \n\t" 148 "paddw %%mm3, %%mm4 \n\t" 149 "paddw %%mm5, %%mm6 \n\t" 150 "paddw %%mm3, %%mm4 \n\t" 151 "paddw %%mm5, %%mm6 \n\t" 152 "paddw %%mm3, %%mm4 \n\t" 153 "paddw %%mm5, %%mm6 \n\t" 154 "psrlw $3, %%mm4 \n\t" 155 "psrlw $3, %%mm6 \n\t" 156 "packuswb %%mm4, %%mm4 \n\t" 157 "packuswb %%mm6, %%mm6 \n\t" 158 "punpcklbw %%mm6, %%mm4 \n\t" 159 "punpcklbw %%mm4, %%mm1 \n\t" 160 "punpckhbw %%mm4, %%mm2 \n\t" 161 162 "movq %%mm1, (%%"REG_D") \n\t" 163 "movq %%mm2, 8(%%"REG_D") \n\t" 164 165 "movq 8(%%"REG_S"), %%mm1 \n\t" 166 "movq 8(%%"REG_S"), %%mm2 \n\t" 167 168 "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" 169 "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" 170 "punpckhbw %%mm0, %%mm4 \n\t" 171 "punpckhbw %%mm0, %%mm6 \n\t" 172 "movq (%%"REG_a"), %%mm3 \n\t" 173 "movq (%%"REG_b"), %%mm5 \n\t" 174 "punpckhbw %%mm0, %%mm3 \n\t" 175 "punpckhbw %%mm0, %%mm5 \n\t" 176 "paddw %%mm3, %%mm4 \n\t" 177 "paddw %%mm5, %%mm6 \n\t" 178 "paddw %%mm3, %%mm4 \n\t" 179 "paddw %%mm5, %%mm6 \n\t" 180 "paddw %%mm3, %%mm4 \n\t" 181 "paddw %%mm5, %%mm6 \n\t" 182 "paddw %%mm3, %%mm4 \n\t" 183 "paddw %%mm5, %%mm6 \n\t" 184 "paddw %%mm3, %%mm4 \n\t" 185 "paddw %%mm5, %%mm6 \n\t" 186 "paddw %%mm3, %%mm4 \n\t" 187 "paddw %%mm5, %%mm6 \n\t" 188 "paddw %%mm3, %%mm4 \n\t" 189 "paddw %%mm5, %%mm6 \n\t" 190 "psrlw $3, %%mm4 \n\t" 191 "psrlw $3, %%mm6 \n\t" 192 "packuswb %%mm4, %%mm4 \n\t" 193 "packuswb %%mm6, %%mm6 \n\t" 194 "punpcklbw %%mm6, %%mm4 \n\t" 195 "punpcklbw %%mm4, %%mm1 \n\t" 196 "punpckhbw %%mm4, %%mm2 \n\t" 197 198 "add $16, %%"REG_S" \n\t" 199 "add $8, %%"REG_a" \n\t" 200 "add $8, %%"REG_b" \n\t" 201 202 "movq %%mm1, 16(%%"REG_D") \n\t" 203 "movq %%mm2, 24(%%"REG_D") \n\t" 204 "add $32, %%"REG_D" \n\t" 205 206 "decl %%ecx \n\t" 207 "jnz 2b \n\t" 208 "emms \n\t" 209 "pop %%"REG_BP" \n\t" 210 : 211 : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16), 212#if ARCH_X86_64 213 "d" ((x86_reg)us), "r" ((x86_reg)vs) 214#else 215 "d" (&us) 216#endif 217 : "memory" 218 ); 219 pack_li_0_C(dst, y, u, v, (w&15), us, vs); 220} 221 222static void pack_li_1_MMX(unsigned char *dst, unsigned char *y, 223 unsigned char *u, unsigned char *v, int w, int us, int vs) 224{ 225 __asm__ volatile ("" 226 "push %%"REG_BP" \n\t" 227#if ARCH_X86_64 228 "mov %6, %%"REG_BP" \n\t" 229#else 230 "movl 4(%%"REG_d"), %%"REG_BP" \n\t" 231 "movl (%%"REG_d"), %%"REG_d" \n\t" 232#endif 233 "pxor %%mm0, %%mm0 \n\t" 234 235 ASMALIGN(4) 236 "3: \n\t" 237 "movq (%%"REG_S"), %%mm1 \n\t" 238 "movq (%%"REG_S"), %%mm2 \n\t" 239 240 "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" 241 "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" 242 "punpcklbw %%mm0, %%mm4 \n\t" 243 "punpcklbw %%mm0, %%mm6 \n\t" 244 "movq (%%"REG_a"), %%mm3 \n\t" 245 "movq (%%"REG_b"), %%mm5 \n\t" 246 "punpcklbw %%mm0, %%mm3 \n\t" 247 "punpcklbw %%mm0, %%mm5 \n\t" 248 "movq %%mm4, %%mm7 \n\t" 249 "paddw %%mm4, %%mm4 \n\t" 250 "paddw %%mm7, %%mm4 \n\t" 251 "movq %%mm6, %%mm7 \n\t" 252 "paddw %%mm6, %%mm6 \n\t" 253 "paddw %%mm7, %%mm6 \n\t" 254 "paddw %%mm3, %%mm4 \n\t" 255 "paddw %%mm5, %%mm6 \n\t" 256 "paddw %%mm3, %%mm4 \n\t" 257 "paddw %%mm5, %%mm6 \n\t" 258 "paddw %%mm3, %%mm4 \n\t" 259 "paddw %%mm5, %%mm6 \n\t" 260 "paddw %%mm3, %%mm4 \n\t" 261 "paddw %%mm5, %%mm6 \n\t" 262 "paddw %%mm3, %%mm4 \n\t" 263 "paddw %%mm5, %%mm6 \n\t" 264 "psrlw $3, %%mm4 \n\t" 265 "psrlw $3, %%mm6 \n\t" 266 "packuswb %%mm4, %%mm4 \n\t" 267 "packuswb %%mm6, %%mm6 \n\t" 268 "punpcklbw %%mm6, %%mm4 \n\t" 269 "punpcklbw %%mm4, %%mm1 \n\t" 270 "punpckhbw %%mm4, %%mm2 \n\t" 271 272 "movq %%mm1, (%%"REG_D") \n\t" 273 "movq %%mm2, 8(%%"REG_D") \n\t" 274 275 "movq 8(%%"REG_S"), %%mm1 \n\t" 276 "movq 8(%%"REG_S"), %%mm2 \n\t" 277 278 "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" 279 "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" 280 "punpckhbw %%mm0, %%mm4 \n\t" 281 "punpckhbw %%mm0, %%mm6 \n\t" 282 "movq (%%"REG_a"), %%mm3 \n\t" 283 "movq (%%"REG_b"), %%mm5 \n\t" 284 "punpckhbw %%mm0, %%mm3 \n\t" 285 "punpckhbw %%mm0, %%mm5 \n\t" 286 "movq %%mm4, %%mm7 \n\t" 287 "paddw %%mm4, %%mm4 \n\t" 288 "paddw %%mm7, %%mm4 \n\t" 289 "movq %%mm6, %%mm7 \n\t" 290 "paddw %%mm6, %%mm6 \n\t" 291 "paddw %%mm7, %%mm6 \n\t" 292 "paddw %%mm3, %%mm4 \n\t" 293 "paddw %%mm5, %%mm6 \n\t" 294 "paddw %%mm3, %%mm4 \n\t" 295 "paddw %%mm5, %%mm6 \n\t" 296 "paddw %%mm3, %%mm4 \n\t" 297 "paddw %%mm5, %%mm6 \n\t" 298 "paddw %%mm3, %%mm4 \n\t" 299 "paddw %%mm5, %%mm6 \n\t" 300 "paddw %%mm3, %%mm4 \n\t" 301 "paddw %%mm5, %%mm6 \n\t" 302 "psrlw $3, %%mm4 \n\t" 303 "psrlw $3, %%mm6 \n\t" 304 "packuswb %%mm4, %%mm4 \n\t" 305 "packuswb %%mm6, %%mm6 \n\t" 306 "punpcklbw %%mm6, %%mm4 \n\t" 307 "punpcklbw %%mm4, %%mm1 \n\t" 308 "punpckhbw %%mm4, %%mm2 \n\t" 309 310 "add $16, %%"REG_S" \n\t" 311 "add $8, %%"REG_a" \n\t" 312 "add $8, %%"REG_b" \n\t" 313 314 "movq %%mm1, 16(%%"REG_D") \n\t" 315 "movq %%mm2, 24(%%"REG_D") \n\t" 316 "add $32, %%"REG_D" \n\t" 317 318 "decl %%ecx \n\t" 319 "jnz 3b \n\t" 320 "emms \n\t" 321 "pop %%"REG_BP" \n\t" 322 : 323 : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16), 324#if ARCH_X86_64 325 "d" ((x86_reg)us), "r" ((x86_reg)vs) 326#else 327 "d" (&us) 328#endif 329 : "memory" 330 ); 331 pack_li_1_C(dst, y, u, v, (w&15), us, vs); 332} 333#endif /* HAVE_EBX_AVAILABLE */ 334#endif 335 336static pack_func_t *pack_nn; 337static pack_func_t *pack_li_0; 338static pack_func_t *pack_li_1; 339 340static void ilpack(unsigned char *dst, unsigned char *src[3], 341 int dststride, int srcstride[3], int w, int h, pack_func_t *pack[2]) 342{ 343 int i; 344 unsigned char *y, *u, *v; 345 int ys = srcstride[0], us = srcstride[1], vs = srcstride[2]; 346 int a, b; 347 348 y = src[0]; 349 u = src[1]; 350 v = src[2]; 351 352 pack_nn(dst, y, u, v, w, 0, 0); 353 y += ys; dst += dststride; 354 pack_nn(dst, y, u+us, v+vs, w, 0, 0); 355 y += ys; dst += dststride; 356 for (i=2; i<h-2; i++) { 357 a = (i&2) ? 1 : -1; 358 b = (i&1) ^ ((i&2)>>1); 359 pack[b](dst, y, u, v, w, us*a, vs*a); 360 y += ys; 361 if ((i&3) == 1) { 362 u -= us; 363 v -= vs; 364 } else { 365 u += us; 366 v += vs; 367 } 368 dst += dststride; 369 } 370 pack_nn(dst, y, u, v, w, 0, 0); 371 y += ys; dst += dststride; u += us; v += vs; 372 pack_nn(dst, y, u, v, w, 0, 0); 373} 374 375 376static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts) 377{ 378 mp_image_t *dmpi; 379 380 // hope we'll get DR buffer: 381 dmpi=ff_vf_get_image(vf->next, IMGFMT_YUY2, 382 MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE, 383 mpi->w, mpi->h); 384 385 ilpack(dmpi->planes[0], mpi->planes, dmpi->stride[0], mpi->stride, mpi->w, mpi->h, vf->priv->pack); 386 387 return ff_vf_next_put_image(vf,dmpi, pts); 388} 389 390static int config(struct vf_instance *vf, 391 int width, int height, int d_width, int d_height, 392 unsigned int flags, unsigned int outfmt) 393{ 394 /* FIXME - also support UYVY output? */ 395 return ff_vf_next_config(vf, width, height, d_width, d_height, flags, IMGFMT_YUY2); 396} 397 398 399static int query_format(struct vf_instance *vf, unsigned int fmt) 400{ 401 /* FIXME - really any YUV 4:2:0 input format should work */ 402 switch (fmt) { 403 case IMGFMT_YV12: 404 case IMGFMT_IYUV: 405 case IMGFMT_I420: 406 return ff_vf_next_query_format(vf,IMGFMT_YUY2); 407 } 408 return 0; 409} 410 411static int vf_open(vf_instance_t *vf, char *args) 412{ 413 vf->config=config; 414 vf->query_format=query_format; 415 vf->put_image=put_image; 416 vf->priv = calloc(1, sizeof(struct vf_priv_s)); 417 vf->priv->mode = 1; 418 if (args) sscanf(args, "%d", &vf->priv->mode); 419 420 pack_nn = pack_nn_C; 421 pack_li_0 = pack_li_0_C; 422 pack_li_1 = pack_li_1_C; 423#if HAVE_MMX 424 if(ff_gCpuCaps.hasMMX) { 425 pack_nn = pack_nn_MMX; 426#if HAVE_EBX_AVAILABLE 427 pack_li_0 = pack_li_0_MMX; 428 pack_li_1 = pack_li_1_MMX; 429#endif 430 } 431#endif 432 433 switch(vf->priv->mode) { 434 case 0: 435 vf->priv->pack[0] = vf->priv->pack[1] = pack_nn; 436 break; 437 default: 438 ff_mp_msg(MSGT_VFILTER, MSGL_WARN, 439 "ilpack: unknown mode %d (fallback to linear)\n", 440 vf->priv->mode); 441 /* Fallthrough */ 442 case 1: 443 vf->priv->pack[0] = pack_li_0; 444 vf->priv->pack[1] = pack_li_1; 445 break; 446 } 447 448 return 1; 449} 450 451const vf_info_t ff_vf_info_ilpack = { 452 "4:2:0 planar -> 4:2:2 packed reinterlacer", 453 "ilpack", 454 "Richard Felker", 455 "", 456 vf_open, 457 NULL 458}; 459