1/* 2 * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com> 3 * April 20, 2007 4 * 5 * Blackfin video color space converter operations 6 * convert I420 YV12 to RGB in various formats 7 * 8 * This file is part of FFmpeg. 9 * 10 * FFmpeg is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * FFmpeg is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with FFmpeg; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 */ 24 25 26/* 27YUV420 to RGB565 conversion. This routine takes a YUV 420 planar macroblock 28and converts it to RGB565. R:5 bits, G:6 bits, B:5 bits.. packed into shorts. 29 30 31The following calculation is used for the conversion: 32 33 r = clipz((y-oy)*cy + crv*(v-128)) 34 g = clipz((y-oy)*cy + cgv*(v-128) + cgu*(u-128)) 35 b = clipz((y-oy)*cy + cbu*(u-128)) 36 37y,u,v are prescaled by a factor of 4 i.e. left-shifted to gain precision. 38 39 40New factorization to eliminate the truncation error which was 41occurring due to the byteop3p. 42 43 441) Use the bytop16m to subtract quad bytes we use this in U8 this 45 then so the offsets need to be renormalized to 8bits. 46 472) Scale operands up by a factor of 4 not 8 because Blackfin 48 multiplies include a shift. 49 503) Compute into the accumulators cy*yx0, cy*yx1. 51 524) Compute each of the linear equations: 53 r = clipz((y - oy) * cy + crv * (v - 128)) 54 55 g = clipz((y - oy) * cy + cgv * (v - 128) + cgu * (u - 128)) 56 57 b = clipz((y - oy) * cy + cbu * (u - 128)) 58 59 Reuse of the accumulators requires that we actually multiply 60 twice once with addition and the second time with a subtraction. 61 62 Because of this we need to compute the equations in the order R B 63 then G saving the writes for B in the case of 24/32 bit color 64 formats. 65 66 API: yuv2rgb_kind (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, 67 int dW, uint32_t *coeffs); 68 69 A B 70 --- --- 71 i2 = cb i3 = cr 72 i1 = coeff i0 = y 73 74Where coeffs have the following layout in memory. 75 76uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv; 77 78coeffs is a pointer to oy. 79 80The {rgb} masks are only utilized by the 565 packing algorithm. Note the data 81replication is used to simplify the internal algorithms for the dual Mac 82architecture of BlackFin. 83 84All routines are exported with _ff_bfin_ as a symbol prefix. 85 86Rough performance gain compared against -O3: 87 882779809/1484290 187.28% 89 90which translates to ~33c/pel to ~57c/pel for the reference vs 17.5 91c/pel for the optimized implementations. Not sure why there is such a 92huge variation on the reference codes on Blackfin I guess it must have 93to do with the memory system. 94*/ 95 96#define mL3 .text 97#ifdef __FDPIC__ 98#define mL1 .l1.text 99#else 100#define mL1 mL3 101#endif 102#define MEM mL1 103 104#define DEFUN(fname,where,interface) \ 105 .section where; \ 106 .global _ff_bfin_ ## fname; \ 107 .type _ff_bfin_ ## fname, STT_FUNC; \ 108 .align 8; \ 109 _ff_bfin_ ## fname 110 111#define DEFUN_END(fname) \ 112 .size _ff_bfin_ ## fname, . - _ff_bfin_ ## fname 113 114 115.text 116 117#define COEFF_LEN 11*4 118#define COEFF_REL_CY_OFF 4*4 119 120#define ARG_OUT 20 121#define ARG_W 24 122#define ARG_COEFF 28 123 124DEFUN(yuv2rgb565_line,MEM, 125 (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)): 126 link 0; 127 [--sp] = (r7:4); 128 p1 = [fp+ARG_OUT]; 129 r3 = [fp+ARG_W]; 130 131 i0 = r0; 132 i2 = r1; 133 i3 = r2; 134 135 r0 = [fp+ARG_COEFF]; 136 i1 = r0; 137 b1 = i1; 138 l1 = COEFF_LEN; 139 m0 = COEFF_REL_CY_OFF; 140 p0 = r3; 141 142 r0 = [i0++]; // 2Y 143 r1.l = w[i2++]; // 2u 144 r1.h = w[i3++]; // 2v 145 p0 = p0>>2; 146 147 lsetup (.L0565, .L1565) lc0 = p0; 148 149 /* 150 uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv 151 r0 -- used to load 4ys 152 r1 -- used to load 2us,2vs 153 r4 -- y3,y2 154 r5 -- y1,y0 155 r6 -- u1,u0 156 r7 -- v1,v0 157 */ 158 r2=[i1++]; // oy 159.L0565: 160 /* 161 rrrrrrrr gggggggg bbbbbbbb 162 5432109876543210 163 bbbbb >>3 164 gggggggg <<3 165 rrrrrrrr <<8 166 rrrrrggggggbbbbb 167 */ 168 (r4,r5) = byteop16m (r1:0, r3:2) || r3=[i1++]; // oc 169 (r7,r6) = byteop16m (r1:0, r3:2) (r); 170 r5 = r5 << 2 (v); // y1,y0 171 r4 = r4 << 2 (v); // y3,y2 172 r6 = r6 << 2 (v) || r0=[i1++]; // u1,u0, r0=zero 173 r7 = r7 << 2 (v) || r1=[i1++]; // v1,v0 r1=cy 174 /* Y' = y*cy */ 175 a1 = r1.h*r5.h, a0 = r1.l*r5.l || r1=[i1++]; // crv 176 177 /* R = Y+ crv*(Cr-128) */ 178 r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l); 179 a1 -= r1.h*r7.l, a0 -= r1.l*r7.l || r5=[i1++]; // rmask 180 r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cbu 181 r2 = r2 >> 3 (v); 182 r3 = r2 & r5; 183 184 /* B = Y+ cbu*(Cb-128) */ 185 r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l); 186 a1 -= r1.h*r6.l, a0 -= r1.l*r6.l || r5=[i1++]; // bmask 187 r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cgu 188 r2 = r2 << 8 (v); 189 r2 = r2 & r5; 190 r3 = r3 | r2; 191 192 /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */ 193 a1 += r1.h*r6.l, a0 += r1.l*r6.l || r1=[i1++]; // cgv 194 r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l); 195 r2 = byteop3p(r3:2, r1:0)(LO) || r5=[i1++m0]; // gmask 196 r2 = r2 << 3 (v); 197 r2 = r2 & r5; 198 r3 = r3 | r2; 199 [p1++]=r3 || r1=[i1++]; // cy 200 201 /* Y' = y*cy */ 202 203 a1 = r1.h*r4.h, a0 = r1.l*r4.l || r1=[i1++]; // crv 204 205 /* R = Y+ crv*(Cr-128) */ 206 r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h); 207 a1 -= r1.h*r7.h, a0 -= r1.l*r7.h || r5=[i1++]; // rmask 208 r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cbu 209 r2 = r2 >> 3 (v); 210 r3 = r2 & r5; 211 212 /* B = Y+ cbu*(Cb-128) */ 213 r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h); 214 a1 -= r1.h*r6.h, a0 -= r1.l*r6.h || r5=[i1++]; // bmask 215 r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cgu 216 r2 = r2 << 8 (v); 217 r2 = r2 & r5; 218 r3 = r3 | r2; 219 220 /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */ 221 a1 += r1.h*r6.h, a0 += r1.l*r6.h || r1=[i1++]; // cgv 222 r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h) || r5=[i1++]; // gmask 223 r2 = byteop3p(r3:2, r1:0)(LO) || r0 = [i0++]; // 2Y 224 r2 = r2 << 3 (v) || r1.l = w[i2++]; // 2u 225 r2 = r2 & r5; 226 r3 = r3 | r2; 227 [p1++]=r3 || r1.h = w[i3++]; // 2v 228.L1565: r2=[i1++]; // oy 229 230 l1 = 0; 231 232 (r7:4) = [sp++]; 233 unlink; 234 rts; 235DEFUN_END(yuv2rgb565_line) 236 237DEFUN(yuv2rgb555_line,MEM, 238 (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)): 239 link 0; 240 [--sp] = (r7:4); 241 p1 = [fp+ARG_OUT]; 242 r3 = [fp+ARG_W]; 243 244 i0 = r0; 245 i2 = r1; 246 i3 = r2; 247 248 r0 = [fp+ARG_COEFF]; 249 i1 = r0; 250 b1 = i1; 251 l1 = COEFF_LEN; 252 m0 = COEFF_REL_CY_OFF; 253 p0 = r3; 254 255 r0 = [i0++]; // 2Y 256 r1.l = w[i2++]; // 2u 257 r1.h = w[i3++]; // 2v 258 p0 = p0>>2; 259 260 lsetup (.L0555, .L1555) lc0 = p0; 261 262 /* 263 uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv 264 r0 -- used to load 4ys 265 r1 -- used to load 2us,2vs 266 r4 -- y3,y2 267 r5 -- y1,y0 268 r6 -- u1,u0 269 r7 -- v1,v0 270 */ 271 r2=[i1++]; // oy 272.L0555: 273 /* 274 rrrrrrrr gggggggg bbbbbbbb 275 5432109876543210 276 bbbbb >>3 277 gggggggg <<2 278 rrrrrrrr <<7 279 xrrrrrgggggbbbbb 280 */ 281 282 (r4,r5) = byteop16m (r1:0, r3:2) || r3=[i1++]; // oc 283 (r7,r6) = byteop16m (r1:0, r3:2) (r); 284 r5 = r5 << 2 (v); // y1,y0 285 r4 = r4 << 2 (v); // y3,y2 286 r6 = r6 << 2 (v) || r0=[i1++]; // u1,u0, r0=zero 287 r7 = r7 << 2 (v) || r1=[i1++]; // v1,v0 r1=cy 288 /* Y' = y*cy */ 289 a1 = r1.h*r5.h, a0 = r1.l*r5.l || r1=[i1++]; // crv 290 291 /* R = Y+ crv*(Cr-128) */ 292 r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l); 293 a1 -= r1.h*r7.l, a0 -= r1.l*r7.l || r5=[i1++]; // rmask 294 r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cbu 295 r2 = r2 >> 3 (v); 296 r3 = r2 & r5; 297 298 /* B = Y+ cbu*(Cb-128) */ 299 r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l); 300 a1 -= r1.h*r6.l, a0 -= r1.l*r6.l || r5=[i1++]; // bmask 301 r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cgu 302 r2 = r2 << 7 (v); 303 r2 = r2 & r5; 304 r3 = r3 | r2; 305 306 /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */ 307 a1 += r1.h*r6.l, a0 += r1.l*r6.l || r1=[i1++]; // cgv 308 r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l); 309 r2 = byteop3p(r3:2, r1:0)(LO) || r5=[i1++m0]; // gmask 310 r2 = r2 << 2 (v); 311 r2 = r2 & r5; 312 r3 = r3 | r2; 313 [p1++]=r3 || r1=[i1++]; // cy 314 315 /* Y' = y*cy */ 316 317 a1 = r1.h*r4.h, a0 = r1.l*r4.l || r1=[i1++]; // crv 318 319 /* R = Y+ crv*(Cr-128) */ 320 r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h); 321 a1 -= r1.h*r7.h, a0 -= r1.l*r7.h || r5=[i1++]; // rmask 322 r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cbu 323 r2 = r2 >> 3 (v); 324 r3 = r2 & r5; 325 326 /* B = Y+ cbu*(Cb-128) */ 327 r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h); 328 a1 -= r1.h*r6.h, a0 -= r1.l*r6.h || r5=[i1++]; // bmask 329 r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cgu 330 r2 = r2 << 7 (v); 331 r2 = r2 & r5; 332 r3 = r3 | r2; 333 334 /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */ 335 a1 += r1.h*r6.h, a0 += r1.l*r6.h || r1=[i1++]; // cgv 336 r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h) || r5=[i1++]; // gmask 337 r2 = byteop3p(r3:2, r1:0)(LO) || r0=[i0++]; // 4Y 338 r2 = r2 << 2 (v) || r1.l=w[i2++]; // 2u 339 r2 = r2 & r5; 340 r3 = r3 | r2; 341 [p1++]=r3 || r1.h=w[i3++]; // 2v 342 343.L1555: r2=[i1++]; // oy 344 345 l1 = 0; 346 347 (r7:4) = [sp++]; 348 unlink; 349 rts; 350DEFUN_END(yuv2rgb555_line) 351 352DEFUN(yuv2rgb24_line,MEM, 353 (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)): 354 link 0; 355 [--sp] = (r7:4); 356 p1 = [fp+ARG_OUT]; 357 r3 = [fp+ARG_W]; 358 p2 = p1; 359 p2 += 3; 360 361 i0 = r0; 362 i2 = r1; 363 i3 = r2; 364 365 r0 = [fp+ARG_COEFF]; // coeff buffer 366 i1 = r0; 367 b1 = i1; 368 l1 = COEFF_LEN; 369 m0 = COEFF_REL_CY_OFF; 370 p0 = r3; 371 372 r0 = [i0++]; // 2Y 373 r1.l = w[i2++]; // 2u 374 r1.h = w[i3++]; // 2v 375 p0 = p0>>2; 376 377 lsetup (.L0888, .L1888) lc0 = p0; 378 379 /* 380 uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv 381 r0 -- used to load 4ys 382 r1 -- used to load 2us,2vs 383 r4 -- y3,y2 384 r5 -- y1,y0 385 r6 -- u1,u0 386 r7 -- v1,v0 387 */ 388 r2=[i1++]; // oy 389.L0888: 390 (r4,r5) = byteop16m (r1:0, r3:2) || r3=[i1++]; // oc 391 (r7,r6) = byteop16m (r1:0, r3:2) (r); 392 r5 = r5 << 2 (v); // y1,y0 393 r4 = r4 << 2 (v); // y3,y2 394 r6 = r6 << 2 (v) || r0=[i1++]; // u1,u0, r0=zero 395 r7 = r7 << 2 (v) || r1=[i1++]; // v1,v0 r1=cy 396 397 /* Y' = y*cy */ 398 a1 = r1.h*r5.h, a0 = r1.l*r5.l || r1=[i1++]; // crv 399 400 /* R = Y+ crv*(Cr-128) */ 401 r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l); 402 a1 -= r1.h*r7.l, a0 -= r1.l*r7.l || r5=[i1++]; // rmask 403 r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cbu 404 r2=r2>>16 || B[p1++]=r2; 405 B[p2++]=r2; 406 407 /* B = Y+ cbu*(Cb-128) */ 408 r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l); 409 a1 -= r1.h*r6.l, a0 -= r1.l*r6.l || r5=[i1++]; // bmask 410 r3 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cgu 411 412 /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */ 413 a1 += r1.h*r6.l, a0 += r1.l*r6.l || r1=[i1++]; // cgv 414 r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l); 415 r2 = byteop3p(r3:2, r1:0)(LO) || r5=[i1++m0]; // gmask, oy,cy,zero 416 417 r2=r2>>16 || B[p1++]=r2; 418 B[p2++]=r2; 419 420 r3=r3>>16 || B[p1++]=r3; 421 B[p2++]=r3 || r1=[i1++]; // cy 422 423 p1+=3; 424 p2+=3; 425 /* Y' = y*cy */ 426 a1 = r1.h*r4.h, a0 = r1.l*r4.l || r1=[i1++]; // crv 427 428 /* R = Y+ crv*(Cr-128) */ 429 r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h); 430 a1 -= r1.h*r7.h, a0 -= r1.l*r7.h || r5=[i1++]; // rmask 431 r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cbu 432 r2=r2>>16 || B[p1++]=r2; 433 B[p2++]=r2; 434 435 /* B = Y+ cbu*(Cb-128) */ 436 r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h); 437 a1 -= r1.h*r6.h, a0 -= r1.l*r6.h || r5=[i1++]; // bmask 438 r3 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cgu 439 440 /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */ 441 a1 += r1.h*r6.h, a0 += r1.l*r6.h || r1=[i1++]; // cgv 442 r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h); 443 r2 = byteop3p(r3:2, r1:0)(LO) || r5=[i1++]; // gmask 444 r2=r2>>16 || B[p1++]=r2 || r0 = [i0++]; // 4y 445 B[p2++]=r2 || r1.l = w[i2++]; // 2u 446 r3=r3>>16 || B[p1++]=r3 || r1.h = w[i3++]; // 2v 447 B[p2++]=r3 || r2=[i1++]; // oy 448 449 p1+=3; 450.L1888: p2+=3; 451 452 l1 = 0; 453 454 (r7:4) = [sp++]; 455 unlink; 456 rts; 457DEFUN_END(yuv2rgb24_line) 458 459 460 461#define ARG_vdst 20 462#define ARG_width 24 463#define ARG_height 28 464#define ARG_lumStride 32 465#define ARG_chromStride 36 466#define ARG_srcStride 40 467 468DEFUN(uyvytoyv12, mL3, (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 469 long width, long height, 470 long lumStride, long chromStride, long srcStride)): 471 link 0; 472 [--sp] = (r7:4,p5:4); 473 474 p0 = r1; // Y top even 475 476 i2 = r2; // *u 477 r2 = [fp + ARG_vdst]; 478 i3 = r2; // *v 479 480 r1 = [fp + ARG_srcStride]; 481 r2 = r0 + r1; 482 r1 += -8; // i0,i1 is pre read need to correct 483 m0 = r1; 484 485 i0 = r0; // uyvy_T even 486 i1 = r2; // uyvy_B odd 487 488 p2 = [fp + ARG_lumStride]; 489 p1 = p0 + p2; // Y bot odd 490 491 p5 = [fp + ARG_width]; 492 p4 = [fp + ARG_height]; 493 r0 = p5; 494 p4 = p4 >> 1; 495 p5 = p5 >> 2; 496 497 r2 = [fp + ARG_chromStride]; 498 r0 = r0 >> 1; 499 r2 = r2 - r0; 500 m1 = r2; 501 502 /* I0,I1 - src input line pointers 503 * p0,p1 - luma output line pointers 504 * I2 - dstU 505 * I3 - dstV 506 */ 507 508 lsetup (0f, 1f) lc1 = p4; // H/2 5090: r0 = [i0++] || r2 = [i1++]; 510 r1 = [i0++] || r3 = [i1++]; 511 r4 = byteop1p(r1:0, r3:2); 512 r5 = byteop1p(r1:0, r3:2) (r); 513 lsetup (2f, 3f) lc0 = p5; // W/4 5142: r0 = r0 >> 8(v); 515 r1 = r1 >> 8(v); 516 r2 = r2 >> 8(v); 517 r3 = r3 >> 8(v); 518 r0 = bytepack(r0, r1); 519 r2 = bytepack(r2, r3) || [p0++] = r0; // yyyy 520 r6 = pack(r5.l, r4.l) || [p1++] = r2; // yyyy 521 r7 = pack(r5.h, r4.h) || r0 = [i0++] || r2 = [i1++]; 522 r6 = bytepack(r6, r7) || r1 = [i0++] || r3 = [i1++]; 523 r4 = byteop1p(r1:0, r3:2) || w[i2++] = r6.l; // uu 5243: r5 = byteop1p(r1:0, r3:2) (r) || w[i3++] = r6.h; // vv 525 526 i0 += m0; 527 i1 += m0; 528 i2 += m1; 529 i3 += m1; 530 p0 = p0 + p2; 5311: p1 = p1 + p2; 532 533 (r7:4,p5:4) = [sp++]; 534 unlink; 535 rts; 536DEFUN_END(uyvytoyv12) 537 538DEFUN(yuyvtoyv12, mL3, (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 539 long width, long height, 540 long lumStride, long chromStride, long srcStride)): 541 link 0; 542 [--sp] = (r7:4,p5:4); 543 544 p0 = r1; // Y top even 545 546 i2 = r2; // *u 547 r2 = [fp + ARG_vdst]; 548 i3 = r2; // *v 549 550 r1 = [fp + ARG_srcStride]; 551 r2 = r0 + r1; 552 r1 += -8; // i0,i1 is pre read need to correct 553 m0 = r1; 554 555 i0 = r0; // uyvy_T even 556 i1 = r2; // uyvy_B odd 557 558 p2 = [fp + ARG_lumStride]; 559 p1 = p0 + p2; // Y bot odd 560 561 p5 = [fp + ARG_width]; 562 p4 = [fp + ARG_height]; 563 r0 = p5; 564 p4 = p4 >> 1; 565 p5 = p5 >> 2; 566 567 r2 = [fp + ARG_chromStride]; 568 r0 = r0 >> 1; 569 r2 = r2 - r0; 570 m1 = r2; 571 572 /* I0,I1 - src input line pointers 573 * p0,p1 - luma output line pointers 574 * I2 - dstU 575 * I3 - dstV 576 */ 577 578 lsetup (0f, 1f) lc1 = p4; // H/2 5790: r0 = [i0++] || r2 = [i1++]; 580 r1 = [i0++] || r3 = [i1++]; 581 r4 = bytepack(r0, r1); 582 r5 = bytepack(r2, r3); 583 lsetup (2f, 3f) lc0 = p5; // W/4 5842: r0 = r0 >> 8(v) || [p0++] = r4; // yyyy-even 585 r1 = r1 >> 8(v) || [p1++] = r5; // yyyy-odd 586 r2 = r2 >> 8(v); 587 r3 = r3 >> 8(v); 588 r4 = byteop1p(r1:0, r3:2); 589 r5 = byteop1p(r1:0, r3:2) (r); 590 r6 = pack(r5.l, r4.l); 591 r7 = pack(r5.h, r4.h) || r0 = [i0++] || r2 = [i1++]; 592 r6 = bytepack(r6, r7) || r1 = [i0++] || r3 = [i1++]; 593 r4 = bytepack(r0, r1) || w[i2++] = r6.l; // uu 5943: r5 = bytepack(r2, r3) || w[i3++] = r6.h; // vv 595 596 i0 += m0; 597 i1 += m0; 598 i2 += m1; 599 i3 += m1; 600 p0 = p0 + p2; 6011: p1 = p1 + p2; 602 603 (r7:4,p5:4) = [sp++]; 604 unlink; 605 rts; 606DEFUN_END(yuyvtoyv12) 607