1/* 2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26#if !defined(JAVA2D_NO_MLIB) || defined(MLIB_ADD_SUFF) 27 28#include <vis_proto.h> 29#include "java2d_Mlib.h" 30#include "vis_AlphaMacros.h" 31 32/***************************************************************/ 33 34extern mlib_d64 vis_d64_div_tbl[256]; 35 36/***************************************************************/ 37 38#define RGB2GRAY(r, g, b) \ 39 (((77 * (r)) + (150 * (g)) + (29 * (b)) + 128) >> 8) 40 41/***************************************************************/ 42 43static const mlib_s32 RGB_weight[] = { 44 128*77, 45 128*150, 46 128*29, 47 (1 << (16 + 6)) | (1 << 6) 48}; 49 50/***************************************************************/ 51 52#define RGB_VARS \ 53 mlib_d64 r, g, b, ar, gb, s02, s13; \ 54 mlib_f32 ff; \ 55 mlib_f32 alpha = ((mlib_f32*)RGB_weight)[0]; \ 56 mlib_f32 beta = ((mlib_f32*)RGB_weight)[1]; \ 57 mlib_f32 gamma = ((mlib_f32*)RGB_weight)[2]; \ 58 mlib_d64 d_half = vis_to_double_dup(RGB_weight[3]); \ 59 \ 60 vis_write_gsr((0 << 3) | 6) 61 62/***************************************************************/ 63 64#define GRAY_U8(ff, r, g, b) \ 65{ \ 66 mlib_d64 dr, dg, db; \ 67 dr = vis_fmul8x16al(r, alpha); \ 68 dg = vis_fmul8x16al(g, beta); \ 69 db = vis_fmul8x16al(b, gamma); \ 70 dr = vis_fpadd16(dr, dg); \ 71 db = vis_fpadd16(db, d_half); \ 72 dr = vis_fpadd16(dr, db); \ 73 ff = vis_fpack16(dr); \ 74} 75 76/***************************************************************/ 77 78#define GRAY_S16(dd, r, g, b) \ 79{ \ 80 mlib_d64 dr, dg, db; \ 81 dr = vis_fmul8x16al(r, alpha); \ 82 dg = vis_fmul8x16al(g, beta); \ 83 db = vis_fmul8x16al(b, gamma); \ 84 dr = vis_fpadd16(dr, dg); \ 85 db = vis_fpadd16(db, d_half); \ 86 dd = vis_fpadd16(dr, db); \ 87} 88 89/***************************************************************/ 90 91#define LOAD_BGR(ind) \ 92 b = vis_faligndata(vis_ld_u8(src + (ind )), b); \ 93 g = vis_faligndata(vis_ld_u8(src + (ind + 1)), g); \ 94 r = vis_faligndata(vis_ld_u8(src + (ind + 2)), r) 95 96/***************************************************************/ 97 98void ADD_SUFF(IntArgbToByteGrayConvert)(BLIT_PARAMS) 99{ 100 mlib_s32 dstScan = pDstInfo->scanStride; 101 mlib_s32 srcScan = pSrcInfo->scanStride; 102 mlib_u8 *dst_end; 103 mlib_s32 j; 104 RGB_VARS; 105 106 if (dstScan == width && srcScan == 4*width) { 107 width *= height; 108 height = 1; 109 } 110 111 for (j = 0; j < height; j++) { 112 mlib_f32 *src = srcBase; 113 mlib_u8 *dst = dstBase; 114 115 dst_end = dst + width; 116 117 while (((mlib_s32)dst & 3) && dst < dst_end) { 118 r = vis_ld_u8((mlib_u8*)src + 1); 119 g = vis_ld_u8((mlib_u8*)src + 2); 120 b = vis_ld_u8((mlib_u8*)src + 3); 121 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 122 vis_st_u8(D64_FROM_F32x2(ff), dst); 123 dst++; 124 src++; 125 } 126 127#pragma pipeloop(0) 128 for (; dst <= (dst_end - 4); dst += 4) { 129 s02 = vis_fpmerge(src[0], src[2]); 130 s13 = vis_fpmerge(src[1], src[3]); 131 ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13)); 132 gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13)); 133 GRAY_U8(ff, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb)); 134 *(mlib_f32*)dst = ff; 135 src += 4; 136 } 137 138 while (dst < dst_end) { 139 r = vis_ld_u8((mlib_u8*)src + 1); 140 g = vis_ld_u8((mlib_u8*)src + 2); 141 b = vis_ld_u8((mlib_u8*)src + 3); 142 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 143 vis_st_u8(D64_FROM_F32x2(ff), dst); 144 dst++; 145 src++; 146 } 147 148 PTR_ADD(dstBase, dstScan); 149 PTR_ADD(srcBase, srcScan); 150 } 151} 152 153/***************************************************************/ 154 155void ADD_SUFF(ThreeByteBgrToByteGrayConvert)(BLIT_PARAMS) 156{ 157 mlib_s32 dstScan = pDstInfo->scanStride; 158 mlib_s32 srcScan = pSrcInfo->scanStride; 159 mlib_u8 *dst_end; 160 mlib_s32 j; 161 RGB_VARS; 162 163 vis_alignaddr(NULL, 7); 164 165 if (dstScan == width && srcScan == 3*width) { 166 width *= height; 167 height = 1; 168 } 169 170 for (j = 0; j < height; j++) { 171 mlib_u8 *src = srcBase; 172 mlib_u8 *dst = dstBase; 173 174 dst_end = dst + width; 175 176 while (((mlib_s32)dst & 3) && dst < dst_end) { 177 b = vis_ld_u8(src); 178 g = vis_ld_u8(src + 1); 179 r = vis_ld_u8(src + 2); 180 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 181 vis_st_u8(D64_FROM_F32x2(ff), dst); 182 dst++; 183 src += 3; 184 } 185 186#pragma pipeloop(0) 187 for (; dst <= (dst_end - 4); dst += 4) { 188 LOAD_BGR(9); 189 LOAD_BGR(6); 190 LOAD_BGR(3); 191 LOAD_BGR(0); 192 GRAY_U8(ff, vis_read_hi(r), vis_read_hi(g), vis_read_hi(b)); 193 *(mlib_f32*)dst = ff; 194 src += 3*4; 195 } 196 197 while (dst < dst_end) { 198 b = vis_ld_u8(src); 199 g = vis_ld_u8(src + 1); 200 r = vis_ld_u8(src + 2); 201 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 202 vis_st_u8(D64_FROM_F32x2(ff), dst); 203 dst++; 204 src += 3; 205 } 206 207 PTR_ADD(dstBase, dstScan); 208 PTR_ADD(srcBase, srcScan); 209 } 210} 211 212/***************************************************************/ 213 214void ADD_SUFF(IntArgbToByteGrayScaleConvert)(SCALE_PARAMS) 215{ 216 mlib_s32 dstScan = pDstInfo->scanStride; 217 mlib_s32 srcScan = pSrcInfo->scanStride; 218 mlib_u8 *dst_end; 219 mlib_s32 i, j; 220 RGB_VARS; 221 222 for (j = 0; j < height; j++) { 223 mlib_f32 *src = srcBase; 224 mlib_u8 *dst = dstBase; 225 mlib_s32 tmpsxloc = sxloc; 226 227 PTR_ADD(src, (syloc >> shift) * srcScan); 228 229 dst_end = dst + width; 230 231 while (((mlib_s32)dst & 3) && dst < dst_end) { 232 i = tmpsxloc >> shift; 233 tmpsxloc += sxinc; 234 r = vis_ld_u8((mlib_u8*)(src + i) + 1); 235 g = vis_ld_u8((mlib_u8*)(src + i) + 2); 236 b = vis_ld_u8((mlib_u8*)(src + i) + 3); 237 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 238 vis_st_u8(D64_FROM_F32x2(ff), dst); 239 dst++; 240 } 241 242#pragma pipeloop(0) 243 for (; dst <= (dst_end - 4); dst += 4) { 244 s02 = vis_fpmerge(src[(tmpsxloc ) >> shift], 245 src[(tmpsxloc + 2*sxinc) >> shift]); 246 s13 = vis_fpmerge(src[(tmpsxloc + sxinc) >> shift], 247 src[(tmpsxloc + 3*sxinc) >> shift]); 248 tmpsxloc += 4*sxinc; 249 ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13)); 250 gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13)); 251 GRAY_U8(ff, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb)); 252 *(mlib_f32*)dst = ff; 253 } 254 255 while (dst < dst_end) { 256 i = tmpsxloc >> shift; 257 tmpsxloc += sxinc; 258 r = vis_ld_u8((mlib_u8*)(src + i) + 1); 259 g = vis_ld_u8((mlib_u8*)(src + i) + 2); 260 b = vis_ld_u8((mlib_u8*)(src + i) + 3); 261 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 262 vis_st_u8(D64_FROM_F32x2(ff), dst); 263 dst++; 264 } 265 266 PTR_ADD(dstBase, dstScan); 267 syloc += syinc; 268 } 269} 270 271/***************************************************************/ 272 273void ADD_SUFF(ThreeByteBgrToByteGrayScaleConvert)(SCALE_PARAMS) 274{ 275 mlib_s32 dstScan = pDstInfo->scanStride; 276 mlib_s32 srcScan = pSrcInfo->scanStride; 277 mlib_u8 *dst_end; 278 mlib_s32 j, i0, i1, i2, i3; 279 RGB_VARS; 280 281 vis_alignaddr(NULL, 7); 282 283 for (j = 0; j < height; j++) { 284 mlib_u8 *src = srcBase; 285 mlib_u8 *dst = dstBase; 286 mlib_s32 tmpsxloc = sxloc; 287 288 PTR_ADD(src, (syloc >> shift) * srcScan); 289 290 dst_end = dst + width; 291 292 while (((mlib_s32)dst & 3) && dst < dst_end) { 293 i0 = 3*(tmpsxloc >> shift); 294 tmpsxloc += sxinc; 295 b = vis_ld_u8(src + i0); 296 g = vis_ld_u8(src + i0 + 1); 297 r = vis_ld_u8(src + i0 + 2); 298 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 299 vis_st_u8(D64_FROM_F32x2(ff), dst); 300 dst++; 301 } 302 303#pragma pipeloop(0) 304 for (; dst <= (dst_end - 4); dst += 4) { 305 i0 = 3*(tmpsxloc >> shift); 306 tmpsxloc += sxinc; 307 i1 = 3*(tmpsxloc >> shift); 308 tmpsxloc += sxinc; 309 i2 = 3*(tmpsxloc >> shift); 310 tmpsxloc += sxinc; 311 i3 = 3*(tmpsxloc >> shift); 312 tmpsxloc += sxinc; 313 LOAD_BGR(i3); 314 LOAD_BGR(i2); 315 LOAD_BGR(i1); 316 LOAD_BGR(i0); 317 GRAY_U8(ff, vis_read_hi(r), vis_read_hi(g), vis_read_hi(b)); 318 *(mlib_f32*)dst = ff; 319 } 320 321 while (dst < dst_end) { 322 i0 = 3*(tmpsxloc >> shift); 323 tmpsxloc += sxinc; 324 b = vis_ld_u8(src + i0); 325 g = vis_ld_u8(src + i0 + 1); 326 r = vis_ld_u8(src + i0 + 2); 327 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 328 vis_st_u8(D64_FROM_F32x2(ff), dst); 329 dst++; 330 } 331 332 PTR_ADD(dstBase, dstScan); 333 syloc += syinc; 334 } 335} 336 337/***************************************************************/ 338 339void ADD_SUFF(IntArgbBmToByteGrayXparOver)(BLIT_PARAMS) 340{ 341 mlib_s32 dstScan = pDstInfo->scanStride; 342 mlib_s32 srcScan = pSrcInfo->scanStride; 343 mlib_u8 *dst_end; 344 mlib_d64 dzero = vis_fzero(); 345 mlib_f32 f0, f1; 346 mlib_s32 i, j, mask0, mask1; 347 RGB_VARS; 348 349 if (width < 8) { 350 for (j = 0; j < height; j++) { 351 mlib_u8 *src = srcBase; 352 mlib_u8 *dst = dstBase; 353 354 for (i = 0; i < width; i++) { 355 if (src[4*i]) { 356 dst[i] = RGB2GRAY(src[4*i + 1], src[4*i + 2], src[4*i + 3]); 357 } 358 } 359 360 PTR_ADD(dstBase, dstScan); 361 PTR_ADD(srcBase, srcScan); 362 } 363 return; 364 } 365 366 for (j = 0; j < height; j++) { 367 mlib_f32 *src = srcBase; 368 mlib_u8 *dst = dstBase; 369 370 dst_end = dst + width; 371 372 while (((mlib_s32)dst & 7) && dst < dst_end) { 373 if (*(mlib_u8*)src) { 374 r = vis_ld_u8((mlib_u8*)src + 1); 375 g = vis_ld_u8((mlib_u8*)src + 2); 376 b = vis_ld_u8((mlib_u8*)src + 3); 377 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 378 vis_st_u8(D64_FROM_F32x2(ff), dst); 379 } 380 dst++; 381 src++; 382 } 383 384#pragma pipeloop(0) 385 for (; dst <= (dst_end - 8); dst += 8) { 386 s02 = vis_fpmerge(src[0], src[2]); 387 s13 = vis_fpmerge(src[1], src[3]); 388 src += 4; 389 ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13)); 390 gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13)); 391 mask0 = vis_fcmpne16(vis_fpmerge(vis_read_hi(ar), vis_read_hi(ar)), 392 dzero); 393 GRAY_U8(f0, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb)); 394 395 s02 = vis_fpmerge(src[0], src[2]); 396 s13 = vis_fpmerge(src[1], src[3]); 397 src += 4; 398 ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13)); 399 gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13)); 400 mask1 = vis_fcmpne16(vis_fpmerge(vis_read_hi(ar), vis_read_hi(ar)), 401 dzero); 402 GRAY_U8(f1, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb)); 403 404 vis_pst_8(vis_freg_pair(f0, f1), dst, (mask0 << 4) | mask1); 405 } 406 407 while (dst < dst_end) { 408 if (*(mlib_u8*)src) { 409 r = vis_ld_u8((mlib_u8*)src + 1); 410 g = vis_ld_u8((mlib_u8*)src + 2); 411 b = vis_ld_u8((mlib_u8*)src + 3); 412 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 413 vis_st_u8(D64_FROM_F32x2(ff), dst); 414 } 415 dst++; 416 src++; 417 } 418 419 PTR_ADD(dstBase, dstScan); 420 PTR_ADD(srcBase, srcScan); 421 } 422} 423 424/***************************************************************/ 425 426void ADD_SUFF(IntArgbBmToByteGrayXparBgCopy)(BCOPY_PARAMS) 427{ 428 mlib_s32 dstScan = pDstInfo->scanStride; 429 mlib_s32 srcScan = pSrcInfo->scanStride; 430 mlib_u8 *dst_end; 431 mlib_d64 dzero = vis_fzero(), d_bgpixel; 432 mlib_f32 f0, f1; 433 mlib_s32 i, j, mask0, mask1; 434 RGB_VARS; 435 436 if (width < 8) { 437 for (j = 0; j < height; j++) { 438 mlib_u8 *src = srcBase; 439 mlib_u8 *dst = dstBase; 440 441 for (i = 0; i < width; i++) { 442 if (src[4*i]) { 443 dst[i] = RGB2GRAY(src[4*i + 1], src[4*i + 2], src[4*i + 3]); 444 } else { 445 dst[i] = bgpixel; 446 } 447 } 448 449 PTR_ADD(dstBase, dstScan); 450 PTR_ADD(srcBase, srcScan); 451 } 452 return; 453 } 454 455 D64_FROM_U8x8(d_bgpixel, bgpixel); 456 457 for (j = 0; j < height; j++) { 458 mlib_f32 *src = srcBase; 459 mlib_u8 *dst = dstBase; 460 461 dst_end = dst + width; 462 463 while (((mlib_s32)dst & 7) && dst < dst_end) { 464 if (*(mlib_u8*)src) { 465 r = vis_ld_u8((mlib_u8*)src + 1); 466 g = vis_ld_u8((mlib_u8*)src + 2); 467 b = vis_ld_u8((mlib_u8*)src + 3); 468 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 469 vis_st_u8(D64_FROM_F32x2(ff), dst); 470 } else { 471 *dst = bgpixel; 472 } 473 dst++; 474 src++; 475 } 476 477#pragma pipeloop(0) 478 for (; dst <= (dst_end - 8); dst += 8) { 479 s02 = vis_fpmerge(src[0], src[2]); 480 s13 = vis_fpmerge(src[1], src[3]); 481 src += 4; 482 ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13)); 483 gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13)); 484 mask0 = vis_fcmpne16(vis_fpmerge(vis_read_hi(ar), vis_read_hi(ar)), 485 dzero); 486 GRAY_U8(f0, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb)); 487 488 s02 = vis_fpmerge(src[0], src[2]); 489 s13 = vis_fpmerge(src[1], src[3]); 490 src += 4; 491 ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13)); 492 gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13)); 493 mask1 = vis_fcmpne16(vis_fpmerge(vis_read_hi(ar), vis_read_hi(ar)), 494 dzero); 495 GRAY_U8(f1, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb)); 496 497 *(mlib_d64*)dst = d_bgpixel; 498 vis_pst_8(vis_freg_pair(f0, f1), dst, (mask0 << 4) | mask1); 499 } 500 501 while (dst < dst_end) { 502 if (*(mlib_u8*)src) { 503 r = vis_ld_u8((mlib_u8*)src + 1); 504 g = vis_ld_u8((mlib_u8*)src + 2); 505 b = vis_ld_u8((mlib_u8*)src + 3); 506 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 507 vis_st_u8(D64_FROM_F32x2(ff), dst); 508 } else { 509 *dst = bgpixel; 510 } 511 dst++; 512 src++; 513 } 514 515 PTR_ADD(dstBase, dstScan); 516 PTR_ADD(srcBase, srcScan); 517 } 518} 519 520/***************************************************************/ 521 522void ADD_SUFF(IntArgbToByteGrayXorBlit)(BLIT_PARAMS) 523{ 524 mlib_s32 dstScan = pDstInfo->scanStride; 525 mlib_s32 srcScan = pSrcInfo->scanStride; 526 mlib_u8 *dst_end; 527 mlib_d64 dd, d_xorpixel, d_alphamask, dzero = vis_fzero(); 528 mlib_f32 f0, f1; 529 mlib_s32 i, j, mask0, mask1; 530 jint xorpixel = pCompInfo->details.xorPixel; 531 juint alphamask = pCompInfo->alphaMask; 532 RGB_VARS; 533 534 if (width < 8) { 535 for (j = 0; j < height; j++) { 536 mlib_s32 *src = srcBase; 537 mlib_u8 *dst = dstBase; 538 mlib_s32 srcpixel, r, g, b; 539 540 for (i = 0; i < width; i++) { 541 srcpixel = src[i]; 542 if (srcpixel >= 0) continue; 543 b = (srcpixel) & 0xff; 544 g = (srcpixel >> 8) & 0xff; 545 r = (srcpixel >> 16) & 0xff; 546 srcpixel = (77*r + 150*g + 29*b + 128) / 256; 547 dst[i] ^= (((srcpixel) ^ (xorpixel)) & ~(alphamask)); 548 } 549 550 PTR_ADD(dstBase, dstScan); 551 PTR_ADD(srcBase, srcScan); 552 } 553 return; 554 } 555 556 D64_FROM_U8x8(d_xorpixel, xorpixel); 557 D64_FROM_U8x8(d_alphamask, alphamask); 558 559 for (j = 0; j < height; j++) { 560 mlib_f32 *src = srcBase; 561 mlib_u8 *dst = dstBase; 562 563 dst_end = dst + width; 564 565 while (((mlib_s32)dst & 7) && dst < dst_end) { 566 if ((*(mlib_u8*)src) & 0x80) { 567 r = vis_ld_u8((mlib_u8*)src + 1); 568 g = vis_ld_u8((mlib_u8*)src + 2); 569 b = vis_ld_u8((mlib_u8*)src + 3); 570 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 571 dd = vis_fxor(D64_FROM_F32x2(ff), d_xorpixel); 572 dd = vis_fandnot(d_alphamask, dd); 573 vis_st_u8(vis_fxor(vis_ld_u8(dst), dd), dst); 574 } 575 dst++; 576 src++; 577 } 578 579#pragma pipeloop(0) 580 for (; dst <= (dst_end - 8); dst += 8) { 581 s02 = vis_fpmerge(src[0], src[2]); 582 s13 = vis_fpmerge(src[1], src[3]); 583 src += 4; 584 ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13)); 585 gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13)); 586 mask0 = vis_fcmplt16(vis_fpmerge(vis_read_hi(ar), vis_read_hi(ar)), 587 dzero); 588 GRAY_U8(f0, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb)); 589 590 s02 = vis_fpmerge(src[0], src[2]); 591 s13 = vis_fpmerge(src[1], src[3]); 592 src += 4; 593 ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13)); 594 gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13)); 595 mask1 = vis_fcmplt16(vis_fpmerge(vis_read_hi(ar), vis_read_hi(ar)), 596 dzero); 597 GRAY_U8(f1, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb)); 598 599 dd = vis_freg_pair(f0, f1); 600 dd = vis_fandnot(d_alphamask, vis_fxor(dd, d_xorpixel)); 601 vis_pst_8(vis_fxor(*(mlib_d64*)dst, dd), dst, (mask0 << 4) | mask1); 602 } 603 604 while (dst < dst_end) { 605 if ((*(mlib_u8*)src) & 0x80) { 606 r = vis_ld_u8((mlib_u8*)src + 1); 607 g = vis_ld_u8((mlib_u8*)src + 2); 608 b = vis_ld_u8((mlib_u8*)src + 3); 609 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 610 dd = vis_fxor(D64_FROM_F32x2(ff), d_xorpixel); 611 dd = vis_fandnot(d_alphamask, dd); 612 vis_st_u8(vis_fxor(vis_ld_u8(dst), dd), dst); 613 } 614 dst++; 615 src++; 616 } 617 618 PTR_ADD(dstBase, dstScan); 619 PTR_ADD(srcBase, srcScan); 620 } 621} 622 623/***************************************************************/ 624 625void ADD_SUFF(IntArgbBmToByteGrayScaleXparOver)(SCALE_PARAMS) 626{ 627 mlib_s32 dstScan = pDstInfo->scanStride; 628 mlib_s32 srcScan = pSrcInfo->scanStride; 629 mlib_u8 *dst_end; 630 mlib_d64 dzero = vis_fzero(); 631 mlib_f32 f0, f1; 632 mlib_s32 i, j, mask0, mask1; 633 RGB_VARS; 634 635 for (j = 0; j < height; j++) { 636 mlib_f32 *src = srcBase; 637 mlib_u8 *dst = dstBase; 638 mlib_s32 tmpsxloc = sxloc; 639 640 PTR_ADD(src, (syloc >> shift) * srcScan); 641 642 dst_end = dst + width; 643 644 while (((mlib_s32)dst & 7) && dst < dst_end) { 645 i = tmpsxloc >> shift; 646 tmpsxloc += sxinc; 647 if (*(mlib_u8*)(src + i)) { 648 r = vis_ld_u8((mlib_u8*)(src + i) + 1); 649 g = vis_ld_u8((mlib_u8*)(src + i) + 2); 650 b = vis_ld_u8((mlib_u8*)(src + i) + 3); 651 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 652 vis_st_u8(D64_FROM_F32x2(ff), dst); 653 } 654 dst++; 655 } 656 657#pragma pipeloop(0) 658 for (; dst <= (dst_end - 8); dst += 8) { 659 s02 = vis_fpmerge(src[(tmpsxloc ) >> shift], 660 src[(tmpsxloc + 2*sxinc) >> shift]); 661 s13 = vis_fpmerge(src[(tmpsxloc + sxinc) >> shift], 662 src[(tmpsxloc + 3*sxinc) >> shift]); 663 tmpsxloc += 4*sxinc; 664 ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13)); 665 gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13)); 666 mask0 = vis_fcmpne16(vis_fpmerge(vis_read_hi(ar), vis_read_hi(ar)), 667 dzero); 668 GRAY_U8(f0, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb)); 669 670 s02 = vis_fpmerge(src[(tmpsxloc ) >> shift], 671 src[(tmpsxloc + 2*sxinc) >> shift]); 672 s13 = vis_fpmerge(src[(tmpsxloc + sxinc) >> shift], 673 src[(tmpsxloc + 3*sxinc) >> shift]); 674 tmpsxloc += 4*sxinc; 675 ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13)); 676 gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13)); 677 mask1 = vis_fcmpne16(vis_fpmerge(vis_read_hi(ar), vis_read_hi(ar)), 678 dzero); 679 GRAY_U8(f1, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb)); 680 681 vis_pst_8(vis_freg_pair(f0, f1), dst, (mask0 << 4) | mask1); 682 } 683 684 while (dst < dst_end) { 685 i = tmpsxloc >> shift; 686 tmpsxloc += sxinc; 687 if (*(mlib_u8*)(src + i)) { 688 r = vis_ld_u8((mlib_u8*)(src + i) + 1); 689 g = vis_ld_u8((mlib_u8*)(src + i) + 2); 690 b = vis_ld_u8((mlib_u8*)(src + i) + 3); 691 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 692 vis_st_u8(D64_FROM_F32x2(ff), dst); 693 } 694 dst++; 695 } 696 697 PTR_ADD(dstBase, dstScan); 698 syloc += syinc; 699 } 700} 701 702/***************************************************************/ 703 704#define TBL_MUL ((mlib_s16*)vis_mul8s_tbl + 1) 705#define TBL_DIV ((mlib_u8*)vis_div8_tbl + 2) 706 707void ADD_SUFF(IntArgbToByteGraySrcOverMaskBlit)(MASKBLIT_PARAMS) 708{ 709 mlib_s32 extraA; 710 mlib_s32 dstScan = pDstInfo->scanStride; 711 mlib_s32 srcScan = pSrcInfo->scanStride; 712 mlib_u8 *mul8_extra; 713 mlib_u8 *dst_end; 714 mlib_d64 srcAx4, dd, d0, d1; 715 mlib_d64 done = vis_to_double_dup(0x7fff7fff); 716 mlib_s32 j, srcA0, srcA1, srcA2, srcA3; 717 RGB_VARS; 718 719 extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5); 720 mul8_extra = mul8table[extraA]; 721 722 if (pMask != NULL) { 723 pMask += maskOff; 724 725 if (dstScan == width && srcScan == 4*width && maskScan == width) { 726 width *= height; 727 height = 1; 728 } 729 730 maskScan -= width; 731 732 for (j = 0; j < height; j++) { 733 mlib_f32 *src = srcBase; 734 mlib_u8 *dst = dstBase; 735 736 dst_end = dst + width; 737 738 while (((mlib_s32)dst & 3) && dst < dst_end) { 739 srcA0 = mul8table[mul8_extra[*pMask++]][*(mlib_u8*)src]; 740 r = vis_ld_u8((mlib_u8*)src + 1); 741 g = vis_ld_u8((mlib_u8*)src + 2); 742 b = vis_ld_u8((mlib_u8*)src + 3); 743 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 744 d0 = vis_fpadd16(MUL8_VIS(ff, srcA0), d_half); 745 d1 = MUL8_VIS(vis_read_lo(vis_ld_u8(dst)), 255 - srcA0); 746 dd = vis_fpadd16(d0, d1); 747 vis_st_u8(D64_FROM_F32x2(vis_fpack16(dd)), dst); 748 dst++; 749 src++; 750 } 751 752#pragma pipeloop(0) 753 for (; dst <= (dst_end - 4); dst += 4) { 754 srcA0 = mul8table[mul8_extra[*pMask++]][*(mlib_u8*)src]; 755 srcA1 = mul8table[mul8_extra[*pMask++]][*(mlib_u8*)(src + 1)]; 756 srcA2 = mul8table[mul8_extra[*pMask++]][*(mlib_u8*)(src + 2)]; 757 srcA3 = mul8table[mul8_extra[*pMask++]][*(mlib_u8*)(src + 3)]; 758 srcAx4 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*srcA3), srcAx4); 759 srcAx4 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*srcA2), srcAx4); 760 srcAx4 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*srcA1), srcAx4); 761 srcAx4 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*srcA0), srcAx4); 762 763 s02 = vis_fpmerge(src[0], src[2]); 764 s13 = vis_fpmerge(src[1], src[3]); 765 ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13)); 766 gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13)); 767 GRAY_U8(ff, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb)); 768 d0 = vis_fpadd16(vis_fmul8x16(ff, srcAx4), d_half); 769 d1 = vis_fmul8x16(*(mlib_f32*)dst, vis_fpsub16(done, srcAx4)); 770 dd = vis_fpadd16(d0, d1); 771 *(mlib_f32*)dst = vis_fpack16(dd); 772 src += 4; 773 } 774 775 while (dst < dst_end) { 776 srcA0 = mul8table[mul8_extra[*pMask++]][*(mlib_u8*)src]; 777 r = vis_ld_u8((mlib_u8*)src + 1); 778 g = vis_ld_u8((mlib_u8*)src + 2); 779 b = vis_ld_u8((mlib_u8*)src + 3); 780 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 781 d0 = vis_fpadd16(MUL8_VIS(ff, srcA0), d_half); 782 d1 = MUL8_VIS(vis_read_lo(vis_ld_u8(dst)), 255 - srcA0); 783 dd = vis_fpadd16(d0, d1); 784 vis_st_u8(D64_FROM_F32x2(vis_fpack16(dd)), dst); 785 dst++; 786 src++; 787 } 788 789 PTR_ADD(dstBase, dstScan); 790 PTR_ADD(srcBase, srcScan); 791 PTR_ADD(pMask, maskScan); 792 } 793 } else { 794 795 if (dstScan == width && srcScan == 4*width) { 796 width *= height; 797 height = 1; 798 } 799 800 for (j = 0; j < height; j++) { 801 mlib_f32 *src = srcBase; 802 mlib_u8 *dst = dstBase; 803 804 dst_end = dst + width; 805 806 while (((mlib_s32)dst & 3) && dst < dst_end) { 807 srcA0 = mul8_extra[*(mlib_u8*)src]; 808 r = vis_ld_u8((mlib_u8*)src + 1); 809 g = vis_ld_u8((mlib_u8*)src + 2); 810 b = vis_ld_u8((mlib_u8*)src + 3); 811 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 812 d0 = vis_fpadd16(MUL8_VIS(ff, srcA0), d_half); 813 d1 = MUL8_VIS(vis_read_lo(vis_ld_u8(dst)), 255 - srcA0); 814 dd = vis_fpadd16(d0, d1); 815 vis_st_u8(D64_FROM_F32x2(vis_fpack16(dd)), dst); 816 dst++; 817 src++; 818 } 819 820#pragma pipeloop(0) 821 for (; dst <= (dst_end - 4); dst += 4) { 822 srcA0 = mul8_extra[*(mlib_u8*)src]; 823 srcA1 = mul8_extra[*(mlib_u8*)(src + 1)]; 824 srcA2 = mul8_extra[*(mlib_u8*)(src + 2)]; 825 srcA3 = mul8_extra[*(mlib_u8*)(src + 3)]; 826 srcAx4 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*srcA3), srcAx4); 827 srcAx4 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*srcA2), srcAx4); 828 srcAx4 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*srcA1), srcAx4); 829 srcAx4 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*srcA0), srcAx4); 830 831 s02 = vis_fpmerge(src[0], src[2]); 832 s13 = vis_fpmerge(src[1], src[3]); 833 ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13)); 834 gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13)); 835 GRAY_U8(ff, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb)); 836 d0 = vis_fpadd16(vis_fmul8x16(ff, srcAx4), d_half); 837 d1 = vis_fmul8x16(*(mlib_f32*)dst, vis_fpsub16(done, srcAx4)); 838 dd = vis_fpadd16(d0, d1); 839 *(mlib_f32*)dst = vis_fpack16(dd); 840 src += 4; 841 } 842 843 while (dst < dst_end) { 844 srcA0 = mul8_extra[*(mlib_u8*)src]; 845 r = vis_ld_u8((mlib_u8*)src + 1); 846 g = vis_ld_u8((mlib_u8*)src + 2); 847 b = vis_ld_u8((mlib_u8*)src + 3); 848 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 849 d0 = vis_fpadd16(MUL8_VIS(ff, srcA0), d_half); 850 d1 = MUL8_VIS(vis_read_lo(vis_ld_u8(dst)), 255 - srcA0); 851 dd = vis_fpadd16(d0, d1); 852 vis_st_u8(D64_FROM_F32x2(vis_fpack16(dd)), dst); 853 dst++; 854 src++; 855 } 856 857 PTR_ADD(dstBase, dstScan); 858 PTR_ADD(srcBase, srcScan); 859 } 860 } 861} 862 863/***************************************************************/ 864 865#define GET_COEF(i) \ 866 pathA = pMask[i]; \ 867 srcA = *(mlib_u8*)(src + i); \ 868 srcA = mul8table[extraA][srcA]; \ 869 dstF = ((((srcA) & DstOpAnd) ^ DstOpXor) + DstOpAdd); \ 870 srcF = mul8table[pathA][srcFbase]; \ 871 dstA = 0xff - pathA + mul8table[pathA][dstF]; \ 872 srcA = mul8table[srcF][srcA]; \ 873 resA = srcA + dstA; \ 874 srcAx4 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*srcA), srcAx4); \ 875 divAx4 = vis_faligndata(vis_ld_u16(TBL_DIV + 8*resA), divAx4) 876 877/***************************************************************/ 878 879void ADD_SUFF(IntArgbToByteGrayAlphaMaskBlit)(MASKBLIT_PARAMS) 880{ 881 mlib_s32 extraA; 882 mlib_s32 dstScan = pDstInfo->scanStride; 883 mlib_s32 srcScan = pSrcInfo->scanStride; 884 mlib_u8 *dst_end; 885 mlib_d64 srcAx4, dstAx4, divAx4, dd, ds; 886 mlib_d64 done = vis_to_double_dup(0x01000100); 887 mlib_f32 fscale = vis_to_float(0x02020202); 888 mlib_s32 j; 889 mlib_s32 SrcOpAnd, SrcOpXor, SrcOpAdd; 890 mlib_s32 DstOpAnd, DstOpXor, DstOpAdd; 891 mlib_s32 pathA, srcFbase, resA, resG, srcF, dstF, srcA, dstA; 892 893 RGB_VARS; 894 895 SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval; 896 SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval; 897 SrcOpAdd = 898 (jint) (AlphaRules[pCompInfo->rule].srcOps).addval - SrcOpXor; 899 900 DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval; 901 DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval; 902 DstOpAdd = 903 (jint) (AlphaRules[pCompInfo->rule].dstOps).addval - DstOpXor; 904 905 extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5); 906 907 srcFbase = ((((0xff) & SrcOpAnd) ^ SrcOpXor) + SrcOpAdd); 908 909 vis_write_gsr((7 << 3) | 6); 910 911 if (pMask != NULL) { 912 pMask += maskOff; 913 914 if (dstScan == width && srcScan == 4*width && maskScan == width) { 915 width *= height; 916 height = 1; 917 } 918 919 maskScan -= width; 920 921 for (j = 0; j < height; j++) { 922 mlib_f32 *src = srcBase; 923 mlib_u8 *dst = dstBase; 924 925 dst_end = dst + width; 926 927 while (((mlib_s32)dst & 3) && dst < dst_end) { 928 pathA = *pMask++; 929 srcA = *(mlib_u8*)src; 930 srcA = mul8table[extraA][srcA]; 931 dstF = ((((srcA) & DstOpAnd) ^ DstOpXor) + DstOpAdd); 932 srcF = mul8table[pathA][srcFbase]; 933 dstA = 0xff - pathA + mul8table[pathA][dstF]; 934 srcA = mul8table[srcF][srcA]; 935 resA = srcA + dstA; 936 937 r = vis_ld_u8((mlib_u8*)src + 1); 938 g = vis_ld_u8((mlib_u8*)src + 2); 939 b = vis_ld_u8((mlib_u8*)src + 3); 940 GRAY_S16(dd, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 941 dd = vis_fmul8x16(fscale, dd); 942 ff = vis_fpack16(dd); 943 944 dd = vis_freg_pair(vis_fzeros(), 945 ((mlib_f32*)vis_mul8s_tbl)[dstA]); 946 DIV_ALPHA(dd, resA); 947 ds = vis_fpsub16(done, dd); 948 dd = vis_fmul8x16(vis_read_lo(vis_ld_u8(dst)), dd); 949 ds = vis_fmul8x16(ff, ds); 950 dd = vis_fpadd16(dd, ds); 951 ff = vis_fpack16(dd); 952 vis_st_u8(D64_FROM_F32x2(ff), dst); 953 954 dst++; 955 src++; 956 } 957 958#pragma pipeloop(0) 959 for (; dst <= (dst_end - 4); dst += 4) { 960 GET_COEF(3); 961 GET_COEF(2); 962 GET_COEF(1); 963 GET_COEF(0); 964 pMask += 4; 965 srcAx4 = FMUL_16x16(srcAx4, divAx4); 966 dstAx4 = vis_fpsub16(done, srcAx4); 967 968 s02 = vis_fpmerge(src[0], src[2]); 969 s13 = vis_fpmerge(src[1], src[3]); 970 ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13)); 971 gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13)); 972 GRAY_S16(dd, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb)); 973 dd = vis_fmul8x16(fscale, dd); 974 ff = vis_fpack16(dd); 975 976 dd = vis_fmul8x16(*(mlib_f32*)dst, dstAx4); 977 ds = vis_fmul8x16(ff, srcAx4); 978 dd = vis_fpadd16(dd, ds); 979 *(mlib_f32*)dst = vis_fpack16(dd); 980 981 src += 4; 982 } 983 984 while (dst < dst_end) { 985 pathA = *pMask++; 986 srcA = *(mlib_u8*)src; 987 srcA = mul8table[extraA][srcA]; 988 dstF = ((((srcA) & DstOpAnd) ^ DstOpXor) + DstOpAdd); 989 srcF = mul8table[pathA][srcFbase]; 990 dstA = 0xff - pathA + mul8table[pathA][dstF]; 991 srcA = mul8table[srcF][srcA]; 992 resA = srcA + dstA; 993 994 r = vis_ld_u8((mlib_u8*)src + 1); 995 g = vis_ld_u8((mlib_u8*)src + 2); 996 b = vis_ld_u8((mlib_u8*)src + 3); 997 GRAY_S16(dd, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 998 dd = vis_fmul8x16(fscale, dd); 999 ff = vis_fpack16(dd); 1000 1001 dd = vis_freg_pair(vis_fzeros(), 1002 ((mlib_f32*)vis_mul8s_tbl)[dstA]); 1003 DIV_ALPHA(dd, resA); 1004 ds = vis_fpsub16(done, dd); 1005 dd = vis_fmul8x16(vis_read_lo(vis_ld_u8(dst)), dd); 1006 ds = vis_fmul8x16(ff, ds); 1007 dd = vis_fpadd16(dd, ds); 1008 ff = vis_fpack16(dd); 1009 vis_st_u8(D64_FROM_F32x2(ff), dst); 1010 1011 dst++; 1012 src++; 1013 } 1014 1015 PTR_ADD(dstBase, dstScan); 1016 PTR_ADD(srcBase, srcScan); 1017 PTR_ADD(pMask, maskScan); 1018 } 1019 } else { 1020 1021 if (dstScan == width && srcScan == 4*width) { 1022 width *= height; 1023 height = 1; 1024 } 1025 1026 for (j = 0; j < height; j++) { 1027 mlib_f32 *src = srcBase; 1028 mlib_u8 *dst = dstBase; 1029 1030 dst_end = dst + width; 1031 1032 while (dst < dst_end) { 1033 srcA = *(mlib_u8*)src; 1034 srcA = mul8table[extraA][srcA]; 1035 dstA = ((((srcA) & DstOpAnd) ^ DstOpXor) + DstOpAdd); 1036 srcA = mul8table[srcFbase][srcA]; 1037 resA = srcA + dstA; 1038 1039 r = vis_ld_u8((mlib_u8*)src + 1); 1040 g = vis_ld_u8((mlib_u8*)src + 2); 1041 b = vis_ld_u8((mlib_u8*)src + 3); 1042 GRAY_S16(dd, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 1043 dd = vis_fmul8x16(fscale, dd); 1044 ff = vis_fpack16(dd); 1045 1046 resG = mul8table[dstA][*dst] + 1047 mul8table[srcA][((mlib_u8*)&ff)[3]]; 1048 *dst = div8table[resA][resG]; 1049 1050 dst++; 1051 src++; 1052 } 1053 1054 PTR_ADD(dstBase, dstScan); 1055 PTR_ADD(srcBase, srcScan); 1056 } 1057 } 1058} 1059 1060/***************************************************************/ 1061 1062void ADD_SUFF(IntRgbToByteGrayAlphaMaskBlit)(MASKBLIT_PARAMS) 1063{ 1064 mlib_s32 extraA; 1065 mlib_s32 dstScan = pDstInfo->scanStride; 1066 mlib_s32 srcScan = pSrcInfo->scanStride; 1067 mlib_u8 *dst_end; 1068 mlib_d64 srcA_d, dstA_d, dd, d0, d1; 1069 mlib_s32 i, j, srcG; 1070 mlib_s32 SrcOpAnd, SrcOpXor, SrcOpAdd; 1071 mlib_s32 DstOpAnd, DstOpXor, DstOpAdd; 1072 mlib_s32 pathA, srcFbase, dstFbase, resA, resG, srcA, dstA; 1073 1074 RGB_VARS; 1075 1076 SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval; 1077 SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval; 1078 SrcOpAdd = 1079 (jint) (AlphaRules[pCompInfo->rule].srcOps).addval - SrcOpXor; 1080 1081 DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval; 1082 DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval; 1083 DstOpAdd = 1084 (jint) (AlphaRules[pCompInfo->rule].dstOps).addval - DstOpXor; 1085 1086 extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5); 1087 1088 srcFbase = ((((0xff) & SrcOpAnd) ^ SrcOpXor) + SrcOpAdd); 1089 dstFbase = (((extraA & DstOpAnd) ^ DstOpXor) + DstOpAdd); 1090 1091 srcFbase = mul8table[srcFbase][extraA]; 1092 1093 if (width < 16) { 1094 if (pMask != NULL) { 1095 pMask += maskOff; 1096 1097 for (j = 0; j < height; j++) { 1098 mlib_u8 *dst = dstBase; 1099 mlib_u8 *src = srcBase; 1100 1101 for (i = 0; i < width; i++) { 1102 pathA = pMask[i]; 1103 dstA = 0xff - pathA + mul8table[dstFbase][pathA]; 1104 srcA = mul8table[srcFbase][pathA]; 1105 resA = srcA + dstA; 1106 1107 srcG = RGB2GRAY(src[4*i + 1], src[4*i + 2], src[4*i + 3]); 1108 resG = mul8table[dstA][dst[i]] + mul8table[srcA][srcG]; 1109 resG = div8table[resA][resG]; 1110 dst[i] = resG; 1111 } 1112 1113 PTR_ADD(dstBase, dstScan); 1114 PTR_ADD(srcBase, srcScan); 1115 PTR_ADD(pMask, maskScan); 1116 } 1117 } else { 1118 dstA = dstFbase; 1119 srcA = srcFbase; 1120 resA = srcA + dstA; 1121 1122 for (j = 0; j < height; j++) { 1123 mlib_u8 *dst = dstBase; 1124 mlib_u8 *src = srcBase; 1125 1126 for (i = 0; i < width; i++) { 1127 srcG = RGB2GRAY(src[4*i + 1], src[4*i + 2], src[4*i + 3]); 1128 resG = mul8table[dstA][dst[i]] + mul8table[srcA][srcG]; 1129 resG = div8table[resA][resG]; 1130 dst[i] = resG; 1131 } 1132 1133 PTR_ADD(dstBase, dstScan); 1134 PTR_ADD(srcBase, srcScan); 1135 } 1136 } 1137 return; 1138 } 1139 1140 if (pMask != NULL) { 1141 mlib_s32 srcA_buff[256]; 1142 mlib_d64 dscale = (mlib_d64)(1 << 15)*(1 << 16), ddiv; 1143 mlib_d64 d_one = vis_to_double_dup(0x7FFF7FFF); 1144 1145 srcA_buff[0] = 0; 1146#pragma pipeloop(0) 1147 for (pathA = 1; pathA < 256; pathA++) { 1148 dstA = 0xff - pathA + mul8table[dstFbase][pathA]; 1149 srcA = mul8table[srcFbase][pathA]; 1150 resA = dstA + srcA; 1151 ddiv = dscale*vis_d64_div_tbl[resA]; 1152 srcA_buff[pathA] = srcA*ddiv + (1 << 15); 1153 } 1154 1155 pMask += maskOff; 1156 maskScan -= width; 1157 1158 if (dstScan == width && srcScan == 4*width && maskScan == width) { 1159 width *= height; 1160 height = 1; 1161 } 1162 1163 for (j = 0; j < height; j++) { 1164 mlib_f32 *src = srcBase; 1165 mlib_u8 *dst = dstBase; 1166 1167 dst_end = dst + width; 1168 1169 while (((mlib_s32)dst & 3) && dst < dst_end) { 1170 pathA = *pMask++; 1171 srcA_d = vis_ld_u16(srcA_buff + pathA); 1172 dstA_d = vis_fpsub16(d_one, srcA_d); 1173 r = vis_ld_u8((mlib_u8*)src + 1); 1174 g = vis_ld_u8((mlib_u8*)src + 2); 1175 b = vis_ld_u8((mlib_u8*)src + 3); 1176 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 1177 d0 = vis_fpadd16(vis_fmul8x16(ff, srcA_d), d_half); 1178 d1 = vis_fmul8x16(vis_read_lo(vis_ld_u8(dst)), dstA_d); 1179 dd = vis_fpadd16(d0, d1); 1180 vis_st_u8(D64_FROM_F32x2(vis_fpack16(dd)), dst); 1181 dst++; 1182 src++; 1183 } 1184 1185#pragma pipeloop(0) 1186 for (; dst <= (dst_end - 4); dst += 4) { 1187 LOAD_NEXT_U16(srcA_d, srcA_buff + pMask[3]); 1188 LOAD_NEXT_U16(srcA_d, srcA_buff + pMask[2]); 1189 LOAD_NEXT_U16(srcA_d, srcA_buff + pMask[1]); 1190 LOAD_NEXT_U16(srcA_d, srcA_buff + pMask[0]); 1191 dstA_d = vis_fpsub16(d_one, srcA_d); 1192 pMask += 4; 1193 1194 s02 = vis_fpmerge(src[0], src[2]); 1195 s13 = vis_fpmerge(src[1], src[3]); 1196 ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13)); 1197 gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13)); 1198 GRAY_U8(ff, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb)); 1199 dd = vis_fpadd16(vis_fmul8x16(ff, srcA_d), d_half); 1200 dd = vis_fpadd16(vis_fmul8x16(*(mlib_f32*)dst, dstA_d), dd); 1201 *(mlib_f32*)dst = vis_fpack16(dd); 1202 src += 4; 1203 } 1204 1205 while (dst < dst_end) { 1206 pathA = *pMask++; 1207 srcA_d = vis_ld_u16(srcA_buff + pathA); 1208 dstA_d = vis_fpsub16(d_one, srcA_d); 1209 r = vis_ld_u8((mlib_u8*)src + 1); 1210 g = vis_ld_u8((mlib_u8*)src + 2); 1211 b = vis_ld_u8((mlib_u8*)src + 3); 1212 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 1213 d0 = vis_fpadd16(vis_fmul8x16(ff, srcA_d), d_half); 1214 d1 = vis_fmul8x16(vis_read_lo(vis_ld_u8(dst)), dstA_d); 1215 dd = vis_fpadd16(d0, d1); 1216 ff = vis_fpack16(dd); 1217 vis_st_u8(D64_FROM_F32x2(ff), dst); 1218 dst++; 1219 src++; 1220 } 1221 1222 PTR_ADD(dstBase, dstScan); 1223 PTR_ADD(srcBase, srcScan); 1224 PTR_ADD(pMask, maskScan); 1225 } 1226 } else { 1227 mlib_d64 dscale = (mlib_d64)(1 << 15)*(1 << 16), ddiv; 1228 mlib_d64 d_one = vis_to_double_dup(0x7FFF7FFF); 1229 1230 dstA = dstFbase; 1231 srcA = srcFbase; 1232 resA = dstA + srcA; 1233 ddiv = dscale*vis_d64_div_tbl[resA]; 1234 srcA = (mlib_s32)(srcA*ddiv + (1 << 15)) >> 16; 1235 srcA_d = vis_to_double_dup((srcA << 16) | srcA); 1236 dstA_d = vis_fpsub16(d_one, srcA_d); 1237 1238 if (dstScan == width && srcScan == 4*width) { 1239 width *= height; 1240 height = 1; 1241 } 1242 1243 for (j = 0; j < height; j++) { 1244 mlib_f32 *src = srcBase; 1245 mlib_u8 *dst = dstBase; 1246 1247 dst_end = dst + width; 1248 1249 while (((mlib_s32)dst & 3) && dst < dst_end) { 1250 r = vis_ld_u8((mlib_u8*)src + 1); 1251 g = vis_ld_u8((mlib_u8*)src + 2); 1252 b = vis_ld_u8((mlib_u8*)src + 3); 1253 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 1254 d0 = vis_fpadd16(vis_fmul8x16(ff, srcA_d), d_half); 1255 d1 = vis_fmul8x16(vis_read_lo(vis_ld_u8(dst)), dstA_d); 1256 dd = vis_fpadd16(d0, d1); 1257 vis_st_u8(D64_FROM_F32x2(vis_fpack16(dd)), dst); 1258 dst++; 1259 src++; 1260 } 1261 1262#pragma pipeloop(0) 1263 for (; dst <= (dst_end - 4); dst += 4) { 1264 s02 = vis_fpmerge(src[0], src[2]); 1265 s13 = vis_fpmerge(src[1], src[3]); 1266 ar = vis_fpmerge(vis_read_hi(s02), vis_read_hi(s13)); 1267 gb = vis_fpmerge(vis_read_lo(s02), vis_read_lo(s13)); 1268 GRAY_U8(ff, vis_read_lo(ar), vis_read_hi(gb), vis_read_lo(gb)); 1269 dd = vis_fpadd16(vis_fmul8x16(ff, srcA_d), d_half); 1270 dd = vis_fpadd16(vis_fmul8x16(*(mlib_f32*)dst, dstA_d), dd); 1271 *(mlib_f32*)dst = vis_fpack16(dd); 1272 src += 4; 1273 } 1274 1275 while (dst < dst_end) { 1276 r = vis_ld_u8((mlib_u8*)src + 1); 1277 g = vis_ld_u8((mlib_u8*)src + 2); 1278 b = vis_ld_u8((mlib_u8*)src + 3); 1279 GRAY_U8(ff, vis_read_lo(r), vis_read_lo(g), vis_read_lo(b)); 1280 d0 = vis_fpadd16(vis_fmul8x16(ff, srcA_d), d_half); 1281 d1 = vis_fmul8x16(vis_read_lo(vis_ld_u8(dst)), dstA_d); 1282 dd = vis_fpadd16(d0, d1); 1283 ff = vis_fpack16(dd); 1284 vis_st_u8(D64_FROM_F32x2(ff), dst); 1285 dst++; 1286 src++; 1287 } 1288 1289 PTR_ADD(dstBase, dstScan); 1290 PTR_ADD(srcBase, srcScan); 1291 } 1292 } 1293} 1294 1295/***************************************************************/ 1296 1297#endif 1298