1/*
2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26#if !defined(JAVA2D_NO_MLIB) || defined(MLIB_ADD_SUFF)
27
28#include "vis_AlphaMacros.h"
29
30/***************************************************************/
31
32/* ##############################################################
33 * IntArgbToIntArgbAlphaMaskBlit()
34 * IntArgbToFourByteAbgrAlphaMaskBlit()
35 */
36
37#define MASK_FILL(rr, pathA, dstA, dstARGB, srcA, srcARGB)     \
38{                                                              \
39    mlib_s32 srcF, dstF;                                       \
40                                                               \
41    srcA = mul8_extra[srcA];                                   \
42                                                               \
43    srcF = ((dstA & SrcOpAnd) ^ SrcOpXor) + SrcOpAdd;          \
44    dstF = ((srcA & DstOpAnd) ^ DstOpXor) + DstOpAdd;          \
45                                                               \
46    srcF = MUL8_INT(pathA, srcF);                              \
47    dstF = MUL8_INT(pathA, dstF) + (0xff - pathA);             \
48                                                               \
49    srcA = MUL8_INT(srcF, srcA);                               \
50    dstA = MUL8_INT(dstF, dstA);                               \
51                                                               \
52    BLEND_VIS(rr, dstARGB, srcARGB, dstA, srcA);               \
53}
54
55/***************************************************************/
56
57static void IntArgbToIntArgbAlphaMaskBlit_line(mlib_f32 *dst_ptr,
58                                               mlib_f32 *src_ptr,
59                                               mlib_u8  *pMask,
60                                               mlib_s32 width,
61                                               mlib_s32 *log_val,
62                                               mlib_u8  *mul8_extra,
63                                               mlib_u8  *mul8_tbl)
64{
65    mlib_s32 i, i0;
66    mlib_s32 pathA0, pathA1, dstA0, dstA1, srcA0, srcA1, msk;
67    mlib_d64 res0, res1, dstARGB;
68    mlib_f32 dstARGB0, srcARGB0, srcARGB1;
69    mlib_s32 SrcOpAnd = log_val[0];
70    mlib_s32 SrcOpXor = log_val[1];
71    mlib_s32 SrcOpAdd = log_val[2];
72    mlib_s32 DstOpAnd = log_val[3];
73    mlib_s32 DstOpXor = log_val[4];
74    mlib_s32 DstOpAdd = log_val[5];
75
76    i = i0 = 0;
77
78    if ((mlib_s32)dst_ptr & 7) {
79        pathA0 = pMask[i];
80        if (pathA0) {
81            dstA0 = *(mlib_u8*)(dst_ptr + i);
82            srcA0 = *(mlib_u8*)(src_ptr + i);
83            dstARGB0 = dst_ptr[i];
84            srcARGB0 = src_ptr[i];
85            MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
86            dst_ptr[i] = vis_fpack16(res0);
87            *(mlib_u8*)(dst_ptr + i) = dstA0;
88        }
89
90        i0 = 1;
91    }
92
93#pragma pipeloop(0)
94    for (i = i0; i <= width - 2; i += 2) {
95        pathA0 = pMask[i];
96        pathA1 = pMask[i + 1];
97        dstA0 = *(mlib_u8*)(dst_ptr + i);
98        dstA1 = *(mlib_u8*)(dst_ptr + i + 1);
99        dstARGB = *(mlib_d64*)(dst_ptr + i);
100        srcA0 = *(mlib_u8*)(src_ptr + i);
101        srcA1 = *(mlib_u8*)(src_ptr + i + 1);
102        srcARGB0 = src_ptr[i];
103        srcARGB1 = src_ptr[i + 1];
104
105        MASK_FILL(res0, pathA0, dstA0, vis_read_hi(dstARGB), srcA0, srcARGB0);
106        MASK_FILL(res1, pathA1, dstA1, vis_read_lo(dstARGB), srcA1, srcARGB1);
107
108        res0 = vis_fpack16_pair(res0, res1);
109
110        msk = (((-pathA0) & (1 << 11)) | ((-pathA1) & (1 << 10))) >> 10;
111        vis_pst_32(res0, dst_ptr + i, msk);
112
113        *(mlib_u8*)(dst_ptr + i    ) = dstA0;
114        *(mlib_u8*)(dst_ptr + i + 1) = dstA1;
115    }
116
117    if (i < width) {
118        pathA0 = pMask[i];
119        if (pathA0) {
120            dstA0 = *(mlib_u8*)(dst_ptr + i);
121            srcA0 = *(mlib_u8*)(src_ptr + i);
122            dstARGB0 = dst_ptr[i];
123            srcARGB0 = src_ptr[i];
124            MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
125            dst_ptr[i] = vis_fpack16(res0);
126            *(mlib_u8*)(dst_ptr + i) = dstA0;
127        }
128    }
129}
130
131/***************************************************************/
132
133#undef  MASK_FILL
134#define MASK_FILL(rr, pathA, dstA, dstARGB, srcA, srcARGB)     \
135{                                                              \
136    mlib_s32 srcF, dstF;                                       \
137                                                               \
138    srcA = mul8_extra[srcA];                                   \
139                                                               \
140    srcF = ((dstA & SrcOpAnd) ^ SrcOpXor) + SrcOpAdd;          \
141    dstF = ((srcA & DstOpAnd) ^ DstOpXor) + DstOpAdd;          \
142                                                               \
143    srcA = MUL8_INT(srcF, srcA);                               \
144    dstA = MUL8_INT(dstF, dstA);                               \
145                                                               \
146    BLEND_VIS(rr, dstARGB, srcARGB, dstA, srcA);               \
147}
148
149/***************************************************************/
150
151static void IntArgbToIntArgbAlphaMaskBlit_A1_line(mlib_f32 *dst_ptr,
152                                                  mlib_f32 *src_ptr,
153                                                  mlib_u8  *pMask,
154                                                  mlib_s32 width,
155                                                  mlib_s32 *log_val,
156                                                  mlib_u8  *mul8_extra,
157                                                  mlib_u8  *mul8_tbl)
158{
159    mlib_s32 i;
160    mlib_s32 dstA0, srcA0;
161    mlib_d64 res0;
162    mlib_f32 dstARGB0, srcARGB0;
163    mlib_s32 SrcOpAnd = log_val[0];
164    mlib_s32 SrcOpXor = log_val[1];
165    mlib_s32 SrcOpAdd = log_val[2];
166    mlib_s32 DstOpAnd = log_val[3];
167    mlib_s32 DstOpXor = log_val[4];
168    mlib_s32 DstOpAdd = log_val[5];
169
170#pragma pipeloop(0)
171    for (i = 0; i < width; i++) {
172        dstA0 = *(mlib_u8*)(dst_ptr + i);
173        srcA0 = *(mlib_u8*)(src_ptr + i);
174        dstARGB0 = dst_ptr[i];
175        srcARGB0 = src_ptr[i];
176        MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
177        dst_ptr[i] = vis_fpack16(res0);
178        *(mlib_u8*)(dst_ptr + i) = dstA0;
179    }
180}
181
182/***************************************************************/
183
184void ADD_SUFF(IntArgbToIntArgbAlphaMaskBlit)(MASKBLIT_PARAMS)
185{
186    mlib_s32 extraA;
187    mlib_s32 dstScan = pDstInfo->scanStride;
188    mlib_s32 srcScan = pSrcInfo->scanStride;
189    mlib_s32 log_val[6];
190    mlib_s32 j;
191    mlib_s32 SrcOpAnd;
192    mlib_s32 SrcOpXor;
193    mlib_s32 SrcOpAdd;
194    mlib_s32 DstOpAnd;
195    mlib_s32 DstOpXor;
196    mlib_s32 DstOpAdd;
197    mlib_u8  *mul8_extra;
198
199    extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5);
200
201    mul8_extra = mul8table[extraA];
202
203    SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval;
204    SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval;
205    SrcOpAdd = (AlphaRules[pCompInfo->rule].srcOps).addval;
206    SrcOpAdd -= SrcOpXor;
207
208    DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval;
209    DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval;
210    DstOpAdd = (AlphaRules[pCompInfo->rule].dstOps).addval;
211    DstOpAdd -= DstOpXor;
212
213    log_val[0] = SrcOpAnd;
214    log_val[1] = SrcOpXor;
215    log_val[2] = SrcOpAdd;
216    log_val[3] = DstOpAnd;
217    log_val[4] = DstOpXor;
218    log_val[5] = DstOpAdd;
219
220    vis_write_gsr(7 << 3);
221
222    if (pMask != NULL) {
223        pMask += maskOff;
224
225        if (dstScan == 4*width && srcScan == dstScan && maskScan == width) {
226            width *= height;
227            height = 1;
228        }
229
230        for (j = 0; j < height; j++) {
231            IntArgbToIntArgbAlphaMaskBlit_line(dstBase, srcBase, pMask,
232                                               width, log_val, mul8_extra,
233                                               (void*)mul8table);
234
235            PTR_ADD(dstBase, dstScan);
236            PTR_ADD(srcBase, srcScan);
237            PTR_ADD(pMask, maskScan);
238        }
239    } else {
240        if (dstScan == 4*width && srcScan == dstScan) {
241            width *= height;
242            height = 1;
243        }
244
245        for (j = 0; j < height; j++) {
246            IntArgbToIntArgbAlphaMaskBlit_A1_line(dstBase, srcBase, pMask,
247                                                  width, log_val, mul8_extra,
248                                                  (void*)mul8table);
249
250            PTR_ADD(dstBase, dstScan);
251            PTR_ADD(srcBase, srcScan);
252        }
253    }
254}
255
256/***************************************************************/
257
258void ADD_SUFF(IntArgbToFourByteAbgrAlphaMaskBlit)(MASKBLIT_PARAMS)
259{
260    mlib_d64 buff[BUFF_SIZE/2];
261    void     *src_buff = buff, *dst_buff;
262    mlib_s32 extraA;
263    mlib_s32 dstScan = pDstInfo->scanStride;
264    mlib_s32 srcScan = pSrcInfo->scanStride;
265    mlib_s32 log_val[6];
266    mlib_s32 j;
267    mlib_s32 SrcOpAnd;
268    mlib_s32 SrcOpXor;
269    mlib_s32 SrcOpAdd;
270    mlib_s32 DstOpAnd;
271    mlib_s32 DstOpXor;
272    mlib_s32 DstOpAdd;
273    mlib_u8  *mul8_extra;
274
275    extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5);
276
277    mul8_extra = mul8table[extraA];
278
279    SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval;
280    SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval;
281    SrcOpAdd = (AlphaRules[pCompInfo->rule].srcOps).addval;
282    SrcOpAdd -= SrcOpXor;
283
284    DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval;
285    DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval;
286    DstOpAdd = (AlphaRules[pCompInfo->rule].dstOps).addval;
287    DstOpAdd -= DstOpXor;
288
289    log_val[0] = SrcOpAnd;
290    log_val[1] = SrcOpXor;
291    log_val[2] = SrcOpAdd;
292    log_val[3] = DstOpAnd;
293    log_val[4] = DstOpXor;
294    log_val[5] = DstOpAdd;
295
296    vis_write_gsr(7 << 3);
297
298    if (2*width > BUFF_SIZE) src_buff = mlib_malloc(2*width*sizeof(mlib_s32));
299    dst_buff = (mlib_s32*)src_buff + width;
300
301    if (pMask != NULL) {
302        pMask += maskOff;
303
304        for (j = 0; j < height; j++) {
305            IntArgbToIntAbgrConvert_line(srcBase, src_buff, width);
306            if (!((mlib_s32)dstBase & 3)) {
307                IntArgbToIntArgbAlphaMaskBlit_line(dstBase, src_buff, pMask,
308                                                   width, log_val, mul8_extra,
309                                                   (void*)mul8table);
310            } else {
311                mlib_ImageCopy_na(dstBase, dst_buff, width*sizeof(mlib_s32));
312                IntArgbToIntArgbAlphaMaskBlit_line(dst_buff, src_buff, pMask,
313                                                   width, log_val, mul8_extra,
314                                                   (void*)mul8table);
315                mlib_ImageCopy_na(dst_buff, dstBase, width*sizeof(mlib_s32));
316            }
317
318            PTR_ADD(dstBase, dstScan);
319            PTR_ADD(srcBase, srcScan);
320            PTR_ADD(pMask, maskScan);
321        }
322    } else {
323        for (j = 0; j < height; j++) {
324            IntArgbToIntAbgrConvert_line(srcBase, src_buff, width);
325            if (!((mlib_s32)dstBase & 3)) {
326                IntArgbToIntArgbAlphaMaskBlit_A1_line(dstBase, src_buff,
327                                                      pMask, width, log_val,
328                                                      mul8_extra,
329                                                      (void*)mul8table);
330            } else {
331                mlib_ImageCopy_na(dstBase, dst_buff, width*sizeof(mlib_s32));
332                IntArgbToIntArgbAlphaMaskBlit_A1_line(dst_buff, src_buff,
333                                                      pMask, width, log_val,
334                                                      mul8_extra,
335                                                      (void*)mul8table);
336                mlib_ImageCopy_na(dst_buff, dstBase, width*sizeof(mlib_s32));
337            }
338
339            PTR_ADD(dstBase, dstScan);
340            PTR_ADD(srcBase, srcScan);
341        }
342    }
343
344    if (src_buff != buff) {
345        mlib_free(src_buff);
346    }
347}
348
349/***************************************************************/
350
351/* ##############################################################
352 * IntArgbToIntRgbAlphaMaskBlit()
353 */
354
355#undef  MASK_FILL
356#define MASK_FILL(rr, pathA, dstA, dstARGB, srcA, srcARGB)     \
357{                                                              \
358    mlib_s32 srcF, dstF;                                       \
359                                                               \
360    srcA = mul8_extra[srcA];                                   \
361    dstF = ((srcA & DstOpAnd) ^ DstOpXor) + DstOpAdd;          \
362                                                               \
363    srcF = mul8_srcF[pathA];                                   \
364    dstA = MUL8_INT(dstF, pathA) + (0xff - pathA);             \
365                                                               \
366    pathA = dstA - 0xff - srcF;                                \
367    /* (pathA == 0) if (dstA == 0xFF && srcF == 0) */          \
368                                                               \
369    srcA = MUL8_INT(srcA, srcF);                               \
370                                                               \
371    BLEND_VIS_RGB(rr, dstARGB, srcARGB, dstA, srcA);           \
372}
373
374/***************************************************************/
375
376static void IntArgbToIntRgbAlphaMaskBlit_line(mlib_f32 *dst_ptr,
377                                              mlib_f32 *src_ptr,
378                                              mlib_u8  *pMask,
379                                              mlib_s32 width,
380                                              mlib_s32 *log_val,
381                                              mlib_u8  *mul8_extra,
382                                              mlib_u8  *mul8_srcF,
383                                              mlib_u8  *mul8_tbl)
384{
385    mlib_s32 i, i0;
386    mlib_s32 pathA0, pathA1, dstA0, dstA1, srcA0, srcA1, msk;
387    mlib_d64 res0, res1, dstARGB;
388    mlib_f32 dstARGB0, srcARGB0, srcARGB1;
389    mlib_s32 DstOpAnd = log_val[3];
390    mlib_s32 DstOpXor = log_val[4];
391    mlib_s32 DstOpAdd = log_val[5];
392
393    i = i0 = 0;
394
395    if ((mlib_s32)dst_ptr & 7) {
396        pathA0 = pMask[i];
397        srcA0 = *(mlib_u8*)(src_ptr + i);
398        dstARGB0 = dst_ptr[i];
399        srcARGB0 = src_ptr[i];
400        MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
401        if (pathA0) {
402            dst_ptr[i] = vis_fpack16(res0);
403        }
404
405        i0 = 1;
406    }
407
408#pragma pipeloop(0)
409    for (i = i0; i <= width - 2; i += 2) {
410        pathA0 = pMask[i];
411        pathA1 = pMask[i + 1];
412        dstARGB = *(mlib_d64*)(dst_ptr + i);
413        srcA0 = *(mlib_u8*)(src_ptr + i);
414        srcA1 = *(mlib_u8*)(src_ptr + i + 1);
415        srcARGB0 = src_ptr[i];
416        srcARGB1 = src_ptr[i + 1];
417
418        MASK_FILL(res0, pathA0, dstA0, vis_read_hi(dstARGB), srcA0, srcARGB0);
419        MASK_FILL(res1, pathA1, dstA1, vis_read_lo(dstARGB), srcA1, srcARGB1);
420
421        res0 = vis_fpack16_pair(res0, res1);
422
423        msk = (((pathA0) & (1 << 11)) | ((pathA1) & (1 << 10))) >> 10;
424        vis_pst_32(res0, dst_ptr + i, msk);
425    }
426
427    if (i < width) {
428        pathA0 = pMask[i];
429        srcA0 = *(mlib_u8*)(src_ptr + i);
430        dstARGB0 = dst_ptr[i];
431        srcARGB0 = src_ptr[i];
432        MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
433        if (pathA0) {
434            dst_ptr[i] = vis_fpack16(res0);
435        }
436    }
437}
438
439/***************************************************************/
440
441#undef  MASK_FILL
442#define MASK_FILL(rr, pathA, dstA, dstARGB, srcA, srcARGB)     \
443{                                                              \
444    srcA = mul8_extra[srcA];                                   \
445    dstA = ((srcA & DstOpAnd) ^ DstOpXor) + DstOpAdd;          \
446                                                               \
447    srcA = mul8_srcF[srcA];                                    \
448                                                               \
449    pathA = dstA - srcF_255;                                   \
450    /* (pathA == 0) if (dstA == 0xFF && srcF == 0) */          \
451                                                               \
452    BLEND_VIS_RGB(rr, dstARGB, srcARGB, dstA, srcA);           \
453}
454
455/***************************************************************/
456
457static void IntArgbToIntRgbAlphaMaskBlit_A1_line(mlib_f32 *dst_ptr,
458                                                 mlib_f32 *src_ptr,
459                                                 mlib_u8  *pMask,
460                                                 mlib_s32 width,
461                                                 mlib_s32 *log_val,
462                                                 mlib_u8  *mul8_extra,
463                                                 mlib_u8  *mul8_srcF,
464                                                 mlib_u8  *mul8_tbl)
465{
466    mlib_s32 i, i0;
467    mlib_s32 pathA0, pathA1, dstA0, dstA1, srcA0, srcA1, msk;
468    mlib_d64 res0, res1, dstARGB;
469    mlib_f32 dstARGB0, srcARGB0, srcARGB1;
470    mlib_s32 DstOpAnd = log_val[3];
471    mlib_s32 DstOpXor = log_val[4];
472    mlib_s32 DstOpAdd = log_val[5];
473    mlib_s32 srcF_255 = mul8_srcF[0xff] + 0xff;
474
475    i = i0 = 0;
476
477    if ((mlib_s32)dst_ptr & 7) {
478        srcA0 = *(mlib_u8*)(src_ptr + i);
479        dstARGB0 = dst_ptr[i];
480        srcARGB0 = src_ptr[i];
481        MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
482        if (pathA0) {
483            dst_ptr[i] = vis_fpack16(res0);
484        }
485
486        i0 = 1;
487    }
488
489#pragma pipeloop(0)
490    for (i = i0; i <= width - 2; i += 2) {
491        dstARGB = *(mlib_d64*)(dst_ptr + i);
492        srcA0 = *(mlib_u8*)(src_ptr + i);
493        srcA1 = *(mlib_u8*)(src_ptr + i + 1);
494        srcARGB0 = src_ptr[i];
495        srcARGB1 = src_ptr[i + 1];
496
497        MASK_FILL(res0, pathA0, dstA0, vis_read_hi(dstARGB), srcA0, srcARGB0);
498        MASK_FILL(res1, pathA1, dstA1, vis_read_lo(dstARGB), srcA1, srcARGB1);
499
500        res0 = vis_fpack16_pair(res0, res1);
501
502        msk = (((pathA0) & (1 << 11)) | ((pathA1) & (1 << 10))) >> 10;
503        vis_pst_32(res0, dst_ptr + i, msk);
504    }
505
506    if (i < width) {
507        srcA0 = *(mlib_u8*)(src_ptr + i);
508        dstARGB0 = dst_ptr[i];
509        srcARGB0 = src_ptr[i];
510        MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
511        if (pathA0) {
512            dst_ptr[i] = vis_fpack16(res0);
513        }
514    }
515}
516
517/***************************************************************/
518
519void ADD_SUFF(IntArgbToIntRgbAlphaMaskBlit)(MASKBLIT_PARAMS)
520{
521    mlib_s32 extraA, srcF;
522    mlib_s32 dstScan = pDstInfo->scanStride;
523    mlib_s32 srcScan = pSrcInfo->scanStride;
524    mlib_s32 log_val[6];
525    mlib_s32 j;
526    mlib_s32 SrcOpAnd;
527    mlib_s32 SrcOpXor;
528    mlib_s32 SrcOpAdd;
529    mlib_s32 DstOpAnd;
530    mlib_s32 DstOpXor;
531    mlib_s32 DstOpAdd;
532    mlib_u8  *mul8_extra, *mul8_srcF;
533
534    extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5);
535
536    mul8_extra = mul8table[extraA];
537
538    SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval;
539    SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval;
540    SrcOpAdd = (AlphaRules[pCompInfo->rule].srcOps).addval;
541    SrcOpAdd -= SrcOpXor;
542
543    DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval;
544    DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval;
545    DstOpAdd = (AlphaRules[pCompInfo->rule].dstOps).addval;
546    DstOpAdd -= DstOpXor;
547
548    log_val[3] = DstOpAnd;
549    log_val[4] = DstOpXor;
550    log_val[5] = DstOpAdd;
551
552    srcF = ((0xff & SrcOpAnd) ^ SrcOpXor) + SrcOpAdd;
553
554    mul8_srcF = mul8table[srcF];
555
556    vis_write_gsr(7 << 3);
557
558    if (pMask != NULL) {
559        pMask += maskOff;
560
561        if (dstScan == 4*width && srcScan == dstScan && maskScan == width) {
562            width *= height;
563            height = 1;
564        }
565
566        for (j = 0; j < height; j++) {
567            IntArgbToIntRgbAlphaMaskBlit_line(dstBase, srcBase, pMask,
568                                              width, log_val, mul8_extra,
569                                              mul8_srcF, (void*)mul8table);
570
571            PTR_ADD(dstBase, dstScan);
572            PTR_ADD(srcBase, srcScan);
573            PTR_ADD(pMask, maskScan);
574        }
575    } else {
576        if (dstScan == 4*width && srcScan == dstScan) {
577            width *= height;
578            height = 1;
579        }
580
581        for (j = 0; j < height; j++) {
582            IntArgbToIntRgbAlphaMaskBlit_A1_line(dstBase, srcBase, pMask,
583                                                 width, log_val, mul8_extra,
584                                                 mul8_srcF, (void*)mul8table);
585
586            PTR_ADD(dstBase, dstScan);
587            PTR_ADD(srcBase, srcScan);
588        }
589    }
590}
591
592/***************************************************************/
593
594/* ##############################################################
595 * IntRgbToIntArgbAlphaMaskBlit()
596 */
597
598#undef  MASK_FILL
599#define MASK_FILL(rr, pathA, dstA, dstARGB, srcAX, srcARGB)    \
600{                                                              \
601    mlib_s32 pathAx256 = pathA << 8;                           \
602    srcF = ((dstA & SrcOpAnd) ^ SrcOpXor) + SrcOpAdd;          \
603                                                               \
604    srcF = mul8_tbl[pathAx256 + srcF];                         \
605    dstFX = mul8_tbl[pathAx256 + dstF] + (0xff - pathA);       \
606                                                               \
607    srcAX = mul8_tbl[srcF + srcAx256];                         \
608    dstA = mul8_tbl[dstFX + (dstA << 8)];                      \
609                                                               \
610    BLEND_VIS(rr, dstARGB, srcARGB, dstA, srcAX);              \
611}
612
613/***************************************************************/
614
615static void IntRgbToIntArgbAlphaMaskBlit_line(mlib_f32 *dst_ptr,
616                                               mlib_f32 *src_ptr,
617                                               mlib_u8  *pMask,
618                                               mlib_s32 width,
619                                               mlib_s32 *log_val,
620                                               mlib_u8  *mul8_extra,
621                                               mlib_u8  *mul8_tbl)
622{
623    mlib_s32 i, i0;
624    mlib_s32 pathA0, pathA1, dstA0, dstA1, srcA, srcA0, srcA1, msk;
625    mlib_d64 res0, res1, dstARGB;
626    mlib_f32 dstARGB0, srcARGB0, srcARGB1;
627    mlib_s32 SrcOpAnd = log_val[0];
628    mlib_s32 SrcOpXor = log_val[1];
629    mlib_s32 SrcOpAdd = log_val[2];
630    mlib_s32 DstOpAnd = log_val[3];
631    mlib_s32 DstOpXor = log_val[4];
632    mlib_s32 DstOpAdd = log_val[5];
633    mlib_s32 srcF, dstF, dstFX, srcAx256;
634
635    i = i0 = 0;
636
637    srcA = 0xFF;
638    srcA = mul8_extra[srcA];
639    srcAx256 = srcA << 8;
640    dstF = ((srcA & DstOpAnd) ^ DstOpXor) + DstOpAdd;
641
642    if ((mlib_s32)dst_ptr & 7) {
643        pathA0 = pMask[i];
644        if (pathA0) {
645            dstA0 = *(mlib_u8*)(dst_ptr + i);
646            dstARGB0 = dst_ptr[i];
647            srcARGB0 = src_ptr[i];
648            MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
649            dst_ptr[i] = vis_fpack16(res0);
650            *(mlib_u8*)(dst_ptr + i) = dstA0;
651        }
652        i0 = 1;
653    }
654
655#pragma pipeloop(0)
656    for (i = i0; i <= width - 2; i += 2) {
657        pathA0 = pMask[i];
658        pathA1 = pMask[i + 1];
659        dstA0 = *(mlib_u8*)(dst_ptr + i);
660        dstA1 = *(mlib_u8*)(dst_ptr + i + 1);
661        dstARGB = *(mlib_d64*)(dst_ptr + i);
662        srcARGB0 = src_ptr[i];
663        srcARGB1 = src_ptr[i + 1];
664
665        MASK_FILL(res0, pathA0, dstA0, vis_read_hi(dstARGB), srcA0, srcARGB0);
666        MASK_FILL(res1, pathA1, dstA1, vis_read_lo(dstARGB), srcA1, srcARGB1);
667
668        res0 = vis_fpack16_pair(res0, res1);
669
670        msk = (((-pathA0) & (1 << 11)) | ((-pathA1) & (1 << 10))) >> 10;
671        vis_pst_32(res0, dst_ptr + i, msk);
672
673        *(mlib_u8*)(dst_ptr + i    ) = dstA0;
674        *(mlib_u8*)(dst_ptr + i + 1) = dstA1;
675    }
676
677    if (i < width) {
678        pathA0 = pMask[i];
679        if (pathA0) {
680            dstA0 = *(mlib_u8*)(dst_ptr + i);
681            dstARGB0 = dst_ptr[i];
682            srcARGB0 = src_ptr[i];
683            MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
684            dst_ptr[i] = vis_fpack16(res0);
685            *(mlib_u8*)(dst_ptr + i) = dstA0;
686        }
687    }
688}
689
690/***************************************************************/
691
692#undef  MASK_FILL
693#define MASK_FILL(rr, pathA, dstA, dstARGB, srcA, srcARGB)     \
694{                                                              \
695    srcF = ((dstA & SrcOpAnd) ^ SrcOpXor) + SrcOpAdd;          \
696                                                               \
697    srcA = mul8_tbl[srcF + srcAx256];                          \
698    dstA = mul8_tbl[dstF + (dstA << 8)];                       \
699                                                               \
700    BLEND_VIS(rr, dstARGB, srcARGB, dstA, srcA);               \
701}
702
703/***************************************************************/
704
705static void IntRgbToIntArgbAlphaMaskBlit_A1_line(mlib_f32 *dst_ptr,
706                                                  mlib_f32 *src_ptr,
707                                                  mlib_u8  *pMask,
708                                                  mlib_s32 width,
709                                                  mlib_s32 *log_val,
710                                                  mlib_u8  *mul8_extra,
711                                                  mlib_u8  *mul8_tbl)
712{
713    mlib_s32 i;
714    mlib_s32 dstA0, srcA, srcA0;
715    mlib_d64 res0;
716    mlib_f32 dstARGB0, srcARGB0;
717    mlib_s32 SrcOpAnd = log_val[0];
718    mlib_s32 SrcOpXor = log_val[1];
719    mlib_s32 SrcOpAdd = log_val[2];
720    mlib_s32 DstOpAnd = log_val[3];
721    mlib_s32 DstOpXor = log_val[4];
722    mlib_s32 DstOpAdd = log_val[5];
723    mlib_s32 srcF, dstF, srcAx256;
724
725    srcA = 0xFF;
726    srcA = mul8_extra[srcA];
727    srcAx256 = srcA << 8;
728    dstF = ((srcA & DstOpAnd) ^ DstOpXor) + DstOpAdd;
729
730#pragma pipeloop(0)
731    for (i = 0; i < width; i++) {
732        dstA0 = *(mlib_u8*)(dst_ptr + i);
733        dstARGB0 = dst_ptr[i];
734        srcARGB0 = src_ptr[i];
735        MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
736        dst_ptr[i] = vis_fpack16(res0);
737        *(mlib_u8*)(dst_ptr + i) = dstA0;
738    }
739}
740
741/***************************************************************/
742
743void ADD_SUFF(IntRgbToIntArgbAlphaMaskBlit)(MASKBLIT_PARAMS)
744{
745    mlib_s32 extraA;
746    mlib_s32 dstScan = pDstInfo->scanStride;
747    mlib_s32 srcScan = pSrcInfo->scanStride;
748    mlib_s32 log_val[6];
749    mlib_s32 j;
750    mlib_s32 SrcOpAnd;
751    mlib_s32 SrcOpXor;
752    mlib_s32 SrcOpAdd;
753    mlib_s32 DstOpAnd;
754    mlib_s32 DstOpXor;
755    mlib_s32 DstOpAdd;
756    mlib_u8  *mul8_extra;
757
758    extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5);
759
760    mul8_extra = mul8table[extraA];
761
762    SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval;
763    SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval;
764    SrcOpAdd = (AlphaRules[pCompInfo->rule].srcOps).addval;
765    SrcOpAdd -= SrcOpXor;
766
767    DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval;
768    DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval;
769    DstOpAdd = (AlphaRules[pCompInfo->rule].dstOps).addval;
770    DstOpAdd -= DstOpXor;
771
772    log_val[0] = SrcOpAnd;
773    log_val[1] = SrcOpXor;
774    log_val[2] = SrcOpAdd;
775    log_val[3] = DstOpAnd;
776    log_val[4] = DstOpXor;
777    log_val[5] = DstOpAdd;
778
779    vis_write_gsr(7 << 3);
780
781    if (pMask != NULL) {
782        pMask += maskOff;
783
784        if (dstScan == 4*width && srcScan == dstScan && maskScan == width) {
785            width *= height;
786            height = 1;
787        }
788
789        for (j = 0; j < height; j++) {
790            IntRgbToIntArgbAlphaMaskBlit_line(dstBase, srcBase, pMask,
791                                               width, log_val, mul8_extra,
792                                               (void*)mul8table);
793
794            PTR_ADD(dstBase, dstScan);
795            PTR_ADD(srcBase, srcScan);
796            PTR_ADD(pMask, maskScan);
797        }
798    } else {
799        if (dstScan == 4*width && srcScan == dstScan) {
800            width *= height;
801            height = 1;
802        }
803
804        for (j = 0; j < height; j++) {
805            IntRgbToIntArgbAlphaMaskBlit_A1_line(dstBase, srcBase, pMask,
806                                                  width, log_val, mul8_extra,
807                                                  (void*)mul8table);
808
809            PTR_ADD(dstBase, dstScan);
810            PTR_ADD(srcBase, srcScan);
811        }
812    }
813}
814
815
816/***************************************************************/
817
818void ADD_SUFF(IntRgbToFourByteAbgrAlphaMaskBlit)(MASKBLIT_PARAMS)
819{
820    mlib_d64 buff[BUFF_SIZE/2];
821    void     *src_buff = buff, *dst_buff;
822    mlib_s32 extraA;
823    mlib_s32 dstScan = pDstInfo->scanStride;
824    mlib_s32 srcScan = pSrcInfo->scanStride;
825    mlib_s32 log_val[6];
826    mlib_s32 j;
827    mlib_s32 SrcOpAnd;
828    mlib_s32 SrcOpXor;
829    mlib_s32 SrcOpAdd;
830    mlib_s32 DstOpAnd;
831    mlib_s32 DstOpXor;
832    mlib_s32 DstOpAdd;
833    mlib_u8  *mul8_extra;
834
835    extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5);
836
837    mul8_extra = mul8table[extraA];
838
839    SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval;
840    SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval;
841    SrcOpAdd = (AlphaRules[pCompInfo->rule].srcOps).addval;
842    SrcOpAdd -= SrcOpXor;
843
844    DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval;
845    DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval;
846    DstOpAdd = (AlphaRules[pCompInfo->rule].dstOps).addval;
847    DstOpAdd -= DstOpXor;
848
849    log_val[0] = SrcOpAnd;
850    log_val[1] = SrcOpXor;
851    log_val[2] = SrcOpAdd;
852    log_val[3] = DstOpAnd;
853    log_val[4] = DstOpXor;
854    log_val[5] = DstOpAdd;
855
856    vis_write_gsr(7 << 3);
857
858    if (2*width > BUFF_SIZE) src_buff = mlib_malloc(2*width*sizeof(mlib_s32));
859    dst_buff = (mlib_s32*)src_buff + width;
860
861    if (pMask != NULL) {
862        pMask += maskOff;
863
864        for (j = 0; j < height; j++) {
865            IntArgbToIntAbgrConvert_line(srcBase, src_buff, width);
866            if (!((mlib_s32)dstBase & 3)) {
867                IntRgbToIntArgbAlphaMaskBlit_line(dstBase, src_buff, pMask,
868                                                  width, log_val, mul8_extra,
869                                                  (void*)mul8table);
870            } else {
871                mlib_ImageCopy_na(dstBase, dst_buff, width*sizeof(mlib_s32));
872                IntRgbToIntArgbAlphaMaskBlit_line(dst_buff, src_buff, pMask,
873                                                  width, log_val, mul8_extra,
874                                                  (void*)mul8table);
875                mlib_ImageCopy_na(dst_buff, dstBase, width*sizeof(mlib_s32));
876            }
877
878            PTR_ADD(dstBase, dstScan);
879            PTR_ADD(srcBase, srcScan);
880            PTR_ADD(pMask, maskScan);
881        }
882    } else {
883        for (j = 0; j < height; j++) {
884            IntArgbToIntAbgrConvert_line(srcBase, src_buff, width);
885            if (!((mlib_s32)dstBase & 3)) {
886                IntRgbToIntArgbAlphaMaskBlit_A1_line(dstBase, src_buff, pMask,
887                                                     width, log_val,
888                                                     mul8_extra,
889                                                     (void*)mul8table);
890            } else {
891                mlib_ImageCopy_na(dstBase, dst_buff, width*sizeof(mlib_s32));
892                IntRgbToIntArgbAlphaMaskBlit_A1_line(dst_buff, src_buff, pMask,
893                                                     width, log_val,
894                                                     mul8_extra,
895                                                     (void*)mul8table);
896                mlib_ImageCopy_na(dst_buff, dstBase, width*sizeof(mlib_s32));
897            }
898
899            PTR_ADD(dstBase, dstScan);
900            PTR_ADD(srcBase, srcScan);
901        }
902    }
903
904    if (src_buff != buff) {
905        mlib_free(src_buff);
906    }
907}
908
909/***************************************************************/
910
911/* ##############################################################
912 * IntArgbToIntBgrAlphaMaskBlit()
913 */
914
915#undef  MASK_FILL
916#define MASK_FILL(rr, pathA, dstA, dstARGB, srcA, srcARGB)     \
917    srcA = mul8_extra[srcA];                                   \
918    dstF = ((srcA & DstOpAnd) ^ DstOpXor) + DstOpAdd;          \
919                                                               \
920    srcF = mul8_srcF[pathA];                                   \
921    dstA = mul8_tbl[(pathA << 8) + dstF] + (0xff - pathA);     \
922                                                               \
923    pathA = dstA - 0xff - srcF;                                \
924    /* (pathA == 0) if (dstA == 0xFF && srcF == 0) */          \
925                                                               \
926    srcA = MUL8_INT(srcA, srcF);                               \
927                                                               \
928    BLEND_VIS_RGB(rr, dstARGB, srcARGB, dstA, srcA)
929
930/***************************************************************/
931
932static void IntArgbToIntBgrAlphaMaskBlit_line(mlib_f32 *dst_ptr,
933                                              mlib_f32 *src_ptr,
934                                              mlib_u8  *pMask,
935                                              mlib_s32 width,
936                                              mlib_s32 *log_val,
937                                              mlib_u8  *mul8_extra,
938                                              mlib_u8  *mul8_srcF,
939                                              mlib_u8  *mul8_tbl)
940{
941    mlib_s32 i, i0;
942    mlib_s32 pathA0, pathA1, dstA0, dstA1, srcA0, srcA1, msk;
943    mlib_d64 res0, res1, dstARGB, srcARGB;
944    mlib_f32 dstARGB0, srcARGB0;
945    mlib_s32 DstOpAnd = log_val[3];
946    mlib_s32 DstOpXor = log_val[4];
947    mlib_s32 DstOpAdd = log_val[5];
948    mlib_s32 srcF, dstF;
949
950#if VIS >= 0x200
951    vis_write_bmask(0x03214765, 0);
952#endif
953
954    i = i0 = 0;
955
956    if ((mlib_s32)dst_ptr & 7) {
957        pathA0 = pMask[i];
958        srcA0 = *(mlib_u8*)(src_ptr + i);
959        dstARGB0 = dst_ptr[i];
960        srcARGB0 = src_ptr[i];
961        ARGB2ABGR_FL(srcARGB0)
962        MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
963        if (pathA0) {
964            dst_ptr[i] = vis_fpack16(res0);
965        }
966
967        i0 = 1;
968    }
969
970#pragma pipeloop(0)
971    for (i = i0; i <= width - 2; i += 2) {
972        pathA0 = pMask[i];
973        pathA1 = pMask[i + 1];
974        dstARGB = *(mlib_d64*)(dst_ptr + i);
975        srcA0 = *(mlib_u8*)(src_ptr + i);
976        srcA1 = *(mlib_u8*)(src_ptr + i + 1);
977        srcARGB = vis_freg_pair(src_ptr[i], src_ptr[i + 1]);
978        ARGB2ABGR_DB(srcARGB)
979
980        MASK_FILL(res0, pathA0, dstA0, vis_read_hi(dstARGB),
981                                srcA0, vis_read_hi(srcARGB));
982        MASK_FILL(res1, pathA1, dstA1, vis_read_lo(dstARGB),
983                                srcA1, vis_read_lo(srcARGB));
984
985        res0 = vis_fpack16_pair(res0, res1);
986
987        msk = (((pathA0) & (1 << 11)) | ((pathA1) & (1 << 10))) >> 10;
988        vis_pst_32(res0, dst_ptr + i, msk);
989    }
990
991    if (i < width) {
992        pathA0 = pMask[i];
993        srcA0 = *(mlib_u8*)(src_ptr + i);
994        dstARGB0 = dst_ptr[i];
995        srcARGB0 = src_ptr[i];
996        ARGB2ABGR_FL(srcARGB0)
997        MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
998        if (pathA0) {
999            dst_ptr[i] = vis_fpack16(res0);
1000        }
1001    }
1002}
1003
1004/***************************************************************/
1005
1006#undef  MASK_FILL
1007#define MASK_FILL(rr, pathA, dstA, dstARGB, srcA, srcARGB)     \
1008    srcA = mul8_extra[srcA];                                   \
1009    dstA = ((srcA & DstOpAnd) ^ DstOpXor) + DstOpAdd;          \
1010                                                               \
1011    srcA = mul8_srcF[srcA];                                    \
1012                                                               \
1013    pathA = dstA - srcF_255;                                   \
1014    /* (pathA == 0) if (dstA == 0xFF && srcF == 0) */          \
1015                                                               \
1016    BLEND_VIS(rr, dstARGB, srcARGB, dstA, srcA)
1017
1018/***************************************************************/
1019
1020static void IntArgbToIntBgrAlphaMaskBlit_A1_line(mlib_f32 *dst_ptr,
1021                                                 mlib_f32 *src_ptr,
1022                                                 mlib_u8  *pMask,
1023                                                 mlib_s32 width,
1024                                                 mlib_s32 *log_val,
1025                                                 mlib_u8  *mul8_extra,
1026                                                 mlib_u8  *mul8_srcF,
1027                                                 mlib_u8  *mul8_tbl)
1028{
1029    mlib_s32 i, i0;
1030    mlib_s32 pathA0, pathA1, dstA0, dstA1, srcA0, srcA1, msk;
1031    mlib_d64 res0, res1, dstARGB, srcARGB;
1032    mlib_f32 dstARGB0, srcARGB0;
1033    mlib_s32 DstOpAnd = log_val[3];
1034    mlib_s32 DstOpXor = log_val[4];
1035    mlib_s32 DstOpAdd = log_val[5];
1036    mlib_s32 srcF_255 = mul8_srcF[0xff] + 0xff;
1037
1038#if VIS >= 0x200
1039    vis_write_bmask(0x03214765, 0);
1040#endif
1041
1042    i = i0 = 0;
1043
1044    if ((mlib_s32)dst_ptr & 7) {
1045        srcA0 = *(mlib_u8*)(src_ptr + i);
1046        dstARGB0 = dst_ptr[i];
1047        srcARGB0 = src_ptr[i];
1048        ARGB2ABGR_FL(srcARGB0)
1049        MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
1050        if (pathA0) {
1051            dst_ptr[i] = vis_fpack16(res0);
1052        }
1053
1054        i0 = 1;
1055    }
1056
1057#pragma pipeloop(0)
1058    for (i = i0; i <= width - 2; i += 2) {
1059        dstARGB = *(mlib_d64*)(dst_ptr + i);
1060        srcA0 = *(mlib_u8*)(src_ptr + i);
1061        srcA1 = *(mlib_u8*)(src_ptr + i + 1);
1062        srcARGB = vis_freg_pair(src_ptr[i], src_ptr[i + 1]);
1063        ARGB2ABGR_DB(srcARGB)
1064
1065        MASK_FILL(res0, pathA0, dstA0, vis_read_hi(dstARGB),
1066                                srcA0, vis_read_hi(srcARGB));
1067        MASK_FILL(res1, pathA1, dstA1, vis_read_lo(dstARGB),
1068                                srcA1, vis_read_lo(srcARGB));
1069
1070        res0 = vis_fpack16_pair(res0, res1);
1071
1072        msk = (((pathA0) & (1 << 11)) | ((pathA1) & (1 << 10))) >> 10;
1073        vis_pst_32(res0, dst_ptr + i, msk);
1074    }
1075
1076    if (i < width) {
1077        srcA0 = *(mlib_u8*)(src_ptr + i);
1078        dstARGB0 = dst_ptr[i];
1079        srcARGB0 = src_ptr[i];
1080        ARGB2ABGR_FL(srcARGB0)
1081        MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
1082        if (pathA0) {
1083            dst_ptr[i] = vis_fpack16(res0);
1084        }
1085    }
1086}
1087
1088/***************************************************************/
1089
1090void ADD_SUFF(IntArgbToIntBgrAlphaMaskBlit)(MASKBLIT_PARAMS)
1091{
1092    mlib_s32 extraA, srcF;
1093    mlib_s32 dstScan = pDstInfo->scanStride;
1094    mlib_s32 srcScan = pSrcInfo->scanStride;
1095    mlib_s32 log_val[6];
1096    mlib_s32 j;
1097    mlib_s32 SrcOpAnd;
1098    mlib_s32 SrcOpXor;
1099    mlib_s32 SrcOpAdd;
1100    mlib_s32 DstOpAnd;
1101    mlib_s32 DstOpXor;
1102    mlib_s32 DstOpAdd;
1103    mlib_u8  *mul8_extra, *mul8_srcF;
1104
1105    extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5);
1106
1107    mul8_extra = mul8table[extraA];
1108
1109    SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval;
1110    SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval;
1111    SrcOpAdd = (AlphaRules[pCompInfo->rule].srcOps).addval;
1112    SrcOpAdd -= SrcOpXor;
1113
1114    DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval;
1115    DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval;
1116    DstOpAdd = (AlphaRules[pCompInfo->rule].dstOps).addval;
1117    DstOpAdd -= DstOpXor;
1118
1119    log_val[3] = DstOpAnd;
1120    log_val[4] = DstOpXor;
1121    log_val[5] = DstOpAdd;
1122
1123    srcF = ((0xff & SrcOpAnd) ^ SrcOpXor) + SrcOpAdd;
1124
1125    mul8_srcF = mul8table[srcF];
1126
1127    vis_write_gsr(7 << 3);
1128
1129    if (pMask != NULL) {
1130        if (dstScan == 4*width && srcScan == dstScan && maskScan == width) {
1131            width *= height;
1132            height = 1;
1133        }
1134
1135        pMask += maskOff;
1136
1137        for (j = 0; j < height; j++) {
1138            IntArgbToIntBgrAlphaMaskBlit_line(dstBase, srcBase, pMask,
1139                                              width, log_val, mul8_extra,
1140                                              mul8_srcF, (void*)mul8table);
1141
1142            PTR_ADD(dstBase, dstScan);
1143            PTR_ADD(srcBase, srcScan);
1144            PTR_ADD(pMask, maskScan);
1145        }
1146    } else {
1147        if (dstScan == 4*width && srcScan == dstScan) {
1148            width *= height;
1149            height = 1;
1150        }
1151
1152        for (j = 0; j < height; j++) {
1153            IntArgbToIntBgrAlphaMaskBlit_A1_line(dstBase, srcBase, pMask,
1154                                                 width, log_val, mul8_extra,
1155                                                 mul8_srcF, (void*)mul8table);
1156
1157            PTR_ADD(dstBase, dstScan);
1158            PTR_ADD(srcBase, srcScan);
1159        }
1160    }
1161}
1162
1163/***************************************************************/
1164
1165/* ##############################################################
1166 * IntRgbToIntRgbAlphaMaskBlit()
1167 * IntRgbToIntBgrAlphaMaskBlit()
1168 * IntBgrToIntBgrAlphaMaskBlit()
1169 */
1170
1171#undef  MASK_FILL
1172#define MASK_FILL(rr, pathA, dstA, dstARGB, srcAX, srcARGB)    \
1173    srcF = mul8_srcF[pathA];                                   \
1174    dstA = mul8_tbl[(pathA << 8) + dstF] + (0xff - pathA);     \
1175    pathA = dstA - 0xff - srcF;                                \
1176    srcAX = mul8_tbl[srcA + (srcF << 8)];                      \
1177                                                               \
1178    BLEND_VIS_RGB(rr, dstARGB, srcARGB, dstA, srcAX)
1179
1180/***************************************************************/
1181
1182static void IntRgbToIntRgbAlphaMaskBlit_line(mlib_f32 *dst_ptr,
1183                                              mlib_f32 *src_ptr,
1184                                              mlib_u8  *pMask,
1185                                              mlib_s32 width,
1186                                              mlib_s32 *log_val,
1187                                              mlib_u8  *mul8_extra,
1188                                              mlib_u8  *mul8_srcF,
1189                                              mlib_u8  *mul8_tbl)
1190{
1191    mlib_s32 i, i0;
1192    mlib_s32 pathA0, pathA1, dstA0, dstA1, srcA, srcA0, srcA1, msk;
1193    mlib_d64 res0, res1, dstARGB;
1194    mlib_f32 dstARGB0, srcARGB0, srcARGB1;
1195    mlib_s32 DstOpAnd = log_val[3];
1196    mlib_s32 DstOpXor = log_val[4];
1197    mlib_s32 DstOpAdd = log_val[5];
1198    mlib_s32 srcF, dstF;
1199
1200    i = i0 = 0;
1201
1202    srcA = 0xFF;
1203    srcA = mul8_extra[srcA];
1204    dstF = ((srcA & DstOpAnd) ^ DstOpXor) + DstOpAdd;
1205
1206    if ((mlib_s32)dst_ptr & 7) {
1207        pathA0 = pMask[i];
1208        dstARGB0 = dst_ptr[i];
1209        srcARGB0 = src_ptr[i];
1210        MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
1211        if (pathA0) {
1212            dst_ptr[i] = vis_fpack16(res0);
1213        }
1214
1215        i0 = 1;
1216    }
1217
1218#pragma pipeloop(0)
1219    for (i = i0; i <= width - 2; i += 2) {
1220        pathA0 = pMask[i];
1221        pathA1 = pMask[i + 1];
1222        dstARGB = *(mlib_d64*)(dst_ptr + i);
1223        srcARGB0 = src_ptr[i];
1224        srcARGB1 = src_ptr[i + 1];
1225
1226        MASK_FILL(res0, pathA0, dstA0, vis_read_hi(dstARGB), srcA0, srcARGB0);
1227        MASK_FILL(res1, pathA1, dstA1, vis_read_lo(dstARGB), srcA1, srcARGB1);
1228
1229        res0 = vis_fpack16_pair(res0, res1);
1230
1231        msk = (((pathA0) & (1 << 11)) | ((pathA1) & (1 << 10))) >> 10;
1232        vis_pst_32(res0, dst_ptr + i, msk);
1233    }
1234
1235    if (i < width) {
1236        pathA0 = pMask[i];
1237        dstARGB0 = dst_ptr[i];
1238        srcARGB0 = src_ptr[i];
1239        MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
1240        if (pathA0) {
1241            dst_ptr[i] = vis_fpack16(res0);
1242        }
1243    }
1244}
1245
1246/***************************************************************/
1247
1248static void IntRgbToIntBgrAlphaMaskBlit_line(mlib_f32 *dst_ptr,
1249                                              mlib_f32 *src_ptr,
1250                                              mlib_u8  *pMask,
1251                                              mlib_s32 width,
1252                                              mlib_s32 *log_val,
1253                                              mlib_u8  *mul8_extra,
1254                                              mlib_u8  *mul8_srcF,
1255                                              mlib_u8  *mul8_tbl)
1256{
1257    mlib_s32 i, i0;
1258    mlib_s32 pathA0, pathA1, dstA0, dstA1, srcA, srcA0, srcA1, msk;
1259    mlib_d64 res0, res1, dstARGB, srcARGB;
1260    mlib_f32 dstARGB0, srcARGB0;
1261    mlib_s32 DstOpAnd = log_val[3];
1262    mlib_s32 DstOpXor = log_val[4];
1263    mlib_s32 DstOpAdd = log_val[5];
1264    mlib_s32 srcF, dstF;
1265
1266#if VIS >= 0x200
1267    vis_write_bmask(0x03214765, 0);
1268#endif
1269
1270    i = i0 = 0;
1271
1272    srcA = 0xFF;
1273    srcA = mul8_extra[srcA];
1274    dstF = ((srcA & DstOpAnd) ^ DstOpXor) + DstOpAdd;
1275
1276    if ((mlib_s32)dst_ptr & 7) {
1277        pathA0 = pMask[i];
1278        dstARGB0 = dst_ptr[i];
1279        srcARGB0 = src_ptr[i];
1280        ARGB2ABGR_FL(srcARGB0)
1281        MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
1282        if (pathA0) {
1283            dst_ptr[i] = vis_fpack16(res0);
1284        }
1285        i0 = 1;
1286    }
1287
1288#pragma pipeloop(0)
1289    for (i = i0; i <= width - 2; i += 2) {
1290        pathA0 = pMask[i];
1291        pathA1 = pMask[i + 1];
1292        dstARGB = *(mlib_d64*)(dst_ptr + i);
1293        srcARGB = vis_freg_pair(src_ptr[i], src_ptr[i + 1]);
1294        ARGB2ABGR_DB(srcARGB)
1295
1296        MASK_FILL(res0, pathA0, dstA0, vis_read_hi(dstARGB),
1297                                srcA0, vis_read_hi(srcARGB));
1298        MASK_FILL(res1, pathA1, dstA1, vis_read_lo(dstARGB),
1299                                srcA1, vis_read_lo(srcARGB));
1300
1301        res0 = vis_fpack16_pair(res0, res1);
1302
1303        msk = (((pathA0) & (1 << 11)) | ((pathA1) & (1 << 10))) >> 10;
1304        vis_pst_32(res0, dst_ptr + i, msk);
1305    }
1306
1307    if (i < width) {
1308        pathA0 = pMask[i];
1309        dstARGB0 = dst_ptr[i];
1310        srcARGB0 = src_ptr[i];
1311        ARGB2ABGR_FL(srcARGB0)
1312        MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
1313        if (pathA0) {
1314            dst_ptr[i] = vis_fpack16(res0);
1315        }
1316    }
1317}
1318
1319/***************************************************************/
1320
1321#undef  MASK_FILL
1322#define MASK_FILL(rr, dstARGB, srcARGB)                \
1323    t0 = vis_fmul8x16al(srcARGB, srcA_mul);            \
1324    t1 = vis_fmul8x16al(dstARGB, dstA_mul);            \
1325    rr = vis_fpadd16(t0, t1);                          \
1326    rr = vis_fpadd16(vis_fmul8sux16(rr, dstA_div),     \
1327                     vis_fmul8ulx16(rr, dstA_div))
1328
1329/***************************************************************/
1330
1331static void IntRgbToIntRgbAlphaMaskBlit_A1_line(mlib_f32 *dst_ptr,
1332                                                 mlib_f32 *src_ptr,
1333                                                 mlib_u8  *pMask,
1334                                                 mlib_s32 width,
1335                                                 mlib_s32 *log_val,
1336                                                 mlib_u8  *mul8_extra,
1337                                                 mlib_u8  *mul8_srcF,
1338                                                 mlib_u8  *mul8_tbl)
1339{
1340    mlib_s32 i, i0;
1341    mlib_s32 pathA, dstA, srcA, msk;
1342    mlib_d64 res0, res1, dstARGB;
1343    mlib_f32 dstARGB0, srcARGB0, srcARGB1, srcA_mul, dstA_mul;
1344    mlib_s32 DstOpAnd = log_val[3];
1345    mlib_s32 DstOpXor = log_val[4];
1346    mlib_s32 DstOpAdd = log_val[5];
1347    mlib_s32 srcF_255 = mul8_srcF[0xff] + 0xff;
1348    mlib_d64 t0, t1, dstA_div;
1349
1350    i = i0 = 0;
1351
1352    srcA = 0xFF;
1353    srcA = mul8_extra[srcA];
1354    dstA = ((srcA & DstOpAnd) ^ DstOpXor) + DstOpAdd;
1355    srcA = mul8_srcF[srcA];
1356    pathA = dstA - srcF_255;
1357    srcA_mul = ((mlib_f32*)vis_mul8s_tbl)[srcA];
1358    dstA_mul = ((mlib_f32*)vis_mul8s_tbl)[dstA];
1359    dstA += srcA;
1360    dstA_div = ((mlib_d64*)vis_div8_tbl)[dstA];
1361
1362    if ((mlib_s32)dst_ptr & 7) {
1363        dstARGB0 = dst_ptr[i];
1364        srcARGB0 = src_ptr[i];
1365        MASK_FILL(res0, dstARGB0, srcARGB0);
1366        if (pathA) {
1367            dst_ptr[i] = vis_fpack16(res0);
1368        }
1369        i0 = 1;
1370    }
1371
1372#pragma pipeloop(0)
1373    for (i = i0; i <= width - 2; i += 2) {
1374        dstARGB = *(mlib_d64*)(dst_ptr + i);
1375        srcARGB0 = src_ptr[i];
1376        srcARGB1 = src_ptr[i + 1];
1377
1378        MASK_FILL(res0, vis_read_hi(dstARGB), srcARGB0);
1379        MASK_FILL(res1, vis_read_lo(dstARGB), srcARGB1);
1380
1381        res0 = vis_fpack16_pair(res0, res1);
1382
1383        msk = (((pathA) & (1 << 11)) | ((pathA) & (1 << 10))) >> 10;
1384        vis_pst_32(res0, dst_ptr + i, msk);
1385    }
1386
1387    if (i < width) {
1388        dstARGB0 = dst_ptr[i];
1389        srcARGB0 = src_ptr[i];
1390        MASK_FILL(res0, dstARGB0, srcARGB0);
1391        if (pathA) {
1392            dst_ptr[i] = vis_fpack16(res0);
1393        }
1394    }
1395}
1396
1397/***************************************************************/
1398
1399static void IntRgbToIntBgrAlphaMaskBlit_A1_line(mlib_f32 *dst_ptr,
1400                                                 mlib_f32 *src_ptr,
1401                                                 mlib_u8  *pMask,
1402                                                 mlib_s32 width,
1403                                                 mlib_s32 *log_val,
1404                                                 mlib_u8  *mul8_extra,
1405                                                 mlib_u8  *mul8_srcF,
1406                                                 mlib_u8  *mul8_tbl)
1407{
1408    mlib_s32 i, i0;
1409    mlib_s32 pathA, dstA, srcA, msk;
1410    mlib_d64 res0, res1, dstARGB, srcARGB;
1411    mlib_f32 dstARGB0, srcARGB0, srcA_mul, dstA_mul;
1412    mlib_s32 DstOpAnd = log_val[3];
1413    mlib_s32 DstOpXor = log_val[4];
1414    mlib_s32 DstOpAdd = log_val[5];
1415    mlib_s32 srcF_255 = mul8_srcF[0xff] + 0xff;
1416    mlib_d64 t0, t1, dstA_div;
1417
1418#if VIS >= 0x200
1419    vis_write_bmask(0x03214765, 0);
1420#endif
1421
1422    i = i0 = 0;
1423
1424    srcA = 0xFF;
1425    srcA = mul8_extra[srcA];
1426    dstA = ((srcA & DstOpAnd) ^ DstOpXor) + DstOpAdd;
1427    srcA = mul8_srcF[srcA];
1428    pathA = dstA - srcF_255;
1429    srcA_mul = ((mlib_f32*)vis_mul8s_tbl)[srcA];
1430    dstA_mul = ((mlib_f32*)vis_mul8s_tbl)[dstA];
1431    dstA += srcA;
1432    dstA_div = ((mlib_d64*)vis_div8_tbl)[dstA];
1433
1434    if ((mlib_s32)dst_ptr & 7) {
1435        dstARGB0 = dst_ptr[i];
1436        srcARGB0 = src_ptr[i];
1437        ARGB2ABGR_FL(srcARGB0)
1438        MASK_FILL(res0, dstARGB0, srcARGB0);
1439        if (pathA) {
1440            dst_ptr[i] = vis_fpack16(res0);
1441        }
1442        i0 = 1;
1443    }
1444
1445#pragma pipeloop(0)
1446    for (i = i0; i <= width - 2; i += 2) {
1447        dstARGB = *(mlib_d64*)(dst_ptr + i);
1448        srcARGB = vis_freg_pair(src_ptr[i], src_ptr[i + 1]);
1449        ARGB2ABGR_DB(srcARGB)
1450
1451        MASK_FILL(res0, vis_read_hi(dstARGB), vis_read_hi(srcARGB));
1452        MASK_FILL(res1, vis_read_lo(dstARGB), vis_read_lo(srcARGB));
1453
1454        res0 = vis_fpack16_pair(res0, res1);
1455
1456        msk = (((pathA) & (1 << 11)) | ((pathA) & (1 << 10))) >> 10;
1457        vis_pst_32(res0, dst_ptr + i, msk);
1458    }
1459
1460    if (i < width) {
1461        dstARGB0 = dst_ptr[i];
1462        srcARGB0 = src_ptr[i];
1463        ARGB2ABGR_FL(srcARGB0)
1464        MASK_FILL(res0, dstARGB0, srcARGB0);
1465        if (pathA) {
1466            dst_ptr[i] = vis_fpack16(res0);
1467        }
1468    }
1469}
1470
1471/***************************************************************/
1472
1473void ADD_SUFF(IntRgbToIntRgbAlphaMaskBlit)(MASKBLIT_PARAMS)
1474{
1475    mlib_s32 extraA, srcF;
1476    mlib_s32 dstScan = pDstInfo->scanStride;
1477    mlib_s32 srcScan = pSrcInfo->scanStride;
1478    mlib_s32 log_val[6];
1479    mlib_s32 j;
1480    mlib_s32 SrcOpAnd;
1481    mlib_s32 SrcOpXor;
1482    mlib_s32 SrcOpAdd;
1483    mlib_s32 DstOpAnd;
1484    mlib_s32 DstOpXor;
1485    mlib_s32 DstOpAdd;
1486    mlib_u8  *mul8_extra, *mul8_srcF;
1487
1488    extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5);
1489
1490    mul8_extra = mul8table[extraA];
1491
1492    SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval;
1493    SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval;
1494    SrcOpAdd = (AlphaRules[pCompInfo->rule].srcOps).addval;
1495    SrcOpAdd -= SrcOpXor;
1496
1497    DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval;
1498    DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval;
1499    DstOpAdd = (AlphaRules[pCompInfo->rule].dstOps).addval;
1500    DstOpAdd -= DstOpXor;
1501
1502    log_val[3] = DstOpAnd;
1503    log_val[4] = DstOpXor;
1504    log_val[5] = DstOpAdd;
1505
1506    srcF = ((0xff & SrcOpAnd) ^ SrcOpXor) + SrcOpAdd;
1507
1508    mul8_srcF = mul8table[srcF];
1509
1510    vis_write_gsr(7 << 3);
1511
1512    if (pMask != NULL) {
1513        pMask += maskOff;
1514
1515        if (dstScan == 4*width && srcScan == dstScan && maskScan == width) {
1516            width *= height;
1517            height = 1;
1518        }
1519
1520        for (j = 0; j < height; j++) {
1521            IntRgbToIntRgbAlphaMaskBlit_line(dstBase, srcBase, pMask,
1522                                              width, log_val, mul8_extra,
1523                                              mul8_srcF, (void*)mul8table);
1524
1525            PTR_ADD(dstBase, dstScan);
1526            PTR_ADD(srcBase, srcScan);
1527            PTR_ADD(pMask, maskScan);
1528        }
1529    } else {
1530        if (dstScan == 4*width && srcScan == dstScan) {
1531            width *= height;
1532            height = 1;
1533        }
1534
1535        for (j = 0; j < height; j++) {
1536            IntRgbToIntRgbAlphaMaskBlit_A1_line(dstBase, srcBase, pMask,
1537                                                 width, log_val, mul8_extra,
1538                                                 mul8_srcF, (void*)mul8table);
1539
1540            PTR_ADD(dstBase, dstScan);
1541            PTR_ADD(srcBase, srcScan);
1542        }
1543    }
1544}
1545
1546/***************************************************************/
1547
1548void ADD_SUFF(IntRgbToIntBgrAlphaMaskBlit)(MASKBLIT_PARAMS)
1549{
1550    mlib_s32 extraA, srcF;
1551    mlib_s32 dstScan = pDstInfo->scanStride;
1552    mlib_s32 srcScan = pSrcInfo->scanStride;
1553    mlib_s32 log_val[6];
1554    mlib_s32 j;
1555    mlib_s32 SrcOpAnd;
1556    mlib_s32 SrcOpXor;
1557    mlib_s32 SrcOpAdd;
1558    mlib_s32 DstOpAnd;
1559    mlib_s32 DstOpXor;
1560    mlib_s32 DstOpAdd;
1561    mlib_u8  *mul8_extra, *mul8_srcF;
1562
1563    extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5);
1564
1565    mul8_extra = mul8table[extraA];
1566
1567    SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval;
1568    SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval;
1569    SrcOpAdd = (AlphaRules[pCompInfo->rule].srcOps).addval;
1570    SrcOpAdd -= SrcOpXor;
1571
1572    DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval;
1573    DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval;
1574    DstOpAdd = (AlphaRules[pCompInfo->rule].dstOps).addval;
1575    DstOpAdd -= DstOpXor;
1576
1577    log_val[3] = DstOpAnd;
1578    log_val[4] = DstOpXor;
1579    log_val[5] = DstOpAdd;
1580
1581    srcF = ((0xff & SrcOpAnd) ^ SrcOpXor) + SrcOpAdd;
1582
1583    mul8_srcF = mul8table[srcF];
1584
1585    vis_write_gsr(7 << 3);
1586
1587    if (pMask != NULL) {
1588        pMask += maskOff;
1589
1590        if (dstScan == 4*width && srcScan == dstScan && maskScan == width) {
1591            width *= height;
1592            height = 1;
1593        }
1594
1595        for (j = 0; j < height; j++) {
1596            IntRgbToIntBgrAlphaMaskBlit_line(dstBase, srcBase, pMask,
1597                                              width, log_val, mul8_extra,
1598                                              mul8_srcF, (void*)mul8table);
1599
1600            PTR_ADD(dstBase, dstScan);
1601            PTR_ADD(srcBase, srcScan);
1602            PTR_ADD(pMask, maskScan);
1603        }
1604    } else {
1605        if (dstScan == 4*width && srcScan == dstScan) {
1606            width *= height;
1607            height = 1;
1608        }
1609
1610        for (j = 0; j < height; j++) {
1611            IntRgbToIntBgrAlphaMaskBlit_A1_line(dstBase, srcBase, pMask,
1612                                                 width, log_val, mul8_extra,
1613                                                 mul8_srcF, (void*)mul8table);
1614
1615            PTR_ADD(dstBase, dstScan);
1616            PTR_ADD(srcBase, srcScan);
1617        }
1618    }
1619}
1620
1621/***************************************************************/
1622
1623#ifdef MLIB_ADD_SUFF
1624#pragma weak IntBgrToIntBgrAlphaMaskBlit_F = IntRgbToIntRgbAlphaMaskBlit_F
1625#else
1626#pragma weak IntBgrToIntBgrAlphaMaskBlit   = IntRgbToIntRgbAlphaMaskBlit
1627#endif
1628
1629/***************************************************************/
1630
1631/*
1632    mlib_d64 buff[BUFF_SIZE/2];
1633    void     *pbuff = buff;
1634
1635    if (width > BUFF_SIZE) pbuff = mlib_malloc(width*sizeof(mlib_s32));
1636
1637        ADD_SUFF(ThreeByteBgrToIntArgbConvert)(rasBase, pbuff, width, 1,
1638                                               pRasInfo, pRasInfo,
1639                                               pPrim, pCompInfo);
1640
1641        ADD_SUFF(IntArgbToThreeByteBgrConvert)(pbuff, rasBase, width, 1,
1642                                               pRasInfo, pRasInfo,
1643                                               pPrim, pCompInfo);
1644
1645
1646    if (pbuff != buff) {
1647        mlib_free(pbuff);
1648    }
1649*/
1650
1651#endif
1652