1/*
2 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26#if !defined(JAVA2D_NO_MLIB) || defined(MLIB_ADD_SUFF)
27
28#include "vis_AlphaMacros.h"
29
30/***************************************************************/
31
32/* ##############################################################
33 * IntArgbPreAlphaMaskFill()
34 */
35
36#undef  MASK_FILL
37#define MASK_FILL(rr, pathA, dstA, dstARGB)               \
38{                                                         \
39    mlib_d64 t0, t1;                                      \
40    mlib_s32 srcF, dstF;                                  \
41                                                          \
42    srcF = ((dstA & ConstAnd) ^ ConstXor) + ConstAdd;     \
43    srcF = MUL8_INT(srcF, pathA);                         \
44    dstF = mul8_cnstF[pathA] + (255 - pathA);             \
45                                                          \
46    t0 = MUL8_VIS(cnstARGB0, srcF);                       \
47    t1 = MUL8_VIS(dstARGB, dstF);                         \
48    rr = vis_fpadd16(t0, t1);                             \
49}
50
51/***************************************************************/
52
53void IntArgbPreAlphaMaskFill_line(mlib_f32 *dst_ptr,
54                                  mlib_u8  *pMask,
55                                  mlib_s32 width,
56                                  mlib_f32 cnstARGB0,
57                                  mlib_s32 *log_val,
58                                  mlib_u8  *mul8_cnstF,
59                                  mlib_u8  *mul8_tbl);
60
61#pragma no_inline(IntArgbPreAlphaMaskFill_line)
62
63void IntArgbPreAlphaMaskFill_line(mlib_f32 *dst_ptr,
64                                  mlib_u8  *pMask,
65                                  mlib_s32 width,
66                                  mlib_f32 cnstARGB0,
67                                  mlib_s32 *log_val,
68                                  mlib_u8  *mul8_cnstF,
69                                  mlib_u8  *mul8_tbl)
70{
71    mlib_s32 i, i0;
72    mlib_s32 pathA0, pathA1, dstA0, dstA1, msk;
73    mlib_d64 res0, res1, dstARGB;
74    mlib_f32 dstARGB0;
75    mlib_s32 ConstAnd, ConstXor, ConstAdd;
76
77    ConstAnd = log_val[0];
78    ConstXor = log_val[1];
79    ConstAdd = log_val[2];
80
81    i = i0 = 0;
82
83    if ((mlib_s32)dst_ptr & 7) {
84        pathA0 = pMask[i];
85
86        if (pathA0) {
87            dstA0 = *(mlib_u8*)(dst_ptr + i);
88            dstARGB0 = dst_ptr[i];
89            MASK_FILL(res0, pathA0, dstA0, dstARGB0);
90            dst_ptr[i] = vis_fpack16(res0);
91        }
92
93        i0 = 1;
94    }
95
96#pragma pipeloop(0)
97    for (i = i0; i <= width - 2; i += 2) {
98        pathA0 = pMask[i];
99        pathA1 = pMask[i + 1];
100        dstA0 = *(mlib_u8*)(dst_ptr + i);
101        dstA1 = *(mlib_u8*)(dst_ptr + i + 1);
102        dstARGB = *(mlib_d64*)(dst_ptr + i);
103
104        MASK_FILL(res0, pathA0, dstA0, vis_read_hi(dstARGB));
105        MASK_FILL(res1, pathA1, dstA1, vis_read_lo(dstARGB));
106
107        res0 = vis_fpack16_pair(res0, res1);
108
109        msk = (((-pathA0) & (1 << 11)) | ((-pathA1) & (1 << 10))) >> 10;
110        vis_pst_32(res0, dst_ptr + i, msk);
111    }
112
113    if (i < width) {
114        pathA0 = pMask[i];
115
116        if (pathA0) {
117            dstA0 = *(mlib_u8*)(dst_ptr + i);
118            dstARGB0 = dst_ptr[i];
119            MASK_FILL(res0, pathA0, dstA0, dstARGB0);
120            dst_ptr[i] = vis_fpack16(res0);
121        }
122    }
123}
124
125/***************************************************************/
126
127#undef  MASK_FILL
128#define MASK_FILL(rr, cnstF, dstA, dstARGB)               \
129{                                                         \
130    mlib_d64 t0, t1;                                      \
131    mlib_s32 srcF, dstF;                                  \
132                                                          \
133    srcF = ((dstA & ConstAnd) ^ ConstXor) + ConstAdd;     \
134    dstF = cnstF;                                         \
135                                                          \
136    t0 = MUL8_VIS(cnstARGB0, srcF);                       \
137    t1 = MUL8_VIS(dstARGB, dstF);                         \
138    rr = vis_fpadd16(t0, t1);                             \
139}
140
141/***************************************************************/
142
143void IntArgPrebAlphaMaskFill_A1_line(mlib_f32 *dst_ptr,
144                                     mlib_s32 width,
145                                     mlib_f32 cnstARGB0,
146                                     mlib_s32 *log_val,
147                                     mlib_s32 cnstF);
148
149#pragma no_inline(IntArgPrebAlphaMaskFill_A1_line)
150
151void IntArgPrebAlphaMaskFill_A1_line(mlib_f32 *dst_ptr,
152                                     mlib_s32 width,
153                                     mlib_f32 cnstARGB0,
154                                     mlib_s32 *log_val,
155                                     mlib_s32 cnstF)
156{
157    mlib_s32 i, i0;
158    mlib_s32 dstA0, dstA1;
159    mlib_d64 res0, res1, dstARGB;
160    mlib_f32 dstARGB0;
161    mlib_s32 ConstAnd, ConstXor, ConstAdd;
162
163    ConstAnd = log_val[0];
164    ConstXor = log_val[1];
165    ConstAdd = log_val[2];
166
167    i = i0 = 0;
168
169    if ((mlib_s32)dst_ptr & 7) {
170        {
171            dstA0 = *(mlib_u8*)(dst_ptr + i);
172            dstARGB0 = dst_ptr[i];
173            MASK_FILL(res0, cnstF, dstA0, dstARGB0);
174            dst_ptr[i] = vis_fpack16(res0);
175        }
176
177        i0 = 1;
178    }
179
180#pragma pipeloop(0)
181    for (i = i0; i <= width - 2; i += 2) {
182        dstA0 = *(mlib_u8*)(dst_ptr + i);
183        dstA1 = *(mlib_u8*)(dst_ptr + i + 1);
184        dstARGB = *(mlib_d64*)(dst_ptr + i);
185
186        MASK_FILL(res0, cnstF, dstA0, vis_read_hi(dstARGB));
187        MASK_FILL(res1, cnstF, dstA1, vis_read_lo(dstARGB));
188
189        res0 = vis_fpack16_pair(res0, res1);
190
191        *(mlib_d64*)(dst_ptr + i) = res0;
192    }
193
194    if (i < width) {
195        {
196            dstA0 = *(mlib_u8*)(dst_ptr + i);
197            dstARGB0 = dst_ptr[i];
198            MASK_FILL(res0, cnstF, dstA0, dstARGB0);
199            dst_ptr[i] = vis_fpack16(res0);
200        }
201    }
202}
203
204/***************************************************************/
205
206void ADD_SUFF(IntArgbPreAlphaMaskFill)(void *rasBase,
207                                       jubyte *pMask,
208                                       jint maskOff,
209                                       jint maskScan,
210                                       jint width,
211                                       jint height,
212                                       jint fgColor,
213                                       SurfaceDataRasInfo *pRasInfo,
214                                       NativePrimitive *pPrim,
215                                       CompositeInfo *pCompInfo)
216{
217    mlib_s32 cnstA, cnstR, cnstG, cnstB;
218    mlib_s32 rasScan = pRasInfo->scanStride;
219    mlib_f32 cnstARGB0;
220    mlib_u8  *mul8_cnstF;
221    mlib_s32 SrcOpAnd, SrcOpXor, SrcOpAdd;
222    mlib_s32 DstOpAnd, DstOpXor, DstOpAdd;
223    mlib_s32 dstFbase;
224    mlib_s32 log_val[3];
225    mlib_s32 j;
226
227    cnstA = (fgColor >> 24) & 0xff;
228    cnstR = (fgColor >> 16) & 0xff;
229    cnstG = (fgColor >>  8) & 0xff;
230    cnstB = (fgColor      ) & 0xff;
231
232    if (cnstA != 0xff) {
233        cnstR = mul8table[cnstA][cnstR];
234        cnstG = mul8table[cnstA][cnstG];
235        cnstB = mul8table[cnstA][cnstB];
236    }
237
238    cnstARGB0 = F32_FROM_U8x4(cnstA, cnstR, cnstG, cnstB);
239
240    SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval;
241    SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval;
242    SrcOpAdd = (AlphaRules[pCompInfo->rule].srcOps).addval;
243    SrcOpAdd -= SrcOpXor;
244
245    log_val[0] = SrcOpAnd;
246    log_val[1] = SrcOpXor;
247    log_val[2] = SrcOpAdd;
248
249    DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval;
250    DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval;
251    DstOpAdd = (AlphaRules[pCompInfo->rule].dstOps).addval;
252    DstOpAdd -= DstOpXor;
253
254    dstFbase = (((cnstA) & DstOpAnd) ^ DstOpXor) + DstOpAdd;
255
256    mul8_cnstF = mul8table[dstFbase];
257
258    vis_write_gsr(0 << 3);
259
260    if (pMask != NULL) {
261        pMask += maskOff;
262
263        if (rasScan == 4*width && maskScan == width) {
264            width *= height;
265            height = 1;
266        }
267
268        for (j = 0; j < height; j++) {
269            IntArgbPreAlphaMaskFill_line(rasBase, pMask, width, cnstARGB0,
270                                         log_val, mul8_cnstF,
271                                         (void*)mul8table);
272
273            PTR_ADD(rasBase, rasScan);
274            PTR_ADD(pMask, maskScan);
275        }
276    } else {
277        if (rasScan == 4*width) {
278            width *= height;
279            height = 1;
280        }
281
282        for (j = 0; j < height; j++) {
283            IntArgPrebAlphaMaskFill_A1_line(rasBase, width, cnstARGB0,
284                                            log_val, dstFbase);
285
286            PTR_ADD(rasBase, rasScan);
287        }
288    }
289}
290
291/***************************************************************/
292
293void ADD_SUFF(FourByteAbgrPreAlphaMaskFill)(void *rasBase,
294                                            jubyte *pMask,
295                                            jint maskOff,
296                                            jint maskScan,
297                                            jint width,
298                                            jint height,
299                                            jint fgColor,
300                                            SurfaceDataRasInfo *pRasInfo,
301                                            NativePrimitive *pPrim,
302                                            CompositeInfo *pCompInfo)
303{
304    mlib_d64 buff[BUFF_SIZE/2];
305    void     *pbuff = buff, *p_dst;
306    mlib_s32 cnstA, cnstR, cnstG, cnstB;
307    mlib_s32 rasScan = pRasInfo->scanStride;
308    mlib_f32 cnstARGB0;
309    mlib_u8  *mul8_cnstF;
310    mlib_s32 SrcOpAnd, SrcOpXor, SrcOpAdd;
311    mlib_s32 DstOpAnd, DstOpXor, DstOpAdd;
312    mlib_s32 dstFbase;
313    mlib_s32 log_val[3];
314    mlib_s32 j;
315
316    if (width > BUFF_SIZE) pbuff = mlib_malloc(width*sizeof(mlib_s32));
317
318    cnstA = (fgColor >> 24) & 0xff;
319    cnstR = (fgColor >> 16) & 0xff;
320    cnstG = (fgColor >>  8) & 0xff;
321    cnstB = (fgColor      ) & 0xff;
322
323    if (cnstA != 0xff) {
324        cnstR = mul8table[cnstA][cnstR];
325        cnstG = mul8table[cnstA][cnstG];
326        cnstB = mul8table[cnstA][cnstB];
327    }
328
329    cnstARGB0 = F32_FROM_U8x4(cnstA, cnstB, cnstG, cnstR);
330
331    SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval;
332    SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval;
333    SrcOpAdd = (AlphaRules[pCompInfo->rule].srcOps).addval;
334    SrcOpAdd -= SrcOpXor;
335
336    log_val[0] = SrcOpAnd;
337    log_val[1] = SrcOpXor;
338    log_val[2] = SrcOpAdd;
339
340    DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval;
341    DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval;
342    DstOpAdd = (AlphaRules[pCompInfo->rule].dstOps).addval;
343    DstOpAdd -= DstOpXor;
344
345    dstFbase = (((cnstA) & DstOpAnd) ^ DstOpXor) + DstOpAdd;
346
347    mul8_cnstF = mul8table[dstFbase];
348
349    vis_write_gsr(0 << 3);
350
351    if (pMask != NULL) {
352        pMask += maskOff;
353
354        for (j = 0; j < height; j++) {
355            if ((mlib_s32)rasBase & 3) {
356                mlib_ImageCopy_na(rasBase, pbuff, width*sizeof(mlib_s32));
357                p_dst = pbuff;
358            } else {
359                p_dst = rasBase;
360            }
361
362            IntArgbPreAlphaMaskFill_line(p_dst, pMask, width, cnstARGB0,
363                                         log_val, mul8_cnstF,
364                                         (void*)mul8table);
365
366            if (p_dst != rasBase) {
367                mlib_ImageCopy_na(p_dst, rasBase, width*sizeof(mlib_s32));
368            }
369
370            PTR_ADD(rasBase, rasScan);
371            PTR_ADD(pMask, maskScan);
372        }
373    } else {
374        for (j = 0; j < height; j++) {
375            if ((mlib_s32)rasBase & 3) {
376                mlib_ImageCopy_na(rasBase, pbuff, width*sizeof(mlib_s32));
377                p_dst = pbuff;
378            } else {
379                p_dst = rasBase;
380            }
381
382            IntArgPrebAlphaMaskFill_A1_line(p_dst, width, cnstARGB0,
383                                            log_val, dstFbase);
384
385            if (p_dst != rasBase) {
386                mlib_ImageCopy_na(p_dst, rasBase, width*sizeof(mlib_s32));
387            }
388
389            PTR_ADD(rasBase, rasScan);
390        }
391    }
392
393    if (pbuff != buff) {
394        mlib_free(pbuff);
395    }
396}
397
398/***************************************************************/
399
400/* ##############################################################
401 * IntArgbPreSrcMaskFill()
402 */
403
404#undef MASK_FILL
405#define MASK_FILL(rr, pathA, dstARGB)           \
406{                                               \
407    mlib_d64 t0, t1;                            \
408                                                \
409    t0 = MUL8_VIS(cnstARGB0, pathA);            \
410    t1 = MUL8_VIS(dstARGB, (0xff - pathA));     \
411    rr = vis_fpadd16(t0, t1);                   \
412}
413
414/***************************************************************/
415
416void IntArgbPreSrcMaskFill_line(mlib_f32 *dst_ptr,
417                                mlib_u8  *pMask,
418                                mlib_s32 width,
419                                mlib_d64 fgARGB,
420                                mlib_f32 cnstARGB0);
421
422#pragma no_inline(IntArgbPreSrcMaskFill_line)
423
424void IntArgbPreSrcMaskFill_line(mlib_f32 *dst_ptr,
425                                mlib_u8  *pMask,
426                                mlib_s32 width,
427                                mlib_d64 fgARGB,
428                                mlib_f32 cnstARGB0)
429{
430    mlib_s32 i, i0;
431    mlib_s32 pathA0, pathA1, msk;
432    mlib_d64 res0, res1, dstARGB;
433    mlib_f32 dstARGB0;
434
435    i = i0 = 0;
436
437    if ((mlib_s32)dst_ptr & 7) {
438        pathA0 = pMask[i];
439        if (pathA0 == 0xff) {
440            dst_ptr[i] = vis_read_hi(fgARGB);
441        } else if (pathA0) {
442            dstARGB0 = dst_ptr[i];
443            MASK_FILL(res0, pathA0, dstARGB0);
444            dst_ptr[i] = vis_fpack16(res0);
445        }
446
447        i0 = 1;
448    }
449
450#pragma pipeloop(0)
451    for (i = i0; i <= width - 2; i += 2) {
452        pathA0 = pMask[i];
453        pathA1 = pMask[i + 1];
454
455        dstARGB = *(mlib_d64*)(dst_ptr + i);
456
457        msk = (((254 - pathA0) & (1 << 11)) |
458               ((254 - pathA1) & (1 << 10))) >> 10;
459
460        MASK_FILL(res0, pathA0, vis_read_hi(dstARGB));
461        MASK_FILL(res1, pathA1, vis_read_lo(dstARGB));
462
463        res0 = vis_fpack16_pair(res0, res1);
464
465        *(mlib_d64*)(dst_ptr + i) = res0;
466
467        vis_pst_32(fgARGB, dst_ptr + i, msk);
468    }
469
470    if (i < width) {
471        pathA0 = pMask[i];
472        if (pathA0 == 0xff) {
473            dst_ptr[i] = vis_read_hi(fgARGB);
474        } else if (pathA0) {
475            dstARGB0 = dst_ptr[i];
476            MASK_FILL(res0, pathA0, dstARGB0);
477            dst_ptr[i] = vis_fpack16(res0);
478        }
479    }
480}
481
482/***************************************************************/
483
484void ADD_SUFF(IntArgbPreSrcMaskFill)(void *rasBase,
485                                     jubyte *pMask,
486                                     jint maskOff,
487                                     jint maskScan,
488                                     jint width,
489                                     jint height,
490                                     jint fgColor,
491                                     SurfaceDataRasInfo *pRasInfo,
492                                     NativePrimitive *pPrim,
493                                     CompositeInfo *pCompInfo)
494{
495    mlib_s32 cnstA, cnstR, cnstG, cnstB;
496    mlib_s32 rasScan = pRasInfo->scanStride;
497    mlib_f32 cnstARGB0;
498    mlib_d64 fgARGB;
499    mlib_s32 j;
500
501    cnstA = (fgColor >> 24) & 0xff;
502    cnstR = (fgColor >> 16) & 0xff;
503    cnstG = (fgColor >>  8) & 0xff;
504    cnstB = (fgColor      ) & 0xff;
505
506    if (cnstA != 0xff) {
507#ifdef LOOPS_OLD_VERSION
508        if (cnstA == 0) return;
509#endif
510        cnstR = mul8table[cnstA][cnstR];
511        cnstG = mul8table[cnstA][cnstG];
512        cnstB = mul8table[cnstA][cnstB];
513    }
514
515    if (pMask == NULL) {
516#ifdef LOOPS_OLD_VERSION
517        ADD_SUFF(AnyIntSetRect)(pRasInfo, 0, 0, width, height,
518                                fgColor, pPrim, pCompInfo);
519#else
520        void *pBase = pRasInfo->rasBase;
521        pRasInfo->rasBase = rasBase;
522        if (cnstA != 0xff) {
523            fgColor = (cnstA << 24) | (cnstR << 16) | (cnstG << 8) | cnstB;
524        }
525        ADD_SUFF(AnyIntSetRect)(pRasInfo,
526                                0, 0, width, height,
527                                fgColor, pPrim, pCompInfo);
528        pRasInfo->rasBase = pBase;
529#endif
530        return;
531    }
532
533    cnstARGB0 = F32_FROM_U8x4(cnstA, cnstR, cnstG, cnstB);
534
535    fgARGB = vis_to_double_dup(fgColor);
536
537    pMask += maskOff;
538
539    if (rasScan == 4*width && maskScan == width) {
540        width *= height;
541        height = 1;
542    }
543
544    vis_write_gsr(0 << 3);
545
546    for (j = 0; j < height; j++) {
547        IntArgbPreSrcMaskFill_line(rasBase, pMask, width, fgARGB, cnstARGB0);
548
549        PTR_ADD(rasBase, rasScan);
550        PTR_ADD(pMask, maskScan);
551    }
552}
553
554/***************************************************************/
555
556void ADD_SUFF(FourByteAbgrPreSrcMaskFill)(void *rasBase,
557                                          jubyte *pMask,
558                                          jint maskOff,
559                                          jint maskScan,
560                                          jint width,
561                                          jint height,
562                                          jint fgColor,
563                                          SurfaceDataRasInfo *pRasInfo,
564                                          NativePrimitive *pPrim,
565                                          CompositeInfo *pCompInfo)
566{
567    mlib_d64 buff[BUFF_SIZE/2];
568    void     *pbuff = buff, *p_dst;
569    mlib_s32 cnstA, cnstR, cnstG, cnstB;
570    mlib_s32 rasScan = pRasInfo->scanStride;
571    mlib_f32 cnstARGB0;
572    mlib_d64 fgARGB;
573    mlib_s32 j;
574
575    cnstA = (fgColor >> 24) & 0xff;
576    cnstR = (fgColor >> 16) & 0xff;
577    cnstG = (fgColor >>  8) & 0xff;
578    cnstB = (fgColor      ) & 0xff;
579
580    if (cnstA != 0xff) {
581        cnstR = mul8table[cnstA][cnstR];
582        cnstG = mul8table[cnstA][cnstG];
583        cnstB = mul8table[cnstA][cnstB];
584    }
585
586    if (pMask == NULL) {
587        void *pBase = pRasInfo->rasBase;
588        pRasInfo->rasBase = rasBase;
589        fgColor = (cnstR << 24) | (cnstG << 16) | (cnstB << 8) | cnstA;
590        ADD_SUFF(Any4ByteSetRect)(pRasInfo,
591                                  0, 0, width, height,
592                                  fgColor, pPrim, pCompInfo);
593        pRasInfo->rasBase = pBase;
594        return;
595    }
596
597    fgColor = (cnstA << 24) | (cnstB << 16) | (cnstG << 8) | cnstR;
598    cnstARGB0 = F32_FROM_U8x4(cnstA, cnstB, cnstG, cnstR);
599
600    fgARGB = vis_to_double_dup(fgColor);
601
602    pMask += maskOff;
603
604    vis_write_gsr(0 << 3);
605
606    if (width > BUFF_SIZE) pbuff = mlib_malloc(width*sizeof(mlib_s32));
607
608    for (j = 0; j < height; j++) {
609        if ((mlib_s32)rasBase & 3) {
610            mlib_ImageCopy_na(rasBase, pbuff, width*sizeof(mlib_s32));
611            p_dst = pbuff;
612        } else {
613            p_dst = rasBase;
614        }
615
616        IntArgbPreSrcMaskFill_line(p_dst, pMask, width, fgARGB, cnstARGB0);
617
618        if (p_dst != rasBase) {
619            mlib_ImageCopy_na(p_dst, rasBase, width*sizeof(mlib_s32));
620        }
621
622        PTR_ADD(rasBase, rasScan);
623        PTR_ADD(pMask, maskScan);
624    }
625
626    if (pbuff != buff) {
627        mlib_free(pbuff);
628    }
629}
630
631/***************************************************************/
632
633/* ##############################################################
634 * IntArgbPreSrcOverMaskFill()
635 */
636
637#undef  MASK_FILL
638#define MASK_FILL(rr, pathA, dstARGB)         \
639{                                             \
640    mlib_d64 t0, t1;                          \
641    mlib_s32 dstA;                            \
642                                              \
643    dstA = 0xff - mul8_cnstA[pathA];          \
644                                              \
645    t0 = MUL8_VIS(cnstARGB0, pathA);          \
646    t1 = MUL8_VIS(dstARGB, dstA);             \
647    rr = vis_fpadd16(t0, t1);                 \
648}
649
650/***************************************************************/
651
652static void IntArgbPreSrcOverMaskFill_line(mlib_f32 *dst_ptr,
653                                           mlib_u8  *pMask,
654                                           mlib_s32 width,
655                                           mlib_f32 cnstARGB0,
656                                           mlib_u8  *mul8_cnstA);
657
658#pragma no_inline(IntArgbPreSrcOverMaskFill_line)
659
660static void IntArgbPreSrcOverMaskFill_line(mlib_f32 *dst_ptr,
661                                           mlib_u8  *pMask,
662                                           mlib_s32 width,
663                                           mlib_f32 cnstARGB0,
664                                           mlib_u8  *mul8_cnstA)
665{
666    mlib_s32 i, i0;
667    mlib_s32 pathA0, pathA1;
668    mlib_d64 res0, res1, dstARGB;
669    mlib_f32 dstARGB0;
670
671    i = i0 = 0;
672
673    if ((mlib_s32)dst_ptr & 7) {
674        pathA0 = pMask[i];
675
676        if (pathA0) {
677            dstARGB0 = dst_ptr[i];
678            MASK_FILL(res0, pathA0, dstARGB0);
679            dst_ptr[i] = vis_fpack16(res0);
680        }
681
682        i0 = 1;
683    }
684
685#pragma pipeloop(0)
686    for (i = i0; i <= width - 2; i += 2) {
687        pathA0 = pMask[i];
688        pathA1 = pMask[i + 1];
689        dstARGB = *(mlib_d64*)(dst_ptr + i);
690
691        MASK_FILL(res0, pathA0, vis_read_hi(dstARGB));
692        MASK_FILL(res1, pathA1, vis_read_lo(dstARGB));
693
694        res0 = vis_fpack16_pair(res0, res1);
695
696        *(mlib_d64 *)(dst_ptr + i) = res0;
697    }
698
699    if (i < width) {
700        pathA0 = pMask[i];
701
702        if (pathA0) {
703            dstARGB0 = dst_ptr[i];
704            MASK_FILL(res0, pathA0, dstARGB0);
705            dst_ptr[i] = vis_fpack16(res0);
706        }
707    }
708}
709
710/***************************************************************/
711
712#undef  MASK_FILL
713#define MASK_FILL(rr, dstARGB)          \
714{                                       \
715    rr = MUL8_VIS(dstARGB, cnstA);      \
716    rr = vis_fpadd16(rr, cnstARGB);     \
717}
718
719/***************************************************************/
720
721static void IntArgbPreSrcOverMaskFill_A1_line(mlib_f32 *dst_ptr,
722                                              mlib_s32 width,
723                                              mlib_d64 cnstARGB,
724                                              mlib_s32 cnstA);
725
726#pragma no_inline(IntArgbPreSrcOverMaskFill_A1_line)
727
728static void IntArgbPreSrcOverMaskFill_A1_line(mlib_f32 *dst_ptr,
729                                              mlib_s32 width,
730                                              mlib_d64 cnstARGB,
731                                              mlib_s32 cnstA)
732{
733    mlib_s32 i, i0;
734    mlib_d64 res0, res1, dstARGB;
735    mlib_f32 dstARGB0;
736
737    cnstA = 0xff - cnstA;
738
739    i = i0 = 0;
740
741    if ((mlib_s32)dst_ptr & 7) {
742        dstARGB0 = dst_ptr[i];
743        MASK_FILL(res0, dstARGB0);
744        dst_ptr[i] = vis_fpack16(res0);
745        i0 = 1;
746    }
747
748#pragma pipeloop(0)
749    for (i = i0; i <= width - 2; i += 2) {
750        dstARGB = *(mlib_d64*)(dst_ptr + i);
751
752        MASK_FILL(res0, vis_read_hi(dstARGB));
753        MASK_FILL(res1, vis_read_lo(dstARGB));
754
755        res0 = vis_fpack16_pair(res0, res1);
756
757        *(mlib_d64*)(dst_ptr + i) = res0;
758    }
759
760    if (i < width) {
761        dstARGB0 = dst_ptr[i];
762        MASK_FILL(res0, dstARGB0);
763        dst_ptr[i] = vis_fpack16(res0);
764    }
765}
766
767/***************************************************************/
768
769void ADD_SUFF(IntArgbPreSrcOverMaskFill)(void *rasBase,
770                                         jubyte *pMask,
771                                         jint maskOff,
772                                         jint maskScan,
773                                         jint width,
774                                         jint height,
775                                         jint fgColor,
776                                         SurfaceDataRasInfo *pRasInfo,
777                                         NativePrimitive *pPrim,
778                                         CompositeInfo *pCompInfo)
779{
780    mlib_s32 cnstA, cnstR, cnstG, cnstB;
781    mlib_s32 rasScan = pRasInfo->scanStride;
782    mlib_f32 cnstARGB0;
783    mlib_d64 cnstARGB;
784    mlib_u8  *mul8_cnstA;
785    mlib_s32 j;
786
787    cnstA = (fgColor >> 24) & 0xff;
788    cnstR = (fgColor >> 16) & 0xff;
789    cnstG = (fgColor >>  8) & 0xff;
790    cnstB = (fgColor      ) & 0xff;
791
792    if (cnstA != 0xff) {
793        if (cnstA == 0) return;
794
795        cnstR = mul8table[cnstA][cnstR];
796        cnstG = mul8table[cnstA][cnstG];
797        cnstB = mul8table[cnstA][cnstB];
798    }
799
800    vis_write_gsr(0 << 3);
801
802    if (pMask != NULL) {
803        pMask += maskOff;
804
805        if (rasScan == 4*width && maskScan == width) {
806            width *= height;
807            height = 1;
808        }
809
810        mul8_cnstA = mul8table[cnstA];
811
812        cnstARGB0 = F32_FROM_U8x4(cnstA, cnstR, cnstG, cnstB);
813
814        for (j = 0; j < height; j++) {
815            IntArgbPreSrcOverMaskFill_line(rasBase, pMask, width, cnstARGB0,
816                                           mul8_cnstA);
817
818            PTR_ADD(rasBase, rasScan);
819            PTR_ADD(pMask, maskScan);
820        }
821    } else {
822        if (rasScan == 4*width) {
823            width *= height;
824            height = 1;
825        }
826
827        cnstARGB = vis_to_double((cnstA << 23) | (cnstR << 7),
828                                 (cnstG << 23) | (cnstB << 7));
829
830        for (j = 0; j < height; j++) {
831            IntArgbPreSrcOverMaskFill_A1_line(rasBase, width, cnstARGB, cnstA);
832
833            PTR_ADD(rasBase, rasScan);
834        }
835    }
836}
837
838/***************************************************************/
839
840void ADD_SUFF(FourByteAbgrPreSrcOverMaskFill)(void *rasBase,
841                                              jubyte *pMask,
842                                              jint maskOff,
843                                              jint maskScan,
844                                              jint width,
845                                              jint height,
846                                              jint fgColor,
847                                              SurfaceDataRasInfo *pRasInfo,
848                                              NativePrimitive *pPrim,
849                                              CompositeInfo *pCompInfo)
850{
851    mlib_d64 buff[BUFF_SIZE/2];
852    void     *pbuff = buff, *p_dst;
853    mlib_s32 cnstA, cnstR, cnstG, cnstB;
854    mlib_s32 rasScan = pRasInfo->scanStride;
855    mlib_f32 cnstARGB0;
856    mlib_d64 cnstARGB;
857    mlib_u8  *mul8_cnstA;
858    mlib_s32 j;
859
860    if (width > BUFF_SIZE) pbuff = mlib_malloc(width*sizeof(mlib_s32));
861
862    cnstA = (fgColor >> 24) & 0xff;
863    cnstR = (fgColor >> 16) & 0xff;
864    cnstG = (fgColor >>  8) & 0xff;
865    cnstB = (fgColor      ) & 0xff;
866
867    if (cnstA != 0xff) {
868        if (cnstA == 0) return;
869
870        cnstR = mul8table[cnstA][cnstR];
871        cnstG = mul8table[cnstA][cnstG];
872        cnstB = mul8table[cnstA][cnstB];
873    }
874
875    vis_write_gsr(0 << 3);
876
877    if (pMask != NULL) {
878        pMask += maskOff;
879
880        mul8_cnstA = mul8table[cnstA];
881
882        cnstARGB0 = F32_FROM_U8x4(cnstA, cnstB, cnstG, cnstR);
883
884        for (j = 0; j < height; j++) {
885            if ((mlib_s32)rasBase & 3) {
886                mlib_ImageCopy_na(rasBase, pbuff, width*sizeof(mlib_s32));
887                p_dst = pbuff;
888            } else {
889                p_dst = rasBase;
890            }
891
892            IntArgbPreSrcOverMaskFill_line(p_dst, pMask, width, cnstARGB0,
893                                           mul8_cnstA);
894
895            if (p_dst != rasBase) {
896                mlib_ImageCopy_na(p_dst, rasBase, width*sizeof(mlib_s32));
897            }
898
899            PTR_ADD(rasBase, rasScan);
900            PTR_ADD(pMask, maskScan);
901        }
902    } else {
903        cnstARGB = vis_to_double((cnstA << 23) | (cnstB << 7),
904                                 (cnstG << 23) | (cnstR << 7));
905
906        for (j = 0; j < height; j++) {
907            if ((mlib_s32)rasBase & 3) {
908                mlib_ImageCopy_na(rasBase, pbuff, width*sizeof(mlib_s32));
909                p_dst = pbuff;
910            } else {
911                p_dst = rasBase;
912            }
913
914            IntArgbPreSrcOverMaskFill_A1_line(p_dst, width, cnstARGB, cnstA);
915
916            if (p_dst != rasBase) {
917                mlib_ImageCopy_na(p_dst, rasBase, width*sizeof(mlib_s32));
918            }
919
920            PTR_ADD(rasBase, rasScan);
921        }
922    }
923
924    if (pbuff != buff) {
925        mlib_free(pbuff);
926    }
927}
928
929/***************************************************************/
930
931/* ##############################################################
932 * IntArgbToIntArgbPreSrcOverMaskBlit()
933 */
934
935#undef  MASK_FILL
936#define MASK_FILL(rr, pathA, dstARGB, srcA, srcARGB)         \
937{                                                            \
938    mlib_d64 t0, t1;                                         \
939    mlib_s32 dstF;                                           \
940                                                             \
941    srcA = MUL8_INT(mul8_extra[pathA], srcA);                \
942    dstF = 0xff - srcA;                                      \
943                                                             \
944    t0 = MUL8_VIS(srcARGB, srcA);                            \
945    t1 = MUL8_VIS(dstARGB, dstF);                            \
946    rr = vis_fpadd16(t0, t1);                                \
947}
948
949/***************************************************************/
950
951static void IntArgbToIntArgbPreSrcOverMaskBlit_line(mlib_f32 *dst_ptr,
952                                                    mlib_f32 *src_ptr,
953                                                    mlib_u8  *pMask,
954                                                    mlib_s32 width,
955                                                    mlib_u8  *mul8_extra,
956                                                    mlib_u8  *mul8_tbl);
957
958#pragma no_inline(IntArgbToIntArgbPreSrcOverMaskBlit_line)
959
960static void IntArgbToIntArgbPreSrcOverMaskBlit_line(mlib_f32 *dst_ptr,
961                                                    mlib_f32 *src_ptr,
962                                                    mlib_u8  *pMask,
963                                                    mlib_s32 width,
964                                                    mlib_u8  *mul8_extra,
965                                                    mlib_u8  *mul8_tbl)
966{
967    mlib_s32 i, i0;
968    mlib_s32 pathA0, pathA1, srcA0, srcA1;
969    mlib_d64 res0, res1, dstARGB, srcARGB;
970    mlib_f32 dstARGB0, srcARGB0;
971    mlib_d64 or_alpha = vis_to_double_dup(0xff000000);
972
973    i = i0 = 0;
974
975    if ((mlib_s32)dst_ptr & 7) {
976        pathA0 = pMask[i];
977        srcA0 = *(mlib_u8*)(src_ptr + i);
978        dstARGB0 = dst_ptr[i];
979        srcARGB0 = src_ptr[i];
980        srcARGB0 = vis_fors(vis_read_hi(or_alpha), srcARGB0);
981        MASK_FILL(res0, pathA0, dstARGB0, srcA0, srcARGB0);
982        if (srcA0) {
983            dst_ptr[i] = vis_fpack16(res0);
984        }
985
986        i0 = 1;
987    }
988
989#pragma pipeloop(0)
990    for (i = i0; i <= width - 2; i += 2) {
991        pathA0 = pMask[i];
992        pathA1 = pMask[i + 1];
993        dstARGB = *(mlib_d64*)(dst_ptr + i);
994        srcA0 = *(mlib_u8*)(src_ptr + i);
995        srcA1 = *(mlib_u8*)(src_ptr + i + 1);
996        srcARGB = vis_freg_pair(src_ptr[i], src_ptr[i + 1]);
997        srcARGB = vis_for(or_alpha, srcARGB);
998
999        MASK_FILL(res0, pathA0, vis_read_hi(dstARGB),
1000                  srcA0, vis_read_hi(srcARGB));
1001        MASK_FILL(res1, pathA1, vis_read_lo(dstARGB),
1002                  srcA1, vis_read_lo(srcARGB));
1003
1004        res0 = vis_fpack16_pair(res0, res1);
1005
1006        *(mlib_d64*)(dst_ptr + i) = res0;
1007    }
1008
1009    if (i < width) {
1010        pathA0 = pMask[i];
1011        srcA0 = *(mlib_u8*)(src_ptr + i);
1012        dstARGB0 = dst_ptr[i];
1013        srcARGB0 = src_ptr[i];
1014        srcARGB0 = vis_fors(vis_read_hi(or_alpha), srcARGB0);
1015        MASK_FILL(res0, pathA0, dstARGB0, srcA0, srcARGB0);
1016        if (srcA0) {
1017            dst_ptr[i] = vis_fpack16(res0);
1018        }
1019    }
1020}
1021
1022/***************************************************************/
1023
1024#undef  MASK_FILL
1025#define MASK_FILL(rr, dstARGB, srcA, srcARGB)         \
1026{                                                     \
1027    mlib_d64 t0, t1;                                  \
1028    mlib_s32 dstF;                                    \
1029                                                      \
1030    srcA = mul8_extra[srcA];                          \
1031    dstF = 0xff - srcA;                               \
1032                                                      \
1033    t0 = MUL8_VIS(srcARGB, srcA);                     \
1034    t1 = MUL8_VIS(dstARGB, dstF);                     \
1035    rr = vis_fpadd16(t0, t1);                         \
1036}
1037
1038/***************************************************************/
1039
1040static void IntArgbToIntArgbPreSrcOverMaskBlit_A1_line(mlib_f32 *dst_ptr,
1041                                                       mlib_f32 *src_ptr,
1042                                                       mlib_s32 width,
1043                                                       mlib_u8  *mul8_extra);
1044
1045#pragma no_inline(IntArgbToIntArgbPreSrcOverMaskBlit_A1_line)
1046
1047static void IntArgbToIntArgbPreSrcOverMaskBlit_A1_line(mlib_f32 *dst_ptr,
1048                                                       mlib_f32 *src_ptr,
1049                                                       mlib_s32 width,
1050                                                       mlib_u8  *mul8_extra)
1051{
1052    mlib_s32 i, i0;
1053    mlib_s32 srcA0, srcA1;
1054    mlib_d64 res0, res1, dstARGB, srcARGB;
1055    mlib_f32 dstARGB0, srcARGB0;
1056    mlib_d64 or_alpha = vis_to_double_dup(0xff000000);
1057
1058    i = i0 = 0;
1059
1060    if ((mlib_s32)dst_ptr & 7) {
1061        srcA0 = *(mlib_u8*)(src_ptr + i);
1062        dstARGB0 = dst_ptr[i];
1063        srcARGB0 = src_ptr[i];
1064        srcARGB0 = vis_fors(vis_read_hi(or_alpha), srcARGB0);
1065        MASK_FILL(res0, dstARGB0, srcA0, srcARGB0);
1066        if (srcA0) {
1067            dst_ptr[i] = vis_fpack16(res0);
1068        }
1069
1070        i0 = 1;
1071    }
1072
1073#pragma pipeloop(0)
1074    for (i = i0; i <= width - 2; i += 2) {
1075        dstARGB = *(mlib_d64*)(dst_ptr + i);
1076        srcA0 = *(mlib_u8*)(src_ptr + i);
1077        srcA1 = *(mlib_u8*)(src_ptr + i + 1);
1078        srcARGB = vis_freg_pair(src_ptr[i], src_ptr[i + 1]);
1079        srcARGB = vis_for(or_alpha, srcARGB);
1080
1081        MASK_FILL(res0, vis_read_hi(dstARGB), srcA0, vis_read_hi(srcARGB));
1082        MASK_FILL(res1, vis_read_lo(dstARGB), srcA1, vis_read_lo(srcARGB));
1083
1084        res0 = vis_fpack16_pair(res0, res1);
1085        *(mlib_d64*)(dst_ptr + i) = res0;
1086    }
1087
1088    if (i < width) {
1089        srcA0 = *(mlib_u8*)(src_ptr + i);
1090        dstARGB0 = dst_ptr[i];
1091        srcARGB0 = src_ptr[i];
1092        srcARGB0 = vis_fors(vis_read_hi(or_alpha), srcARGB0);
1093        MASK_FILL(res0, dstARGB0, srcA0, srcARGB0);
1094        if (srcA0) {
1095            dst_ptr[i] = vis_fpack16(res0);
1096        }
1097    }
1098}
1099
1100/***************************************************************/
1101
1102void ADD_SUFF(IntArgbToIntArgbPreSrcOverMaskBlit)(MASKBLIT_PARAMS)
1103{
1104    mlib_s32 extraA;
1105    mlib_s32 dstScan = pDstInfo->scanStride;
1106    mlib_s32 srcScan = pSrcInfo->scanStride;
1107    mlib_u8  *mul8_extra;
1108    mlib_s32 j;
1109
1110    extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5);
1111
1112    mul8_extra = mul8table[extraA];
1113
1114    vis_write_gsr(0 << 3);
1115
1116    if (pMask != NULL) {
1117        pMask += maskOff;
1118
1119        if (dstScan == 4*width && srcScan == dstScan && maskScan == width) {
1120            width *= height;
1121            height = 1;
1122        }
1123
1124        for (j = 0; j < height; j++) {
1125            IntArgbToIntArgbPreSrcOverMaskBlit_line(dstBase, srcBase, pMask,
1126                                                    width, mul8_extra,
1127                                                    (void*)mul8table);
1128
1129            PTR_ADD(dstBase, dstScan);
1130            PTR_ADD(srcBase, srcScan);
1131            PTR_ADD(pMask, maskScan);
1132        }
1133    } else {
1134        if (dstScan == 4*width && srcScan == dstScan) {
1135            width *= height;
1136            height = 1;
1137        }
1138
1139        for (j = 0; j < height; j++) {
1140            IntArgbToIntArgbPreSrcOverMaskBlit_A1_line(dstBase, srcBase, width,
1141                                                       mul8_extra);
1142
1143            PTR_ADD(dstBase, dstScan);
1144            PTR_ADD(srcBase, srcScan);
1145        }
1146    }
1147}
1148
1149/***************************************************************/
1150
1151void ADD_SUFF(IntArgbToFourByteAbgrPreSrcOverMaskBlit)(MASKBLIT_PARAMS)
1152{
1153    mlib_d64 buff[BUFF_SIZE/2];
1154    void     *pbuff = buff;
1155    mlib_s32 extraA;
1156    mlib_s32 dstScan = pDstInfo->scanStride;
1157    mlib_s32 srcScan = pSrcInfo->scanStride;
1158    mlib_u8  *mul8_extra;
1159    mlib_s32 j;
1160
1161    if (width > BUFF_SIZE) pbuff = mlib_malloc(width*sizeof(mlib_s32));
1162
1163    extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5);
1164
1165    mul8_extra = mul8table[extraA];
1166
1167    vis_write_gsr(0 << 3);
1168
1169    if (pMask != NULL) {
1170        pMask += maskOff;
1171
1172        for (j = 0; j < height; j++) {
1173            ADD_SUFF(FourByteAbgrToIntArgbConvert)(dstBase, pbuff, width, 1,
1174                                                   pSrcInfo, pDstInfo,
1175                                                   pPrim, pCompInfo);
1176
1177            IntArgbToIntArgbPreSrcOverMaskBlit_line(pbuff, srcBase, pMask,
1178                                                    width, mul8_extra,
1179                                                    (void*)mul8table);
1180
1181            ADD_SUFF(IntArgbToFourByteAbgrConvert)(pbuff, dstBase, width, 1,
1182                                                   pSrcInfo, pDstInfo,
1183                                                   pPrim, pCompInfo);
1184
1185            PTR_ADD(dstBase, dstScan);
1186            PTR_ADD(srcBase, srcScan);
1187            PTR_ADD(pMask, maskScan);
1188        }
1189    } else {
1190        for (j = 0; j < height; j++) {
1191            ADD_SUFF(FourByteAbgrToIntArgbConvert)(dstBase, pbuff, width, 1,
1192                                                   pSrcInfo, pDstInfo,
1193                                                   pPrim, pCompInfo);
1194
1195            IntArgbToIntArgbPreSrcOverMaskBlit_A1_line(pbuff, srcBase, width,
1196                                                       mul8_extra);
1197
1198            ADD_SUFF(IntArgbToFourByteAbgrConvert)(pbuff, dstBase, width, 1,
1199                                                   pSrcInfo, pDstInfo,
1200                                                   pPrim, pCompInfo);
1201
1202            PTR_ADD(dstBase, dstScan);
1203            PTR_ADD(srcBase, srcScan);
1204        }
1205    }
1206
1207    if (pbuff != buff) {
1208        mlib_free(pbuff);
1209    }
1210}
1211
1212/***************************************************************/
1213
1214/* ##############################################################
1215 * IntArgbToIntArgbPreAlphaMaskBlit()
1216 */
1217
1218#undef  MASK_FILL
1219#define MASK_FILL(rr, pathA, dstA, dstARGB, srcA, srcARGB)         \
1220{                                                                  \
1221    mlib_d64 t0, t1;                                               \
1222    mlib_s32 srcF, dstF;                                           \
1223                                                                   \
1224    srcA = mul8_extra[srcA];                                       \
1225                                                                   \
1226    srcF = ((dstA & SrcOpAnd) ^ SrcOpXor) + SrcOpAdd;              \
1227    dstF = ((srcA & DstOpAnd) ^ DstOpXor) + DstOpAdd;              \
1228                                                                   \
1229    srcF = MUL8_INT(pathA, srcF);                                  \
1230    dstF = MUL8_INT(pathA, dstF) + (0xff - pathA);                 \
1231                                                                   \
1232    srcA = MUL8_INT(srcF, srcA);                                   \
1233                                                                   \
1234    t0 = MUL8_VIS(srcARGB, srcA);                                  \
1235    t1 = MUL8_VIS(dstARGB, dstF);                                  \
1236    rr = vis_fpadd16(t0, t1);                                      \
1237}
1238
1239/**************************************************************/
1240
1241static void IntArgbToIntArgbPreAlphaMaskBlit_line(mlib_f32 *dst_ptr,
1242                                                  mlib_f32 *src_ptr,
1243                                                  mlib_u8  *pMask,
1244                                                  mlib_s32 width,
1245                                                  mlib_s32 *log_val,
1246                                                  mlib_u8  *mul8_extra,
1247                                                  mlib_u8  *mul8_tbl);
1248
1249#pragma no_inline(IntArgbToIntArgbPreAlphaMaskBlit_line)
1250
1251static void IntArgbToIntArgbPreAlphaMaskBlit_line(mlib_f32 *dst_ptr,
1252                                                  mlib_f32 *src_ptr,
1253                                                  mlib_u8  *pMask,
1254                                                  mlib_s32 width,
1255                                                  mlib_s32 *log_val,
1256                                                  mlib_u8  *mul8_extra,
1257                                                  mlib_u8  *mul8_tbl)
1258{
1259    mlib_s32 i;
1260    mlib_s32 pathA0, dstA0, srcA0;
1261    mlib_d64 res0;
1262    mlib_f32 dstARGB0, srcARGB0;
1263    mlib_s32 SrcOpAnd = log_val[0];
1264    mlib_s32 SrcOpXor = log_val[1];
1265    mlib_s32 SrcOpAdd = log_val[2];
1266    mlib_s32 DstOpAnd = log_val[3];
1267    mlib_s32 DstOpXor = log_val[4];
1268    mlib_s32 DstOpAdd = log_val[5];
1269    mlib_f32 or_alpha = vis_to_float(0xff000000);
1270
1271#pragma pipeloop(0)
1272    for (i = 0; i < width; i++) {
1273
1274        pathA0 = pMask[i];
1275
1276        dstA0 = *(mlib_u8*)dst_ptr;
1277
1278        dstARGB0 = *dst_ptr;
1279        srcA0 = *(mlib_u8*)src_ptr;
1280        srcARGB0 = *src_ptr;
1281        srcARGB0 = vis_fors(or_alpha, srcARGB0);
1282
1283        MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
1284
1285        *dst_ptr = vis_fpack16(res0);
1286        dst_ptr++;
1287        src_ptr++;
1288    }
1289
1290}
1291
1292/***************************************************************/
1293
1294#undef  MASK_FILL
1295#define MASK_FILL(rr, dstA, dstARGB, srcA, srcARGB)         \
1296{                                                           \
1297    mlib_d64 t0, t1;                                        \
1298    mlib_s32 srcF, dstF;                                    \
1299                                                            \
1300    srcA = mul8_extra[srcA];                                \
1301                                                            \
1302    srcF = ((dstA & SrcOpAnd) ^ SrcOpXor) + SrcOpAdd;       \
1303    dstF = ((srcA & DstOpAnd) ^ DstOpXor) + DstOpAdd;       \
1304                                                            \
1305    srcA = MUL8_INT(srcF, srcA);                            \
1306                                                            \
1307    t0 = MUL8_VIS(srcARGB, srcA);                           \
1308    t1 = MUL8_VIS(dstARGB, dstF);                           \
1309    rr = vis_fpadd16(t0, t1);                               \
1310}
1311
1312/***************************************************************/
1313
1314static void IntArgbToIntArgbPreAlphaMaskBlit_A1_line(mlib_f32 *dst_ptr,
1315                                                     mlib_f32 *src_ptr,
1316                                                     mlib_s32 width,
1317                                                     mlib_s32 *log_val,
1318                                                     mlib_u8  *mul8_extra,
1319                                                     mlib_u8  *mul8_tbl);
1320
1321#pragma no_inline(IntArgbToIntArgbPreAlphaMaskBlit_A1_line)
1322
1323static void IntArgbToIntArgbPreAlphaMaskBlit_A1_line(mlib_f32 *dst_ptr,
1324                                                     mlib_f32 *src_ptr,
1325                                                     mlib_s32 width,
1326                                                     mlib_s32 *log_val,
1327                                                     mlib_u8  *mul8_extra,
1328                                                     mlib_u8  *mul8_tbl)
1329{
1330    mlib_s32 i;
1331    mlib_s32 dstA0, srcA0;
1332    mlib_d64 res0;
1333    mlib_f32 dstARGB0, srcARGB0;
1334    mlib_s32 SrcOpAnd = log_val[0];
1335    mlib_s32 SrcOpXor = log_val[1];
1336    mlib_s32 SrcOpAdd = log_val[2];
1337    mlib_s32 DstOpAnd = log_val[3];
1338    mlib_s32 DstOpXor = log_val[4];
1339    mlib_s32 DstOpAdd = log_val[5];
1340    mlib_f32 or_alpha = vis_to_float(0xff000000);
1341
1342#pragma pipeloop(0)
1343    for (i = 0; i < width; i++) {
1344        dstA0 = *(mlib_u8*)(dst_ptr + i);
1345        srcA0 = *(mlib_u8*)(src_ptr + i);
1346        dstARGB0 = dst_ptr[i];
1347        srcARGB0 = src_ptr[i];
1348        srcARGB0 = vis_fors(or_alpha, srcARGB0);
1349
1350        MASK_FILL(res0, dstA0, dstARGB0, srcA0, srcARGB0);
1351
1352        dst_ptr[i] = vis_fpack16(res0);
1353    }
1354}
1355
1356/***************************************************************/
1357
1358void ADD_SUFF(IntArgbToIntArgbPreAlphaMaskBlit)(MASKBLIT_PARAMS)
1359{
1360    mlib_s32 extraA;
1361    mlib_s32 dstScan = pDstInfo->scanStride;
1362    mlib_s32 srcScan = pSrcInfo->scanStride;
1363    mlib_s32 log_val[6];
1364    mlib_s32 j;
1365    mlib_s32 SrcOpAnd;
1366    mlib_s32 SrcOpXor;
1367    mlib_s32 SrcOpAdd;
1368    mlib_s32 DstOpAnd;
1369    mlib_s32 DstOpXor;
1370    mlib_s32 DstOpAdd;
1371    mlib_u8  *mul8_extra;
1372
1373    extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5);
1374
1375    mul8_extra = mul8table[extraA];
1376
1377    SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval;
1378    SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval;
1379    SrcOpAdd = (AlphaRules[pCompInfo->rule].srcOps).addval;
1380    SrcOpAdd -= SrcOpXor;
1381
1382    DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval;
1383    DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval;
1384    DstOpAdd = (AlphaRules[pCompInfo->rule].dstOps).addval;
1385    DstOpAdd -= DstOpXor;
1386
1387    log_val[0] = SrcOpAnd;
1388    log_val[1] = SrcOpXor;
1389    log_val[2] = SrcOpAdd;
1390    log_val[3] = DstOpAnd;
1391    log_val[4] = DstOpXor;
1392    log_val[5] = DstOpAdd;
1393
1394    vis_write_gsr(0 << 3);
1395
1396    if (pMask != NULL) {
1397        pMask += maskOff;
1398
1399        if (dstScan == 4*width && srcScan == dstScan && maskScan == width) {
1400            width *= height;
1401            height = 1;
1402        }
1403
1404        for (j = 0; j < height; j++) {
1405            IntArgbToIntArgbPreAlphaMaskBlit_line(dstBase, srcBase, pMask,
1406                                                  width, log_val, mul8_extra,
1407                                                  (void*)mul8table);
1408
1409            PTR_ADD(dstBase, dstScan);
1410            PTR_ADD(srcBase, srcScan);
1411            PTR_ADD(pMask, maskScan);
1412        }
1413    } else {
1414        if (dstScan == 4*width && srcScan == dstScan) {
1415            width *= height;
1416            height = 1;
1417        }
1418
1419        for (j = 0; j < height; j++) {
1420            IntArgbToIntArgbPreAlphaMaskBlit_A1_line(dstBase, srcBase,
1421                                                     width, log_val, mul8_extra,
1422                                                     (void*)mul8table);
1423
1424            PTR_ADD(dstBase, dstScan);
1425            PTR_ADD(srcBase, srcScan);
1426        }
1427    }
1428}
1429
1430/***************************************************************/
1431
1432void ADD_SUFF(IntArgbToFourByteAbgrPreAlphaMaskBlit)(MASKBLIT_PARAMS)
1433{
1434    mlib_d64 buff[BUFF_SIZE/2];
1435    void     *pbuff = buff;
1436    mlib_s32 extraA;
1437    mlib_s32 dstScan = pDstInfo->scanStride;
1438    mlib_s32 srcScan = pSrcInfo->scanStride;
1439    mlib_s32 log_val[6];
1440    mlib_s32 j;
1441    mlib_s32 SrcOpAnd;
1442    mlib_s32 SrcOpXor;
1443    mlib_s32 SrcOpAdd;
1444    mlib_s32 DstOpAnd;
1445    mlib_s32 DstOpXor;
1446    mlib_s32 DstOpAdd;
1447    mlib_u8  *mul8_extra;
1448
1449    if (width > BUFF_SIZE) pbuff = mlib_malloc(width*sizeof(mlib_s32));
1450
1451    extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5);
1452
1453    mul8_extra = mul8table[extraA];
1454
1455    SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval;
1456    SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval;
1457    SrcOpAdd = (AlphaRules[pCompInfo->rule].srcOps).addval;
1458    SrcOpAdd -= SrcOpXor;
1459
1460    DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval;
1461    DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval;
1462    DstOpAdd = (AlphaRules[pCompInfo->rule].dstOps).addval;
1463    DstOpAdd -= DstOpXor;
1464
1465    log_val[0] = SrcOpAnd;
1466    log_val[1] = SrcOpXor;
1467    log_val[2] = SrcOpAdd;
1468    log_val[3] = DstOpAnd;
1469    log_val[4] = DstOpXor;
1470    log_val[5] = DstOpAdd;
1471
1472    vis_write_gsr(0 << 3);
1473
1474    if (pMask != NULL) {
1475        pMask += maskOff;
1476
1477        for (j = 0; j < height; j++) {
1478            ADD_SUFF(FourByteAbgrToIntArgbConvert)(dstBase, pbuff, width, 1,
1479                                                   pSrcInfo, pDstInfo,
1480                                                   pPrim, pCompInfo);
1481
1482            IntArgbToIntArgbPreAlphaMaskBlit_line(pbuff, srcBase, pMask,
1483                                                  width, log_val, mul8_extra,
1484                                                  (void*)mul8table);
1485
1486            ADD_SUFF(IntArgbToFourByteAbgrConvert)(pbuff, dstBase, width, 1,
1487                                                   pSrcInfo, pDstInfo,
1488                                                   pPrim, pCompInfo);
1489
1490            PTR_ADD(dstBase, dstScan);
1491            PTR_ADD(srcBase, srcScan);
1492            PTR_ADD(pMask, maskScan);
1493        }
1494    } else {
1495        for (j = 0; j < height; j++) {
1496            ADD_SUFF(FourByteAbgrToIntArgbConvert)(dstBase, pbuff, width, 1,
1497                                                   pSrcInfo, pDstInfo,
1498                                                   pPrim, pCompInfo);
1499
1500            IntArgbToIntArgbPreAlphaMaskBlit_A1_line(pbuff, srcBase,
1501                                                     width, log_val, mul8_extra,
1502                                                     (void*)mul8table);
1503
1504            ADD_SUFF(IntArgbToFourByteAbgrConvert)(pbuff, dstBase, width, 1,
1505                                                   pSrcInfo, pDstInfo,
1506                                                   pPrim, pCompInfo);
1507
1508            PTR_ADD(dstBase, dstScan);
1509            PTR_ADD(srcBase, srcScan);
1510        }
1511    }
1512
1513    if (pbuff != buff) {
1514        mlib_free(pbuff);
1515    }
1516}
1517
1518/***************************************************************/
1519
1520/* ##############################################################
1521 * IntRgbToIntArgbPreAlphaMaskBlit()
1522 */
1523
1524#undef  MASK_FILL
1525#define MASK_FILL(rr, pathA, dstA, dstARGB, srcA, srcARGB)         \
1526{                                                                  \
1527    mlib_d64 t0, t1;                                               \
1528    mlib_s32 srcF, dstF;                                           \
1529                                                                   \
1530    srcF = ((dstA & SrcOpAnd) ^ SrcOpXor) + SrcOpAdd;              \
1531                                                                   \
1532    srcF = MUL8_INT(pathA, srcF);                                  \
1533    dstF = mul8_tbl[pathA + dstF_0] + (0xff - pathA);              \
1534                                                                   \
1535    srcF = mul8_tbl[srcF + srcA];                                  \
1536                                                                   \
1537    t0 = MUL8_VIS(srcARGB, srcF);                                  \
1538    t1 = MUL8_VIS(dstARGB, dstF);                                  \
1539    rr = vis_fpadd16(t0, t1);                                      \
1540}
1541
1542/**************************************************************/
1543
1544static void IntRgbToIntArgbPreAlphaMaskBlit_line(mlib_f32 *dst_ptr,
1545                                                 mlib_f32 *src_ptr,
1546                                                 mlib_u8  *pMask,
1547                                                 mlib_s32 width,
1548                                                 mlib_s32 *log_val,
1549                                                 mlib_s32 extraA,
1550                                                 mlib_s32 dstF_0,
1551                                                 mlib_u8  *mul8_tbl);
1552
1553#pragma no_inline(IntRgbToIntArgbPreAlphaMaskBlit_line)
1554
1555static void IntRgbToIntArgbPreAlphaMaskBlit_line(mlib_f32 *dst_ptr,
1556                                                 mlib_f32 *src_ptr,
1557                                                 mlib_u8  *pMask,
1558                                                 mlib_s32 width,
1559                                                 mlib_s32 *log_val,
1560                                                 mlib_s32 extraA,
1561                                                 mlib_s32 dstF_0,
1562                                                 mlib_u8  *mul8_tbl)
1563{
1564    mlib_s32 i;
1565    mlib_s32 pathA0, dstA0, srcA0;
1566    mlib_d64 res0;
1567    mlib_f32 dstARGB0, srcARGB0;
1568    mlib_s32 SrcOpAnd = log_val[0];
1569    mlib_s32 SrcOpXor = log_val[1];
1570    mlib_s32 SrcOpAdd = log_val[2];
1571    mlib_f32 or_alpha = vis_to_float(0xff000000);
1572
1573    srcA0 = extraA*256;
1574    dstF_0 *= 256;
1575
1576#pragma pipeloop(0)
1577    for (i = 0; i < width; i++) {
1578        pathA0 = pMask[i];
1579
1580        dstA0 = *(mlib_u8*)dst_ptr;
1581        dstARGB0 = *dst_ptr;
1582        srcARGB0 = *src_ptr;
1583
1584        srcARGB0 = vis_fors(or_alpha, srcARGB0);
1585
1586        MASK_FILL(res0, pathA0, dstA0, dstARGB0, srcA0, srcARGB0);
1587
1588        *dst_ptr = vis_fpack16(res0);
1589        dst_ptr++;
1590        src_ptr++;
1591    }
1592}
1593
1594/***************************************************************/
1595
1596#undef  MASK_FILL
1597#define MASK_FILL(rr, dstA, dstARGB, srcA, srcARGB)         \
1598{                                                           \
1599    mlib_d64 t0, t1;                                        \
1600    mlib_s32 srcF;                                          \
1601                                                            \
1602    srcF = ((dstA & SrcOpAnd) ^ SrcOpXor) + SrcOpAdd;       \
1603                                                            \
1604    srcF = mul8_tbl[srcF + srcA];                           \
1605                                                            \
1606    t0 = MUL8_VIS(srcARGB, srcF);                           \
1607    t1 = MUL8_VIS(dstARGB, dstF_0);                         \
1608    rr = vis_fpadd16(t0, t1);                               \
1609}
1610
1611/***************************************************************/
1612
1613static void IntRgbToIntArgbPreAlphaMaskBlit_A1_line(mlib_f32 *dst_ptr,
1614                                                    mlib_f32 *src_ptr,
1615                                                    mlib_s32 width,
1616                                                    mlib_s32 *log_val,
1617                                                    mlib_s32 extraA,
1618                                                    mlib_s32 dstF_0,
1619                                                    mlib_u8  *mul8_tbl);
1620
1621#pragma no_inline(IntRgbToIntArgbPreAlphaMaskBlit_A1_line)
1622
1623static void IntRgbToIntArgbPreAlphaMaskBlit_A1_line(mlib_f32 *dst_ptr,
1624                                                    mlib_f32 *src_ptr,
1625                                                    mlib_s32 width,
1626                                                    mlib_s32 *log_val,
1627                                                    mlib_s32 extraA,
1628                                                    mlib_s32 dstF_0,
1629                                                    mlib_u8  *mul8_tbl)
1630{
1631    mlib_s32 i;
1632    mlib_s32 dstA0, srcA0;
1633    mlib_d64 res0;
1634    mlib_f32 dstARGB0, srcARGB0;
1635    mlib_s32 SrcOpAnd = log_val[0];
1636    mlib_s32 SrcOpXor = log_val[1];
1637    mlib_s32 SrcOpAdd = log_val[2];
1638    mlib_f32 or_alpha = vis_to_float(0xff000000);
1639
1640    srcA0 = extraA*256;
1641
1642#pragma pipeloop(0)
1643    for (i = 0; i < width; i++) {
1644        dstA0 = *(mlib_u8*)dst_ptr;
1645
1646        dstARGB0 = *dst_ptr;
1647        srcARGB0 = *src_ptr;
1648        srcARGB0 = vis_fors(or_alpha, srcARGB0);
1649
1650        MASK_FILL(res0, dstA0, dstARGB0, srcA0, srcARGB0);
1651
1652        *dst_ptr = vis_fpack16(res0);
1653
1654        dst_ptr++;
1655        src_ptr++;
1656    }
1657}
1658
1659/***************************************************************/
1660
1661void ADD_SUFF(IntRgbToIntArgbPreAlphaMaskBlit)(MASKBLIT_PARAMS)
1662{
1663    mlib_s32 extraA;
1664    mlib_s32 dstScan = pDstInfo->scanStride;
1665    mlib_s32 srcScan = pSrcInfo->scanStride;
1666    mlib_s32 log_val[3];
1667    mlib_s32 j;
1668    mlib_s32 SrcOpAnd;
1669    mlib_s32 SrcOpXor;
1670    mlib_s32 SrcOpAdd;
1671    mlib_s32 DstOpAnd;
1672    mlib_s32 DstOpXor;
1673    mlib_s32 DstOpAdd;
1674    mlib_s32 dstF_0;
1675
1676    extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5);
1677
1678    SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval;
1679    SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval;
1680    SrcOpAdd = (AlphaRules[pCompInfo->rule].srcOps).addval;
1681    SrcOpAdd -= SrcOpXor;
1682
1683    DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval;
1684    DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval;
1685    DstOpAdd = (AlphaRules[pCompInfo->rule].dstOps).addval;
1686    DstOpAdd -= DstOpXor;
1687
1688    log_val[0] = SrcOpAnd;
1689    log_val[1] = SrcOpXor;
1690    log_val[2] = SrcOpAdd;
1691
1692    vis_write_gsr(0 << 3);
1693
1694    dstF_0 = ((extraA & DstOpAnd) ^ DstOpXor) + DstOpAdd;
1695
1696    if (pMask != NULL) {
1697        pMask += maskOff;
1698
1699        if (dstScan == 4*width && srcScan == dstScan && maskScan == width) {
1700            width *= height;
1701            height = 1;
1702        }
1703
1704        for (j = 0; j < height; j++) {
1705            IntRgbToIntArgbPreAlphaMaskBlit_line(dstBase, srcBase, pMask,
1706                                                 width, log_val, extraA, dstF_0,
1707                                                 (void*)mul8table);
1708
1709            PTR_ADD(dstBase, dstScan);
1710            PTR_ADD(srcBase, srcScan);
1711            PTR_ADD(pMask, maskScan);
1712        }
1713    } else {
1714        if (dstScan == 4*width && srcScan == dstScan) {
1715            width *= height;
1716            height = 1;
1717        }
1718
1719        for (j = 0; j < height; j++) {
1720            IntRgbToIntArgbPreAlphaMaskBlit_A1_line(dstBase, srcBase, width,
1721                                                    log_val, extraA, dstF_0,
1722                                                    (void*)mul8table);
1723
1724            PTR_ADD(dstBase, dstScan);
1725            PTR_ADD(srcBase, srcScan);
1726        }
1727    }
1728}
1729
1730/***************************************************************/
1731
1732void ADD_SUFF(IntRgbToFourByteAbgrPreAlphaMaskBlit)(MASKBLIT_PARAMS)
1733{
1734    mlib_d64 buff[BUFF_SIZE/2];
1735    void     *pbuff = buff;
1736    mlib_s32 extraA;
1737    mlib_s32 dstScan = pDstInfo->scanStride;
1738    mlib_s32 srcScan = pSrcInfo->scanStride;
1739    mlib_s32 log_val[3];
1740    mlib_s32 j;
1741    mlib_s32 SrcOpAnd;
1742    mlib_s32 SrcOpXor;
1743    mlib_s32 SrcOpAdd;
1744    mlib_s32 DstOpAnd;
1745    mlib_s32 DstOpXor;
1746    mlib_s32 DstOpAdd;
1747    mlib_s32 dstF_0;
1748
1749    if (width > BUFF_SIZE) pbuff = mlib_malloc(width*sizeof(mlib_s32));
1750
1751    extraA = (mlib_s32)(pCompInfo->details.extraAlpha * 255.0 + 0.5);
1752
1753    SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval;
1754    SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval;
1755    SrcOpAdd = (AlphaRules[pCompInfo->rule].srcOps).addval;
1756    SrcOpAdd -= SrcOpXor;
1757
1758    DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval;
1759    DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval;
1760    DstOpAdd = (AlphaRules[pCompInfo->rule].dstOps).addval;
1761    DstOpAdd -= DstOpXor;
1762
1763    log_val[0] = SrcOpAnd;
1764    log_val[1] = SrcOpXor;
1765    log_val[2] = SrcOpAdd;
1766
1767    vis_write_gsr(0 << 3);
1768
1769    dstF_0 = ((extraA & DstOpAnd) ^ DstOpXor) + DstOpAdd;
1770
1771    if (pMask != NULL) {
1772        pMask += maskOff;
1773
1774        for (j = 0; j < height; j++) {
1775            ADD_SUFF(FourByteAbgrToIntArgbConvert)(dstBase, pbuff, width, 1,
1776                                                   pSrcInfo, pDstInfo,
1777                                                   pPrim, pCompInfo);
1778
1779            IntRgbToIntArgbPreAlphaMaskBlit_line(pbuff, srcBase, pMask, width,
1780                                                 log_val, extraA, dstF_0,
1781                                                 (void*)mul8table);
1782
1783            ADD_SUFF(IntArgbToFourByteAbgrConvert)(pbuff, dstBase, width, 1,
1784                                                   pSrcInfo, pDstInfo,
1785                                                   pPrim, pCompInfo);
1786
1787            PTR_ADD(dstBase, dstScan);
1788            PTR_ADD(srcBase, srcScan);
1789            PTR_ADD(pMask, maskScan);
1790        }
1791    } else {
1792        for (j = 0; j < height; j++) {
1793            ADD_SUFF(FourByteAbgrToIntArgbConvert)(dstBase, pbuff, width, 1,
1794                                                   pSrcInfo, pDstInfo,
1795                                                   pPrim, pCompInfo);
1796
1797            IntRgbToIntArgbPreAlphaMaskBlit_A1_line(pbuff, srcBase, width,
1798                                                    log_val, extraA, dstF_0,
1799                                                    (void*)mul8table);
1800
1801            ADD_SUFF(IntArgbToFourByteAbgrConvert)(pbuff, dstBase, width, 1,
1802                                                   pSrcInfo, pDstInfo,
1803                                                   pPrim, pCompInfo);
1804
1805            PTR_ADD(dstBase, dstScan);
1806            PTR_ADD(srcBase, srcScan);
1807        }
1808    }
1809
1810    if (pbuff != buff) {
1811        mlib_free(pbuff);
1812    }
1813}
1814
1815/***************************************************************/
1816
1817#endif /* JAVA2D_NO_MLIB */
1818