1/*
2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26#if !defined(JAVA2D_NO_MLIB) || defined(MLIB_ADD_SUFF)
27
28#include "vis_AlphaMacros.h"
29
30/***************************************************************/
31
32#define ARGB_to_GBGR(x)        \
33    (x << 16) | (x & 0xff00) | ((x >> 16) & 0xff)
34
35/***************************************************************/
36
37#define ARGB_to_BGR(x)         \
38    ((x << 16) & 0xff0000) | (x & 0xff00) | ((x >> 16) & 0xff)
39
40/***************************************************************/
41
42#define READ_Bgr(i)    \
43    (src[3*i] << 16) | (src[3*i + 1] << 8) | src[3*i + 2]
44
45/***************************************************************/
46
47#define ARGB_to_GBGR_FL2(dst, src0, src1) {                    \
48    mlib_d64 t0, t1, t2;                                       \
49    t0 = vis_fpmerge(src0, src1);                              \
50    t1 = vis_fpmerge(vis_read_lo(t0), vis_read_hi(t0));        \
51    t2 = vis_fpmerge(vis_read_lo(t0), vis_read_lo(t0));        \
52    dst = vis_fpmerge(vis_read_hi(t2), vis_read_lo(t1));       \
53}
54
55/***************************************************************/
56
57#define ARGB_to_BGR_FL2(dst, src0, src1) {                     \
58    mlib_d64 t0, t1, t2;                                       \
59    t0 = vis_fpmerge(src0, src1);                              \
60    t1 = vis_fpmerge(vis_read_lo(t0), vis_read_hi(t0));        \
61    t2 = vis_fpmerge(vis_fzeros(),    vis_read_lo(t0));        \
62    dst = vis_fpmerge(vis_read_hi(t2), vis_read_lo(t1));       \
63}
64
65/***************************************************************/
66
67void ADD_SUFF(IntBgrToIntArgbConvert)(BLIT_PARAMS)
68{
69    mlib_s32 dstScan = pDstInfo->scanStride;
70    mlib_s32 srcScan = pSrcInfo->scanStride;
71    mlib_d64 dd, amask;
72    mlib_s32 i, i0, j, x;
73
74    if (dstScan == 4*width && srcScan == 4*width) {
75        width *= height;
76        height = 1;
77    }
78
79    amask = vis_to_double_dup(0xFF000000);
80    vis_alignaddr(NULL, 7);
81
82    for (j = 0; j < height; j++) {
83        mlib_u32 *src = srcBase;
84        mlib_u32 *dst = dstBase;
85
86        i = i0 = 0;
87
88        if ((mlib_s32)dst & 7) {
89            x = src[i];
90            dst[i] = 0xff000000 | ARGB_to_GBGR(x);
91            i0 = 1;
92        }
93
94#pragma pipeloop(0)
95        for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
96            ARGB2ABGR_FL2(dd, ((mlib_f32*)src)[i], ((mlib_f32*)src)[i + 1]);
97            *(mlib_d64*)(dst + i) = vis_for(dd, amask);
98        }
99
100        if (i < width) {
101            x = src[i];
102            dst[i] = 0xff000000 | ARGB_to_GBGR(x);
103        }
104
105        PTR_ADD(dstBase, dstScan);
106        PTR_ADD(srcBase, srcScan);
107    }
108}
109
110/***************************************************************/
111
112void ADD_SUFF(IntBgrToIntArgbScaleConvert)(SCALE_PARAMS)
113{
114    mlib_s32 dstScan = pDstInfo->scanStride;
115    mlib_s32 srcScan = pSrcInfo->scanStride;
116    mlib_d64 dd, amask;
117    mlib_s32 j, x;
118
119    amask = vis_to_double_dup(0xFF000000);
120    vis_alignaddr(NULL, 7);
121
122    for (j = 0; j < height; j++) {
123        mlib_u32 *src = srcBase;
124        mlib_u32 *dst = dstBase;
125        mlib_u32 *dst_end = dst + width;
126        mlib_s32 tmpsxloc = sxloc;
127
128        PTR_ADD(src, (syloc >> shift) * srcScan);
129
130        if ((mlib_s32)dst & 7) {
131            x = src[tmpsxloc >> shift];
132            *dst++ = 0xff000000 | ARGB_to_GBGR(x);
133            tmpsxloc += sxinc;
134        }
135
136#pragma pipeloop(0)
137        for (; dst <= dst_end - 2; dst += 2) {
138            ARGB2ABGR_FL2(dd, ((mlib_f32*)src)[tmpsxloc >> shift],
139                              ((mlib_f32*)src)[(tmpsxloc + sxinc) >> shift]);
140            *(mlib_d64*)dst = vis_for(dd, amask);
141            tmpsxloc += 2*sxinc;
142        }
143
144        for (; dst < dst_end; dst++) {
145            x = src[tmpsxloc >> shift];
146            *dst++ = 0xff000000 | ARGB_to_GBGR(x);
147            tmpsxloc += sxinc;
148        }
149
150        PTR_ADD(dstBase, dstScan);
151        syloc += syinc;
152    }
153}
154
155/***************************************************************/
156
157void ADD_SUFF(IntArgbToIntBgrConvert)(BLIT_PARAMS)
158{
159    mlib_s32 dstScan = pDstInfo->scanStride;
160    mlib_s32 srcScan = pSrcInfo->scanStride;
161    mlib_d64 dd;
162    mlib_s32 i, i0, j, x;
163
164    if (dstScan == 4*width && srcScan == 4*width) {
165        width *= height;
166        height = 1;
167    }
168
169    for (j = 0; j < height; j++) {
170        mlib_u32 *src = srcBase;
171        mlib_u32 *dst = dstBase;
172
173        i = i0 = 0;
174
175        if ((mlib_s32)dst & 7) {
176            x = src[i];
177            dst[i] = ARGB_to_GBGR(x);
178            i0 = 1;
179        }
180
181#pragma pipeloop(0)
182        for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
183            ARGB_to_GBGR_FL2(dd, ((mlib_f32*)src)[i], ((mlib_f32*)src)[i + 1]);
184            *(mlib_d64*)(dst + i) = dd;
185        }
186
187        if (i < width) {
188            x = src[i];
189            dst[i] = ARGB_to_GBGR(x);
190        }
191
192        PTR_ADD(dstBase, dstScan);
193        PTR_ADD(srcBase, srcScan);
194    }
195}
196
197/***************************************************************/
198
199void ADD_SUFF(IntArgbToIntBgrScaleConvert)(SCALE_PARAMS)
200{
201    mlib_s32 dstScan = pDstInfo->scanStride;
202    mlib_s32 srcScan = pSrcInfo->scanStride;
203    mlib_d64 dd;
204    mlib_s32 j, x;
205
206    for (j = 0; j < height; j++) {
207        mlib_u32 *src = srcBase;
208        mlib_u32 *dst = dstBase;
209        mlib_u32 *dst_end = dst + width;
210        mlib_s32 tmpsxloc = sxloc;
211
212        PTR_ADD(src, (syloc >> shift) * srcScan);
213
214        if ((mlib_s32)dst & 7) {
215            x = src[tmpsxloc >> shift];
216            *dst++ = ARGB_to_GBGR(x);
217            tmpsxloc += sxinc;
218        }
219
220#pragma pipeloop(0)
221        for (; dst <= dst_end - 2; dst += 2) {
222            ARGB_to_GBGR_FL2(dd, ((mlib_f32*)src)[tmpsxloc >> shift],
223                                 ((mlib_f32*)src)[(tmpsxloc + sxinc) >> shift]);
224            *(mlib_d64*)dst = dd;
225            tmpsxloc += 2*sxinc;
226        }
227
228        for (; dst < dst_end; dst++) {
229            x = src[tmpsxloc >> shift];
230            *dst++ = ARGB_to_GBGR(x);
231            tmpsxloc += sxinc;
232        }
233
234        PTR_ADD(dstBase, dstScan);
235        syloc += syinc;
236    }
237}
238
239/***************************************************************/
240
241#define INSERT_U8_34R {                                        \
242    mlib_d64 sda, sdb, sdc, sdd;                               \
243    mlib_d64 sde, sdf, sdg, sdh;                               \
244    mlib_d64 sdi, sdj, sdk, sdl;                               \
245    mlib_d64 sdm;                                              \
246                                                               \
247    sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1));     \
248    sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2));     \
249    sdc = vis_fpmerge(vis_read_hi(sd1), vis_read_lo(sd2));     \
250    sdd = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sdb));     \
251    sde = vis_fpmerge(vis_read_lo(sda), vis_read_hi(sdc));     \
252    sdf = vis_fpmerge(vis_read_hi(sdb), vis_read_lo(sdc));     \
253    sdg = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sde));     \
254    sdh = vis_fpmerge(vis_read_lo(sdd), vis_read_hi(sdf));     \
255    sdi = vis_fpmerge(vis_read_hi(sde), vis_read_lo(sdf));     \
256    sdj = vis_fpmerge(vis_read_hi(sdg), vis_read_hi(sdi));     \
257    sdk = vis_fpmerge(vis_read_lo(sdg), vis_read_lo(sdi));     \
258    sdl = vis_fpmerge(vis_read_hi(sFF), vis_read_hi(sdh));     \
259    sdm = vis_fpmerge(vis_read_lo(sFF), vis_read_lo(sdh));     \
260    dd0 = vis_fpmerge(vis_read_hi(sdl), vis_read_hi(sdj));     \
261    dd1 = vis_fpmerge(vis_read_lo(sdl), vis_read_lo(sdj));     \
262    dd2 = vis_fpmerge(vis_read_hi(sdm), vis_read_hi(sdk));     \
263    dd3 = vis_fpmerge(vis_read_lo(sdm), vis_read_lo(sdk));     \
264}
265
266/***************************************************************/
267
268void ADD_SUFF(ThreeByteBgrToIntBgrConvert)(BLIT_PARAMS)
269{
270    mlib_s32 dstScan = pDstInfo->scanStride;
271    mlib_s32 srcScan = pSrcInfo->scanStride;
272    mlib_d64 *sp;
273    mlib_d64 sFF;
274    mlib_d64 s0, s1, s2, s3, sd0, sd1, sd2, dd0, dd1, dd2, dd3;
275    mlib_s32 i, i0, j;
276
277    if (width < 16) {
278        for (j = 0; j < height; j++) {
279            mlib_u8  *src = srcBase;
280            mlib_u32 *dst = dstBase;
281
282            for (i = 0; i < width; i++) {
283                dst[i] = READ_Bgr(i);
284            }
285
286            PTR_ADD(dstBase, dstScan);
287            PTR_ADD(srcBase, srcScan);
288        }
289        return;
290    }
291
292    if (srcScan == 3*width && dstScan == 4*width) {
293        width *= height;
294        height = 1;
295    }
296
297    sFF = vis_fzero();
298
299    for (j = 0; j < height; j++) {
300        mlib_u8  *src = srcBase;
301        mlib_f32 *dst = dstBase;
302
303        i = i0 = 0;
304
305        if ((mlib_s32)dst & 7) {
306            ((mlib_s32*)dst)[i] = READ_Bgr(i);
307            i0 = 1;
308        }
309
310        sp = vis_alignaddr(src, 3*i0);
311        s3 = *sp++;
312
313#pragma pipeloop(0)
314        for (i = i0; i <= (mlib_s32)width - 8; i += 8) {
315            s0 = s3;
316            s1 = *sp++;
317            s2 = *sp++;
318            s3 = *sp++;
319            sd0 = vis_faligndata(s0, s1);
320            sd1 = vis_faligndata(s1, s2);
321            sd2 = vis_faligndata(s2, s3);
322
323            INSERT_U8_34R
324
325            *(mlib_d64*)(dst + i    ) = dd0;
326            *(mlib_d64*)(dst + i + 2) = dd1;
327            *(mlib_d64*)(dst + i + 4) = dd2;
328            *(mlib_d64*)(dst + i + 6) = dd3;
329        }
330
331        for (; i < width; i++) {
332            ((mlib_s32*)dst)[i] = READ_Bgr(i);
333        }
334
335        PTR_ADD(dstBase, dstScan);
336        PTR_ADD(srcBase, srcScan);
337    }
338}
339
340/***************************************************************/
341
342void ADD_SUFF(ThreeByteBgrToIntBgrScaleConvert)(SCALE_PARAMS)
343{
344    mlib_s32 dstScan = pDstInfo->scanStride;
345    mlib_s32 srcScan = pSrcInfo->scanStride;
346    mlib_d64 dd, dzero;
347    mlib_s32 i, i0, i1, j;
348
349    if (width < 16) {
350        for (j = 0; j < height; j++) {
351            mlib_u8  *src = srcBase;
352            mlib_s32 *dst = dstBase;
353            mlib_s32 *dst_end = dst + width;
354            mlib_s32 tmpsxloc = sxloc;
355
356            PTR_ADD(src, (syloc >> shift) * srcScan);
357
358            for (; dst < dst_end; dst++) {
359                i = tmpsxloc >> shift;
360                tmpsxloc += sxinc;
361                *(mlib_s32*)dst = READ_Bgr(i);
362            }
363
364            PTR_ADD(dstBase, dstScan);
365            syloc += syinc;
366        }
367        return;
368    }
369
370    dzero = vis_fzero();
371
372    vis_alignaddr(NULL, 7);
373
374    for (j = 0; j < height; j++) {
375        mlib_u8  *src = srcBase;
376        mlib_f32 *dst = dstBase;
377        mlib_f32 *dst_end = dst + width;
378        mlib_s32 tmpsxloc = sxloc;
379
380        PTR_ADD(src, (syloc >> shift) * srcScan);
381
382        if ((mlib_s32)dst & 7) {
383            i = tmpsxloc >> shift;
384            tmpsxloc += sxinc;
385            *(mlib_s32*)dst = READ_Bgr(i);
386            dst++;
387        }
388
389#pragma pipeloop(0)
390        for (; dst <= dst_end - 2; dst += 2) {
391            i0 = tmpsxloc >> shift;
392            i1 = (tmpsxloc + sxinc) >> shift;
393            tmpsxloc += 2*sxinc;
394
395            dd = vis_faligndata(vis_ld_u8(src + 3*i1 + 2), dd);
396            dd = vis_faligndata(vis_ld_u8(src + 3*i1 + 1), dd);
397            dd = vis_faligndata(vis_ld_u8(src + 3*i1    ), dd);
398            dd = vis_faligndata(dzero, dd);
399            dd = vis_faligndata(vis_ld_u8(src + 3*i0 + 2), dd);
400            dd = vis_faligndata(vis_ld_u8(src + 3*i0 + 1), dd);
401            dd = vis_faligndata(vis_ld_u8(src + 3*i0    ), dd);
402            dd = vis_faligndata(dzero, dd);
403
404            *(mlib_d64*)dst = dd;
405        }
406
407        for (; dst < dst_end; dst++) {
408            i = tmpsxloc >> shift;
409            tmpsxloc += sxinc;
410            *(mlib_s32*)dst = READ_Bgr(i);
411        }
412
413        PTR_ADD(dstBase, dstScan);
414        syloc += syinc;
415    }
416}
417
418/***************************************************************/
419
420void ADD_SUFF(IntArgbBmToIntBgrXparOver)(BLIT_PARAMS)
421{
422    mlib_s32 dstScan = pDstInfo->scanStride;
423    mlib_s32 srcScan = pSrcInfo->scanStride;
424    mlib_d64 dd;
425    mlib_s32 i, i0, j, mask, x;
426
427    if (dstScan == 4*width && srcScan == 4*width) {
428        width *= height;
429        height = 1;
430    }
431
432    for (j = 0; j < height; j++) {
433        mlib_s32 *src = srcBase;
434        mlib_s32 *dst = dstBase;
435
436        i = i0 = 0;
437
438        if ((mlib_s32)dst & 7) {
439            if (*(mlib_u8*)(src + i)) {
440                x = src[i];
441                dst[i] = ARGB_to_GBGR(x);
442            }
443            i0 = 1;
444        }
445
446#pragma pipeloop(0)
447        for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
448            ARGB_to_GBGR_FL2(dd, ((mlib_f32*)src)[i], ((mlib_f32*)src)[i + 1]);
449            mask = (((-*(mlib_u8*)(src + i)) >> 31) & 2) |
450                   (((-*(mlib_u8*)(src + i + 1)) >> 31) & 1);
451            vis_pst_32(dd, dst + i, mask);
452        }
453
454        if (i < width) {
455            if (*(mlib_u8*)(src + i)) {
456                x = src[i];
457                dst[i] = ARGB_to_GBGR(x);
458            }
459        }
460
461        PTR_ADD(dstBase, dstScan);
462        PTR_ADD(srcBase, srcScan);
463    }
464}
465
466/***************************************************************/
467
468void ADD_SUFF(IntArgbBmToIntBgrScaleXparOver)(SCALE_PARAMS)
469{
470    mlib_s32 dstScan = pDstInfo->scanStride;
471    mlib_s32 srcScan = pSrcInfo->scanStride;
472    mlib_d64 dd;
473    mlib_s32 j, mask;
474
475    for (j = 0; j < height; j++) {
476        mlib_s32 *src = srcBase;
477        mlib_s32 *dst = dstBase;
478        mlib_s32 *dst_end = dst + width;
479        mlib_s32 tmpsxloc = sxloc;
480
481        PTR_ADD(src, (syloc >> shift) * srcScan);
482
483        if ((mlib_s32)dst & 7) {
484            mlib_s32 *pp = src + (tmpsxloc >> shift);
485            if (*(mlib_u8*)pp) {
486                *dst = ARGB_to_GBGR(*pp);
487            }
488            dst++;
489            tmpsxloc += sxinc;
490        }
491
492#pragma pipeloop(0)
493        for (; dst <= dst_end - 2; dst += 2) {
494            mlib_s32 *pp0 = src + (tmpsxloc >> shift);
495            mlib_s32 *pp1 = src + ((tmpsxloc + sxinc) >> shift);
496            ARGB_to_GBGR_FL2(dd, *(mlib_f32*)pp0, *(mlib_f32*)pp1);
497            mask = (((-*(mlib_u8*)pp0) >> 31) & 2) |
498                   ((mlib_u32)(-*(mlib_u8*)pp1) >> 31);
499            vis_pst_32(dd, dst, mask);
500            tmpsxloc += 2*sxinc;
501        }
502
503        for (; dst < dst_end; dst++) {
504            mlib_s32 *pp = src + (tmpsxloc >> shift);
505            if (*(mlib_u8*)pp) {
506                *dst = ARGB_to_GBGR(*pp);
507            }
508        }
509
510        PTR_ADD(dstBase, dstScan);
511        syloc += syinc;
512    }
513}
514
515/***************************************************************/
516
517void ADD_SUFF(IntArgbBmToIntBgrXparBgCopy)(BCOPY_PARAMS)
518{
519    mlib_s32 dstScan = pDstInfo->scanStride;
520    mlib_s32 srcScan = pSrcInfo->scanStride;
521    mlib_d64 dd, d_bgpixel;
522    mlib_s32 i, i0, j, mask;
523
524    if (dstScan == 4*width && srcScan == 4*width) {
525        width *= height;
526        height = 1;
527    }
528
529    vis_alignaddr(NULL, 1);
530    d_bgpixel = vis_to_double_dup(bgpixel);
531
532    for (j = 0; j < height; j++) {
533        mlib_s32 *src = srcBase;
534        mlib_s32 *dst = dstBase;
535
536        i = i0 = 0;
537
538        if ((mlib_s32)dst & 7) {
539            if (*(mlib_u8*)(src + i)) {
540                dst[i] = ARGB_to_GBGR(src[i]);
541            } else {
542                dst[i] = bgpixel;
543            }
544            i0 = 1;
545        }
546
547#pragma pipeloop(0)
548        for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
549            ARGB_to_GBGR_FL2(dd, ((mlib_f32*)src)[i], ((mlib_f32*)src)[i + 1]);
550            mask = (((-*(mlib_u8*)(src + i)) >> 31) & 2) |
551                   (((-*(mlib_u8*)(src + i + 1)) >> 31) & 1);
552            *(mlib_d64*)(dst + i) = d_bgpixel;
553            vis_pst_32(dd, dst + i, mask);
554        }
555
556        if (i < width) {
557            if (*(mlib_u8*)(src + i)) {
558                dst[i] = ARGB_to_GBGR(src[i]);
559            } else {
560                dst[i] = bgpixel;
561            }
562        }
563
564        PTR_ADD(dstBase, dstScan);
565        PTR_ADD(srcBase, srcScan);
566    }
567}
568
569/***************************************************************/
570
571void ADD_SUFF(ByteIndexedToIntBgrConvert)(BLIT_PARAMS)
572{
573    jint *pixLut = pSrcInfo->lutBase;
574    mlib_s32 dstScan = pDstInfo->scanStride;
575    mlib_s32 srcScan = pSrcInfo->scanStride;
576    mlib_d64 dd;
577    mlib_s32 i, i0, j, x;
578
579    if (srcScan == width && dstScan == 4*width) {
580        width *= height;
581        height = 1;
582    }
583
584    for (j = 0; j < height; j++) {
585        mlib_u8  *src = srcBase;
586        mlib_s32 *dst = dstBase;
587
588        i = i0 = 0;
589
590        if ((mlib_s32)dst & 7) {
591            x = pixLut[src[i]];
592            dst[i] = ARGB_to_GBGR(x);
593            i0 = 1;
594        }
595
596#pragma pipeloop(0)
597        for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
598            ARGB_to_GBGR_FL2(dd, ((mlib_f32*)pixLut)[src[i]],
599                                 ((mlib_f32*)pixLut)[src[i + 1]]);
600            *(mlib_d64*)(dst + i) = dd;
601        }
602
603        for (; i < width; i++) {
604            x = pixLut[src[i]];
605            dst[i] = ARGB_to_GBGR(x);
606        }
607
608        PTR_ADD(dstBase, dstScan);
609        PTR_ADD(srcBase, srcScan);
610    }
611}
612
613/***************************************************************/
614
615void ADD_SUFF(ByteIndexedToIntBgrScaleConvert)(SCALE_PARAMS)
616{
617    jint *pixLut = pSrcInfo->lutBase;
618    mlib_s32 dstScan = pDstInfo->scanStride;
619    mlib_s32 srcScan = pSrcInfo->scanStride;
620    mlib_d64 dd;
621    mlib_s32 j, x;
622
623    for (j = 0; j < height; j++) {
624        mlib_u8  *src = srcBase;
625        mlib_s32 *dst = dstBase;
626        mlib_s32 *dst_end = dst + width;
627        mlib_s32 tmpsxloc = sxloc;
628
629        PTR_ADD(src, (syloc >> shift) * srcScan);
630
631        if ((mlib_s32)dst & 7) {
632            x = pixLut[src[tmpsxloc >> shift]];
633            *dst++ = ARGB_to_GBGR(x);
634            tmpsxloc += sxinc;
635        }
636
637#pragma pipeloop(0)
638        for (; dst <= dst_end - 2; dst += 2) {
639            mlib_f32 f0 = ((mlib_f32*)pixLut)[src[tmpsxloc >> shift]];
640            mlib_f32 f1 = ((mlib_f32*)pixLut)[src[(tmpsxloc + sxinc) >> shift]];
641            ARGB_to_GBGR_FL2(dd, f0, f1);
642            *(mlib_d64*)dst = dd;
643            tmpsxloc += 2*sxinc;
644        }
645
646        for (; dst < dst_end; dst++) {
647            x = pixLut[src[tmpsxloc >> shift]];
648            *dst++ = ARGB_to_GBGR(x);
649            tmpsxloc += sxinc;
650        }
651
652        PTR_ADD(dstBase, dstScan);
653        syloc += syinc;
654    }
655}
656
657/***************************************************************/
658
659void ADD_SUFF(ByteIndexedBmToIntBgrXparOver)(BLIT_PARAMS)
660{
661    jint *pixLut = pSrcInfo->lutBase;
662    mlib_s32 dstScan = pDstInfo->scanStride;
663    mlib_s32 srcScan = pSrcInfo->scanStride;
664    mlib_d64 dd;
665    mlib_s32 i, i0, j, x, mask;
666
667    if (srcScan == width && dstScan == 4*width) {
668        width *= height;
669        height = 1;
670    }
671
672    for (j = 0; j < height; j++) {
673        mlib_u8  *src = srcBase;
674        mlib_s32 *dst = dstBase;
675
676        i = i0 = 0;
677
678        if ((mlib_s32)dst & 7) {
679            x = pixLut[src[i]];
680            if (x < 0) {
681                dst[i] = ARGB_to_BGR(x);
682            }
683            i0 = 1;
684        }
685
686#pragma pipeloop(0)
687        for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
688            mlib_f32 *pp0 = (mlib_f32*)pixLut + src[i];
689            mlib_f32 *pp1 = (mlib_f32*)pixLut + src[i + 1];
690            ARGB_to_BGR_FL2(dd, *pp0, *pp1);
691            mask = (((*(mlib_u8*)pp0) >> 6) & 2) | ((*(mlib_u8*)pp1) >> 7);
692            vis_pst_32(dd, dst + i, mask);
693        }
694
695        for (; i < width; i++) {
696            x = pixLut[src[i]];
697            if (x < 0) {
698                dst[i] = ARGB_to_BGR(x);
699            }
700        }
701
702        PTR_ADD(dstBase, dstScan);
703        PTR_ADD(srcBase, srcScan);
704    }
705}
706
707/***************************************************************/
708
709void ADD_SUFF(ByteIndexedBmToIntBgrScaleXparOver)(SCALE_PARAMS)
710{
711    jint *pixLut = pSrcInfo->lutBase;
712    mlib_s32 dstScan = pDstInfo->scanStride;
713    mlib_s32 srcScan = pSrcInfo->scanStride;
714    mlib_d64 dd;
715    mlib_s32 j, x, mask;
716
717    for (j = 0; j < height; j++) {
718        mlib_u8  *src = srcBase;
719        mlib_s32 *dst = dstBase;
720        mlib_s32 *dst_end = dst + width;
721        mlib_s32 tmpsxloc = sxloc;
722
723        PTR_ADD(src, (syloc >> shift) * srcScan);
724
725        if ((mlib_s32)dst & 7) {
726            x = pixLut[src[tmpsxloc >> shift]];
727            tmpsxloc += sxinc;
728            if (x < 0) {
729                *dst = ARGB_to_BGR(x);
730            }
731            dst++;
732        }
733
734#pragma pipeloop(0)
735        for (; dst <= dst_end - 2; dst += 2) {
736            mlib_f32 *p0 = (mlib_f32*)pixLut + src[tmpsxloc >> shift];
737            mlib_f32 *p1 = (mlib_f32*)pixLut + src[(tmpsxloc + sxinc) >> shift];
738            ARGB_to_BGR_FL2(dd, *p0, *p1);
739            mask = (((*(mlib_u8*)p0) >> 6) & 2) | ((*(mlib_u8*)p1) >> 7);
740            tmpsxloc += 2*sxinc;
741            vis_pst_32(dd, dst, mask);
742        }
743
744        for (; dst < dst_end; dst++) {
745            x = pixLut[src[tmpsxloc >> shift]];
746            tmpsxloc += sxinc;
747            if (x < 0) {
748                *dst = ARGB_to_BGR(x);
749            }
750        }
751
752        PTR_ADD(dstBase, dstScan);
753        syloc += syinc;
754    }
755}
756
757/***************************************************************/
758
759void ADD_SUFF(ByteIndexedBmToIntBgrXparBgCopy)(BCOPY_PARAMS)
760{
761    jint *pixLut = pSrcInfo->lutBase;
762    mlib_s32 dstScan = pDstInfo->scanStride;
763    mlib_s32 srcScan = pSrcInfo->scanStride;
764    mlib_d64 dd, d_bgpixel;
765    mlib_s32 j, x, mask;
766
767    if (srcScan == width && dstScan == 4*width) {
768        width *= height;
769        height = 1;
770    }
771
772    d_bgpixel = vis_to_double_dup(bgpixel);
773
774    for (j = 0; j < height; j++) {
775        mlib_u8  *src = srcBase;
776        mlib_s32 *dst = dstBase;
777        mlib_s32 *dst_end;
778
779        dst_end = dst + width;
780
781        if ((mlib_s32)dst & 7) {
782            x = pixLut[*src++];
783            if (x < 0) {
784                *dst = ARGB_to_GBGR(x);
785            } else {
786                *dst = bgpixel;
787            }
788            dst++;
789        }
790
791#pragma pipeloop(0)
792        for (; dst <= (dst_end - 2); dst += 2) {
793            mlib_f32 *pp0 = (mlib_f32*)pixLut + src[0];
794            mlib_f32 *pp1 = (mlib_f32*)pixLut + src[1];
795            ARGB_to_GBGR_FL2(dd, *pp0, *pp1);
796            mask = (((*(mlib_u8*)pp0) >> 6) & 2) | ((*(mlib_u8*)pp1) >> 7);
797            *(mlib_d64*)dst = d_bgpixel;
798            vis_pst_32(dd, dst, mask);
799            src += 2;
800        }
801
802        while (dst < dst_end) {
803            x = pixLut[*src++];
804            if (x < 0) {
805                *dst = ARGB_to_GBGR(x);
806            } else {
807                *dst = bgpixel;
808            }
809            dst++;
810        }
811
812        PTR_ADD(dstBase, dstScan);
813        PTR_ADD(srcBase, srcScan);
814    }
815}
816
817/***************************************************************/
818
819void ADD_SUFF(IntBgrDrawGlyphListAA)(GLYPH_LIST_PARAMS)
820{
821    mlib_s32 glyphCounter;
822    mlib_s32 scan = pRasInfo->scanStride;
823    mlib_u8  *dstBase;
824    mlib_s32 j;
825    mlib_d64 dmix0, dmix1, dd, d0, d1, e0, e1, fgpixel_d;
826    mlib_d64 done, done16, d_half, maskRGB, dzero;
827    mlib_s32 pix, mask, mask_z;
828    mlib_f32 srcG_f;
829
830    done = vis_to_double_dup(0x7fff7fff);
831    done16 = vis_to_double_dup(0x7fff);
832    d_half = vis_to_double_dup((1 << (16 + 6)) | (1 << 6));
833
834    fgpixel_d = vis_to_double_dup(fgpixel);
835    srcG_f = vis_to_float(argbcolor);
836    maskRGB = vis_to_double_dup(0xffffff);
837    dzero = vis_fzero();
838
839    ARGB2ABGR_FL(srcG_f)
840
841    vis_write_gsr(0 << 3);
842
843    for (glyphCounter = 0; glyphCounter < totalGlyphs; glyphCounter++) {
844        const jubyte *pixels;
845        unsigned int rowBytes;
846        int left, top;
847        int width, height;
848        int right, bottom;
849
850        pixels = (const jubyte *) glyphs[glyphCounter].pixels;
851
852        if (!pixels) continue;
853
854        left = glyphs[glyphCounter].x;
855        top = glyphs[glyphCounter].y;
856        width = glyphs[glyphCounter].width;
857        height = glyphs[glyphCounter].height;
858        rowBytes = width;
859        right = left + width;
860        bottom = top + height;
861        if (left < clipLeft) {
862            pixels += clipLeft - left;
863            left = clipLeft;
864        }
865        if (top < clipTop) {
866            pixels += (clipTop - top) * rowBytes;
867            top = clipTop;
868        }
869        if (right > clipRight) {
870            right = clipRight;
871        }
872        if (bottom > clipBottom) {
873            bottom = clipBottom;
874        }
875        if (right <= left || bottom <= top) {
876            continue;
877        }
878        width = right - left;
879        height = bottom - top;
880
881        dstBase = pRasInfo->rasBase;
882        PTR_ADD(dstBase, top*scan + 4*left);
883
884        for (j = 0; j < height; j++) {
885            mlib_u8  *src = (void*)pixels;
886            mlib_s32 *dst, *dst_end;
887
888            dst = (void*)dstBase;
889            dst_end = dst + width;
890
891            if ((mlib_s32)dst & 7) {
892                pix = *src++;
893                if (pix) {
894                    dd = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);
895                    dd = vis_fpadd16(MUL8_VIS(*(mlib_f32*)dst, 255 - pix), dd);
896                    *(mlib_f32*)dst = vis_fands(vis_fpack16(dd),
897                                                vis_read_hi(maskRGB));
898                    if (pix == 255) *(mlib_f32*)dst = vis_read_hi(fgpixel_d);
899                }
900                dst++;
901            }
902
903#pragma pipeloop(0)
904            for (; dst <= (dst_end - 2); dst += 2) {
905                dmix0 = vis_freg_pair(((mlib_f32 *)vis_mul8s_tbl)[src[0]],
906                                      ((mlib_f32 *)vis_mul8s_tbl)[src[1]]);
907                mask = vis_fcmplt32(dmix0, done16);
908                mask_z = vis_fcmpne32(dmix0, dzero);
909                dmix1 = vis_fpsub16(done, dmix0);
910                src += 2;
911
912                dd = *(mlib_d64*)dst;
913                d0 = vis_fmul8x16al(srcG_f, vis_read_hi(dmix0));
914                d1 = vis_fmul8x16al(srcG_f, vis_read_lo(dmix0));
915                e0 = vis_fmul8x16al(vis_read_hi(dd), vis_read_hi(dmix1));
916                e1 = vis_fmul8x16al(vis_read_lo(dd), vis_read_lo(dmix1));
917                d0 = vis_fpadd16(vis_fpadd16(d0, d_half), e0);
918                d1 = vis_fpadd16(vis_fpadd16(d1, d_half), e1);
919                dd = vis_fpack16_pair(d0, d1);
920                dd = vis_fand(dd, maskRGB);
921
922                vis_pst_32(fgpixel_d, dst, mask_z);
923                vis_pst_32(dd, dst, mask & mask_z);
924            }
925
926            while (dst < dst_end) {
927                pix = *src++;
928                if (pix) {
929                    dd = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);
930                    dd = vis_fpadd16(MUL8_VIS(*(mlib_f32*)dst, 255 - pix), dd);
931                    *(mlib_f32*)dst = vis_fands(vis_fpack16(dd),
932                                                vis_read_hi(maskRGB));
933                    if (pix == 255) *(mlib_f32*)dst = vis_read_hi(fgpixel_d);
934                }
935                dst++;
936            }
937
938            PTR_ADD(dstBase, scan);
939            pixels += rowBytes;
940        }
941    }
942}
943
944/***************************************************************/
945
946#endif /* JAVA2D_NO_MLIB */
947