1/*
2 * Copyright (c) 2003, 2008, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26#if !defined(JAVA2D_NO_MLIB) || defined(MLIB_ADD_SUFF)
27
28#include "vis_AlphaMacros.h"
29
30/***************************************************************/
31
32#define Gray2Argb(x)   \
33    0xff000000 | (x << 16) | (x << 8) | x
34
35/***************************************************************/
36
37#if VIS >= 0x200
38
39#define BMASK_FOR_ARGB         \
40    vis_write_bmask(0x03214765, 0);
41
42#else
43
44#define BMASK_FOR_ARGB
45
46#endif
47
48/***************************************************************/
49
50#define RGB2ABGR_DB(x)         \
51    x = vis_for(x, amask);     \
52    ARGB2ABGR_DB(x)
53
54/***************************************************************/
55
56#define INSERT_U8_34R                                          \
57    sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1));     \
58    sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2));     \
59    sdc = vis_fpmerge(vis_read_hi(sd1), vis_read_lo(sd2));     \
60    sdd = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sdb));     \
61    sde = vis_fpmerge(vis_read_lo(sda), vis_read_hi(sdc));     \
62    sdf = vis_fpmerge(vis_read_hi(sdb), vis_read_lo(sdc));     \
63    sdg = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sde));     \
64    sdh = vis_fpmerge(vis_read_lo(sdd), vis_read_hi(sdf));     \
65    sdi = vis_fpmerge(vis_read_hi(sde), vis_read_lo(sdf));     \
66    sdj = vis_fpmerge(vis_read_hi(sdg), vis_read_hi(sdi));     \
67    sdk = vis_fpmerge(vis_read_lo(sdg), vis_read_lo(sdi));     \
68    sdl = vis_fpmerge(vis_read_hi(sFF), vis_read_hi(sdh));     \
69    sdm = vis_fpmerge(vis_read_lo(sFF), vis_read_lo(sdh));     \
70    dd0 = vis_fpmerge(vis_read_hi(sdl), vis_read_hi(sdj));     \
71    dd1 = vis_fpmerge(vis_read_lo(sdl), vis_read_lo(sdj));     \
72    dd2 = vis_fpmerge(vis_read_hi(sdm), vis_read_hi(sdk));     \
73    dd3 = vis_fpmerge(vis_read_lo(sdm), vis_read_lo(sdk))
74
75/***************************************************************/
76
77void IntArgbToIntAbgrConvert_line(mlib_s32 *srcBase,
78                                  mlib_s32 *dstBase,
79                                  mlib_s32 width)
80{
81    mlib_s32 *dst_end = dstBase + width;
82    mlib_d64 dd;
83    mlib_f32 ff;
84
85    BMASK_FOR_ARGB
86
87    if ((mlib_s32)srcBase & 7) {
88        ff = *(mlib_f32*)srcBase;
89        ARGB2ABGR_FL(ff)
90        *(mlib_f32*)dstBase = ff;
91        srcBase++;
92        dstBase++;
93    }
94
95    if ((mlib_s32)dstBase & 7) {
96#pragma pipeloop(0)
97        for (; dstBase <= (dst_end - 2); dstBase += 2) {
98            dd = *(mlib_d64*)srcBase;
99            ARGB2ABGR_DB(dd)
100            ((mlib_f32*)dstBase)[0] = vis_read_hi(dd);
101            ((mlib_f32*)dstBase)[1] = vis_read_lo(dd);
102            srcBase += 2;
103        }
104    } else {
105#pragma pipeloop(0)
106        for (; dstBase <= (dst_end - 2); dstBase += 2) {
107            dd = *(mlib_d64*)srcBase;
108            ARGB2ABGR_DB(dd)
109            *(mlib_d64*)dstBase = dd;
110            srcBase += 2;
111        }
112    }
113
114    if (dstBase < dst_end) {
115        ff = *(mlib_f32*)srcBase;
116        ARGB2ABGR_FL(ff)
117        *(mlib_f32*)dstBase = ff;
118    }
119}
120
121/***************************************************************/
122
123void ADD_SUFF(FourByteAbgrToIntArgbConvert)(BLIT_PARAMS)
124{
125    mlib_u32 *argb = (mlib_u32 *)dstBase;
126    mlib_u8  *pabgr = (mlib_u8 *)srcBase;
127    mlib_s32 dstScan = (pDstInfo)->scanStride;
128    mlib_s32 srcScan = (pSrcInfo)->scanStride;
129    mlib_s32 i, j, count, left;
130    mlib_d64 w_abgr;
131
132    if (width < 16) {
133        for (j = 0; j < height; j++) {
134            mlib_u8  *src = srcBase;
135            mlib_s32 *dst = dstBase;
136
137            for (i = 0; i < width; i++) {
138                *dst++ = (src[0] << 24) | (src[3] << 16) |
139                         (src[2] << 8) | (src[1]);
140                src += 4;
141            }
142
143            PTR_ADD(dstBase, dstScan);
144            PTR_ADD(srcBase, srcScan);
145        }
146        return;
147    }
148
149    if (dstScan == 4*width && srcScan == dstScan) {
150        width *= height;
151        height = 1;
152    }
153    count = width >> 1;
154    left = width & 1;
155
156    BMASK_FOR_ARGB
157
158    if ((((mlib_addr)pabgr & 3) == 0) && ((srcScan & 3) == 0)) {
159        mlib_u32 *abgr = (mlib_u32 *)pabgr;
160
161        dstScan >>= 2;
162        srcScan >>= 2;
163
164        for (i = 0; i < height; i++, argb += dstScan, abgr += srcScan) {
165            if ((((mlib_addr) argb | (mlib_addr) abgr) & 7) == 0) {
166                mlib_d64 *d_abgr = (mlib_d64 *) abgr;
167                mlib_d64 *d_argb = (mlib_d64 *) argb;
168
169#pragma pipeloop(0)
170                for (j = 0; j < count; j++) {
171                    w_abgr = d_abgr[j];
172                    ARGB2ABGR_DB(w_abgr)
173                    d_argb[j] = w_abgr;
174                }
175
176                if (left) {
177                    w_abgr = d_abgr[count];
178                    ARGB2ABGR_DB(w_abgr)
179                    ((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);
180                }
181            } else {
182                mlib_f32 v_abgr0, v_abgr1;
183
184#pragma pipeloop(0)
185                for (j = 0; j < count; j++) {
186                    v_abgr0 = ((mlib_f32 *) abgr)[2 * j];
187                    v_abgr1 = ((mlib_f32 *) abgr)[2 * j + 1];
188                    w_abgr = vis_freg_pair(v_abgr0, v_abgr1);
189                    ARGB2ABGR_DB(w_abgr)
190                    ((mlib_f32 *) argb)[2 * j] = vis_read_hi(w_abgr);
191                    ((mlib_f32 *) argb)[2 * j + 1] = vis_read_lo(w_abgr);
192                }
193
194                if (left) {
195                    v_abgr0 = ((mlib_f32 *) abgr)[2 * count];
196                    w_abgr = vis_freg_pair(v_abgr0, 0);
197                    ARGB2ABGR_DB(w_abgr)
198                    ((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);
199                }
200            }
201        }
202    } else {      /* abgr is not aligned */
203        mlib_u8 *abgr = pabgr;
204        mlib_d64 *d_abgr, db0, db1;
205
206        dstScan >>= 2;
207
208        for (i = 0; i < height; i++, argb += dstScan, abgr += srcScan) {
209            d_abgr = vis_alignaddr(abgr, 0);
210            db0 = *d_abgr++;
211
212            if (((mlib_addr) argb & 7) == 0) {
213                mlib_d64 *d_argb = (mlib_d64 *) argb;
214
215#pragma pipeloop(0)
216                for (j = 0; j < count; j++) {
217                    db1 = d_abgr[j];
218                    w_abgr = vis_faligndata(db0, db1);
219                    db0 = db1;
220                    ARGB2ABGR_DB(w_abgr)
221                    d_argb[j] = w_abgr;
222                }
223
224                if (left) {
225                    db1 = d_abgr[j];
226                    w_abgr = vis_faligndata(db0, db1);
227                    ARGB2ABGR_DB(w_abgr)
228                    ((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);
229                }
230            } else {
231                mlib_d64 w_abgr;
232
233                db1 = *d_abgr++;
234                w_abgr = vis_faligndata(db0, db1);
235                db0 = db1;
236#pragma pipeloop(0)
237                for (j = 0; j < count; j++) {
238                    ARGB2ABGR_DB(w_abgr)
239                    ((mlib_f32 *) argb)[2 * j] = vis_read_hi(w_abgr);
240                    ((mlib_f32 *) argb)[2 * j + 1] = vis_read_lo(w_abgr);
241                    db1 = d_abgr[j];
242                    w_abgr = vis_faligndata(db0, db1);
243                    db0 = db1;
244                }
245
246                if (left) {
247                    ARGB2ABGR_DB(w_abgr)
248                    ((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);
249                }
250            }
251        }
252    }
253}
254
255/***************************************************************/
256
257void ADD_SUFF(IntArgbToFourByteAbgrConvert)(BLIT_PARAMS)
258{
259    mlib_u32 *argb = (mlib_u32 *)srcBase;
260    mlib_u8 *abgr = (mlib_u8 *)dstBase;
261    mlib_s32 dstScan = (pDstInfo)->scanStride;
262    mlib_s32 srcScan = (pSrcInfo)->scanStride;
263    mlib_s32 i, j, count, left;
264    mlib_d64 w_abgr;
265
266    if (width < 16) {
267        for (j = 0; j < height; j++) {
268            mlib_s32 *src = srcBase;
269            mlib_u8  *dst = dstBase;
270
271            for (i = 0; i < width; i++) {
272                mlib_u32 x = *src++;
273                dst[0] = x >> 24;
274                dst[1] = x;
275                dst[2] = x >> 8;
276                dst[3] = x >> 16;
277                dst += 4;
278            }
279
280            PTR_ADD(dstBase, dstScan);
281            PTR_ADD(srcBase, srcScan);
282        }
283        return;
284    }
285
286    if (dstScan == 4*width && srcScan == dstScan) {
287        width *= height;
288        height = 1;
289    }
290    count = width >> 1;
291    left = width & 1;
292
293    BMASK_FOR_ARGB
294
295    srcScan >>= 2;
296
297    for (i = 0; i < height; i++, argb += srcScan, abgr += dstScan) {
298
299        if ((((mlib_addr) abgr | (mlib_addr) argb) & 7) == 0) {
300            mlib_d64 *d_argb = (mlib_d64 *) argb;
301            mlib_d64 *d_abgr = (mlib_d64 *) abgr;
302
303#pragma pipeloop(0)
304            for (j = 0; j < count; j++) {
305                w_abgr = d_argb[j];
306                ARGB2ABGR_DB(w_abgr)
307                d_abgr[j] = w_abgr;
308            }
309
310            if (left) {
311                w_abgr = d_argb[count];
312                ARGB2ABGR_DB(w_abgr)
313                ((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);
314            }
315
316        } else if (((mlib_addr) abgr & 3) == 0) {
317            mlib_f32 v_argb0, v_argb1;
318
319#pragma pipeloop(0)
320            for (j = 0; j < count; j++) {
321                v_argb0 = ((mlib_f32 *) argb)[2 * j];
322                v_argb1 = ((mlib_f32 *) argb)[2 * j + 1];
323                w_abgr = vis_freg_pair(v_argb0, v_argb1);
324
325                ARGB2ABGR_DB(w_abgr)
326                ((mlib_f32 *) abgr)[2 * j] = vis_read_hi(w_abgr);
327                ((mlib_f32 *) abgr)[2 * j + 1] = vis_read_lo(w_abgr);
328            }
329
330            if (left) {
331                v_argb0 = ((mlib_f32 *) argb)[2 * count];
332                w_abgr = vis_freg_pair(v_argb0, vis_fzeros());
333
334                ARGB2ABGR_DB(w_abgr)
335                ((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);
336            }
337
338        } else {      /* abgr is not aligned */
339
340            mlib_u8 *pend = abgr + (width << 2) - 1;
341            mlib_d64 *d_abgr, db0, db1;
342            mlib_s32 emask, off;
343            mlib_f32 *f_argb = (mlib_f32 *) argb;
344
345            off = (mlib_addr)abgr & 7;
346            vis_alignaddr((void *)(8 - off), 0);
347            d_abgr = (mlib_d64 *) (abgr - off);
348
349            db1 = vis_freg_pair(*f_argb++, *f_argb++);
350            ARGB2ABGR_DB(db1)
351            w_abgr = vis_faligndata(db1, db1);
352            emask = vis_edge8(abgr, pend);
353            vis_pst_8(w_abgr, d_abgr++, emask);
354            db0 = db1;
355
356            db1 = vis_freg_pair(f_argb[0], f_argb[1]);
357#pragma pipeloop(0)
358            for (; (mlib_addr)d_abgr < (mlib_addr)(pend - 6); ) {
359                ARGB2ABGR_DB(db1)
360                w_abgr = vis_faligndata(db0, db1);
361                *d_abgr++ = w_abgr;
362                db0 = db1;
363                f_argb += 2;
364                db1 = vis_freg_pair(f_argb[0], f_argb[1]);
365            }
366
367            if ((mlib_addr)d_abgr <= (mlib_addr)pend) {
368                ARGB2ABGR_DB(db1)
369                w_abgr = vis_faligndata(db0, db1);
370                emask = vis_edge8(d_abgr, pend);
371                vis_pst_8(w_abgr, d_abgr, emask);
372            }
373        }
374    }
375}
376
377/***************************************************************/
378
379void ADD_SUFF(IntRgbToFourByteAbgrConvert)(BLIT_PARAMS)
380{
381    mlib_u32 *argb = (mlib_u32 *)srcBase;
382    mlib_u8  *abgr = (mlib_u8 *)dstBase;
383    mlib_s32 dstScan = (pDstInfo)->scanStride;
384    mlib_s32 srcScan = (pSrcInfo)->scanStride;
385    mlib_s32 i, j, count, left;
386    mlib_d64 w_abgr;
387    mlib_d64 amask = vis_to_double_dup(0xFF000000);
388
389    if (width < 16) {
390        for (j = 0; j < height; j++) {
391            mlib_s32 *src = srcBase;
392            mlib_u8  *dst = dstBase;
393
394            for (i = 0; i < width; i++) {
395                mlib_u32 x = *src++;
396                dst[0] = 0xFF;
397                dst[1] = x;
398                dst[2] = x >> 8;
399                dst[3] = x >> 16;
400                dst += 4;
401            }
402
403            PTR_ADD(dstBase, dstScan);
404            PTR_ADD(srcBase, srcScan);
405        }
406        return;
407    }
408
409    if (dstScan == 4*width && srcScan == dstScan) {
410        width *= height;
411        height = 1;
412    }
413    count = width >> 1;
414    left = width & 1;
415
416    BMASK_FOR_ARGB
417
418    srcScan >>= 2;
419
420    for (i = 0; i < height; i++, argb += srcScan, abgr += dstScan) {
421
422        if ((((mlib_addr) abgr | (mlib_addr) argb) & 7) == 0) {
423            mlib_d64 *d_argb = (mlib_d64 *) argb;
424            mlib_d64 *d_abgr = (mlib_d64 *) abgr;
425
426#pragma pipeloop(0)
427            for (j = 0; j < count; j++) {
428                w_abgr = d_argb[j];
429                RGB2ABGR_DB(w_abgr)
430                d_abgr[j] = w_abgr;
431            }
432
433            if (left) {
434                w_abgr = d_argb[count];
435                RGB2ABGR_DB(w_abgr)
436                ((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);
437            }
438
439        } else if (((mlib_addr) abgr & 3) == 0) {
440            mlib_f32 v_argb0, v_argb1;
441
442#pragma pipeloop(0)
443            for (j = 0; j < count; j++) {
444                v_argb0 = ((mlib_f32 *) argb)[2 * j];
445                v_argb1 = ((mlib_f32 *) argb)[2 * j + 1];
446                w_abgr = vis_freg_pair(v_argb0, v_argb1);
447
448                RGB2ABGR_DB(w_abgr)
449                ((mlib_f32 *) abgr)[2 * j] = vis_read_hi(w_abgr);
450                ((mlib_f32 *) abgr)[2 * j + 1] = vis_read_lo(w_abgr);
451            }
452
453            if (left) {
454                v_argb0 = ((mlib_f32 *) argb)[2 * count];
455                w_abgr = vis_freg_pair(v_argb0, vis_fzeros());
456
457                RGB2ABGR_DB(w_abgr)
458                ((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);
459            }
460
461        } else {      /* abgr is not aligned */
462
463            mlib_u8 *pend = abgr + (width << 2) - 1;
464            mlib_d64 *d_abgr, db0, db1;
465            mlib_s32 emask, off;
466            mlib_f32 *f_argb = (mlib_f32 *) argb;
467
468            off = (mlib_addr)abgr & 7;
469            vis_alignaddr((void *)(8 - off), 0);
470            d_abgr = (mlib_d64 *) (abgr - off);
471
472            db1 = vis_freg_pair(*f_argb++, *f_argb++);
473            RGB2ABGR_DB(db1)
474            w_abgr = vis_faligndata(db1, db1);
475            emask = vis_edge8(abgr, pend);
476            vis_pst_8(w_abgr, d_abgr++, emask);
477            db0 = db1;
478
479            db1 = vis_freg_pair(f_argb[0], f_argb[1]);
480#pragma pipeloop(0)
481            for (; (mlib_addr)d_abgr < (mlib_addr)(pend - 6); ) {
482                RGB2ABGR_DB(db1)
483                w_abgr = vis_faligndata(db0, db1);
484                *d_abgr++ = w_abgr;
485                db0 = db1;
486                f_argb += 2;
487                db1 = vis_freg_pair(f_argb[0], f_argb[1]);
488            }
489
490            if ((mlib_addr)d_abgr <= (mlib_addr)pend) {
491                RGB2ABGR_DB(db1)
492                w_abgr = vis_faligndata(db0, db1);
493                emask = vis_edge8(d_abgr, pend);
494                vis_pst_8(w_abgr, d_abgr, emask);
495            }
496        }
497    }
498}
499
500/***************************************************************/
501
502void ADD_SUFF(ThreeByteBgrToFourByteAbgrConvert)(BLIT_PARAMS)
503{
504    mlib_s32 dstScan = pDstInfo->scanStride;
505    mlib_s32 srcScan = pSrcInfo->scanStride;
506    mlib_d64 sd0, sd1, sd2;
507    mlib_d64 dd0, dd1, dd2, dd3;
508    mlib_d64 sda, sdb, sdc, sdd;
509    mlib_d64 sde, sdf, sdg, sdh;
510    mlib_d64 sdi, sdj, sdk, sdl;
511    mlib_d64 sdm;
512    mlib_d64 sFF;
513    mlib_s32 r, g, b;
514    mlib_s32 i, j;
515
516    if (width < 16) {
517        for (j = 0; j < height; j++) {
518            mlib_u8 *src = srcBase;
519            mlib_u8 *dst = dstBase;
520
521#pragma pipeloop(0)
522            for (i = 0; i < width; i++) {
523                dst[0] = 0xFF;
524                dst[1] = src[0];
525                dst[2] = src[1];
526                dst[3] = src[2];
527                src += 3;
528                dst += 4;
529            }
530
531            PTR_ADD(dstBase, dstScan);
532            PTR_ADD(srcBase, srcScan);
533        }
534        return;
535    }
536
537    if (dstScan == 4*width && srcScan == 3*width) {
538        width *= height;
539        height = 1;
540    }
541
542    sFF = vis_fone();
543
544    for (j = 0; j < height; j++) {
545        mlib_u8 *pSrc = srcBase;
546        mlib_u8 *pDst = dstBase;
547
548        if (!(((mlib_s32)pSrc | (mlib_s32)pDst) & 7)) {
549#pragma pipeloop(0)
550            for (i = 0; i <= ((mlib_s32)width - 8); i += 8) {
551                sd0 = ((mlib_d64*)pSrc)[0];
552                sd1 = ((mlib_d64*)pSrc)[1];
553                sd2 = ((mlib_d64*)pSrc)[2];
554                pSrc += 3*8;
555                INSERT_U8_34R;
556                ((mlib_d64*)pDst)[0] = dd0;
557                ((mlib_d64*)pDst)[1] = dd1;
558                ((mlib_d64*)pDst)[2] = dd2;
559                ((mlib_d64*)pDst)[3] = dd3;
560                pDst += 4*8;
561            }
562
563            for (; i < width; i++) {
564                b = pSrc[0];
565                g = pSrc[1];
566                r = pSrc[2];
567                ((mlib_u16*)pDst)[0] = 0xff00 | b;
568                ((mlib_u16*)pDst)[1] = (g << 8) | r;
569                pSrc += 3;
570                pDst += 4;
571            }
572        } else if (!((mlib_s32)pDst & 1)) {
573#pragma pipeloop(0)
574            for (i = 0; i < width; i++) {
575                b = pSrc[0];
576                g = pSrc[1];
577                r = pSrc[2];
578                ((mlib_u16*)pDst)[0] = 0xff00 | b;
579                ((mlib_u16*)pDst)[1] = (g << 8) | r;
580                pSrc += 3;
581                pDst += 4;
582            }
583        } else {
584            *pDst++ = 0xff;
585#pragma pipeloop(0)
586            for (i = 0; i < (mlib_s32)width - 1; i++) {
587                b = pSrc[0];
588                g = pSrc[1];
589                r = pSrc[2];
590                ((mlib_u16*)pDst)[0] = (b << 8) | g;
591                ((mlib_u16*)pDst)[1] = (r << 8) | 0xff;
592                pSrc += 3;
593                pDst += 4;
594            }
595            if (width) {
596                pDst[0] = pSrc[0];
597                pDst[1] = pSrc[1];
598                pDst[2] = pSrc[2];
599            }
600        }
601
602        PTR_ADD(dstBase, dstScan);
603        PTR_ADD(srcBase, srcScan);
604    }
605}
606
607/***************************************************************/
608
609#if 1
610
611#define LOAD_BGR(dd) {                                 \
612    mlib_u8  *sp = pSrc - 1 + 3*(tmpsxloc >> shift);   \
613    mlib_d64 *ap = (void*)((mlib_addr)sp &~ 7);        \
614    vis_alignaddr(sp, 0);                              \
615    dd = vis_faligndata(ap[0], ap[1]);                 \
616    tmpsxloc += sxinc;                                 \
617}
618
619#else
620
621#define LOAD_BGR(dd) {                                 \
622    mlib_u8 *sp = pSrc + 3*(tmpsxloc >> shift);        \
623    dd = vis_faligndata(vis_ld_u8(sp + 2), dd);        \
624    dd = vis_faligndata(vis_ld_u8(sp + 1), dd);        \
625    dd = vis_faligndata(vis_ld_u8(sp    ), dd);        \
626    dd = vis_faligndata(amask, dd);                    \
627    tmpsxloc += sxinc;                                 \
628}
629
630#endif
631
632/***************************************************************/
633
634void ADD_SUFF(ThreeByteBgrToFourByteAbgrScaleConvert)(SCALE_PARAMS)
635{
636    mlib_s32 dstScan = pDstInfo->scanStride;
637    mlib_s32 srcScan = pSrcInfo->scanStride;
638    mlib_d64 d0;
639    mlib_d64 amask;
640    mlib_s32 r, g, b;
641    mlib_s32 i, j;
642
643    if (width < 16 /*|| (((mlib_s32)dstBase | dstScan) & 3)*/) {
644        for (j = 0; j < height; j++) {
645            mlib_u8  *pSrc = srcBase;
646            mlib_u8  *pDst = dstBase;
647            mlib_s32 tmpsxloc = sxloc;
648
649            PTR_ADD(pSrc, (syloc >> shift) * srcScan);
650
651#pragma pipeloop(0)
652            for (i = 0; i < width; i++) {
653                mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);
654                pDst[0] = 0xff;
655                pDst[1] = pp[0];
656                pDst[2] = pp[1];
657                pDst[3] = pp[2];
658                tmpsxloc += sxinc;
659                pDst += 4;
660            }
661
662            PTR_ADD(dstBase, dstScan);
663            syloc += syinc;
664        }
665        return;
666    }
667
668    vis_alignaddr(NULL, 7);
669    amask = vis_to_double_dup(0xFF000000);
670
671    for (j = 0; j < height; j++) {
672        mlib_u8 *pSrc = srcBase;
673        mlib_u8 *pDst = dstBase;
674        mlib_s32 tmpsxloc = sxloc;
675
676        PTR_ADD(pSrc, (syloc >> shift) * srcScan);
677
678        if (!((mlib_s32)pDst & 3)) {
679#pragma pipeloop(0)
680            for (i = 0; i < width; i++) {
681                LOAD_BGR(d0);
682                ((mlib_f32*)pDst)[0] = vis_fors(vis_read_hi(d0),
683                                                vis_read_hi(amask));
684                pDst += 4;
685            }
686        } else if (!((mlib_s32)pDst & 1)) {
687#pragma pipeloop(0)
688            for (i = 0; i < width; i++) {
689                mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);
690                tmpsxloc += sxinc;
691                b = pp[0];
692                g = pp[1];
693                r = pp[2];
694                ((mlib_u16*)pDst)[2*i    ] = 0xff00 | b;
695                ((mlib_u16*)pDst)[2*i + 1] = (g << 8) | r;
696            }
697        } else {
698            *pDst++ = 0xff;
699#pragma pipeloop(0)
700            for (i = 0; i < (mlib_s32)width - 1; i++) {
701                mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);
702                tmpsxloc += sxinc;
703                b = pp[0];
704                g = pp[1];
705                r = pp[2];
706                ((mlib_u16*)pDst)[2*i    ] = (b << 8) | g;
707                ((mlib_u16*)pDst)[2*i + 1] = (r << 8) | 0xff;
708            }
709            if (width) {
710                mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);
711                tmpsxloc += sxinc;
712                pDst[4*i  ] = pp[0];
713                pDst[4*i+1] = pp[1];
714                pDst[4*i+2] = pp[2];
715            }
716        }
717
718        PTR_ADD(dstBase, dstScan);
719        syloc += syinc;
720    }
721}
722
723/***************************************************************/
724
725void ADD_SUFF(ByteGrayToFourByteAbgrConvert)(BLIT_PARAMS)
726{
727    mlib_s32 dstScan = pDstInfo->scanStride;
728    mlib_s32 srcScan = pSrcInfo->scanStride;
729    mlib_d64 d0, d1, d2, d3;
730    mlib_f32 ff, aa = vis_fones();
731    mlib_s32 i, j, x;
732
733    if (!(((mlib_s32)dstBase | dstScan) & 3)) {
734        ADD_SUFF(ByteGrayToIntArgbConvert)(BLIT_CALL_PARAMS);
735        return;
736    }
737
738    if (width < 16) {
739        for (j = 0; j < height; j++) {
740            mlib_u8 *src = srcBase;
741            mlib_u8 *dst = dstBase;
742
743            for (i = 0; i < width; i++) {
744                x = *src++;
745                dst[0] = 0xff;
746                dst[1] = x;
747                dst[2] = x;
748                dst[3] = x;
749                dst += 4;
750            }
751
752            PTR_ADD(dstBase, dstScan);
753            PTR_ADD(srcBase, srcScan);
754        }
755        return;
756    }
757
758    if (srcScan == width && dstScan == 4*width) {
759        width *= height;
760        height = 1;
761    }
762
763    for (j = 0; j < height; j++) {
764        mlib_u8 *src = srcBase;
765        mlib_u8 *dst = dstBase;
766        mlib_u8 *dst_end;
767
768        dst_end = dst + 4*width;
769
770        while (((mlib_s32)src & 3) && dst < dst_end) {
771            x = *src++;
772            dst[0] = 0xff;
773            dst[1] = x;
774            dst[2] = x;
775            dst[3] = x;
776            dst += 4;
777        }
778
779        if (!((mlib_s32)dst & 3)) {
780#pragma pipeloop(0)
781            for (; dst <= (dst_end - 4*4); dst += 4*4) {
782                ff = *(mlib_f32*)src;
783                d0 = vis_fpmerge(aa, ff);
784                d1 = vis_fpmerge(ff, ff);
785                d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));
786                d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));
787                ((mlib_f32*)dst)[0] = vis_read_hi(d2);
788                ((mlib_f32*)dst)[1] = vis_read_lo(d2);
789                ((mlib_f32*)dst)[2] = vis_read_hi(d3);
790                ((mlib_f32*)dst)[3] = vis_read_lo(d3);
791                src += 4;
792            }
793        } else {
794            mlib_d64 *dp;
795
796            dp = vis_alignaddr(dst, 0);
797            d3 = vis_faligndata(dp[0], dp[0]);
798            vis_alignaddrl(dst, 0);
799
800#pragma pipeloop(0)
801            for (; dst <= (dst_end - 4*4); dst += 4*4) {
802                ff = *(mlib_f32*)src;
803                d0 = vis_fpmerge(aa, ff);
804                d1 = vis_fpmerge(ff, ff);
805                d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));
806                *dp++ = vis_faligndata(d3, d2);
807                d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));
808                *dp++ = vis_faligndata(d2, d3);
809                src += 4;
810            }
811
812            vis_pst_8(vis_faligndata(d3, d3), dp, vis_edge8(dp, dst - 1));
813        }
814
815        while (dst < dst_end) {
816            x = *src++;
817            dst[0] = 0xff;
818            dst[1] = x;
819            dst[2] = x;
820            dst[3] = x;
821            dst += 4;
822        }
823
824        PTR_ADD(dstBase, dstScan);
825        PTR_ADD(srcBase, srcScan);
826    }
827}
828
829/***************************************************************/
830
831void ADD_SUFF(IntArgbToFourByteAbgrXorBlit)(BLIT_PARAMS)
832{
833    mlib_s32 dstScan = pDstInfo->scanStride;
834    mlib_s32 srcScan = pSrcInfo->scanStride;
835    mlib_u32 xorpixel = pCompInfo->details.xorPixel;
836    mlib_u32 alphamask = pCompInfo->alphaMask;
837    mlib_d64 dd, d_xorpixel, d_alphamask, d_zero;
838    mlib_s32 i, j, x, neg_mask;
839
840    if (width < 16) {
841        xorpixel = (xorpixel << 24) | (xorpixel >> 8);
842        alphamask = (alphamask << 24) | (alphamask >> 8);
843
844        for (j = 0; j < height; j++) {
845            mlib_s32 *src = srcBase;
846            mlib_u8  *dst = dstBase;
847
848            for (i = 0; i < width; i++) {
849                x = src[i];
850                neg_mask = x >> 31;
851                x = (x ^ xorpixel) & (neg_mask &~ alphamask);
852                dst[0] ^= x >> 24;
853                dst[1] ^= x;
854                dst[2] ^= x >> 8;
855                dst[3] ^= x >> 16;
856                dst += 4;
857            }
858
859            PTR_ADD(dstBase, dstScan);
860            PTR_ADD(srcBase, srcScan);
861        }
862        return;
863    }
864
865    if (srcScan == 4*width && dstScan == 4*width) {
866        width *= height;
867        height = 1;
868    }
869
870    d_zero = vis_fzero();
871    d_xorpixel = vis_freg_pair(vis_ldfa_ASI_PL(&xorpixel),
872                               vis_ldfa_ASI_PL(&xorpixel));
873    d_alphamask = vis_freg_pair(vis_ldfa_ASI_PL(&alphamask),
874                                vis_ldfa_ASI_PL(&alphamask));
875
876    dd = vis_freg_pair(vis_read_hi(d_xorpixel), vis_read_hi(d_alphamask));
877    ARGB2ABGR_DB(dd)
878    xorpixel = ((mlib_s32*)&dd)[0];
879    alphamask = ((mlib_s32*)&dd)[1];
880
881    for (j = 0; j < height; j++) {
882        mlib_s32 *src = srcBase;
883        mlib_u8  *dst = dstBase;
884        mlib_u8  *dst_end;
885
886        dst_end = dst + 4*width;
887
888        if (!((mlib_s32)dst & 7)) {
889#pragma pipeloop(0)
890            for (; dst <= (dst_end - 8); dst += 8) {
891                dd = vis_freg_pair(((mlib_f32*)src)[0], ((mlib_f32*)src)[1]);
892                src += 2;
893                neg_mask = vis_fcmplt32(dd, d_zero);
894                ARGB2ABGR_DB(dd)
895                dd = vis_fxor(dd, d_xorpixel);
896                dd = vis_fandnot(d_alphamask, dd);
897                dd = vis_fxor(dd, *(mlib_d64*)dst);
898                vis_pst_32(dd, dst, neg_mask);
899            }
900        }
901
902        while (dst < dst_end) {
903            x = *src++;
904            neg_mask = x >> 31;
905            x = (x ^ xorpixel) & (neg_mask &~ alphamask);
906            dst[0] ^= x >> 24;
907            dst[1] ^= x;
908            dst[2] ^= x >> 8;
909            dst[3] ^= x >> 16;
910            dst += 4;
911        }
912
913        PTR_ADD(dstBase, dstScan);
914        PTR_ADD(srcBase, srcScan);
915    }
916}
917
918/***************************************************************/
919
920void ADD_SUFF(ByteGrayToFourByteAbgrScaleConvert)(SCALE_PARAMS)
921{
922    mlib_s32 dstScan = pDstInfo->scanStride;
923    mlib_s32 srcScan = pSrcInfo->scanStride;
924    mlib_d64 d0, d1, d2, d3, dd;
925    mlib_f32 ff, aa;
926    mlib_s32 i, j, x;
927
928/*  if (!(((mlib_s32)dstBase | dstScan) & 3)) {
929    ADD_SUFF(ByteGrayToIntArgbScaleConvert)(SCALE_CALL_PARAMS);
930    return;
931    }*/
932
933    if (width < 16) {
934        for (j = 0; j < height; j++) {
935            mlib_u8 *src = srcBase;
936            mlib_u8 *dst = dstBase;
937            mlib_s32 tmpsxloc = sxloc;
938
939            PTR_ADD(src, (syloc >> shift) * srcScan);
940
941            for (i = 0; i < width; i++) {
942                x = src[tmpsxloc >> shift];
943                tmpsxloc += sxinc;
944                dst[4*i    ] = 0xff;
945                dst[4*i + 1] = x;
946                dst[4*i + 2] = x;
947                dst[4*i + 3] = x;
948            }
949
950            PTR_ADD(dstBase, dstScan);
951            syloc += syinc;
952        }
953        return;
954    }
955
956    aa = vis_fones();
957
958    for (j = 0; j < height; j++) {
959        mlib_u8 *src = srcBase;
960        mlib_u8 *dst = dstBase;
961        mlib_u8 *dst_end;
962        mlib_s32 tmpsxloc = sxloc;
963
964        PTR_ADD(src, (syloc >> shift) * srcScan);
965
966        dst_end = dst + 4*width;
967
968        if (!((mlib_s32)dst & 3)) {
969            vis_alignaddr(NULL, 7);
970#pragma pipeloop(0)
971            for (; dst <= (dst_end - 4*4); dst += 4*4) {
972                LOAD_NEXT_U8(dd, src + ((tmpsxloc + 3*sxinc) >> shift));
973                LOAD_NEXT_U8(dd, src + ((tmpsxloc + 2*sxinc) >> shift));
974                LOAD_NEXT_U8(dd, src + ((tmpsxloc +   sxinc) >> shift));
975                LOAD_NEXT_U8(dd, src + ((tmpsxloc          ) >> shift));
976                tmpsxloc += 4*sxinc;
977                ff = vis_read_hi(dd);
978                d0 = vis_fpmerge(aa, ff);
979                d1 = vis_fpmerge(ff, ff);
980                d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));
981                d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));
982                ((mlib_f32*)dst)[0] = vis_read_hi(d2);
983                ((mlib_f32*)dst)[1] = vis_read_lo(d2);
984                ((mlib_f32*)dst)[2] = vis_read_hi(d3);
985                ((mlib_f32*)dst)[3] = vis_read_lo(d3);
986            }
987        } else {
988            mlib_d64 *dp;
989
990            dp = vis_alignaddr(dst, 0);
991            d3 = vis_faligndata(dp[0], dp[0]);
992            vis_alignaddrl(dst, 0);
993
994#pragma pipeloop(0)
995            for (; dst <= (dst_end - 4*4); dst += 4*4) {
996                mlib_d64 s0, s1, s2, s3;
997                s0 = vis_ld_u8(src + ((tmpsxloc          ) >> shift));
998                s1 = vis_ld_u8(src + ((tmpsxloc +   sxinc) >> shift));
999                s2 = vis_ld_u8(src + ((tmpsxloc + 2*sxinc) >> shift));
1000                s3 = vis_ld_u8(src + ((tmpsxloc + 3*sxinc) >> shift));
1001                tmpsxloc += 4*sxinc;
1002                s0 = vis_fpmerge(vis_read_lo(s0), vis_read_lo(s2));
1003                s1 = vis_fpmerge(vis_read_lo(s1), vis_read_lo(s3));
1004                dd = vis_fpmerge(vis_read_lo(s0), vis_read_lo(s1));
1005                ff = vis_read_lo(dd);
1006                d0 = vis_fpmerge(aa, ff);
1007                d1 = vis_fpmerge(ff, ff);
1008                d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));
1009                *dp++ = vis_faligndata(d3, d2);
1010                d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));
1011                *dp++ = vis_faligndata(d2, d3);
1012            }
1013
1014            vis_pst_8(vis_faligndata(d3, d3), dp, vis_edge8(dp, dst - 1));
1015        }
1016
1017        while (dst < dst_end) {
1018            x = src[tmpsxloc >> shift];
1019            tmpsxloc += sxinc;
1020            dst[0] = 0xff;
1021            dst[1] = x;
1022            dst[2] = x;
1023            dst[3] = x;
1024            dst += 4;
1025        }
1026
1027        PTR_ADD(dstBase, dstScan);
1028        syloc += syinc;
1029    }
1030}
1031
1032/***************************************************************/
1033
1034void ADD_SUFF(ByteIndexedToFourByteAbgrConvert)(BLIT_PARAMS)
1035{
1036    jint *pixLut = pSrcInfo->lutBase;
1037    mlib_s32 dstScan = pDstInfo->scanStride;
1038    mlib_s32 srcScan = pSrcInfo->scanStride;
1039    mlib_d64 dd, d_old;
1040    mlib_s32 i, j, x;
1041
1042/*  if (!(((mlib_s32)dstBase | dstScan) & 3)) {
1043    ADD_SUFF(ByteIndexedToIntAbgrConvert)(BLIT_CALL_PARAMS);
1044    return;
1045    }*/
1046
1047    if (width < 8) {
1048        for (j = 0; j < height; j++) {
1049            mlib_u8 *src = srcBase;
1050            mlib_u8 *dst = dstBase;
1051
1052            for (i = 0; i < width; i++) {
1053                x = pixLut[src[i]];
1054                dst[4*i    ] = x >> 24;
1055                dst[4*i + 1] = x;
1056                dst[4*i + 2] = x >> 8;
1057                dst[4*i + 3] = x >> 16;
1058            }
1059
1060            PTR_ADD(dstBase, dstScan);
1061            PTR_ADD(srcBase, srcScan);
1062        }
1063        return;
1064    }
1065
1066    if (srcScan == width && dstScan == 4*width) {
1067        width *= height;
1068        height = 1;
1069    }
1070
1071    BMASK_FOR_ARGB
1072
1073    for (j = 0; j < height; j++) {
1074        mlib_u8 *src = srcBase;
1075        mlib_u8 *dst = dstBase;
1076        mlib_u8 *dst_end;
1077
1078        dst_end = dst + 4*width;
1079
1080        if (!((mlib_s32)dst & 7)) {
1081#pragma pipeloop(0)
1082            for (; dst <= (dst_end - 2*4); dst += 2*4) {
1083                dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],
1084                                   ((mlib_f32*)pixLut)[src[1]]);
1085                ARGB2ABGR_DB(dd)
1086                *(mlib_d64*)dst = dd;
1087                src += 2;
1088            }
1089        } else {
1090            mlib_d64 *dp;
1091
1092            dp = vis_alignaddr(dst, 0);
1093            dd = vis_faligndata(dp[0], dp[0]);
1094            vis_alignaddrl(dst, 0);
1095
1096#pragma pipeloop(0)
1097            for (; dst <= (dst_end - 2*4); dst += 2*4) {
1098                d_old = dd;
1099                dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],
1100                                   ((mlib_f32*)pixLut)[src[1]]);
1101                ARGB2ABGR_DB(dd)
1102                *dp++ = vis_faligndata(d_old, dd);
1103                src += 2;
1104            }
1105
1106            vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));
1107        }
1108
1109        while (dst < dst_end) {
1110            x = pixLut[*src++];
1111            dst[0] = x >> 24;
1112            dst[1] = x;
1113            dst[2] = x >> 8;
1114            dst[3] = x >> 16;
1115            dst += 4;
1116        }
1117
1118        PTR_ADD(dstBase, dstScan);
1119        PTR_ADD(srcBase, srcScan);
1120    }
1121}
1122
1123/***************************************************************/
1124
1125void ADD_SUFF(ByteIndexedBmToFourByteAbgrXparOver)(BLIT_PARAMS)
1126{
1127    jint *pixLut = pSrcInfo->lutBase;
1128    mlib_s32 dstScan = pDstInfo->scanStride;
1129    mlib_s32 srcScan = pSrcInfo->scanStride;
1130    mlib_d64 dd, dzero;
1131    mlib_s32 i, j, x, mask;
1132
1133/*  if (!(((mlib_s32)dstBase | dstScan) & 3)) {
1134    ADD_SUFF(ByteIndexedToIntAbgrConvert)(BLIT_CALL_PARAMS);
1135    return;
1136    }*/
1137
1138    if (width < 8) {
1139        for (j = 0; j < height; j++) {
1140            mlib_u8 *src = srcBase;
1141            mlib_u8 *dst = dstBase;
1142
1143            for (i = 0; i < width; i++) {
1144                x = pixLut[src[i]];
1145                if (x < 0) {
1146                    dst[4*i    ] = x >> 24;
1147                    dst[4*i + 1] = x;
1148                    dst[4*i + 2] = x >> 8;
1149                    dst[4*i + 3] = x >> 16;
1150                }
1151            }
1152
1153            PTR_ADD(dstBase, dstScan);
1154            PTR_ADD(srcBase, srcScan);
1155        }
1156        return;
1157    }
1158
1159    if (srcScan == width && dstScan == 4*width) {
1160        width *= height;
1161        height = 1;
1162    }
1163
1164    BMASK_FOR_ARGB
1165
1166    dzero = vis_fzero();
1167
1168    for (j = 0; j < height; j++) {
1169        mlib_u8 *src = srcBase;
1170        mlib_u8 *dst = dstBase;
1171        mlib_u8 *dst_end;
1172
1173        dst_end = dst + 4*width;
1174
1175        if (!((mlib_s32)dst & 7)) {
1176#pragma pipeloop(0)
1177            for (; dst <= (dst_end - 2*4); dst += 2*4) {
1178                dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],
1179                                   ((mlib_f32*)pixLut)[src[1]]);
1180                mask = vis_fcmplt32(dd, dzero);
1181                ARGB2ABGR_DB(dd)
1182                vis_pst_32(dd, dst, mask);
1183                src += 2;
1184            }
1185        }
1186
1187        while (dst < dst_end) {
1188            x = pixLut[*src++];
1189            if (x < 0) {
1190                dst[0] = x >> 24;
1191                dst[1] = x;
1192                dst[2] = x >> 8;
1193                dst[3] = x >> 16;
1194            }
1195            dst += 4;
1196        }
1197
1198        PTR_ADD(dstBase, dstScan);
1199        PTR_ADD(srcBase, srcScan);
1200    }
1201}
1202
1203/***************************************************************/
1204
1205void ADD_SUFF(ByteIndexedBmToFourByteAbgrXparBgCopy)(BCOPY_PARAMS)
1206{
1207    jint *pixLut = pSrcInfo->lutBase;
1208    mlib_s32 dstScan = pDstInfo->scanStride;
1209    mlib_s32 srcScan = pSrcInfo->scanStride;
1210    mlib_d64 dd, dzero, d_bgpixel;
1211    mlib_s32 i, j, x, mask;
1212    mlib_s32 bgpix0 = bgpixel;
1213    mlib_s32 bgpix1 = bgpixel >> 8;
1214    mlib_s32 bgpix2 = bgpixel >> 16;
1215    mlib_s32 bgpix3 = bgpixel >> 24;
1216
1217    if (width < 8) {
1218        for (j = 0; j < height; j++) {
1219            mlib_u8 *src = srcBase;
1220            mlib_u8 *dst = dstBase;
1221
1222            for (i = 0; i < width; i++) {
1223                x = pixLut[src[i]];
1224                if (x < 0) {
1225                    dst[4*i    ] = x >> 24;
1226                    dst[4*i + 1] = x;
1227                    dst[4*i + 2] = x >> 8;
1228                    dst[4*i + 3] = x >> 16;
1229                } else {
1230                    dst[4*i    ] = bgpix0;
1231                    dst[4*i + 1] = bgpix1;
1232                    dst[4*i + 2] = bgpix2;
1233                    dst[4*i + 3] = bgpix3;
1234                }
1235            }
1236
1237            PTR_ADD(dstBase, dstScan);
1238            PTR_ADD(srcBase, srcScan);
1239        }
1240        return;
1241    }
1242
1243    if (srcScan == width && dstScan == 4*width) {
1244        width *= height;
1245        height = 1;
1246    }
1247
1248    BMASK_FOR_ARGB
1249
1250    dzero = vis_fzero();
1251    d_bgpixel = vis_freg_pair(vis_ldfa_ASI_PL(&bgpixel),
1252                              vis_ldfa_ASI_PL(&bgpixel));
1253
1254    for (j = 0; j < height; j++) {
1255        mlib_u8 *src = srcBase;
1256        mlib_u8 *dst = dstBase;
1257        mlib_u8 *dst_end;
1258
1259        dst_end = dst + 4*width;
1260
1261        if (!((mlib_s32)dst & 7)) {
1262#pragma pipeloop(0)
1263            for (; dst <= (dst_end - 2*4); dst += 2*4) {
1264                dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],
1265                                   ((mlib_f32*)pixLut)[src[1]]);
1266                mask = vis_fcmplt32(dd, dzero);
1267                ARGB2ABGR_DB(dd)
1268                *(mlib_d64*)dst = d_bgpixel;
1269                vis_pst_32(dd, dst, mask);
1270                src += 2;
1271            }
1272        }
1273
1274        while (dst < dst_end) {
1275            x = pixLut[*src++];
1276            if (x < 0) {
1277                dst[0] = x >> 24;
1278                dst[1] = x;
1279                dst[2] = x >> 8;
1280                dst[3] = x >> 16;
1281            } else {
1282                dst[0] = bgpix0;
1283                dst[1] = bgpix1;
1284                dst[2] = bgpix2;
1285                dst[3] = bgpix3;
1286            }
1287            dst += 4;
1288        }
1289
1290        PTR_ADD(dstBase, dstScan);
1291        PTR_ADD(srcBase, srcScan);
1292    }
1293}
1294
1295/***************************************************************/
1296
1297void ADD_SUFF(ByteIndexedToFourByteAbgrScaleConvert)(SCALE_PARAMS)
1298{
1299    jint *pixLut = pSrcInfo->lutBase;
1300    mlib_s32 dstScan = pDstInfo->scanStride;
1301    mlib_s32 srcScan = pSrcInfo->scanStride;
1302    mlib_d64 dd, d_old;
1303    mlib_s32 i, j, x;
1304
1305/*
1306    if (!(((mlib_s32)dstBase | dstScan) & 3)) {
1307        ADD_SUFF(ByteIndexedToIntAbgrScaleConvert)(SCALE_CALL_PARAMS);
1308        return;
1309    }
1310*/
1311
1312    if (width < 8) {
1313        for (j = 0; j < height; j++) {
1314            mlib_u8 *src = srcBase;
1315            mlib_u8 *dst = dstBase;
1316            mlib_s32 tmpsxloc = sxloc;
1317
1318            PTR_ADD(src, (syloc >> shift) * srcScan);
1319
1320            for (i = 0; i < width; i++) {
1321                x = pixLut[src[tmpsxloc >> shift]];
1322                tmpsxloc += sxinc;
1323                dst[4*i    ] = x >> 24;
1324                dst[4*i + 1] = x;
1325                dst[4*i + 2] = x >> 8;
1326                dst[4*i + 3] = x >> 16;
1327            }
1328
1329            PTR_ADD(dstBase, dstScan);
1330            syloc += syinc;
1331        }
1332        return;
1333    }
1334
1335    BMASK_FOR_ARGB
1336
1337    for (j = 0; j < height; j++) {
1338        mlib_u8 *src = srcBase;
1339        mlib_u8 *dst = dstBase;
1340        mlib_u8 *dst_end;
1341        mlib_s32 tmpsxloc = sxloc;
1342
1343        PTR_ADD(src, (syloc >> shift) * srcScan);
1344
1345        dst_end = dst + 4*width;
1346
1347        if (!((mlib_s32)dst & 7)) {
1348#pragma pipeloop(0)
1349            for (; dst <= (dst_end - 2*4); dst += 2*4) {
1350                dd = LOAD_2F32(pixLut, src[tmpsxloc >> shift],
1351                                       src[(tmpsxloc + sxinc) >> shift]);
1352                tmpsxloc += 2*sxinc;
1353                ARGB2ABGR_DB(dd)
1354                *(mlib_d64*)dst = dd;
1355            }
1356        } else {
1357            mlib_d64 *dp;
1358
1359            dp = vis_alignaddr(dst, 0);
1360            dd = vis_faligndata(dp[0], dp[0]);
1361            vis_alignaddrl(dst, 0);
1362
1363#pragma pipeloop(0)
1364            for (; dst <= (dst_end - 2*4); dst += 2*4) {
1365                d_old = dd;
1366                dd = LOAD_2F32(pixLut, src[tmpsxloc >> shift],
1367                                       src[(tmpsxloc + sxinc) >> shift]);
1368                tmpsxloc += 2*sxinc;
1369                ARGB2ABGR_DB(dd)
1370                *dp++ = vis_faligndata(d_old, dd);
1371            }
1372
1373            vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));
1374        }
1375
1376        while (dst < dst_end) {
1377            x = pixLut[src[tmpsxloc >> shift]];
1378            tmpsxloc += sxinc;
1379            dst[0] = x >> 24;
1380            dst[1] = x;
1381            dst[2] = x >> 8;
1382            dst[3] = x >> 16;
1383            dst += 4;
1384        }
1385
1386        PTR_ADD(dstBase, dstScan);
1387        syloc += syinc;
1388    }
1389}
1390
1391/***************************************************************/
1392
1393void ADD_SUFF(ByteIndexedBmToFourByteAbgrScaleXparOver)(SCALE_PARAMS)
1394{
1395    jint *pixLut = pSrcInfo->lutBase;
1396    mlib_s32 dstScan = pDstInfo->scanStride;
1397    mlib_s32 srcScan = pSrcInfo->scanStride;
1398    mlib_d64 dd, dzero;
1399    mlib_s32 i, j, x, mask;
1400
1401/*
1402    if (!(((mlib_s32)dstBase | dstScan) & 3)) {
1403        ADD_SUFF(ByteIndexedToIntAbgrScaleConvert)(SCALE_CALL_PARAMS);
1404        return;
1405    }
1406*/
1407
1408    if (width < 8) {
1409        for (j = 0; j < height; j++) {
1410            mlib_u8 *src = srcBase;
1411            mlib_u8 *dst = dstBase;
1412            mlib_s32 tmpsxloc = sxloc;
1413
1414            PTR_ADD(src, (syloc >> shift) * srcScan);
1415
1416            for (i = 0; i < width; i++) {
1417                x = pixLut[src[tmpsxloc >> shift]];
1418                tmpsxloc += sxinc;
1419                if (x < 0) {
1420                    dst[4*i    ] = x >> 24;
1421                    dst[4*i + 1] = x;
1422                    dst[4*i + 2] = x >> 8;
1423                    dst[4*i + 3] = x >> 16;
1424                }
1425            }
1426
1427            PTR_ADD(dstBase, dstScan);
1428            syloc += syinc;
1429        }
1430        return;
1431    }
1432
1433    BMASK_FOR_ARGB
1434
1435    dzero = vis_fzero();
1436
1437    for (j = 0; j < height; j++) {
1438        mlib_u8 *src = srcBase;
1439        mlib_u8 *dst = dstBase;
1440        mlib_u8 *dst_end;
1441        mlib_s32 tmpsxloc = sxloc;
1442
1443        PTR_ADD(src, (syloc >> shift) * srcScan);
1444
1445        dst_end = dst + 4*width;
1446
1447        if (!((mlib_s32)dst & 7)) {
1448#pragma pipeloop(0)
1449            for (; dst <= (dst_end - 2*4); dst += 2*4) {
1450                dd = LOAD_2F32(pixLut, src[tmpsxloc >> shift],
1451                                       src[(tmpsxloc + sxinc) >> shift]);
1452                tmpsxloc += 2*sxinc;
1453                mask = vis_fcmplt32(dd, dzero);
1454                ARGB2ABGR_DB(dd)
1455                vis_pst_32(dd, dst, mask);
1456            }
1457        }
1458
1459        while (dst < dst_end) {
1460            x = pixLut[src[tmpsxloc >> shift]];
1461            tmpsxloc += sxinc;
1462            if (x < 0) {
1463                dst[0] = x >> 24;
1464                dst[1] = x;
1465                dst[2] = x >> 8;
1466                dst[3] = x >> 16;
1467            }
1468            dst += 4;
1469        }
1470
1471        PTR_ADD(dstBase, dstScan);
1472        syloc += syinc;
1473    }
1474}
1475
1476/***************************************************************/
1477
1478void ADD_SUFF(IntArgbBmToFourByteAbgrScaleXparOver)(SCALE_PARAMS)
1479{
1480    mlib_s32 dstScan = pDstInfo->scanStride;
1481    mlib_s32 srcScan = pSrcInfo->scanStride;
1482    mlib_d64 dd, amask;
1483    mlib_s32 i, j, x, mask;
1484
1485    if (width < 16) {
1486        for (j = 0; j < height; j++) {
1487            mlib_s32 *src = srcBase;
1488            mlib_u8  *dst = dstBase;
1489            mlib_s32 tmpsxloc = sxloc;
1490
1491            PTR_ADD(src, (syloc >> shift) * srcScan);
1492
1493            for (i = 0; i < width; i++) {
1494                x = src[tmpsxloc >> shift];
1495                tmpsxloc += sxinc;
1496                if (x >> 24) {
1497                    dst[4*i    ] = 0xFF;
1498                    dst[4*i + 1] = x;
1499                    dst[4*i + 2] = x >> 8;
1500                    dst[4*i + 3] = x >> 16;
1501                }
1502            }
1503
1504            PTR_ADD(dstBase, dstScan);
1505            syloc += syinc;
1506        }
1507        return;
1508    }
1509
1510    BMASK_FOR_ARGB
1511
1512    amask = vis_to_double_dup(0xFF000000);
1513
1514    for (j = 0; j < height; j++) {
1515        mlib_s32 *src = srcBase;
1516        mlib_u8  *dst = dstBase;
1517        mlib_u8  *dst_end;
1518        mlib_s32 tmpsxloc = sxloc;
1519
1520        PTR_ADD(src, (syloc >> shift) * srcScan);
1521
1522        dst_end = dst + 4*width;
1523
1524        if (!((mlib_s32)dst & 7)) {
1525#pragma pipeloop(0)
1526            for (; dst <= (dst_end - 2*4); dst += 2*4) {
1527                mlib_s32 *pp0 = src + (tmpsxloc >> shift);
1528                mlib_s32 *pp1 = src + ((tmpsxloc + sxinc) >> shift);
1529                dd = vis_freg_pair(*(mlib_f32*)pp0, *(mlib_f32*)pp1);
1530                tmpsxloc += 2*sxinc;
1531                ARGB2ABGR_DB(dd)
1532                dd = vis_for(dd, amask);
1533                mask = (((-*(mlib_u8*)pp0) >> 31) & 2) |
1534                       (((-*(mlib_u8*)pp1) >> 31) & 1);
1535                vis_pst_32(dd, dst, mask);
1536            }
1537        }
1538
1539        while (dst < dst_end) {
1540            x = src[tmpsxloc >> shift];
1541            tmpsxloc += sxinc;
1542            if (x >> 24) {
1543                dst[0] = 0xFF;
1544                dst[1] = x;
1545                dst[2] = x >> 8;
1546                dst[3] = x >> 16;
1547            }
1548            dst += 4;
1549        }
1550
1551        PTR_ADD(dstBase, dstScan);
1552        syloc += syinc;
1553    }
1554}
1555
1556/***************************************************************/
1557
1558#ifdef MLIB_ADD_SUFF
1559#pragma weak IntArgbBmToFourByteAbgrPreScaleXparOver_F =       \
1560             IntArgbBmToFourByteAbgrScaleXparOver_F
1561#else
1562#pragma weak IntArgbBmToFourByteAbgrPreScaleXparOver =         \
1563             IntArgbBmToFourByteAbgrScaleXparOver
1564#endif
1565
1566/***************************************************************/
1567
1568void ADD_SUFF(FourByteAbgrToIntArgbScaleConvert)(SCALE_PARAMS)
1569{
1570    mlib_s32 dstScan = pDstInfo->scanStride;
1571    mlib_s32 srcScan = pSrcInfo->scanStride;
1572    mlib_s32 i, j;
1573
1574    if (width < 16) {
1575        for (j = 0; j < height; j++) {
1576            mlib_u8  *src = srcBase;
1577            mlib_s32 *dst = dstBase;
1578            mlib_s32 tmpsxloc = sxloc;
1579
1580            PTR_ADD(src, (syloc >> shift) * srcScan);
1581
1582            for (i = 0; i < width; i++) {
1583                mlib_u8 *pp = src + 4*(tmpsxloc >> shift);
1584                *dst++ = (pp[0] << 24) | (pp[3] << 16) | (pp[2] << 8) | pp[1];
1585                tmpsxloc += sxinc;
1586            }
1587
1588            PTR_ADD(dstBase, dstScan);
1589            syloc += syinc;
1590        }
1591        return;
1592    }
1593
1594    BMASK_FOR_ARGB
1595
1596    for (j = 0; j < height; j++) {
1597        mlib_u8  *src = srcBase;
1598        mlib_s32 *dst = dstBase;
1599        mlib_s32 *dst_end = dst + width;
1600        mlib_s32 tmpsxloc = sxloc;
1601        mlib_s32 off;
1602        mlib_d64 dd, dd0, dd1;
1603        mlib_f32 *pp0, *pp1;
1604
1605        PTR_ADD(src, (syloc >> shift) * srcScan);
1606
1607        if ((mlib_s32)dst & 7) {
1608            mlib_u8 *pp = src + 4*(tmpsxloc >> shift);
1609            *dst++ = (pp[0] << 24) | (pp[3] << 16) | (pp[2] << 8) | pp[1];
1610            tmpsxloc += sxinc;
1611        }
1612
1613        off = (mlib_s32)src & 3;
1614        if (!off) {
1615#pragma pipeloop(0)
1616            for (; dst <= (dst_end - 2); dst += 2) {
1617                pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
1618                pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
1619                tmpsxloc += 2*sxinc;
1620                dd = vis_freg_pair(pp0[0], pp1[0]);
1621                ARGB2ABGR_DB(dd)
1622                *(mlib_d64*)dst = dd;
1623            }
1624        } else {
1625            vis_alignaddr(NULL, off);
1626#pragma pipeloop(0)
1627            for (; dst <= (dst_end - 2); dst += 2) {
1628                pp0 = (mlib_f32*)(src - off) + (tmpsxloc >> shift);
1629                pp1 = (mlib_f32*)(src - off) + ((tmpsxloc + sxinc) >> shift);
1630                tmpsxloc += 2*sxinc;
1631                dd0 = vis_freg_pair(pp0[0], pp0[1]);
1632                dd1 = vis_freg_pair(pp1[0], pp1[1]);
1633                dd0 = vis_faligndata(dd0, dd0);
1634                dd1 = vis_faligndata(dd1, dd1);
1635                ARGB2ABGR_FL2(dd, vis_read_hi(dd0), vis_read_hi(dd1))
1636                *(mlib_d64*)dst = dd;
1637            }
1638        }
1639
1640        if (dst < dst_end) {
1641            mlib_u8 *pp = src + 4*(tmpsxloc >> shift);
1642            *dst++ = (pp[0] << 24) | (pp[3] << 16) | (pp[2] << 8) | pp[1];
1643            tmpsxloc += sxinc;
1644        }
1645
1646        PTR_ADD(dstBase, dstScan);
1647        syloc += syinc;
1648    }
1649}
1650
1651/***************************************************************/
1652
1653void ADD_SUFF(IntArgbToFourByteAbgrScaleConvert)(SCALE_PARAMS)
1654{
1655    mlib_s32 dstScan = pDstInfo->scanStride;
1656    mlib_s32 srcScan = pSrcInfo->scanStride;
1657    mlib_s32 i, j;
1658    mlib_s32 x;
1659
1660    if (width < 16) {
1661        for (j = 0; j < height; j++) {
1662            mlib_s32 *src = srcBase;
1663            mlib_u8  *dst = dstBase;
1664            mlib_s32 tmpsxloc = sxloc;
1665
1666            PTR_ADD(src, (syloc >> shift) * srcScan);
1667
1668            for (i = 0; i < width; i++) {
1669                x = src[tmpsxloc >> shift];
1670                tmpsxloc += sxinc;
1671                dst[4*i    ] = x >> 24;
1672                dst[4*i + 1] = x;
1673                dst[4*i + 2] = x >> 8;
1674                dst[4*i + 3] = x >> 16;
1675            }
1676
1677            PTR_ADD(dstBase, dstScan);
1678            syloc += syinc;
1679        }
1680        return;
1681    }
1682
1683    BMASK_FOR_ARGB
1684
1685    for (j = 0; j < height; j++) {
1686        mlib_s32 *src = srcBase;
1687        mlib_u8  *dst = dstBase;
1688        mlib_u8  *dst_end = dst + 4*width;
1689        mlib_s32 tmpsxloc = sxloc;
1690        mlib_d64 dd, d_old;
1691        mlib_f32 *pp0, *pp1;
1692
1693        PTR_ADD(src, (syloc >> shift) * srcScan);
1694
1695        if (!((mlib_s32)dst & 3)) {
1696            if ((mlib_s32)dst & 7) {
1697                x = src[tmpsxloc >> shift];
1698                tmpsxloc += sxinc;
1699                dst[0] = x >> 24;
1700                dst[1] = x;
1701                dst[2] = x >> 8;
1702                dst[3] = x >> 16;
1703                dst += 4;
1704            }
1705#pragma pipeloop(0)
1706            for (; dst <= (dst_end - 2*4); dst += 2*4) {
1707                pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
1708                pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
1709                tmpsxloc += 2*sxinc;
1710                dd = vis_freg_pair(pp0[0], pp1[0]);
1711                ARGB2ABGR_DB(dd)
1712                *(mlib_d64*)dst = dd;
1713            }
1714        } else {
1715            mlib_d64 *dp;
1716
1717            dp = vis_alignaddr(dst, 0);
1718            dd = vis_faligndata(dp[0], dp[0]);
1719            vis_alignaddrl(dst, 0);
1720
1721#pragma pipeloop(0)
1722            for (; dst <= (dst_end - 2*4); dst += 2*4) {
1723                d_old = dd;
1724                pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
1725                pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
1726                tmpsxloc += 2*sxinc;
1727                dd = vis_freg_pair(pp0[0], pp1[0]);
1728                ARGB2ABGR_DB(dd)
1729                *dp++ = vis_faligndata(d_old, dd);
1730            }
1731
1732            vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));
1733        }
1734
1735        if (dst < dst_end) {
1736            x = src[tmpsxloc >> shift];
1737            tmpsxloc += sxinc;
1738            dst[0] = x >> 24;
1739            dst[1] = x;
1740            dst[2] = x >> 8;
1741            dst[3] = x >> 16;
1742            dst += 4;
1743        }
1744
1745        PTR_ADD(dstBase, dstScan);
1746        syloc += syinc;
1747    }
1748}
1749
1750/***************************************************************/
1751
1752void ADD_SUFF(IntRgbToFourByteAbgrScaleConvert)(SCALE_PARAMS)
1753{
1754    mlib_s32 dstScan = pDstInfo->scanStride;
1755    mlib_s32 srcScan = pSrcInfo->scanStride;
1756    mlib_s32 i, j;
1757    mlib_s32 x;
1758    mlib_d64 amask = vis_to_double_dup(0xFF000000);
1759
1760    if (width < 16) {
1761        for (j = 0; j < height; j++) {
1762            mlib_s32 *src = srcBase;
1763            mlib_u8  *dst = dstBase;
1764            mlib_s32 tmpsxloc = sxloc;
1765
1766            PTR_ADD(src, (syloc >> shift) * srcScan);
1767
1768            for (i = 0; i < width; i++) {
1769                x = src[tmpsxloc >> shift];
1770                tmpsxloc += sxinc;
1771                dst[4*i    ] = 0xFF;
1772                dst[4*i + 1] = x;
1773                dst[4*i + 2] = x >> 8;
1774                dst[4*i + 3] = x >> 16;
1775            }
1776
1777            PTR_ADD(dstBase, dstScan);
1778            syloc += syinc;
1779        }
1780        return;
1781    }
1782
1783    BMASK_FOR_ARGB
1784
1785    for (j = 0; j < height; j++) {
1786        mlib_s32 *src = srcBase;
1787        mlib_u8  *dst = dstBase;
1788        mlib_u8  *dst_end = dst + 4*width;
1789        mlib_s32 tmpsxloc = sxloc;
1790        mlib_d64 dd, d_old;
1791        mlib_f32 *pp0, *pp1;
1792
1793        PTR_ADD(src, (syloc >> shift) * srcScan);
1794
1795        if (!((mlib_s32)dst & 3)) {
1796            if ((mlib_s32)dst & 7) {
1797                x = src[tmpsxloc >> shift];
1798                tmpsxloc += sxinc;
1799                dst[0] = 0xFF;
1800                dst[1] = x;
1801                dst[2] = x >> 8;
1802                dst[3] = x >> 16;
1803                dst += 4;
1804            }
1805#pragma pipeloop(0)
1806            for (; dst <= (dst_end - 2*4); dst += 2*4) {
1807                pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
1808                pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
1809                tmpsxloc += 2*sxinc;
1810                dd = vis_freg_pair(pp0[0], pp1[0]);
1811                RGB2ABGR_DB(dd)
1812                *(mlib_d64*)dst = dd;
1813            }
1814        } else {
1815            mlib_d64 *dp;
1816
1817            dp = vis_alignaddr(dst, 0);
1818            dd = vis_faligndata(dp[0], dp[0]);
1819            vis_alignaddrl(dst, 0);
1820
1821#pragma pipeloop(0)
1822            for (; dst <= (dst_end - 2*4); dst += 2*4) {
1823                d_old = dd;
1824                pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
1825                pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
1826                tmpsxloc += 2*sxinc;
1827                dd = vis_freg_pair(pp0[0], pp1[0]);
1828                RGB2ABGR_DB(dd)
1829                *dp++ = vis_faligndata(d_old, dd);
1830            }
1831
1832            vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));
1833        }
1834
1835        if (dst < dst_end) {
1836            x = src[tmpsxloc >> shift];
1837            tmpsxloc += sxinc;
1838            dst[0] = 0xFF;
1839            dst[1] = x;
1840            dst[2] = x >> 8;
1841            dst[3] = x >> 16;
1842            dst += 4;
1843        }
1844
1845        PTR_ADD(dstBase, dstScan);
1846        syloc += syinc;
1847    }
1848}
1849
1850/***************************************************************/
1851
1852void ADD_SUFF(FourByteAbgrDrawGlyphListAA)(SurfaceDataRasInfo * pRasInfo,
1853                                           ImageRef *glyphs,
1854                                           jint totalGlyphs,
1855                                           jint fgpixel, jint argbcolor,
1856                                           jint clipLeft, jint clipTop,
1857                                           jint clipRight, jint clipBottom,
1858                                           NativePrimitive * pPrim,
1859                                           CompositeInfo * pCompInfo)
1860{
1861    mlib_d64 buff[BUFF_SIZE/2];
1862    void     *pbuff = buff;
1863    mlib_s32 glyphCounter;
1864    mlib_s32 scan = pRasInfo->scanStride;
1865    mlib_u8  *dstBase;
1866    mlib_s32 i, j;
1867    mlib_d64 dmix0, dmix1, dd, d0, d1, e0, e1, fgpixel_d;
1868    mlib_d64 done, done16, d_half;
1869    mlib_s32 pix, mask;
1870    mlib_f32 fgpixel_f, srcG_f;
1871    mlib_s32 max_width = BUFF_SIZE;
1872
1873    done = vis_to_double_dup(0x7fff7fff);
1874    done16 = vis_to_double_dup(0x7fff);
1875    d_half = vis_to_double_dup((1 << (16 + 6)) | (1 << 6));
1876
1877    fgpixel_f = vis_ldfa_ASI_PL(&fgpixel);
1878    fgpixel_d = vis_freg_pair(fgpixel_f, fgpixel_f);
1879    srcG_f = vis_to_float(argbcolor);
1880    ARGB2ABGR_FL(srcG_f)
1881
1882    vis_write_gsr(0 << 3);
1883
1884    for (glyphCounter = 0; glyphCounter < totalGlyphs; glyphCounter++) {
1885        const jubyte *pixels;
1886        unsigned int rowBytes;
1887        int left, top;
1888        int width, height;
1889        int right, bottom;
1890
1891        pixels = (const jubyte *) glyphs[glyphCounter].pixels;
1892
1893        if (!pixels) continue;
1894
1895        left = glyphs[glyphCounter].x;
1896        top = glyphs[glyphCounter].y;
1897        width = glyphs[glyphCounter].width;
1898        height = glyphs[glyphCounter].height;
1899        rowBytes = width;
1900        right = left + width;
1901        bottom = top + height;
1902        if (left < clipLeft) {
1903            pixels += clipLeft - left;
1904            left = clipLeft;
1905        }
1906        if (top < clipTop) {
1907            pixels += (clipTop - top) * rowBytes;
1908            top = clipTop;
1909        }
1910        if (right > clipRight) {
1911            right = clipRight;
1912        }
1913        if (bottom > clipBottom) {
1914            bottom = clipBottom;
1915        }
1916        if (right <= left || bottom <= top) {
1917            continue;
1918        }
1919        width = right - left;
1920        height = bottom - top;
1921
1922        dstBase = pRasInfo->rasBase;
1923        PTR_ADD(dstBase, top*scan + 4*left);
1924
1925        if (((mlib_s32)dstBase | scan) & 3) {
1926            if (width > max_width) {
1927                if (pbuff != buff) {
1928                    mlib_free(pbuff);
1929                }
1930                pbuff = mlib_malloc(width*sizeof(mlib_s32));
1931                if (pbuff == NULL) return;
1932                max_width = width;
1933            }
1934        }
1935
1936        for (j = 0; j < height; j++) {
1937            mlib_u8  *src = (void*)pixels;
1938            mlib_s32 *dst, *dst_end;
1939            mlib_u8 *dst_start;
1940
1941            if ((mlib_s32)dstBase & 3) {
1942                COPY_NA(dstBase, pbuff, width*sizeof(mlib_s32));
1943                dst = pbuff;
1944            } else {
1945                dst = (void*)dstBase;
1946            }
1947            dst_start = (void*)dst;
1948            dst_end = dst + width;
1949
1950            /* Need to reset the GSR from the values set by the
1951             * convert call near the end of this loop.
1952             */
1953            vis_write_gsr(7 << 0);
1954
1955            if ((mlib_s32)dst & 7) {
1956                pix = *src++;
1957                dd = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);
1958                dd = vis_fpadd16(MUL8_VIS(*(mlib_f32*)dst, 255 - pix), dd);
1959                *(mlib_f32*)dst = vis_fpack16(dd);
1960                if (pix == 255) *(mlib_f32*)dst = vis_read_hi(fgpixel_d);
1961                dst++;
1962            }
1963
1964#pragma pipeloop(0)
1965            for (; dst <= (dst_end - 2); dst += 2) {
1966                dmix0 = vis_freg_pair(((mlib_f32 *)vis_mul8s_tbl)[src[0]],
1967                                      ((mlib_f32 *)vis_mul8s_tbl)[src[1]]);
1968                mask = vis_fcmplt32(dmix0, done16);
1969                dmix1 = vis_fpsub16(done, dmix0);
1970                src += 2;
1971
1972                dd = *(mlib_d64*)dst;
1973                d0 = vis_fmul8x16al(srcG_f, vis_read_hi(dmix0));
1974                d1 = vis_fmul8x16al(srcG_f, vis_read_lo(dmix0));
1975                e0 = vis_fmul8x16al(vis_read_hi(dd), vis_read_hi(dmix1));
1976                e1 = vis_fmul8x16al(vis_read_lo(dd), vis_read_lo(dmix1));
1977                d0 = vis_fpadd16(vis_fpadd16(d0, d_half), e0);
1978                d1 = vis_fpadd16(vis_fpadd16(d1, d_half), e1);
1979                dd = vis_fpack16_pair(d0, d1);
1980
1981                *(mlib_d64*)dst = fgpixel_d;
1982                vis_pst_32(dd, dst, mask);
1983            }
1984
1985            while (dst < dst_end) {
1986                pix = *src++;
1987                dd = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);
1988                dd = vis_fpadd16(MUL8_VIS(*(mlib_f32*)dst, 255 - pix), dd);
1989                *(mlib_f32*)dst = vis_fpack16(dd);
1990                if (pix == 255) *(mlib_f32*)dst = vis_read_hi(fgpixel_d);
1991                dst++;
1992            }
1993
1994            ADD_SUFF(IntArgbPreToIntArgbConvert)(dst_start, dst_start,
1995                                                 width, 1,
1996                                                 pRasInfo, pRasInfo,
1997                                                 pPrim, pCompInfo);
1998
1999            if ((mlib_s32)dstBase & 3) {
2000                COPY_NA(dst_start, dstBase, width*sizeof(mlib_s32));
2001            }
2002
2003            PTR_ADD(dstBase, scan);
2004            pixels += rowBytes;
2005        }
2006    }
2007
2008    if (pbuff != buff) {
2009        mlib_free(pbuff);
2010    }
2011}
2012
2013/***************************************************************/
2014
2015#endif /* JAVA2D_NO_MLIB */
2016