1/*
2 * Copyright (c) 2003, 2008, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26#if !defined(JAVA2D_NO_MLIB) || defined(MLIB_ADD_SUFF)
27
28#include "vis_AlphaMacros.h"
29
30/***************************************************************/
31
32#define GET_ARGBPRE(i)         \
33    0xFF000000 | (src[3*i + 2] << 16) | (src[3*i + 1] << 8) | src[3*i]
34
35/***************************************************************/
36
37#define CONVERT_PRE(rr, dstA, dstARGB)         \
38    rr = vis_fmul8x16(dstARGB, ((mlib_d64*)vis_div8pre_tbl)[dstA])
39
40/***************************************************************/
41
42void ADD_SUFF(IntArgbPreToIntArgbConvert)(BLIT_PARAMS)
43{
44    mlib_s32 dstScan = pDstInfo->scanStride;
45    mlib_s32 srcScan = pSrcInfo->scanStride;
46    mlib_s32 dstA0, dstA1;
47    mlib_d64 res0, res1, dstARGB;
48    mlib_f32 dstARGB0;
49    mlib_s32 i, i0, j;
50
51    vis_write_gsr(7 << 3);
52
53    if (dstScan == 4*width && srcScan == 4*width) {
54        width *= height;
55        height = 1;
56    }
57
58    for (j = 0; j < height; j++) {
59        mlib_f32 *src = srcBase;
60        mlib_f32 *dst = dstBase;
61
62        i = i0 = 0;
63
64        if ((mlib_s32)dst & 7) {
65            dstA0 = *(mlib_u8*)(src + i);
66            dstARGB0 = src[i];
67            CONVERT_PRE(res0, dstA0, dstARGB0);
68            dst[i] = vis_fpack16(res0);
69
70            i0 = 1;
71        }
72
73#pragma pipeloop(0)
74        for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
75            dstA0 = *(mlib_u8*)(src + i);
76            dstA1 = *(mlib_u8*)(src + i + 1);
77            dstARGB = vis_freg_pair(src[i], src[i + 1]);
78
79            CONVERT_PRE(res0, dstA0, vis_read_hi(dstARGB));
80            CONVERT_PRE(res1, dstA1, vis_read_lo(dstARGB));
81
82            res0 = vis_fpack16_pair(res0, res1);
83
84            *(mlib_d64*)(dst + i) = res0;
85        }
86
87        if (i < width) {
88            dstA0 = *(mlib_u8*)(src + i);
89            dstARGB0 = src[i];
90            CONVERT_PRE(res0, dstA0, dstARGB0);
91            dst[i] = vis_fpack16(res0);
92        }
93
94        PTR_ADD(dstBase, dstScan);
95        PTR_ADD(srcBase, srcScan);
96    }
97}
98
99/***************************************************************/
100
101void ADD_SUFF(IntArgbPreToIntArgbScaleConvert)(SCALE_PARAMS)
102{
103    mlib_s32 dstScan = pDstInfo->scanStride;
104    mlib_s32 srcScan = pSrcInfo->scanStride;
105    mlib_s32 dstA0, dstA1;
106    mlib_d64 res0, res1, dstARGB;
107    mlib_f32 dstARGB0;
108    mlib_s32 i, i0, j, ind0, ind1;
109
110    if (width < 16) {
111        for (j = 0; j < height; j++) {
112            mlib_s32 *src = srcBase;
113            mlib_u8  *dst = dstBase;
114            mlib_s32 tmpsxloc = sxloc;
115
116            PTR_ADD(src, (syloc >> shift) * srcScan);
117
118            for (i = 0; i < width; i++) {
119                mlib_u32 argb = src[tmpsxloc >> shift];
120                mlib_u32 a, r, g, b;
121                b = argb & 0xff;
122                g = (argb >> 8) & 0xff;
123                r = (argb >> 16) & 0xff;
124                a = argb >> 24;
125                dst[4*i] = a;
126                if (a == 0) a = 255; /* a |= (a - 1) >> 24; */
127                dst[4*i + 1] = div8table[a][r];
128                dst[4*i + 2] = div8table[a][g];
129                dst[4*i + 3] = div8table[a][b];
130                tmpsxloc += sxinc;
131            }
132
133            PTR_ADD(dstBase, dstScan);
134            syloc += syinc;
135        }
136        return;
137    }
138
139    vis_write_gsr(7 << 3);
140
141    for (j = 0; j < height; j++) {
142        mlib_f32 *src = srcBase;
143        mlib_f32 *dst = dstBase;
144        mlib_s32 tmpsxloc = sxloc;
145
146        PTR_ADD(src, (syloc >> shift) * srcScan);
147
148        i = i0 = 0;
149
150        if ((mlib_s32)dst & 7) {
151            ind0 = tmpsxloc >> shift;
152            tmpsxloc += sxinc;
153            dstA0 = *(mlib_u8*)(src + ind0);
154            dstARGB0 = src[ind0];
155            CONVERT_PRE(res0, dstA0, dstARGB0);
156            dst[i] = vis_fpack16(res0);
157
158            i0 = 1;
159        }
160
161#pragma pipeloop(0)
162        for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
163            ind0 = tmpsxloc >> shift;
164            tmpsxloc += sxinc;
165            ind1 = tmpsxloc >> shift;
166            tmpsxloc += sxinc;
167            dstA0 = *(mlib_u8*)(src + ind0);
168            dstA1 = *(mlib_u8*)(src + ind1);
169
170            dstARGB = vis_freg_pair(src[ind0], src[ind1]);
171
172            CONVERT_PRE(res0, dstA0, vis_read_hi(dstARGB));
173            CONVERT_PRE(res1, dstA1, vis_read_lo(dstARGB));
174
175            res0 = vis_fpack16_pair(res0, res1);
176
177            *(mlib_d64*)(dst + i) = res0;
178        }
179
180        if (i < width) {
181            ind0 = tmpsxloc >> shift;
182            tmpsxloc += sxinc;
183            dstA0 = *(mlib_u8*)(src + ind0);
184            dstARGB0 = src[ind0];
185            CONVERT_PRE(res0, dstA0, dstARGB0);
186            dst[i] = vis_fpack16(res0);
187        }
188
189        PTR_ADD(dstBase, dstScan);
190        syloc += syinc;
191    }
192}
193
194/***************************************************************/
195
196#undef  CONVERT_PRE
197#define CONVERT_PRE(rr, dstA, dstARGB)         \
198    rr = MUL8_VIS(dstARGB, dstA)
199
200void ADD_SUFF(IntArgbToIntArgbPreConvert)(BLIT_PARAMS)
201{
202    mlib_s32 dstScan = pDstInfo->scanStride;
203    mlib_s32 srcScan = pSrcInfo->scanStride;
204    mlib_s32 dstA0, dstA1;
205    mlib_d64 res0, res1, dstARGB;
206    mlib_f32 dstARGB0;
207    mlib_s32 i, i0, j;
208
209    vis_write_gsr(0 << 3);
210
211    if (dstScan == 4*width && srcScan == 4*width) {
212        width *= height;
213        height = 1;
214    }
215
216    for (j = 0; j < height; j++) {
217        mlib_f32 *src = srcBase;
218        mlib_f32 *dst = dstBase;
219
220        i = i0 = 0;
221
222        if ((mlib_s32)dst & 7) {
223            dstA0 = *(mlib_u8*)(src + i);
224            dstARGB0 = src[i];
225            CONVERT_PRE(res0, dstA0, dstARGB0);
226            dst[i] = vis_fpack16(res0);
227            *(mlib_u8*)(dst + i) = dstA0;
228
229            i0 = 1;
230        }
231
232#pragma pipeloop(0)
233        for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
234            dstA0 = *(mlib_u8*)(src + i);
235            dstA1 = *(mlib_u8*)(src + i + 1);
236            dstARGB = vis_freg_pair(src[i], src[i + 1]);
237
238            CONVERT_PRE(res0, dstA0, vis_read_hi(dstARGB));
239            CONVERT_PRE(res1, dstA1, vis_read_lo(dstARGB));
240
241            res0 = vis_fpack16_pair(res0, res1);
242
243            *(mlib_d64*)(dst + i) = res0;
244            vis_pst_8(dstARGB, dst + i, 0x88);
245        }
246
247        if (i < width) {
248            dstA0 = *(mlib_u8*)(src + i);
249            dstARGB0 = src[i];
250            CONVERT_PRE(res0, dstA0, dstARGB0);
251            dst[i] = vis_fpack16(res0);
252            *(mlib_u8*)(dst + i) = dstA0;
253        }
254
255        PTR_ADD(dstBase, dstScan);
256        PTR_ADD(srcBase, srcScan);
257    }
258}
259
260/***************************************************************/
261
262void ADD_SUFF(IntArgbToIntArgbPreScaleConvert)(SCALE_PARAMS)
263{
264    mlib_s32 dstScan = pDstInfo->scanStride;
265    mlib_s32 srcScan = pSrcInfo->scanStride;
266    mlib_s32 dstA0, dstA1;
267    mlib_d64 res0, res1, dstARGB;
268    mlib_f32 dstARGB0;
269    mlib_s32 i, i0, j, ind0, ind1;
270
271    if (width < 16) {
272        for (j = 0; j < height; j++) {
273            mlib_s32 *src = srcBase;
274            mlib_u8  *dst = dstBase;
275            mlib_s32 tmpsxloc = sxloc;
276
277            PTR_ADD(src, (syloc >> shift) * srcScan);
278
279            for (i = 0; i < width; i++) {
280                mlib_u32 argb = src[tmpsxloc >> shift];
281                mlib_u32 a, r, g, b;
282                b = argb & 0xff;
283                g = (argb >> 8) & 0xff;
284                r = (argb >> 16) & 0xff;
285                a = argb >> 24;
286                dst[4*i] = a;
287                dst[4*i + 1] = mul8table[a][r];
288                dst[4*i + 2] = mul8table[a][g];
289                dst[4*i + 3] = mul8table[a][b];
290                tmpsxloc += sxinc;
291            }
292
293            PTR_ADD(dstBase, dstScan);
294            syloc += syinc;
295        }
296        return;
297    }
298
299    vis_write_gsr(0 << 3);
300
301    for (j = 0; j < height; j++) {
302        mlib_f32 *src = srcBase;
303        mlib_f32 *dst = dstBase;
304        mlib_s32 tmpsxloc = sxloc;
305
306        PTR_ADD(src, (syloc >> shift) * srcScan);
307
308        i = i0 = 0;
309
310        if ((mlib_s32)dst & 7) {
311            ind0 = tmpsxloc >> shift;
312            tmpsxloc += sxinc;
313            dstA0 = *(mlib_u8*)(src + ind0);
314            dstARGB0 = src[ind0];
315            CONVERT_PRE(res0, dstA0, dstARGB0);
316            dst[i] = vis_fpack16(res0);
317            *(mlib_u8*)(dst + i) = dstA0;
318
319            i0 = 1;
320        }
321
322#pragma pipeloop(0)
323        for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
324            ind0 = tmpsxloc >> shift;
325            tmpsxloc += sxinc;
326            ind1 = tmpsxloc >> shift;
327            tmpsxloc += sxinc;
328            dstA0 = *(mlib_u8*)(src + ind0);
329            dstA1 = *(mlib_u8*)(src + ind1);
330
331            dstARGB = vis_freg_pair(src[ind0], src[ind1]);
332
333            CONVERT_PRE(res0, dstA0, vis_read_hi(dstARGB));
334            CONVERT_PRE(res1, dstA1, vis_read_lo(dstARGB));
335
336            res0 = vis_fpack16_pair(res0, res1);
337
338            *(mlib_d64*)(dst + i) = res0;
339            vis_pst_8(dstARGB, dst + i, 0x88);
340        }
341
342        if (i < width) {
343            ind0 = tmpsxloc >> shift;
344            tmpsxloc += sxinc;
345            dstA0 = *(mlib_u8*)(src + ind0);
346            dstARGB0 = src[ind0];
347            CONVERT_PRE(res0, dstA0, dstARGB0);
348            dst[i] = vis_fpack16(res0);
349            *(mlib_u8*)(dst + i) = dstA0;
350        }
351
352        PTR_ADD(dstBase, dstScan);
353        syloc += syinc;
354    }
355}
356
357/***************************************************************/
358
359void ADD_SUFF(IntArgbToIntArgbPreXorBlit)(BLIT_PARAMS)
360{
361    mlib_s32 dstScan = pDstInfo->scanStride;
362    mlib_s32 srcScan = pSrcInfo->scanStride;
363    mlib_s32 xorpixel = pCompInfo->details.xorPixel;
364    mlib_s32 alphamask = pCompInfo->alphaMask;
365    mlib_s32 dstA0, dstA1;
366    mlib_d64 res0, res1, dstARGB, dd, d_xorpixel, d_alphamask, maskRGB;
367    mlib_d64 d_round;
368    mlib_f32 dstARGB0, ff;
369    mlib_s32 i, i0, j;
370
371    vis_write_gsr(0 << 3);
372
373    if (dstScan == 4*width && srcScan == 4*width) {
374        width *= height;
375        height = 1;
376    }
377
378    d_xorpixel = vis_to_double_dup(xorpixel);
379    d_alphamask = vis_to_double_dup(alphamask);
380    maskRGB = vis_to_double_dup(0xFFFFFF);
381    d_round = vis_to_double_dup(((1 << 16) | 1) << 6);
382
383    xorpixel >>= 24;
384    alphamask >>= 24;
385
386    for (j = 0; j < height; j++) {
387        mlib_f32 *src = srcBase;
388        mlib_f32 *dst = dstBase;
389
390        i = i0 = 0;
391
392        if ((mlib_s32)dst & 7) {
393            dstA0 = *(mlib_u8*)(src + i);
394            dstARGB0 = src[i];
395            if (dstA0 & 0x80) {
396                CONVERT_PRE(res0, dstA0, dstARGB0);
397                res0 = vis_fpadd16(res0, d_round);
398                ff = vis_fpack16(res0);
399                ff = vis_fxors(ff, vis_read_hi(d_xorpixel));
400                ff = vis_fandnots(vis_read_hi(d_alphamask), ff);
401                ff = vis_fxors(ff, dst[i]);
402                dstA0 = *(mlib_u8*)(dst + i) ^
403                        ((dstA0 ^ xorpixel) &~ alphamask);
404                dst[i] = ff;
405                *(mlib_u8*)(dst + i) = dstA0;
406            }
407
408            i0 = 1;
409        }
410
411#pragma pipeloop(0)
412        for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
413            dstA0 = *(mlib_u8*)(src + i);
414            dstA1 = *(mlib_u8*)(src + i + 1);
415            dstARGB = vis_freg_pair(src[i], src[i + 1]);
416
417            CONVERT_PRE(res0, dstA0, vis_read_hi(dstARGB));
418            CONVERT_PRE(res1, dstA1, vis_read_lo(dstARGB));
419            res0 = vis_fpadd16(res0, d_round);
420            res1 = vis_fpadd16(res1, d_round);
421            dd = vis_fpack16_pair(res0, res1);
422
423            dd = vis_for(vis_fand(maskRGB, dd), vis_fandnot(maskRGB, dstARGB));
424
425            dd = vis_fxor(dd, d_xorpixel);
426            dd = vis_fandnot(d_alphamask, dd);
427            dd = vis_fxor(dd, *(mlib_d64*)(dst + i));
428
429            vis_pst_32(dd, dst + i, ((dstA0 >> 6) & 2) | (dstA1 >> 7));
430        }
431
432        if (i < width) {
433            dstA0 = *(mlib_u8*)(src + i);
434            dstARGB0 = src[i];
435            if (dstA0 & 0x80) {
436                CONVERT_PRE(res0, dstA0, dstARGB0);
437                res0 = vis_fpadd16(res0, d_round);
438                ff = vis_fpack16(res0);
439                ff = vis_fxors(ff, vis_read_hi(d_xorpixel));
440                ff = vis_fandnots(vis_read_hi(d_alphamask), ff);
441                ff = vis_fxors(ff, dst[i]);
442                dstA0 = *(mlib_u8*)(dst + i) ^
443                        ((dstA0 ^ xorpixel) &~ alphamask);
444                dst[i] = ff;
445                *(mlib_u8*)(dst + i) = dstA0;
446            }
447        }
448
449        PTR_ADD(dstBase, dstScan);
450        PTR_ADD(srcBase, srcScan);
451    }
452}
453
454/***************************************************************/
455
456void ADD_SUFF(IntRgbToIntArgbPreConvert)(BLIT_PARAMS)
457{
458    mlib_s32 dstScan = pDstInfo->scanStride;
459    mlib_s32 srcScan = pSrcInfo->scanStride;
460    mlib_d64 dd, mask;
461    mlib_s32 i, i0, j;
462
463    if (dstScan == 4*width && srcScan == 4*width) {
464        width *= height;
465        height = 1;
466    }
467
468    mask = vis_to_double_dup(0xFF000000);
469
470    for (j = 0; j < height; j++) {
471        mlib_f32 *src = srcBase;
472        mlib_f32 *dst = dstBase;
473
474        i = i0 = 0;
475
476        if ((mlib_s32)dst & 7) {
477            dst[i] = vis_fors(src[i], vis_read_hi(mask));
478            i0 = 1;
479        }
480
481#pragma pipeloop(0)
482        for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
483            dd = vis_freg_pair(src[i], src[i + 1]);
484
485            *(mlib_d64*)(dst + i) = vis_for(dd, mask);
486        }
487
488        if (i < width) {
489            dst[i] = vis_fors(src[i], vis_read_hi(mask));
490        }
491
492        PTR_ADD(dstBase, dstScan);
493        PTR_ADD(srcBase, srcScan);
494    }
495}
496
497/***************************************************************/
498
499void ADD_SUFF(IntRgbToIntArgbPreScaleConvert)(SCALE_PARAMS)
500{
501    mlib_s32 dstScan = pDstInfo->scanStride;
502    mlib_s32 srcScan = pSrcInfo->scanStride;
503    mlib_d64 dd, mask;
504    mlib_s32 j;
505
506    mask = vis_to_double_dup(0xFF000000);
507
508    for (j = 0; j < height; j++) {
509        mlib_f32 *src = srcBase;
510        mlib_f32 *dst = dstBase;
511        mlib_f32 *dst_end = dst + width;
512        mlib_s32 tmpsxloc = sxloc;
513
514        PTR_ADD(src, (syloc >> shift) * srcScan);
515
516        if ((mlib_s32)dst & 7) {
517            *dst++ = vis_fors(src[tmpsxloc >> shift], vis_read_hi(mask));
518            tmpsxloc += sxinc;
519        }
520
521#pragma pipeloop(0)
522        for (; dst <= dst_end - 2; dst += 2) {
523            dd = vis_freg_pair(src[tmpsxloc >> shift],
524                               src[(tmpsxloc + sxinc) >> shift]);
525            *(mlib_d64*)dst = vis_for(dd, mask);
526            tmpsxloc += 2*sxinc;
527        }
528
529        if (dst < dst_end) {
530            *dst = vis_fors(src[tmpsxloc >> shift], vis_read_hi(mask));
531        }
532
533        PTR_ADD(dstBase, dstScan);
534        syloc += syinc;
535    }
536}
537
538/***************************************************************/
539
540#define BGR_TO_ARGB {                                          \
541    mlib_d64 sda, sdb, sdc, sdd, sde, sdf;                     \
542    mlib_d64 s_1, s_2, s_3, a13, b13, a02, b02;                \
543                                                               \
544    sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1));     \
545    sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2));     \
546    sdc = vis_fpmerge(vis_read_hi(sd1), vis_read_lo(sd2));     \
547                                                               \
548    sdd = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sdb));     \
549    sde = vis_fpmerge(vis_read_lo(sda), vis_read_hi(sdc));     \
550    sdf = vis_fpmerge(vis_read_hi(sdb), vis_read_lo(sdc));     \
551                                                               \
552    s_3 = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sde));     \
553    s_2 = vis_fpmerge(vis_read_lo(sdd), vis_read_hi(sdf));     \
554    s_1 = vis_fpmerge(vis_read_hi(sde), vis_read_lo(sdf));     \
555                                                               \
556    a13 = vis_fpmerge(vis_read_hi(s_1), vis_read_hi(s_3));     \
557    b13 = vis_fpmerge(vis_read_lo(s_1), vis_read_lo(s_3));     \
558    a02 = vis_fpmerge(vis_read_hi(s_0), vis_read_hi(s_2));     \
559    b02 = vis_fpmerge(vis_read_lo(s_0), vis_read_lo(s_2));     \
560                                                               \
561    dd0 = vis_fpmerge(vis_read_hi(a02), vis_read_hi(a13));     \
562    dd1 = vis_fpmerge(vis_read_lo(a02), vis_read_lo(a13));     \
563    dd2 = vis_fpmerge(vis_read_hi(b02), vis_read_hi(b13));     \
564    dd3 = vis_fpmerge(vis_read_lo(b02), vis_read_lo(b13));     \
565}
566
567/***************************************************************/
568
569void ADD_SUFF(ThreeByteBgrToIntArgbPreConvert)(BLIT_PARAMS)
570{
571    mlib_s32 dstScan = pDstInfo->scanStride;
572    mlib_s32 srcScan = pSrcInfo->scanStride;
573    mlib_d64 *sp;
574    mlib_d64 s_0;
575    mlib_d64 s0, s1, s2, s3, sd0, sd1, sd2, dd0, dd1, dd2, dd3;
576    mlib_s32 i, i0, j;
577
578    if (srcScan == 3*width && dstScan == 4*width) {
579        width *= height;
580        height = 1;
581    }
582
583    s_0 = vis_fone();
584
585    for (j = 0; j < height; j++) {
586        mlib_u8  *src = srcBase;
587        mlib_f32 *dst = dstBase;
588
589        i = i0 = 0;
590
591        if ((mlib_s32)dst & 7) {
592            ((mlib_s32*)dst)[i] = GET_ARGBPRE(i);
593            i0 = 1;
594        }
595
596        sp = vis_alignaddr(src, 3*i0);
597        s3 = *sp++;
598
599#pragma pipeloop(0)
600        for (i = i0; i <= (mlib_s32)width - 8; i += 8) {
601            s0 = s3;
602            s1 = *sp++;
603            s2 = *sp++;
604            s3 = *sp++;
605            sd0 = vis_faligndata(s0, s1);
606            sd1 = vis_faligndata(s1, s2);
607            sd2 = vis_faligndata(s2, s3);
608
609            BGR_TO_ARGB
610
611            *(mlib_d64*)(dst + i    ) = dd0;
612            *(mlib_d64*)(dst + i + 2) = dd1;
613            *(mlib_d64*)(dst + i + 4) = dd2;
614            *(mlib_d64*)(dst + i + 6) = dd3;
615        }
616
617        for (; i < width; i++) {
618            ((mlib_s32*)dst)[i] = GET_ARGBPRE(i);
619        }
620
621        PTR_ADD(dstBase, dstScan);
622        PTR_ADD(srcBase, srcScan);
623    }
624}
625
626/***************************************************************/
627
628void ADD_SUFF(ThreeByteBgrToIntArgbPreScaleConvert)(SCALE_PARAMS)
629{
630    mlib_s32 dstScan = pDstInfo->scanStride;
631    mlib_s32 srcScan = pSrcInfo->scanStride;
632    mlib_d64 dd, maskFF;
633    mlib_s32 i, i0, i1, j;
634
635    maskFF = vis_fone();
636
637    vis_alignaddr(NULL, 7);
638
639    for (j = 0; j < height; j++) {
640        mlib_u8  *src = srcBase;
641        mlib_f32 *dst = dstBase;
642        mlib_f32 *dst_end = dst + width;
643        mlib_s32 tmpsxloc = sxloc;
644
645        PTR_ADD(src, (syloc >> shift) * srcScan);
646
647        if ((mlib_s32)dst & 7) {
648            i = tmpsxloc >> shift;
649            tmpsxloc += sxinc;
650            *(mlib_s32*)dst = GET_ARGBPRE(i);
651            dst++;
652        }
653
654#pragma pipeloop(0)
655        for (; dst <= dst_end - 2; dst += 2) {
656            i0 = tmpsxloc >> shift;
657            i1 = (tmpsxloc + sxinc) >> shift;
658            tmpsxloc += 2*sxinc;
659
660            dd = vis_faligndata(vis_ld_u8(src + 3*i1    ), dd);
661            dd = vis_faligndata(vis_ld_u8(src + 3*i1 + 1), dd);
662            dd = vis_faligndata(vis_ld_u8(src + 3*i1 + 2), dd);
663            dd = vis_faligndata(maskFF, dd);
664            dd = vis_faligndata(vis_ld_u8(src + 3*i0    ), dd);
665            dd = vis_faligndata(vis_ld_u8(src + 3*i0 + 1), dd);
666            dd = vis_faligndata(vis_ld_u8(src + 3*i0 + 2), dd);
667            dd = vis_faligndata(maskFF, dd);
668
669            *(mlib_d64*)dst = dd;
670        }
671
672        for (; dst < dst_end; dst++) {
673            i = tmpsxloc >> shift;
674            tmpsxloc += sxinc;
675            *(mlib_s32*)dst = GET_ARGBPRE(i);
676        }
677
678        PTR_ADD(dstBase, dstScan);
679        syloc += syinc;
680    }
681}
682
683/***************************************************************/
684
685void ADD_SUFF(ByteIndexedToIntArgbPreConvert)(BLIT_PARAMS)
686{
687    jint *pixLut = pSrcInfo->lutBase;
688    mlib_s32 buff[256];
689    mlib_s32 dstScan = pDstInfo->scanStride;
690    mlib_s32 srcScan = pSrcInfo->scanStride;
691    mlib_s32 i, i0, j;
692
693    if (width < 16) {
694        for (j = 0; j < height; j++) {
695            mlib_u8  *src = srcBase;
696            mlib_s32 *dst = dstBase;
697
698            for (i = 0; i < width; i++) {
699                mlib_s32 a, r, g, b;
700                mlib_u32 x = pixLut[src[i]];
701                b = x & 0xff;
702                g = (x >> 8) & 0xff;
703                r = (x >> 16) & 0xff;
704                a = x >> 24;
705                r = mul8table[a][r];
706                g = mul8table[a][g];
707                b = mul8table[a][b];
708                dst[i] = (a << 24) | (r << 16) | (g << 8) | b;
709            }
710
711            PTR_ADD(dstBase, dstScan);
712            PTR_ADD(srcBase, srcScan);
713        }
714        return;
715    }
716
717    if (srcScan == width && dstScan == 4*width) {
718        width *= height;
719        height = 1;
720    }
721
722    ADD_SUFF(IntArgbToIntArgbPreConvert)(pixLut, buff, 256, 1,
723                                         pSrcInfo, pDstInfo, pPrim, pCompInfo);
724
725    for (j = 0; j < height; j++) {
726        mlib_u8  *src = srcBase;
727        mlib_s32 *dst = dstBase;
728
729        i = i0 = 0;
730
731        if ((mlib_s32)dst & 7) {
732            dst[i] = buff[src[i]];
733            i0 = 1;
734        }
735
736#pragma pipeloop(0)
737        for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
738            *(mlib_d64*)(dst + i) = LOAD_2F32(buff, src[i], src[i + 1]);
739        }
740
741        for (; i < width; i++) {
742            dst[i] = buff[src[i]];
743        }
744
745        PTR_ADD(dstBase, dstScan);
746        PTR_ADD(srcBase, srcScan);
747    }
748}
749
750/***************************************************************/
751
752void ADD_SUFF(ByteIndexedToIntArgbPreScaleConvert)(SCALE_PARAMS)
753{
754    jint *pixLut = pSrcInfo->lutBase;
755    mlib_s32 buff[256];
756    mlib_s32 dstScan = pDstInfo->scanStride;
757    mlib_s32 srcScan = pSrcInfo->scanStride;
758    mlib_s32 i, j;
759
760    if (width < 16) {
761        for (j = 0; j < height; j++) {
762            mlib_u8  *src = srcBase;
763            mlib_s32 *dst = dstBase;
764            mlib_s32 tmpsxloc = sxloc;
765
766            PTR_ADD(src, (syloc >> shift) * srcScan);
767
768            for (i = 0; i < width; i++) {
769                mlib_s32 a, r, g, b;
770                mlib_u32 x = pixLut[src[tmpsxloc >> shift]];
771                tmpsxloc += sxinc;
772                b = x & 0xff;
773                g = (x >> 8) & 0xff;
774                r = (x >> 16) & 0xff;
775                a = x >> 24;
776                r = mul8table[a][r];
777                g = mul8table[a][g];
778                b = mul8table[a][b];
779                dst[i] = (a << 24) | (r << 16) | (g << 8) | b;
780            }
781
782            PTR_ADD(dstBase, dstScan);
783            syloc += syinc;
784        }
785        return;
786    }
787
788    ADD_SUFF(IntArgbToIntArgbPreConvert)(pixLut, buff, 256, 1,
789                                         pSrcInfo, pDstInfo, pPrim, pCompInfo);
790
791    for (j = 0; j < height; j++) {
792        mlib_u8  *src = srcBase;
793        mlib_s32 *dst = dstBase;
794        mlib_s32 *dst_end = dst + width;
795        mlib_s32 tmpsxloc = sxloc;
796
797        PTR_ADD(src, (syloc >> shift) * srcScan);
798
799        if ((mlib_s32)dst & 7) {
800            *dst++ = buff[src[tmpsxloc >> shift]];
801            tmpsxloc += sxinc;
802        }
803
804#pragma pipeloop(0)
805        for (; dst <= dst_end - 2; dst += 2) {
806            *(mlib_d64*)dst = LOAD_2F32(buff, src[tmpsxloc >> shift],
807                                              src[(tmpsxloc + sxinc) >> shift]);
808            tmpsxloc += 2*sxinc;
809        }
810
811        for (; dst < dst_end; dst++) {
812            *dst = buff[src[tmpsxloc >> shift]];
813            tmpsxloc += sxinc;
814        }
815
816        PTR_ADD(dstBase, dstScan);
817        syloc += syinc;
818    }
819}
820
821/***************************************************************/
822
823void ADD_SUFF(ByteIndexedBmToIntArgbPreXparOver)(BLIT_PARAMS)
824{
825    jint *pixLut = pSrcInfo->lutBase;
826    mlib_s32 buff[256];
827    mlib_s32 dstScan = pDstInfo->scanStride;
828    mlib_s32 srcScan = pSrcInfo->scanStride;
829    mlib_d64 dd, dzero;
830    mlib_s32 i, i0, j, x, mask;
831
832    if (width < 16) {
833        for (j = 0; j < height; j++) {
834            mlib_u8  *src = srcBase;
835            mlib_s32 *dst = dstBase;
836
837            for (i = 0; i < width; i++) {
838                mlib_s32 a, r, g, b;
839                mlib_s32 x = pixLut[src[i]];
840                if (x < 0) {
841                    b = x & 0xff;
842                    g = (x >> 8) & 0xff;
843                    r = (x >> 16) & 0xff;
844                    a = (mlib_u32)x >> 24;
845                    r = mul8table[a][r];
846                    g = mul8table[a][g];
847                    b = mul8table[a][b];
848                    dst[i] = (a << 24) | (r << 16) | (g << 8) | b;
849                }
850            }
851
852            PTR_ADD(dstBase, dstScan);
853            PTR_ADD(srcBase, srcScan);
854        }
855        return;
856    }
857
858    if (srcScan == width && dstScan == 4*width) {
859        width *= height;
860        height = 1;
861    }
862
863    ADD_SUFF(IntArgbToIntArgbPreConvert)(pixLut, buff, 256, 1,
864                                         pSrcInfo, pDstInfo, pPrim, pCompInfo);
865
866    dzero = vis_fzero();
867
868    for (j = 0; j < height; j++) {
869        mlib_u8  *src = srcBase;
870        mlib_s32 *dst = dstBase;
871
872        i = i0 = 0;
873
874        if ((mlib_s32)dst & 7) {
875            x = buff[src[i]];
876            if (x < 0) {
877                dst[i] = x;
878            }
879            i0 = 1;
880        }
881
882#pragma pipeloop(0)
883        for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
884            dd = vis_freg_pair(((mlib_f32*)buff)[src[i]],
885                               ((mlib_f32*)buff)[src[i + 1]]);
886            mask = vis_fcmplt32(dd, dzero);
887            vis_pst_32(dd, dst + i, mask);
888        }
889
890        for (; i < width; i++) {
891            x = buff[src[i]];
892            if (x < 0) {
893                dst[i] = x;
894            }
895        }
896
897        PTR_ADD(dstBase, dstScan);
898        PTR_ADD(srcBase, srcScan);
899    }
900}
901
902/***************************************************************/
903
904void ADD_SUFF(ByteIndexedBmToIntArgbPreScaleXparOver)(SCALE_PARAMS)
905{
906    jint *pixLut = pSrcInfo->lutBase;
907    mlib_s32 buff[256];
908    mlib_s32 dstScan = pDstInfo->scanStride;
909    mlib_s32 srcScan = pSrcInfo->scanStride;
910    mlib_d64 dd, dzero;
911    mlib_s32 i, j, x, mask;
912
913    if (width < 16) {
914        for (j = 0; j < height; j++) {
915            mlib_u8  *src = srcBase;
916            mlib_s32 *dst = dstBase;
917            mlib_s32 tmpsxloc = sxloc;
918
919            PTR_ADD(src, (syloc >> shift) * srcScan);
920
921            for (i = 0; i < width; i++) {
922                mlib_s32 a, r, g, b;
923                mlib_s32 x = pixLut[src[tmpsxloc >> shift]];
924                tmpsxloc += sxinc;
925                if (x < 0) {
926                    b = x & 0xff;
927                    g = (x >> 8) & 0xff;
928                    r = (x >> 16) & 0xff;
929                    a = (mlib_u32)x >> 24;
930                    r = mul8table[a][r];
931                    g = mul8table[a][g];
932                    b = mul8table[a][b];
933                    dst[i] = (a << 24) | (r << 16) | (g << 8) | b;
934                }
935            }
936
937            PTR_ADD(dstBase, dstScan);
938            syloc += syinc;
939        }
940        return;
941    }
942
943    ADD_SUFF(IntArgbToIntArgbPreConvert)(pixLut, buff, 256, 1,
944                                         pSrcInfo, pDstInfo, pPrim, pCompInfo);
945
946    dzero = vis_fzero();
947
948    for (j = 0; j < height; j++) {
949        mlib_u8  *src = srcBase;
950        mlib_s32 *dst = dstBase;
951        mlib_s32 *dst_end = dst + width;
952        mlib_s32 tmpsxloc = sxloc;
953
954        PTR_ADD(src, (syloc >> shift) * srcScan);
955
956        if ((mlib_s32)dst & 7) {
957            x = buff[src[tmpsxloc >> shift]];
958            tmpsxloc += sxinc;
959            if (x < 0) {
960                *dst = x;
961            }
962            dst++;
963        }
964
965#pragma pipeloop(0)
966        for (; dst <= dst_end - 2; dst += 2) {
967            dd = LOAD_2F32(buff, src[tmpsxloc >> shift],
968                                 src[(tmpsxloc + sxinc) >> shift]);
969            tmpsxloc += 2*sxinc;
970            mask = vis_fcmplt32(dd, dzero);
971            vis_pst_32(dd, dst, mask);
972        }
973
974        for (; dst < dst_end; dst++) {
975            x = buff[src[tmpsxloc >> shift]];
976            tmpsxloc += sxinc;
977            if (x < 0) {
978                *dst = x;
979            }
980        }
981
982        PTR_ADD(dstBase, dstScan);
983        syloc += syinc;
984    }
985}
986
987/***************************************************************/
988
989void ADD_SUFF(ByteIndexedBmToIntArgbPreXparBgCopy)(BCOPY_PARAMS)
990{
991    jint *pixLut = pSrcInfo->lutBase;
992    mlib_s32 buff[256];
993    mlib_s32 dstScan = pDstInfo->scanStride;
994    mlib_s32 srcScan = pSrcInfo->scanStride;
995    mlib_d64 dd, dzero, d_bgpixel;
996    mlib_s32 i, j, x, mask;
997
998    if (width < 16) {
999        for (j = 0; j < height; j++) {
1000            mlib_u8  *src = srcBase;
1001            mlib_s32 *dst = dstBase;
1002
1003            for (i = 0; i < width; i++) {
1004                x = pixLut[src[i]];
1005                if (x < 0) {
1006                    mlib_s32 a, r, g, b;
1007                    b = x & 0xff;
1008                    g = (x >> 8) & 0xff;
1009                    r = (x >> 16) & 0xff;
1010                    a = (mlib_u32)x >> 24;
1011                    r = mul8table[a][r];
1012                    g = mul8table[a][g];
1013                    b = mul8table[a][b];
1014                    dst[i] = (a << 24) | (r << 16) | (g << 8) | b;
1015                } else {
1016                    dst[i] = bgpixel;
1017                }
1018            }
1019
1020            PTR_ADD(dstBase, dstScan);
1021            PTR_ADD(srcBase, srcScan);
1022        }
1023        return;
1024    }
1025
1026    ADD_SUFF(IntArgbToIntArgbPreConvert)(pixLut, buff, 256, 1,
1027                                         pSrcInfo, pDstInfo, pPrim, pCompInfo);
1028
1029    if (srcScan == width && dstScan == 4*width) {
1030        width *= height;
1031        height = 1;
1032    }
1033
1034    dzero = vis_fzero();
1035    d_bgpixel = vis_to_double_dup(bgpixel);
1036
1037    for (j = 0; j < height; j++) {
1038        mlib_u8  *src = srcBase;
1039        mlib_s32 *dst = dstBase;
1040        mlib_s32 *dst_end;
1041
1042        dst_end = dst + width;
1043
1044        if ((mlib_s32)dst & 7) {
1045            x = buff[*src++];
1046            if (x < 0) {
1047                *dst = x;
1048            } else {
1049                *dst = bgpixel;
1050            }
1051            dst++;
1052        }
1053
1054#pragma pipeloop(0)
1055        for (; dst <= (dst_end - 2); dst += 2) {
1056            dd = vis_freg_pair(((mlib_f32*)buff)[src[0]],
1057                               ((mlib_f32*)buff)[src[1]]);
1058            mask = vis_fcmplt32(dd, dzero);
1059            *(mlib_d64*)dst = d_bgpixel;
1060            vis_pst_32(dd, dst, mask);
1061            src += 2;
1062        }
1063
1064        while (dst < dst_end) {
1065            x = buff[*src++];
1066            if (x < 0) {
1067                *dst = x;
1068            } else {
1069                *dst = bgpixel;
1070            }
1071            dst++;
1072        }
1073
1074        PTR_ADD(dstBase, dstScan);
1075        PTR_ADD(srcBase, srcScan);
1076    }
1077}
1078
1079/***************************************************************/
1080
1081void ADD_SUFF(IntArgbPreDrawGlyphListAA)(SurfaceDataRasInfo * pRasInfo,
1082                                         ImageRef *glyphs,
1083                                         jint totalGlyphs,
1084                                         jint fgpixel, jint argbcolor,
1085                                         jint clipLeft, jint clipTop,
1086                                         jint clipRight, jint clipBottom,
1087                                         NativePrimitive * pPrim,
1088                                         CompositeInfo * pCompInfo)
1089{
1090    mlib_s32 glyphCounter;
1091    mlib_s32 scan = pRasInfo->scanStride;
1092    mlib_u8  *dstBase, *dstBase0;
1093    mlib_s32 i, j;
1094    mlib_d64 dmix0, dmix1, dd, d0, d1, e0, e1;
1095    mlib_d64 done, d_half;
1096    mlib_s32 pix;
1097    mlib_f32 srcG_f;
1098
1099    done = vis_to_double_dup(0x7fff7fff);
1100    d_half = vis_to_double_dup((1 << (16 + 6)) | (1 << 6));
1101
1102    srcG_f = vis_to_float(argbcolor);
1103
1104    for (glyphCounter = 0; glyphCounter < totalGlyphs; glyphCounter++) {
1105        const jubyte *pixels, *pixels0;
1106        unsigned int rowBytes;
1107        int left, top;
1108        int width, height;
1109        int right, bottom;
1110
1111        pixels = (const jubyte *) glyphs[glyphCounter].pixels;
1112
1113        if (!pixels) continue;
1114
1115        left = glyphs[glyphCounter].x;
1116        top = glyphs[glyphCounter].y;
1117        width = glyphs[glyphCounter].width;
1118        height = glyphs[glyphCounter].height;
1119        rowBytes = width;
1120        right = left + width;
1121        bottom = top + height;
1122        if (left < clipLeft) {
1123            pixels += clipLeft - left;
1124            left = clipLeft;
1125        }
1126        if (top < clipTop) {
1127            pixels += (clipTop - top) * rowBytes;
1128            top = clipTop;
1129        }
1130        if (right > clipRight) {
1131            right = clipRight;
1132        }
1133        if (bottom > clipBottom) {
1134            bottom = clipBottom;
1135        }
1136        if (right <= left || bottom <= top) {
1137            continue;
1138        }
1139        width = right - left;
1140        height = bottom - top;
1141
1142        dstBase = pRasInfo->rasBase;
1143        PTR_ADD(dstBase, top*scan + 4*left);
1144
1145        pixels0 = pixels;
1146        dstBase0 = dstBase;
1147
1148        for (j = 0; j < height; j++) {
1149            mlib_u8  *src = (void*)pixels;
1150            mlib_s32 *dst, *dst_end;
1151
1152            dst = (void*)dstBase;
1153            dst_end = dst + width;
1154
1155            ADD_SUFF(IntArgbPreToIntArgbConvert)(dstBase, dstBase, width, 1,
1156                                                 pRasInfo, pRasInfo,
1157                                                 pPrim, pCompInfo);
1158
1159            vis_write_gsr(0 << 3);
1160
1161            if ((mlib_s32)dst & 7) {
1162                pix = *src++;
1163                dd = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);
1164                dd = vis_fpadd16(MUL8_VIS(*(mlib_f32*)dst, 255 - pix), dd);
1165                *(mlib_f32*)dst = vis_fpack16(dd);
1166                dst++;
1167            }
1168
1169#pragma pipeloop(0)
1170            for (; dst <= (dst_end - 2); dst += 2) {
1171                dmix0 = vis_freg_pair(((mlib_f32 *)vis_mul8s_tbl)[src[0]],
1172                                      ((mlib_f32 *)vis_mul8s_tbl)[src[1]]);
1173                dmix1 = vis_fpsub16(done, dmix0);
1174                src += 2;
1175
1176                dd = *(mlib_d64*)dst;
1177                d0 = vis_fmul8x16al(srcG_f, vis_read_hi(dmix0));
1178                d1 = vis_fmul8x16al(srcG_f, vis_read_lo(dmix0));
1179                e0 = vis_fmul8x16al(vis_read_hi(dd), vis_read_hi(dmix1));
1180                e1 = vis_fmul8x16al(vis_read_lo(dd), vis_read_lo(dmix1));
1181                d0 = vis_fpadd16(vis_fpadd16(d0, d_half), e0);
1182                d1 = vis_fpadd16(vis_fpadd16(d1, d_half), e1);
1183                dd = vis_fpack16_pair(d0, d1);
1184
1185                *(mlib_d64*)dst = dd;
1186            }
1187
1188            while (dst < dst_end) {
1189                pix = *src++;
1190                dd = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);
1191                dd = vis_fpadd16(MUL8_VIS(*(mlib_f32*)dst, 255 - pix), dd);
1192                *(mlib_f32*)dst = vis_fpack16(dd);
1193                dst++;
1194            }
1195
1196            PTR_ADD(dstBase, scan);
1197            pixels += rowBytes;
1198        }
1199
1200        pixels = pixels0;
1201        dstBase = dstBase0;
1202
1203        for (j = 0; j < height; j++) {
1204            mlib_u8  *src = (void*)pixels;
1205            mlib_s32 *dst = (void*)dstBase;
1206
1207            for (i = 0; i < width; i++) {
1208                if (src[i] == 255) dst[i] = fgpixel;
1209            }
1210            PTR_ADD(dstBase, scan);
1211            pixels += rowBytes;
1212        }
1213    }
1214}
1215
1216/***************************************************************/
1217
1218#endif /* JAVA2D_NO_MLIB */
1219