1/*
2 * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26#include <vis_proto.h>
27#include "java2d_Mlib.h"
28
29/*#define USE_TWO_BC_TABLES*/ /* a little more precise, but slow on Ultra-III */
30
31/***************************************************************/
32
33#define MUL_16x16(src1, src2)                   \
34  vis_fpadd16(vis_fmul8sux16((src1), (src2)),   \
35              vis_fmul8ulx16((src1), (src2)))
36
37#define BILINEAR                                                \
38  xf = vis_fand(xf, mask7fff);                                  \
39  yf = vis_fand(yf, mask7fff);                                  \
40  xr = vis_fpsub32(mask7fff, xf);                               \
41  yf0 = vis_fmul8x16au(mask80, vis_read_hi(yf));                \
42  yf1 = vis_fmul8x16au(mask80, vis_read_lo(yf));                \
43                                                                \
44  a0 = vis_fmul8x16au(vis_read_hi(a01), vis_read_hi(xr));       \
45  a1 = vis_fmul8x16au(vis_read_lo(a01), vis_read_hi(xf));       \
46  a2 = vis_fmul8x16au(vis_read_hi(a23), vis_read_hi(xr));       \
47  a3 = vis_fmul8x16au(vis_read_lo(a23), vis_read_hi(xf));       \
48  a0 = vis_fpadd16(a0, a1);                                     \
49  a2 = vis_fpadd16(a2, a3);                                     \
50  a2 = vis_fpsub16(a2, a0);                                     \
51  a2 = MUL_16x16(a2, yf0);                                      \
52  a0 = vis_fmul8x16(mask40, a0);                                \
53  a0 = vis_fpadd16(a0, a2);                                     \
54  a0 = vis_fpadd16(a0, d_rnd);                                  \
55                                                                \
56  b0 = vis_fmul8x16au(vis_read_hi(b01), vis_read_lo(xr));       \
57  b1 = vis_fmul8x16au(vis_read_lo(b01), vis_read_lo(xf));       \
58  b2 = vis_fmul8x16au(vis_read_hi(b23), vis_read_lo(xr));       \
59  b3 = vis_fmul8x16au(vis_read_lo(b23), vis_read_lo(xf));       \
60  b0 = vis_fpadd16(b0, b1);                                     \
61  b2 = vis_fpadd16(b2, b3);                                     \
62  b2 = vis_fpsub16(b2, b0);                                     \
63  b2 = MUL_16x16(b2, yf1);                                      \
64  b0 = vis_fmul8x16(mask40, b0);                                \
65  b0 = vis_fpadd16(b0, b2);                                     \
66  b0 = vis_fpadd16(b0, d_rnd);                                  \
67                                                                \
68  xf = vis_fpadd32(xf, dx);                                     \
69  yf = vis_fpadd32(yf, dy)
70
71void
72vis_BilinearBlend(jint *pRGB, jint numpix,
73                  jint xfract, jint dxfract,
74                  jint yfract, jint dyfract)
75{
76  mlib_d64 *p_src = (void*)pRGB;
77  mlib_f32 *p_dst = (void*)pRGB;
78  mlib_d64 a01, a23, a0, a1, a2, a3;
79  mlib_d64 b01, b23, b0, b1, b2, b3;
80  mlib_d64 xf, xr, dx, yf, yf0, yf1, dy;
81  mlib_d64 mask7fff, d_rnd;
82  mlib_f32 mask80, mask40;
83  mlib_s32 i;
84
85  vis_write_gsr(2 << 3);
86
87  xf = vis_to_double(xfract >> 1, (xfract + dxfract) >> 1);
88  yf = vis_to_double(yfract >> 1, (yfract + dyfract) >> 1);
89  dx = vis_to_double_dup(dxfract);
90  dy = vis_to_double_dup(dyfract);
91
92  mask7fff = vis_to_double_dup(0x7fffffff);
93  d_rnd = vis_to_double_dup(0x00100010);
94  mask80 = vis_to_float(0x80808080);
95  mask40 = vis_to_float(0x40404040);
96
97#pragma pipeloop(0)
98  for (i = 0; i < numpix/2; i++) {
99    a01 = p_src[0];
100    a23 = p_src[1];
101    b01 = p_src[2];
102    b23 = p_src[3];
103    p_src += 4;
104
105    BILINEAR;
106
107    ((mlib_d64*)p_dst)[0] = vis_fpack16_pair(a0, b0);
108    p_dst += 2;
109  }
110
111  if (numpix & 1) {
112    a01 = p_src[0];
113    a23 = p_src[1];
114
115    BILINEAR;
116
117    p_dst[0] = vis_fpack16(a0);
118  }
119}
120
121/***************************************************************/
122
123static jboolean vis_bicubic_table_inited = 0;
124static mlib_d64 vis_bicubic_coeff[256 + 1];
125#ifdef USE_TWO_BC_TABLES
126static mlib_d64 vis_bicubic_coeff2[512 + 1];
127#endif
128
129/*
130 * REMIND: The following formulas are designed to give smooth
131 * results when 'A' is -0.5 or -1.0.
132 */
133
134static void
135init_vis_bicubic_table(jdouble A)
136{
137  mlib_s16 *p_tbl = (void*)vis_bicubic_coeff;
138#ifdef USE_TWO_BC_TABLES
139  mlib_s16 *p_tbl2 = (void*)vis_bicubic_coeff2;
140#endif
141  mlib_d64 x, y;
142  int i;
143
144  for (i = 0; i <= 256; i++) {
145    x = i*(1.0/256.0);
146
147    /* r(x) = (A + 2)|x|^3 - (A + 3)|x|^2 + 1 , 0 <= |x| < 1 */
148    y = ((A+2)*x - (A+3))*x*x + 1;
149    y *= 16384;
150    p_tbl[4*i + 1] = p_tbl[4*(256 - i) + 2] = (mlib_s16)y;
151#ifdef USE_TWO_BC_TABLES
152    y *= 2;
153    if (y >= 32767) y = 32767;
154    p_tbl2[4*i] = p_tbl2[4*i + 1] =
155    p_tbl2[4*i + 2] = p_tbl2[4*i + 3] = (mlib_s16)y;
156#endif
157
158    /* r(x) = A|x|^3 - 5A|x|^2 + 8A|x| - 4A , 1 <= |x| < 2 */
159    x += 1.0;
160    y = ((A*x - 5*A)*x + 8*A)*x - 4*A;
161    y *= 16384;
162    p_tbl[4*i] = p_tbl[4*(256 - i) + 3] = (mlib_s16)y;
163#ifdef USE_TWO_BC_TABLES
164    y *= 2;
165    if (y >= 32767) y = 32767;
166    p_tbl2[4*i + 1024] = p_tbl2[4*i + 1025] =
167    p_tbl2[4*i + 1026] = p_tbl2[4*i + 1027] = (mlib_s16)y;
168#endif
169  }
170  vis_bicubic_table_inited = 1;
171}
172
173/***************************************************************/
174
175#define MUL_BC_COEFF(x0, x1, coeff)                                     \
176  vis_fpadd16(vis_fmul8x16au(x0, coeff), vis_fmul8x16al(x1, coeff))
177
178#define SAT(val, max) \
179    do { \
180        val -= max;           /* only overflows are now positive */ \
181        val &= (val >> 31);   /* positives become 0 */ \
182        val += max;           /* range is now [0 -> max] */ \
183    } while (0)
184
185void
186vis_BicubicBlend(jint *pRGB, jint numpix,
187                 jint xfract, jint dxfract,
188                 jint yfract, jint dyfract)
189{
190  mlib_d64 *p_src = (void*)pRGB;
191  union {
192      jint     theInt;
193      mlib_f32 theF32;
194  } p_dst;
195  mlib_d64 a0, a1, a2, a3, a4, a5, a6, a7;
196  mlib_d64 xf, yf, yf0, yf1, yf2, yf3;
197  mlib_d64 d_rnd;
198  mlib_f32 mask80;
199  mlib_s32 i;
200
201  if (!vis_bicubic_table_inited) {
202    init_vis_bicubic_table(-0.5);
203  }
204
205#ifdef USE_TWO_BC_TABLES
206  vis_write_gsr(2 << 3);
207  d_rnd = vis_to_double_dup(0x000f000f);
208#else
209  vis_write_gsr(4 << 3);
210  d_rnd = vis_to_double_dup(0x00030003);
211#endif
212
213  mask80 = vis_to_float(0x80808080);
214
215#pragma pipeloop(0)
216  for (i = 0; i < numpix; i++) {
217    jint xfactor, yfactor;
218
219    xfactor = URShift(xfract, 32-8);
220    xfract += dxfract;
221    xf = vis_bicubic_coeff[xfactor];
222
223    a0 = p_src[0];
224    a1 = p_src[1];
225    a2 = p_src[2];
226    a3 = p_src[3];
227    a4 = p_src[4];
228    a5 = p_src[5];
229    a6 = p_src[6];
230    a7 = p_src[7];
231    p_src += 8;
232
233    a0 = MUL_BC_COEFF(vis_read_hi(a0), vis_read_lo(a0), vis_read_hi(xf));
234    a1 = MUL_BC_COEFF(vis_read_hi(a1), vis_read_lo(a1), vis_read_lo(xf));
235    a2 = MUL_BC_COEFF(vis_read_hi(a2), vis_read_lo(a2), vis_read_hi(xf));
236    a3 = MUL_BC_COEFF(vis_read_hi(a3), vis_read_lo(a3), vis_read_lo(xf));
237    a4 = MUL_BC_COEFF(vis_read_hi(a4), vis_read_lo(a4), vis_read_hi(xf));
238    a5 = MUL_BC_COEFF(vis_read_hi(a5), vis_read_lo(a5), vis_read_lo(xf));
239    a6 = MUL_BC_COEFF(vis_read_hi(a6), vis_read_lo(a6), vis_read_hi(xf));
240    a7 = MUL_BC_COEFF(vis_read_hi(a7), vis_read_lo(a7), vis_read_lo(xf));
241
242    a0 = vis_fpadd16(a0, a1);
243    a1 = vis_fpadd16(a2, a3);
244    a2 = vis_fpadd16(a4, a5);
245    a3 = vis_fpadd16(a6, a7);
246
247    yfactor = URShift(yfract, 32-8);
248    yfract += dyfract;
249#ifdef USE_TWO_BC_TABLES
250    yf0 = vis_bicubic_coeff2[256 + yfactor];
251    yf1 = vis_bicubic_coeff2[yfactor];
252    yf2 = vis_bicubic_coeff2[256 - yfactor];
253    yf3 = vis_bicubic_coeff2[512 - yfactor];
254#else
255    yf = vis_bicubic_coeff[yfactor];
256    yf0 = vis_fmul8x16au(mask80, vis_read_hi(yf));
257    yf1 = vis_fmul8x16al(mask80, vis_read_hi(yf));
258    yf2 = vis_fmul8x16au(mask80, vis_read_lo(yf));
259    yf3 = vis_fmul8x16al(mask80, vis_read_lo(yf));
260#endif
261
262    a0 = MUL_16x16(a0, yf0);
263    a1 = MUL_16x16(a1, yf1);
264    a2 = MUL_16x16(a2, yf2);
265    a3 = MUL_16x16(a3, yf3);
266    a0 = vis_fpadd16(a0, d_rnd);
267
268    a0 = vis_fpadd16(vis_fpadd16(a0, a1), vis_fpadd16(a2, a3));
269
270    p_dst.theF32 = vis_fpack16(a0);
271    {
272        int a, r, g, b;
273        b = p_dst.theInt;
274        a = (b >> 24) & 0xff;
275        r = (b >> 16) & 0xff;
276        g = (b >>  8) & 0xff;
277        b = (b      ) & 0xff;
278        SAT(r, a);
279        SAT(g, a);
280        SAT(b, a);
281        *pRGB++ = ((a << 24) | (r << 16) | (g << 8) | (b));
282    }
283  }
284}
285
286/***************************************************************/
287