1/* 2 * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26#include <vis_proto.h> 27#include "java2d_Mlib.h" 28 29/*#define USE_TWO_BC_TABLES*/ /* a little more precise, but slow on Ultra-III */ 30 31/***************************************************************/ 32 33#define MUL_16x16(src1, src2) \ 34 vis_fpadd16(vis_fmul8sux16((src1), (src2)), \ 35 vis_fmul8ulx16((src1), (src2))) 36 37#define BILINEAR \ 38 xf = vis_fand(xf, mask7fff); \ 39 yf = vis_fand(yf, mask7fff); \ 40 xr = vis_fpsub32(mask7fff, xf); \ 41 yf0 = vis_fmul8x16au(mask80, vis_read_hi(yf)); \ 42 yf1 = vis_fmul8x16au(mask80, vis_read_lo(yf)); \ 43 \ 44 a0 = vis_fmul8x16au(vis_read_hi(a01), vis_read_hi(xr)); \ 45 a1 = vis_fmul8x16au(vis_read_lo(a01), vis_read_hi(xf)); \ 46 a2 = vis_fmul8x16au(vis_read_hi(a23), vis_read_hi(xr)); \ 47 a3 = vis_fmul8x16au(vis_read_lo(a23), vis_read_hi(xf)); \ 48 a0 = vis_fpadd16(a0, a1); \ 49 a2 = vis_fpadd16(a2, a3); \ 50 a2 = vis_fpsub16(a2, a0); \ 51 a2 = MUL_16x16(a2, yf0); \ 52 a0 = vis_fmul8x16(mask40, a0); \ 53 a0 = vis_fpadd16(a0, a2); \ 54 a0 = vis_fpadd16(a0, d_rnd); \ 55 \ 56 b0 = vis_fmul8x16au(vis_read_hi(b01), vis_read_lo(xr)); \ 57 b1 = vis_fmul8x16au(vis_read_lo(b01), vis_read_lo(xf)); \ 58 b2 = vis_fmul8x16au(vis_read_hi(b23), vis_read_lo(xr)); \ 59 b3 = vis_fmul8x16au(vis_read_lo(b23), vis_read_lo(xf)); \ 60 b0 = vis_fpadd16(b0, b1); \ 61 b2 = vis_fpadd16(b2, b3); \ 62 b2 = vis_fpsub16(b2, b0); \ 63 b2 = MUL_16x16(b2, yf1); \ 64 b0 = vis_fmul8x16(mask40, b0); \ 65 b0 = vis_fpadd16(b0, b2); \ 66 b0 = vis_fpadd16(b0, d_rnd); \ 67 \ 68 xf = vis_fpadd32(xf, dx); \ 69 yf = vis_fpadd32(yf, dy) 70 71void 72vis_BilinearBlend(jint *pRGB, jint numpix, 73 jint xfract, jint dxfract, 74 jint yfract, jint dyfract) 75{ 76 mlib_d64 *p_src = (void*)pRGB; 77 mlib_f32 *p_dst = (void*)pRGB; 78 mlib_d64 a01, a23, a0, a1, a2, a3; 79 mlib_d64 b01, b23, b0, b1, b2, b3; 80 mlib_d64 xf, xr, dx, yf, yf0, yf1, dy; 81 mlib_d64 mask7fff, d_rnd; 82 mlib_f32 mask80, mask40; 83 mlib_s32 i; 84 85 vis_write_gsr(2 << 3); 86 87 xf = vis_to_double(xfract >> 1, (xfract + dxfract) >> 1); 88 yf = vis_to_double(yfract >> 1, (yfract + dyfract) >> 1); 89 dx = vis_to_double_dup(dxfract); 90 dy = vis_to_double_dup(dyfract); 91 92 mask7fff = vis_to_double_dup(0x7fffffff); 93 d_rnd = vis_to_double_dup(0x00100010); 94 mask80 = vis_to_float(0x80808080); 95 mask40 = vis_to_float(0x40404040); 96 97#pragma pipeloop(0) 98 for (i = 0; i < numpix/2; i++) { 99 a01 = p_src[0]; 100 a23 = p_src[1]; 101 b01 = p_src[2]; 102 b23 = p_src[3]; 103 p_src += 4; 104 105 BILINEAR; 106 107 ((mlib_d64*)p_dst)[0] = vis_fpack16_pair(a0, b0); 108 p_dst += 2; 109 } 110 111 if (numpix & 1) { 112 a01 = p_src[0]; 113 a23 = p_src[1]; 114 115 BILINEAR; 116 117 p_dst[0] = vis_fpack16(a0); 118 } 119} 120 121/***************************************************************/ 122 123static jboolean vis_bicubic_table_inited = 0; 124static mlib_d64 vis_bicubic_coeff[256 + 1]; 125#ifdef USE_TWO_BC_TABLES 126static mlib_d64 vis_bicubic_coeff2[512 + 1]; 127#endif 128 129/* 130 * REMIND: The following formulas are designed to give smooth 131 * results when 'A' is -0.5 or -1.0. 132 */ 133 134static void 135init_vis_bicubic_table(jdouble A) 136{ 137 mlib_s16 *p_tbl = (void*)vis_bicubic_coeff; 138#ifdef USE_TWO_BC_TABLES 139 mlib_s16 *p_tbl2 = (void*)vis_bicubic_coeff2; 140#endif 141 mlib_d64 x, y; 142 int i; 143 144 for (i = 0; i <= 256; i++) { 145 x = i*(1.0/256.0); 146 147 /* r(x) = (A + 2)|x|^3 - (A + 3)|x|^2 + 1 , 0 <= |x| < 1 */ 148 y = ((A+2)*x - (A+3))*x*x + 1; 149 y *= 16384; 150 p_tbl[4*i + 1] = p_tbl[4*(256 - i) + 2] = (mlib_s16)y; 151#ifdef USE_TWO_BC_TABLES 152 y *= 2; 153 if (y >= 32767) y = 32767; 154 p_tbl2[4*i] = p_tbl2[4*i + 1] = 155 p_tbl2[4*i + 2] = p_tbl2[4*i + 3] = (mlib_s16)y; 156#endif 157 158 /* r(x) = A|x|^3 - 5A|x|^2 + 8A|x| - 4A , 1 <= |x| < 2 */ 159 x += 1.0; 160 y = ((A*x - 5*A)*x + 8*A)*x - 4*A; 161 y *= 16384; 162 p_tbl[4*i] = p_tbl[4*(256 - i) + 3] = (mlib_s16)y; 163#ifdef USE_TWO_BC_TABLES 164 y *= 2; 165 if (y >= 32767) y = 32767; 166 p_tbl2[4*i + 1024] = p_tbl2[4*i + 1025] = 167 p_tbl2[4*i + 1026] = p_tbl2[4*i + 1027] = (mlib_s16)y; 168#endif 169 } 170 vis_bicubic_table_inited = 1; 171} 172 173/***************************************************************/ 174 175#define MUL_BC_COEFF(x0, x1, coeff) \ 176 vis_fpadd16(vis_fmul8x16au(x0, coeff), vis_fmul8x16al(x1, coeff)) 177 178#define SAT(val, max) \ 179 do { \ 180 val -= max; /* only overflows are now positive */ \ 181 val &= (val >> 31); /* positives become 0 */ \ 182 val += max; /* range is now [0 -> max] */ \ 183 } while (0) 184 185void 186vis_BicubicBlend(jint *pRGB, jint numpix, 187 jint xfract, jint dxfract, 188 jint yfract, jint dyfract) 189{ 190 mlib_d64 *p_src = (void*)pRGB; 191 union { 192 jint theInt; 193 mlib_f32 theF32; 194 } p_dst; 195 mlib_d64 a0, a1, a2, a3, a4, a5, a6, a7; 196 mlib_d64 xf, yf, yf0, yf1, yf2, yf3; 197 mlib_d64 d_rnd; 198 mlib_f32 mask80; 199 mlib_s32 i; 200 201 if (!vis_bicubic_table_inited) { 202 init_vis_bicubic_table(-0.5); 203 } 204 205#ifdef USE_TWO_BC_TABLES 206 vis_write_gsr(2 << 3); 207 d_rnd = vis_to_double_dup(0x000f000f); 208#else 209 vis_write_gsr(4 << 3); 210 d_rnd = vis_to_double_dup(0x00030003); 211#endif 212 213 mask80 = vis_to_float(0x80808080); 214 215#pragma pipeloop(0) 216 for (i = 0; i < numpix; i++) { 217 jint xfactor, yfactor; 218 219 xfactor = URShift(xfract, 32-8); 220 xfract += dxfract; 221 xf = vis_bicubic_coeff[xfactor]; 222 223 a0 = p_src[0]; 224 a1 = p_src[1]; 225 a2 = p_src[2]; 226 a3 = p_src[3]; 227 a4 = p_src[4]; 228 a5 = p_src[5]; 229 a6 = p_src[6]; 230 a7 = p_src[7]; 231 p_src += 8; 232 233 a0 = MUL_BC_COEFF(vis_read_hi(a0), vis_read_lo(a0), vis_read_hi(xf)); 234 a1 = MUL_BC_COEFF(vis_read_hi(a1), vis_read_lo(a1), vis_read_lo(xf)); 235 a2 = MUL_BC_COEFF(vis_read_hi(a2), vis_read_lo(a2), vis_read_hi(xf)); 236 a3 = MUL_BC_COEFF(vis_read_hi(a3), vis_read_lo(a3), vis_read_lo(xf)); 237 a4 = MUL_BC_COEFF(vis_read_hi(a4), vis_read_lo(a4), vis_read_hi(xf)); 238 a5 = MUL_BC_COEFF(vis_read_hi(a5), vis_read_lo(a5), vis_read_lo(xf)); 239 a6 = MUL_BC_COEFF(vis_read_hi(a6), vis_read_lo(a6), vis_read_hi(xf)); 240 a7 = MUL_BC_COEFF(vis_read_hi(a7), vis_read_lo(a7), vis_read_lo(xf)); 241 242 a0 = vis_fpadd16(a0, a1); 243 a1 = vis_fpadd16(a2, a3); 244 a2 = vis_fpadd16(a4, a5); 245 a3 = vis_fpadd16(a6, a7); 246 247 yfactor = URShift(yfract, 32-8); 248 yfract += dyfract; 249#ifdef USE_TWO_BC_TABLES 250 yf0 = vis_bicubic_coeff2[256 + yfactor]; 251 yf1 = vis_bicubic_coeff2[yfactor]; 252 yf2 = vis_bicubic_coeff2[256 - yfactor]; 253 yf3 = vis_bicubic_coeff2[512 - yfactor]; 254#else 255 yf = vis_bicubic_coeff[yfactor]; 256 yf0 = vis_fmul8x16au(mask80, vis_read_hi(yf)); 257 yf1 = vis_fmul8x16al(mask80, vis_read_hi(yf)); 258 yf2 = vis_fmul8x16au(mask80, vis_read_lo(yf)); 259 yf3 = vis_fmul8x16al(mask80, vis_read_lo(yf)); 260#endif 261 262 a0 = MUL_16x16(a0, yf0); 263 a1 = MUL_16x16(a1, yf1); 264 a2 = MUL_16x16(a2, yf2); 265 a3 = MUL_16x16(a3, yf3); 266 a0 = vis_fpadd16(a0, d_rnd); 267 268 a0 = vis_fpadd16(vis_fpadd16(a0, a1), vis_fpadd16(a2, a3)); 269 270 p_dst.theF32 = vis_fpack16(a0); 271 { 272 int a, r, g, b; 273 b = p_dst.theInt; 274 a = (b >> 24) & 0xff; 275 r = (b >> 16) & 0xff; 276 g = (b >> 8) & 0xff; 277 b = (b ) & 0xff; 278 SAT(r, a); 279 SAT(g, a); 280 SAT(b, a); 281 *pRGB++ = ((a << 24) | (r << 16) | (g << 8) | (b)); 282 } 283 } 284} 285 286/***************************************************************/ 287