1/* 2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 28/* 29 * The functions step along the lines from xLeft to xRight and apply 30 * the bicubic filtering. 31 * 32 */ 33 34#include "vis_proto.h" 35#include "mlib_ImageAffine.h" 36#include "mlib_v_ImageFilters.h" 37 38/***************************************************************/ 39#define DTYPE mlib_s16 40 41#define FILTER_BITS 9 42 43/***************************************************************/ 44#define sPtr srcPixelPtr 45 46/***************************************************************/ 47#define NEXT_PIXEL_1BC_S16() \ 48 xSrc = (X >> MLIB_SHIFT)-1; \ 49 ySrc = (Y >> MLIB_SHIFT)-1; \ 50 sPtr = (mlib_s16 *)lineAddr[ySrc] + xSrc 51 52/***************************************************************/ 53#define LOAD_BC_S16_1CH_1PIXEL(mlib_filters_s16, mlib_filters_s16_4) \ 54 dpSrc = vis_alignaddr(sPtr, 0); \ 55 data0 = dpSrc[0]; \ 56 data1 = dpSrc[1]; \ 57 row0 = vis_faligndata(data0, data1); \ 58 sPtr += srcYStride; \ 59 dpSrc = vis_alignaddr(sPtr, 0); \ 60 data0 = dpSrc[0]; \ 61 data1 = dpSrc[1]; \ 62 row1 = vis_faligndata(data0, data1); \ 63 sPtr += srcYStride; \ 64 dpSrc = vis_alignaddr(sPtr, 0); \ 65 data0 = dpSrc[0]; \ 66 data1 = dpSrc[1]; \ 67 row2 = vis_faligndata(data0, data1); \ 68 sPtr += srcYStride; \ 69 dpSrc = vis_alignaddr(sPtr, 0); \ 70 data0 = dpSrc[0]; \ 71 data1 = dpSrc[1]; \ 72 row3 = vis_faligndata(data0, data1); \ 73 filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \ 74 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ 75 yFilter0 = yPtr[0]; \ 76 yFilter1 = yPtr[1]; \ 77 yFilter2 = yPtr[2]; \ 78 yFilter3 = yPtr[3]; \ 79 filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \ 80 xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \ 81 X += dX; \ 82 Y += dY 83 84/***************************************************************/ 85#define RESULT_1BC_S16_1PIXEL() \ 86 u0 = vis_fmul8sux16(vis_fxor(row0, mask8000), yFilter0); \ 87 u1 = vis_fmul8ulx16(vis_fxor(row0, mask8000), yFilter0); \ 88 u2 = vis_fmul8sux16(vis_fxor(row1, mask8000), yFilter1); \ 89 v0 = vis_fpadd16(u0, u1); \ 90 u3 = vis_fmul8ulx16(vis_fxor(row1, mask8000), yFilter1); \ 91 u0 = vis_fmul8sux16(vis_fxor(row2, mask8000), yFilter2); \ 92 v1 = vis_fpadd16(u2, u3); \ 93 u1 = vis_fmul8ulx16(vis_fxor(row2, mask8000), yFilter2); \ 94 sum = vis_fpadd16(v0, v1); \ 95 u2 = vis_fmul8sux16(vis_fxor(row3, mask8000), yFilter3); \ 96 v2 = vis_fpadd16(u0, u1); \ 97 u3 = vis_fmul8ulx16(vis_fxor(row3, mask8000), yFilter3); \ 98 sum = vis_fpadd16(sum, v2); \ 99 v3 = vis_fpadd16(u2, u3); \ 100 sum = vis_fpadd16(sum, v3); \ 101 d00 = vis_fmul8sux16(sum, xFilter); \ 102 d10 = vis_fmul8ulx16(sum, xFilter); \ 103 d0 = vis_fpadd16(d00, d10); \ 104 p0 = vis_fpadd16s(vis_read_hi(d0), vis_read_lo(d0)); \ 105 d0 = vis_fmuld8sux16(f_x01000100, p0); \ 106 d1 = vis_write_lo(d1, vis_fpadd32s(vis_read_hi(d0), vis_read_lo(d0))); \ 107 res = vis_fxor(vis_fpackfix_pair(d1, d1), mask8000) 108 109/***************************************************************/ 110#define BC_S16_1CH(ind, mlib_filters_s16, mlib_filters_s16_4) \ 111 u0 = vis_fmul8sux16(vis_fxor(row0, mask8000), yFilter0); \ 112 u1 = vis_fmul8ulx16(vis_fxor(row0, mask8000), yFilter0); \ 113 dpSrc = vis_alignaddr(sPtr, 0); \ 114 u2 = vis_fmul8sux16(vis_fxor(row1, mask8000), yFilter1); \ 115 v0 = vis_fpadd16(u0, u1); \ 116 data0 = dpSrc[0]; \ 117 filterposy = (Y >> FILTER_SHIFT); \ 118 u3 = vis_fmul8ulx16(vis_fxor(row1, mask8000), yFilter1); \ 119 data1 = dpSrc[1]; \ 120 row0 = vis_faligndata(data0, data1); \ 121 filterposx = (X >> FILTER_SHIFT); \ 122 sPtr += srcYStride; \ 123 dpSrc = vis_alignaddr(sPtr, 0); \ 124 u0 = vis_fmul8sux16(vis_fxor(row2, mask8000), yFilter2); \ 125 v1 = vis_fpadd16(u2, u3); \ 126 data0 = dpSrc[0]; \ 127 u1 = vis_fmul8ulx16(vis_fxor(row2, mask8000), yFilter2); \ 128 sum = vis_fpadd16(v0, v1); \ 129 X += dX; \ 130 data1 = dpSrc[1]; \ 131 row1 = vis_faligndata(data0, data1); \ 132 sPtr += srcYStride; \ 133 dpSrc = vis_alignaddr(sPtr, 0); \ 134 u2 = vis_fmul8sux16(vis_fxor(row3, mask8000), yFilter3); \ 135 v2 = vis_fpadd16(u0, u1); \ 136 Y += dY; \ 137 xSrc = (X >> MLIB_SHIFT)-1; \ 138 data0 = dpSrc[0]; \ 139 u3 = vis_fmul8ulx16(vis_fxor(row3, mask8000), yFilter3); \ 140 sum = vis_fpadd16(sum, v2); \ 141 ySrc = (Y >> MLIB_SHIFT)-1; \ 142 data1 = dpSrc[1]; \ 143 filterposy &= FILTER_MASK; \ 144 row2 = vis_faligndata(data0, data1); \ 145 sPtr += srcYStride; \ 146 filterposx &= FILTER_MASK; \ 147 dpSrc = vis_alignaddr(sPtr, 0); \ 148 data0 = dpSrc[0]; \ 149 v3 = vis_fpadd16(u2, u3); \ 150 data1 = dpSrc[1]; \ 151 row3 = vis_faligndata(data0, data1); \ 152 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ 153 yFilter0 = yPtr[0]; \ 154 sum = vis_fpadd16(sum, v3); \ 155 yFilter1 = yPtr[1]; \ 156 d0 = vis_fmul8sux16(sum, xFilter); \ 157 yFilter2 = yPtr[2]; \ 158 d1 = vis_fmul8ulx16(sum, xFilter); \ 159 yFilter3 = yPtr[3]; \ 160 xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \ 161 d0##ind = vis_fpadd16(d0, d1); \ 162 sPtr = (mlib_s16 *)lineAddr[ySrc] + xSrc 163 164/***************************************************************/ 165#define FADD_1BC_S16() \ 166 p0 = vis_fpadd16s(vis_read_hi(d00), vis_read_lo(d00)); \ 167 p1 = vis_fpadd16s(vis_read_hi(d01), vis_read_lo(d01)); \ 168 p2 = vis_fpadd16s(vis_read_hi(d02), vis_read_lo(d02)); \ 169 p3 = vis_fpadd16s(vis_read_hi(d03), vis_read_lo(d03)); \ 170 d0 = vis_fmuld8sux16(f_x01000100, p0); \ 171 d1 = vis_fmuld8sux16(f_x01000100, p1); \ 172 d2 = vis_fmuld8sux16(f_x01000100, p2); \ 173 d3 = vis_fmuld8sux16(f_x01000100, p3); \ 174 d0 = vis_freg_pair(vis_fpadd32s(vis_read_hi(d0), vis_read_lo(d0)), \ 175 vis_fpadd32s(vis_read_hi(d1), vis_read_lo(d1))); \ 176 d1 = vis_freg_pair(vis_fpadd32s(vis_read_hi(d2), vis_read_lo(d2)), \ 177 vis_fpadd32s(vis_read_hi(d3), vis_read_lo(d3))); \ 178 res = vis_fxor(vis_fpackfix_pair(d0, d1), mask8000) 179 180/***************************************************************/ 181mlib_status mlib_ImageAffine_u16_1ch_bc (mlib_affine_param *param) 182{ 183 DECLAREVAR_BC(); 184 mlib_s32 filterposx, filterposy; 185 mlib_d64 data0, data1; 186 mlib_d64 sum; 187 mlib_d64 row0, row1, row2, row3; 188 mlib_f32 p0, p1, p2, p3; 189 mlib_d64 xFilter, yFilter0, yFilter1, yFilter2, yFilter3; 190 mlib_d64 v0, v1, v2, v3; 191 mlib_d64 u0, u1, u2, u3; 192 mlib_d64 d0, d1, d2, d3; 193 mlib_d64 d00, d10, d01, d02, d03; 194 mlib_d64 *yPtr; 195 mlib_d64 *dpSrc; 196 mlib_s32 align, cols, i; 197 mlib_d64 res; 198 mlib_f32 f_x01000100 = vis_to_float(0x01000100); 199 mlib_d64 mask8000 = vis_to_double_dup(0x80008000); 200 const mlib_s16 *mlib_filters_table ; 201 const mlib_s16 *mlib_filters_table_4; 202 203 if (filter == MLIB_BICUBIC) { 204 mlib_filters_table = mlib_filters_s16_bc; 205 mlib_filters_table_4 = mlib_filters_s16_bc_4; 206 } else { 207 mlib_filters_table = mlib_filters_s16_bc2; 208 mlib_filters_table_4 = mlib_filters_s16_bc2_4; 209 } 210 211 srcYStride >>= 1; 212 213 for (j = yStart; j <= yFinish; j++) { 214 215 vis_write_gsr(10 << 3); 216 217 CLIP(1); 218 219 cols = xRight - xLeft + 1; 220 align = (8 - ((mlib_addr)dstPixelPtr) & 7) & 7; 221 align >>= 1; 222 align = (cols < align)? cols : align; 223 224 for (i = 0; i < align; i++) { 225 NEXT_PIXEL_1BC_S16(); 226 LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 227 RESULT_1BC_S16_1PIXEL(); 228 vis_st_u16(res, dstPixelPtr++); 229 } 230 231 if (i <= cols - 10) { 232 233 NEXT_PIXEL_1BC_S16(); 234 LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 235 236 NEXT_PIXEL_1BC_S16(); 237 238 BC_S16_1CH(0, mlib_filters_table, mlib_filters_table_4); 239 BC_S16_1CH(1, mlib_filters_table, mlib_filters_table_4); 240 BC_S16_1CH(2, mlib_filters_table, mlib_filters_table_4); 241 BC_S16_1CH(3, mlib_filters_table, mlib_filters_table_4); 242 243 FADD_1BC_S16(); 244 245 BC_S16_1CH(0, mlib_filters_table, mlib_filters_table_4); 246 BC_S16_1CH(1, mlib_filters_table, mlib_filters_table_4); 247 BC_S16_1CH(2, mlib_filters_table, mlib_filters_table_4); 248 BC_S16_1CH(3, mlib_filters_table, mlib_filters_table_4); 249 250#pragma pipeloop(0) 251 for (; i <= cols - 14; i += 4) { 252 *(mlib_d64*)dstPixelPtr = res; 253 FADD_1BC_S16(); 254 BC_S16_1CH(0, mlib_filters_table, mlib_filters_table_4); 255 BC_S16_1CH(1, mlib_filters_table, mlib_filters_table_4); 256 BC_S16_1CH(2, mlib_filters_table, mlib_filters_table_4); 257 BC_S16_1CH(3, mlib_filters_table, mlib_filters_table_4); 258 dstPixelPtr += 4; 259 } 260 261 *(mlib_d64*)dstPixelPtr = res; 262 dstPixelPtr += 4; 263 FADD_1BC_S16(); 264 *(mlib_d64*)dstPixelPtr = res; 265 dstPixelPtr += 4; 266 267 RESULT_1BC_S16_1PIXEL(); 268 vis_st_u16(res, dstPixelPtr++); 269 270 LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 271 RESULT_1BC_S16_1PIXEL(); 272 vis_st_u16(res, dstPixelPtr++); 273 i += 10; 274 } 275 276 for (; i < cols; i++) { 277 NEXT_PIXEL_1BC_S16(); 278 LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 279 RESULT_1BC_S16_1PIXEL(); 280 vis_st_u16(res, dstPixelPtr++); 281 } 282 } 283 284 return MLIB_SUCCESS; 285} 286 287/***************************************************************/ 288#define NEXT_PIXEL_2BC_S16() \ 289 xSrc = (X >> MLIB_SHIFT)-1; \ 290 ySrc = (Y >> MLIB_SHIFT)-1; \ 291 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 1) 292 293/***************************************************************/ 294#define LOAD_BC_S16_2CH_1PIXEL(mlib_filters_s16, mlib_filters_s16_4) \ 295 dpSrc = vis_alignaddr(sPtr, 0); \ 296 data0 = dpSrc[0]; \ 297 data1 = dpSrc[1]; \ 298 data2 = dpSrc[2]; \ 299 row00 = vis_faligndata(data0, data1); \ 300 row01 = vis_faligndata(data1, data2); \ 301 sPtr += srcYStride; \ 302 dpSrc = vis_alignaddr(sPtr, 0); \ 303 data0 = dpSrc[0]; \ 304 data1 = dpSrc[1]; \ 305 data2 = dpSrc[2]; \ 306 row10 = vis_faligndata(data0, data1); \ 307 row11 = vis_faligndata(data1, data2); \ 308 sPtr += srcYStride; \ 309 dpSrc = vis_alignaddr(sPtr, 0); \ 310 data0 = dpSrc[0]; \ 311 data1 = dpSrc[1]; \ 312 data2 = dpSrc[2]; \ 313 row20 = vis_faligndata(data0, data1); \ 314 row21 = vis_faligndata(data1, data2); \ 315 sPtr += srcYStride; \ 316 dpSrc = vis_alignaddr(sPtr, 0); \ 317 data0 = dpSrc[0]; \ 318 data1 = dpSrc[1]; \ 319 data2 = dpSrc[2]; \ 320 row30 = vis_faligndata(data0, data1); \ 321 row31 = vis_faligndata(data1, data2); \ 322 filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \ 323 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ 324 yFilter0 = yPtr[0]; \ 325 yFilter1 = yPtr[1]; \ 326 yFilter2 = yPtr[2]; \ 327 yFilter3 = yPtr[3]; \ 328 filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \ 329 xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \ 330 X += dX; \ 331 Y += dY 332 333/***************************************************************/ 334#define RESULT_2BC_S16_1PIXEL() \ 335 u00 = vis_fmul8sux16(vis_fxor(row00, mask8000), yFilter0); \ 336 dr = vis_fpmerge(vis_read_hi(xFilter), vis_read_lo(xFilter)); \ 337 u01 = vis_fmul8ulx16(vis_fxor(row00, mask8000), yFilter0); \ 338 dr = vis_fpmerge(vis_read_hi(dr), vis_read_lo(dr)); \ 339 u10 = vis_fmul8sux16(vis_fxor(row01, mask8000), yFilter0); \ 340 dr1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr)); \ 341 u11 = vis_fmul8ulx16(vis_fxor(row01, mask8000), yFilter0); \ 342 dr = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr)); \ 343 u20 = vis_fmul8sux16(vis_fxor(row10, mask8000), yFilter1); \ 344 v00 = vis_fpadd16(u00, u01); \ 345 u21 = vis_fmul8ulx16(vis_fxor(row10, mask8000), yFilter1); \ 346 v01 = vis_fpadd16(u10, u11); \ 347 u00 = vis_fmul8sux16(vis_fxor(row11, mask8000), yFilter1); \ 348 xFilter0 = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr1)); \ 349 u01 = vis_fmul8ulx16(vis_fxor(row11, mask8000), yFilter1); \ 350 u10 = vis_fmul8sux16(vis_fxor(row20, mask8000), yFilter2); \ 351 u11 = vis_fmul8ulx16(vis_fxor(row20, mask8000), yFilter2); \ 352 v10 = vis_fpadd16(u20, u21); \ 353 sum0 = vis_fpadd16(v00, v10); \ 354 u20 = vis_fmul8sux16(vis_fxor(row21, mask8000), yFilter2); \ 355 v11 = vis_fpadd16(u00, u01); \ 356 u21 = vis_fmul8ulx16(vis_fxor(row21, mask8000), yFilter2); \ 357 xFilter1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr1)); \ 358 u00 = vis_fmul8sux16(vis_fxor(row30, mask8000), yFilter3); \ 359 v20 = vis_fpadd16(u10, u11); \ 360 sum1 = vis_fpadd16(v01, v11); \ 361 u01 = vis_fmul8ulx16(vis_fxor(row30, mask8000), yFilter3); \ 362 sum0 = vis_fpadd16(sum0, v20); \ 363 v21 = vis_fpadd16(u20, u21); \ 364 u10 = vis_fmul8sux16(vis_fxor(row31, mask8000), yFilter3); \ 365 v30 = vis_fpadd16(u00, u01); \ 366 sum1 = vis_fpadd16(sum1, v21); \ 367 u11 = vis_fmul8ulx16(vis_fxor(row31, mask8000), yFilter3); \ 368 sum0 = vis_fpadd16(sum0, v30); \ 369 v31 = vis_fpadd16(u10, u11); \ 370 sum1 = vis_fpadd16(sum1, v31); \ 371 d00 = vis_fmul8sux16(sum0, xFilter0); \ 372 d10 = vis_fmul8ulx16(sum0, xFilter0); \ 373 d20 = vis_fmul8sux16(sum1, xFilter1); \ 374 d30 = vis_fmul8ulx16(sum1, xFilter1); \ 375 d0 = vis_fpadd16(d00, d10); \ 376 d1 = vis_fpadd16(d20, d30); \ 377 d0 = vis_fpadd16(d0, d1); \ 378 p0 = vis_fpadd16s(vis_read_hi(d0), vis_read_lo(d0)); \ 379 d0 = vis_fmuld8sux16(f_x01000100, p0); \ 380 res = vis_fxor(vis_fpackfix_pair(d0, d0), mask8000) 381 382/***************************************************************/ 383#define BC_S16_2CH(ind, mlib_filters_s16, mlib_filters_s16_4) \ 384 u00 = vis_fmul8sux16(vis_fxor(row00, mask8000), yFilter0); \ 385 dr = vis_fpmerge(vis_read_hi(xFilter), vis_read_lo(xFilter)); \ 386 u01 = vis_fmul8ulx16(vis_fxor(row00, mask8000), yFilter0); \ 387 dr = vis_fpmerge(vis_read_hi(dr), vis_read_lo(dr)); \ 388 u10 = vis_fmul8sux16(vis_fxor(row01, mask8000), yFilter0); \ 389 dr1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr)); \ 390 u11 = vis_fmul8ulx16(vis_fxor(row01, mask8000), yFilter0); \ 391 dr = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr)); \ 392 dpSrc = vis_alignaddr(sPtr, 0); \ 393 u20 = vis_fmul8sux16(vis_fxor(row10, mask8000), yFilter1); \ 394 v00 = vis_fpadd16(u00, u01); \ 395 u21 = vis_fmul8ulx16(vis_fxor(row10, mask8000), yFilter1); \ 396 data0 = dpSrc[0]; \ 397 filterposy = (Y >> FILTER_SHIFT); \ 398 v01 = vis_fpadd16(u10, u11); \ 399 data1 = dpSrc[1]; \ 400 u00 = vis_fmul8sux16(vis_fxor(row11, mask8000), yFilter1); \ 401 xFilter0 = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr1)); \ 402 data2 = dpSrc[2]; \ 403 u01 = vis_fmul8ulx16(vis_fxor(row11, mask8000), yFilter1); \ 404 row00 = vis_faligndata(data0, data1); \ 405 u10 = vis_fmul8sux16(vis_fxor(row20, mask8000), yFilter2); \ 406 row01 = vis_faligndata(data1, data2); \ 407 filterposx = (X >> FILTER_SHIFT); \ 408 sPtr += srcYStride; \ 409 dpSrc = vis_alignaddr(sPtr, 0); \ 410 u11 = vis_fmul8ulx16(vis_fxor(row20, mask8000), yFilter2); \ 411 v10 = vis_fpadd16(u20, u21); \ 412 data0 = dpSrc[0]; \ 413 sum0 = vis_fpadd16(v00, v10); \ 414 X += dX; \ 415 data1 = dpSrc[1]; \ 416 u20 = vis_fmul8sux16(vis_fxor(row21, mask8000), yFilter2); \ 417 v11 = vis_fpadd16(u00, u01); \ 418 data2 = dpSrc[2]; \ 419 row10 = vis_faligndata(data0, data1); \ 420 u21 = vis_fmul8ulx16(vis_fxor(row21, mask8000), yFilter2); \ 421 row11 = vis_faligndata(data1, data2); \ 422 sPtr += srcYStride; \ 423 xFilter1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr1)); \ 424 dpSrc = vis_alignaddr(sPtr, 0); \ 425 u00 = vis_fmul8sux16(vis_fxor(row30, mask8000), yFilter3); \ 426 v20 = vis_fpadd16(u10, u11); \ 427 Y += dY; \ 428 xSrc = (X >> MLIB_SHIFT)-1; \ 429 sum1 = vis_fpadd16(v01, v11); \ 430 data0 = dpSrc[0]; \ 431 u01 = vis_fmul8ulx16(vis_fxor(row30, mask8000), yFilter3); \ 432 sum0 = vis_fpadd16(sum0, v20); \ 433 ySrc = (Y >> MLIB_SHIFT)-1; \ 434 data1 = dpSrc[1]; \ 435 v21 = vis_fpadd16(u20, u21); \ 436 u10 = vis_fmul8sux16(vis_fxor(row31, mask8000), yFilter3); \ 437 data2 = dpSrc[2]; \ 438 v30 = vis_fpadd16(u00, u01); \ 439 filterposy &= FILTER_MASK; \ 440 row20 = vis_faligndata(data0, data1); \ 441 sum1 = vis_fpadd16(sum1, v21); \ 442 u11 = vis_fmul8ulx16(vis_fxor(row31, mask8000), yFilter3); \ 443 row21 = vis_faligndata(data1, data2); \ 444 sPtr += srcYStride; \ 445 filterposx &= FILTER_MASK; \ 446 v31 = vis_fpadd16(u10, u11); \ 447 dpSrc = vis_alignaddr(sPtr, 0); \ 448 data0 = dpSrc[0]; \ 449 sum0 = vis_fpadd16(sum0, v30); \ 450 data1 = dpSrc[1]; \ 451 sum1 = vis_fpadd16(sum1, v31); \ 452 data2 = dpSrc[2]; \ 453 row30 = vis_faligndata(data0, data1); \ 454 d0 = vis_fmul8sux16(sum0, xFilter0); \ 455 row31 = vis_faligndata(data1, data2); \ 456 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ 457 d1 = vis_fmul8ulx16(sum0, xFilter0); \ 458 yFilter0 = yPtr[0]; \ 459 d2 = vis_fmul8sux16(sum1, xFilter1); \ 460 yFilter1 = yPtr[1]; \ 461 d3 = vis_fmul8ulx16(sum1, xFilter1); \ 462 d0##ind = vis_fpadd16(d0, d1); \ 463 yFilter2 = yPtr[2]; \ 464 yFilter3 = yPtr[3]; \ 465 d1##ind = vis_fpadd16(d2, d3); \ 466 xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \ 467 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 1) 468 469/***************************************************************/ 470#define FADD_2BC_S16() \ 471 d0 = vis_fpadd16(d00, d10); \ 472 d2 = vis_fpadd16(d01, d11); \ 473 p0 = vis_fpadd16s(vis_read_hi(d0), vis_read_lo(d0)); \ 474 p1 = vis_fpadd16s(vis_read_hi(d2), vis_read_lo(d2)); \ 475 d0 = vis_fmuld8sux16(f_x01000100, p0); \ 476 d1 = vis_fmuld8sux16(f_x01000100, p1); \ 477 res = vis_fxor(vis_fpackfix_pair(d0, d1), mask8000) 478 479/***************************************************************/ 480mlib_status mlib_ImageAffine_u16_2ch_bc (mlib_affine_param *param) 481{ 482 DECLAREVAR_BC(); 483 DTYPE *dstLineEnd; 484 mlib_s32 filterposx, filterposy; 485 mlib_d64 data0, data1, data2; 486 mlib_d64 sum0, sum1; 487 mlib_d64 row00, row10, row20, row30; 488 mlib_d64 row01, row11, row21, row31; 489 mlib_f32 p0, p1; 490 mlib_d64 xFilter, xFilter0, xFilter1; 491 mlib_d64 yFilter0, yFilter1, yFilter2, yFilter3; 492 mlib_d64 v00, v01, v10, v11, v20, v21, v30, v31; 493 mlib_d64 u00, u01, u10, u11, u20, u21; 494 mlib_d64 d0, d1, d2, d3; 495 mlib_d64 d00, d10, d20, d30, d01, d11; 496 mlib_d64 *yPtr; 497 mlib_d64 *dp, *dpSrc; 498 mlib_s32 cols, i, mask, emask; 499 mlib_d64 res, res1; 500 mlib_d64 dr, dr1; 501 mlib_f32 f_x01000100 = vis_to_float(0x01000100); 502 mlib_d64 mask8000 = vis_to_double_dup(0x80008000); 503 const mlib_s16 *mlib_filters_table ; 504 const mlib_s16 *mlib_filters_table_4; 505 506 if (filter == MLIB_BICUBIC) { 507 mlib_filters_table = mlib_filters_s16_bc; 508 mlib_filters_table_4 = mlib_filters_s16_bc_4; 509 } else { 510 mlib_filters_table = mlib_filters_s16_bc2; 511 mlib_filters_table_4 = mlib_filters_s16_bc2_4; 512 } 513 514 srcYStride >>= 1; 515 516 for (j = yStart; j <= yFinish; j++) { 517 518 vis_write_gsr(10 << 3); 519 520 CLIP(2); 521 dstLineEnd = (DTYPE*)dstData + 2 * xRight; 522 523 cols = xRight - xLeft + 1; 524 dp = vis_alignaddr(dstPixelPtr, 0); 525 dstLineEnd += 1; 526 mask = vis_edge16(dstPixelPtr, dstLineEnd); 527 i = 0; 528 529 if (i <= cols - 6) { 530 531 NEXT_PIXEL_2BC_S16(); 532 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 533 534 NEXT_PIXEL_2BC_S16(); 535 536 BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4); 537 BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4); 538 539 FADD_2BC_S16(); 540 541 BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4); 542 BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4); 543 544#pragma pipeloop(0) 545 for (; i <= cols-8; i += 2) { 546 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); 547 res = vis_faligndata(res, res); 548 vis_pst_16(res, dp++, mask); 549 vis_pst_16(res, dp, ~mask); 550 FADD_2BC_S16(); 551 BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4); 552 BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4); 553 } 554 555 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); 556 res = vis_faligndata(res, res); 557 vis_pst_16(res, dp++, mask); 558 vis_pst_16(res, dp, ~mask); 559 560 FADD_2BC_S16(); 561 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); 562 res = vis_faligndata(res, res); 563 vis_pst_16(res, dp++, mask); 564 vis_pst_16(res, dp, ~mask); 565 566 RESULT_2BC_S16_1PIXEL(); 567 res1 = res; 568 569 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 570 RESULT_2BC_S16_1PIXEL(); 571 res = vis_write_hi(res, vis_read_hi(res1)); 572 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); 573 res = vis_faligndata(res, res); 574 vis_pst_16(res, dp++, mask); 575 vis_pst_16(res, dp, ~mask); 576 577 i += 6; 578 } 579 580 if (i <= cols - 4) { 581 NEXT_PIXEL_2BC_S16(); 582 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 583 584 NEXT_PIXEL_2BC_S16(); 585 586 BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4); 587 BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4); 588 589 FADD_2BC_S16(); 590 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); 591 res = vis_faligndata(res, res); 592 vis_pst_16(res, dp++, mask); 593 vis_pst_16(res, dp, ~mask); 594 595 RESULT_2BC_S16_1PIXEL(); 596 res1 = res; 597 598 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 599 RESULT_2BC_S16_1PIXEL(); 600 res = vis_write_hi(res, vis_read_hi(res1)); 601 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); 602 res = vis_faligndata(res, res); 603 vis_pst_16(res, dp++, mask); 604 vis_pst_16(res, dp, ~mask); 605 606 i += 4; 607 } 608 609 if (i <= cols - 2) { 610 NEXT_PIXEL_2BC_S16(); 611 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 612 RESULT_2BC_S16_1PIXEL(); 613 res1 = res; 614 615 NEXT_PIXEL_2BC_S16(); 616 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 617 RESULT_2BC_S16_1PIXEL(); 618 res = vis_write_hi(res, vis_read_hi(res1)); 619 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); 620 res = vis_faligndata(res, res); 621 vis_pst_16(res, dp++, mask); 622 vis_pst_16(res, dp, ~mask); 623 624 i += 2; 625 } 626 627 if (i < cols) { 628 NEXT_PIXEL_2BC_S16(); 629 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 630 RESULT_2BC_S16_1PIXEL(); 631 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); 632 res = vis_faligndata(res, res); 633 emask = vis_edge16(dp, dstLineEnd); 634 vis_pst_16(res, dp++, mask & emask); 635 636 if ((mlib_s16*)dp <= dstLineEnd) { 637 mask = vis_edge16(dp, dstLineEnd); 638 vis_pst_16(res, dp, mask); 639 } 640 } 641 } 642 643 return MLIB_SUCCESS; 644} 645 646/***************************************************************/ 647#define NEXT_PIXEL_3BC_S16() \ 648 xSrc = (X >> MLIB_SHIFT)-1; \ 649 ySrc = (Y >> MLIB_SHIFT)-1; \ 650 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc*3) 651 652/***************************************************************/ 653#define LOAD_BC_S16_3CH_1PIXEL(mlib_filters_s16_3, mlib_filters_s16_4) \ 654 dpSrc = vis_alignaddr(sPtr, 0); \ 655 data0 = dpSrc[0]; \ 656 data1 = dpSrc[1]; \ 657 data2 = dpSrc[2]; \ 658 data3 = dpSrc[3]; \ 659 row00 = vis_faligndata(data0, data1); \ 660 row01 = vis_faligndata(data1, data2); \ 661 row02 = vis_faligndata(data2, data3); \ 662 sPtr += srcYStride; \ 663 dpSrc = vis_alignaddr(sPtr, 0); \ 664 data0 = dpSrc[0]; \ 665 data1 = dpSrc[1]; \ 666 data2 = dpSrc[2]; \ 667 data3 = dpSrc[3]; \ 668 row10 = vis_faligndata(data0, data1); \ 669 row11 = vis_faligndata(data1, data2); \ 670 row12 = vis_faligndata(data2, data3); \ 671 sPtr += srcYStride; \ 672 dpSrc = vis_alignaddr(sPtr, 0); \ 673 data0 = dpSrc[0]; \ 674 data1 = dpSrc[1]; \ 675 data2 = dpSrc[2]; \ 676 data3 = dpSrc[3]; \ 677 row20 = vis_faligndata(data0, data1); \ 678 row21 = vis_faligndata(data1, data2); \ 679 row22 = vis_faligndata(data2, data3); \ 680 sPtr += srcYStride; \ 681 dpSrc = vis_alignaddr(sPtr, 0); \ 682 data0 = dpSrc[0]; \ 683 data1 = dpSrc[1]; \ 684 data2 = dpSrc[2]; \ 685 data3 = dpSrc[3]; \ 686 row30 = vis_faligndata(data0, data1); \ 687 row31 = vis_faligndata(data1, data2); \ 688 row32 = vis_faligndata(data2, data3); \ 689 filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \ 690 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ 691 yFilter0 = yPtr[0]; \ 692 yFilter1 = yPtr[1]; \ 693 yFilter2 = yPtr[2]; \ 694 yFilter3 = yPtr[3]; \ 695 filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \ 696 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_3 + filterposx*3)); \ 697 xFilter0 = xPtr[0]; \ 698 xFilter1 = xPtr[1]; \ 699 xFilter2 = xPtr[2]; \ 700 X += dX; \ 701 Y += dY 702 703/***************************************************************/ 704#define STORE_BC_S16_3CH_1PIXEL() \ 705 dstPixelPtr[0] = f0.t[0]; \ 706 dstPixelPtr[1] = f0.t[1]; \ 707 dstPixelPtr[2] = f0.t[2]; \ 708 dstPixelPtr += 3 709 710/***************************************************************/ 711#define RESULT_3BC_S16_1PIXEL() \ 712 u00 = vis_fmul8sux16(vis_fxor(row00, mask8000), yFilter0); \ 713 u01 = vis_fmul8ulx16(vis_fxor(row00, mask8000), yFilter0); \ 714 u10 = vis_fmul8sux16(vis_fxor(row01, mask8000), yFilter0); \ 715 u11 = vis_fmul8ulx16(vis_fxor(row01, mask8000), yFilter0); \ 716 v00 = vis_fpadd16(u00, u01); \ 717 u20 = vis_fmul8sux16(vis_fxor(row02, mask8000), yFilter0); \ 718 v01 = vis_fpadd16(u10, u11); \ 719 u21 = vis_fmul8ulx16(vis_fxor(row02, mask8000), yFilter0); \ 720 u00 = vis_fmul8sux16(vis_fxor(row10, mask8000), yFilter1); \ 721 u01 = vis_fmul8ulx16(vis_fxor(row10, mask8000), yFilter1); \ 722 v02 = vis_fpadd16(u20, u21); \ 723 u10 = vis_fmul8sux16(vis_fxor(row11, mask8000), yFilter1); \ 724 u11 = vis_fmul8ulx16(vis_fxor(row11, mask8000), yFilter1); \ 725 v10 = vis_fpadd16(u00, u01); \ 726 u20 = vis_fmul8sux16(vis_fxor(row12, mask8000), yFilter1); \ 727 u21 = vis_fmul8ulx16(vis_fxor(row12, mask8000), yFilter1); \ 728 u00 = vis_fmul8sux16(vis_fxor(row20, mask8000), yFilter2); \ 729 v11 = vis_fpadd16(u10, u11); \ 730 u01 = vis_fmul8ulx16(vis_fxor(row20, mask8000), yFilter2); \ 731 v12 = vis_fpadd16(u20, u21); \ 732 u10 = vis_fmul8sux16(vis_fxor(row21, mask8000), yFilter2); \ 733 u11 = vis_fmul8ulx16(vis_fxor(row21, mask8000), yFilter2); \ 734 v20 = vis_fpadd16(u00, u01); \ 735 u20 = vis_fmul8sux16(vis_fxor(row22, mask8000), yFilter2); \ 736 sum0 = vis_fpadd16(v00, v10); \ 737 u21 = vis_fmul8ulx16(vis_fxor(row22, mask8000), yFilter2); \ 738 u00 = vis_fmul8sux16(vis_fxor(row30, mask8000), yFilter3); \ 739 u01 = vis_fmul8ulx16(vis_fxor(row30, mask8000), yFilter3); \ 740 v21 = vis_fpadd16(u10, u11); \ 741 sum1 = vis_fpadd16(v01, v11); \ 742 u10 = vis_fmul8sux16(vis_fxor(row31, mask8000), yFilter3); \ 743 sum2 = vis_fpadd16(v02, v12); \ 744 v22 = vis_fpadd16(u20, u21); \ 745 u11 = vis_fmul8ulx16(vis_fxor(row31, mask8000), yFilter3); \ 746 sum0 = vis_fpadd16(sum0, v20); \ 747 u20 = vis_fmul8sux16(vis_fxor(row32, mask8000), yFilter3); \ 748 v30 = vis_fpadd16(u00, u01); \ 749 sum1 = vis_fpadd16(sum1, v21); \ 750 u21 = vis_fmul8ulx16(vis_fxor(row32, mask8000), yFilter3); \ 751 v31 = vis_fpadd16(u10, u11); \ 752 sum2 = vis_fpadd16(sum2, v22); \ 753 v32 = vis_fpadd16(u20, u21); \ 754 sum0 = vis_fpadd16(sum0, v30); \ 755 row30 = vis_faligndata(data0, data1); \ 756 v00 = vis_fmul8sux16(sum0, xFilter0); \ 757 sum1 = vis_fpadd16(sum1, v31); \ 758 sum2 = vis_fpadd16(sum2, v32); \ 759 v01 = vis_fmul8ulx16(sum0, xFilter0); \ 760 v10 = vis_fmul8sux16(sum1, xFilter1); \ 761 v11 = vis_fmul8ulx16(sum1, xFilter1); \ 762 d0 = vis_fpadd16(v00, v01); \ 763 v20 = vis_fmul8sux16(sum2, xFilter2); \ 764 v21 = vis_fmul8ulx16(sum2, xFilter2); \ 765 d1 = vis_fpadd16(v10, v11); \ 766 d2 = vis_fpadd16(v20, v21); \ 767 vis_alignaddr((void*)6, 0); \ 768 d3 = vis_faligndata(d0, d1); \ 769 vis_alignaddr((void*)2, 0); \ 770 d4 = vis_faligndata(d1, d2); \ 771 d0 = vis_fpadd16(d0, d3); \ 772 d2 = vis_fpadd16(d2, d4); \ 773 d1 = vis_faligndata(d2, d2); \ 774 d0 = vis_fpadd16(d0, d1); \ 775 d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \ 776 d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \ 777 f0.d = vis_fxor(vis_fpackfix_pair(d2, d3), mask8000) 778 779/***************************************************************/ 780#define BC_S16_3CH(mlib_filters_s16_3, mlib_filters_s16_4) \ 781 u00 = vis_fmul8sux16(vis_fxor(row00, mask8000), yFilter0); \ 782 u01 = vis_fmul8ulx16(vis_fxor(row00, mask8000), yFilter0); \ 783 u10 = vis_fmul8sux16(vis_fxor(row01, mask8000), yFilter0); \ 784 u11 = vis_fmul8ulx16(vis_fxor(row01, mask8000), yFilter0); \ 785 v00 = vis_fpadd16(u00, u01); \ 786 u20 = vis_fmul8sux16(vis_fxor(row02, mask8000), yFilter0); \ 787 v01 = vis_fpadd16(u10, u11); \ 788 u21 = vis_fmul8ulx16(vis_fxor(row02, mask8000), yFilter0); \ 789 dpSrc = vis_alignaddr(sPtr, 0); \ 790 u00 = vis_fmul8sux16(vis_fxor(row10, mask8000), yFilter1); \ 791 u01 = vis_fmul8ulx16(vis_fxor(row10, mask8000), yFilter1); \ 792 data0 = dpSrc[0]; \ 793 filterposy = (Y >> FILTER_SHIFT); \ 794 v02 = vis_fpadd16(u20, u21); \ 795 data1 = dpSrc[1]; \ 796 u10 = vis_fmul8sux16(vis_fxor(row11, mask8000), yFilter1); \ 797 data2 = dpSrc[2]; \ 798 u11 = vis_fmul8ulx16(vis_fxor(row11, mask8000), yFilter1); \ 799 v10 = vis_fpadd16(u00, u01); \ 800 data3 = dpSrc[3]; \ 801 u20 = vis_fmul8sux16(vis_fxor(row12, mask8000), yFilter1); \ 802 row00 = vis_faligndata(data0, data1); \ 803 u21 = vis_fmul8ulx16(vis_fxor(row12, mask8000), yFilter1); \ 804 row01 = vis_faligndata(data1, data2); \ 805 u00 = vis_fmul8sux16(vis_fxor(row20, mask8000), yFilter2); \ 806 row02 = vis_faligndata(data2, data3); \ 807 filterposx = (X >> FILTER_SHIFT); \ 808 sPtr += srcYStride; \ 809 dpSrc = vis_alignaddr(sPtr, 0); \ 810 v11 = vis_fpadd16(u10, u11); \ 811 u01 = vis_fmul8ulx16(vis_fxor(row20, mask8000), yFilter2); \ 812 v12 = vis_fpadd16(u20, u21); \ 813 data0 = dpSrc[0]; \ 814 u10 = vis_fmul8sux16(vis_fxor(row21, mask8000), yFilter2); \ 815 X += dX; \ 816 data1 = dpSrc[1]; \ 817 u11 = vis_fmul8ulx16(vis_fxor(row21, mask8000), yFilter2); \ 818 v20 = vis_fpadd16(u00, u01); \ 819 data2 = dpSrc[2]; \ 820 u20 = vis_fmul8sux16(vis_fxor(row22, mask8000), yFilter2); \ 821 sum0 = vis_fpadd16(v00, v10); \ 822 data3 = dpSrc[3]; \ 823 row10 = vis_faligndata(data0, data1); \ 824 u21 = vis_fmul8ulx16(vis_fxor(row22, mask8000), yFilter2); \ 825 row11 = vis_faligndata(data1, data2); \ 826 u00 = vis_fmul8sux16(vis_fxor(row30, mask8000), yFilter3); \ 827 row12 = vis_faligndata(data2, data3); \ 828 sPtr += srcYStride; \ 829 dpSrc = vis_alignaddr(sPtr, 0); \ 830 u01 = vis_fmul8ulx16(vis_fxor(row30, mask8000), yFilter3); \ 831 v21 = vis_fpadd16(u10, u11); \ 832 Y += dY; \ 833 xSrc = (X >> MLIB_SHIFT)-1; \ 834 sum1 = vis_fpadd16(v01, v11); \ 835 data0 = dpSrc[0]; \ 836 u10 = vis_fmul8sux16(vis_fxor(row31, mask8000), yFilter3); \ 837 sum2 = vis_fpadd16(v02, v12); \ 838 ySrc = (Y >> MLIB_SHIFT)-1; \ 839 data1 = dpSrc[1]; \ 840 v22 = vis_fpadd16(u20, u21); \ 841 u11 = vis_fmul8ulx16(vis_fxor(row31, mask8000), yFilter3); \ 842 data2 = dpSrc[2]; \ 843 sum0 = vis_fpadd16(sum0, v20); \ 844 u20 = vis_fmul8sux16(vis_fxor(row32, mask8000), yFilter3); \ 845 data3 = dpSrc[3]; \ 846 v30 = vis_fpadd16(u00, u01); \ 847 filterposy &= FILTER_MASK; \ 848 row20 = vis_faligndata(data0, data1); \ 849 sum1 = vis_fpadd16(sum1, v21); \ 850 u21 = vis_fmul8ulx16(vis_fxor(row32, mask8000), yFilter3); \ 851 row21 = vis_faligndata(data1, data2); \ 852 row22 = vis_faligndata(data2, data3); \ 853 sPtr += srcYStride; \ 854 filterposx &= FILTER_MASK; \ 855 v31 = vis_fpadd16(u10, u11); \ 856 dpSrc = vis_alignaddr(sPtr, 0); \ 857 data0 = dpSrc[0]; \ 858 sum2 = vis_fpadd16(sum2, v22); \ 859 data1 = dpSrc[1]; \ 860 v32 = vis_fpadd16(u20, u21); \ 861 data2 = dpSrc[2]; \ 862 sum0 = vis_fpadd16(sum0, v30); \ 863 data3 = dpSrc[3]; \ 864 row30 = vis_faligndata(data0, data1); \ 865 v00 = vis_fmul8sux16(sum0, xFilter0); \ 866 row31 = vis_faligndata(data1, data2); \ 867 row32 = vis_faligndata(data2, data3); \ 868 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ 869 sum1 = vis_fpadd16(sum1, v31); \ 870 yFilter0 = yPtr[0]; \ 871 sum2 = vis_fpadd16(sum2, v32); \ 872 v01 = vis_fmul8ulx16(sum0, xFilter0); \ 873 yFilter1 = yPtr[1]; \ 874 v10 = vis_fmul8sux16(sum1, xFilter1); \ 875 yFilter2 = yPtr[2]; \ 876 v11 = vis_fmul8ulx16(sum1, xFilter1); \ 877 d0 = vis_fpadd16(v00, v01); \ 878 yFilter3 = yPtr[3]; \ 879 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_3 + filterposx*3)); \ 880 v20 = vis_fmul8sux16(sum2, xFilter2); \ 881 xFilter0 = xPtr[0]; \ 882 v21 = vis_fmul8ulx16(sum2, xFilter2); \ 883 d1 = vis_fpadd16(v10, v11); \ 884 xFilter1 = xPtr[1]; \ 885 d2 = vis_fpadd16(v20, v21); \ 886 xFilter2 = xPtr[2]; \ 887 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc*3) 888 889/***************************************************************/ 890#define FADD_3BC_S16() \ 891 vis_alignaddr((void*)6, 0); \ 892 d3 = vis_faligndata(d0, d1); \ 893 vis_alignaddr((void*)2, 0); \ 894 d4 = vis_faligndata(d1, d2); \ 895 d0 = vis_fpadd16(d0, d3); \ 896 d2 = vis_fpadd16(d2, d4); \ 897 d1 = vis_faligndata(d2, d2); \ 898 d0 = vis_fpadd16(d0, d1); \ 899 d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \ 900 d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \ 901 f0.d = vis_fxor(vis_fpackfix_pair(d2, d3), mask8000) 902 903/***************************************************************/ 904mlib_status mlib_ImageAffine_u16_3ch_bc (mlib_affine_param *param) 905{ 906 DECLAREVAR_BC(); 907 mlib_s32 filterposx, filterposy; 908 mlib_d64 data0, data1, data2, data3; 909 mlib_d64 sum0, sum1, sum2; 910 mlib_d64 row00, row10, row20, row30; 911 mlib_d64 row01, row11, row21, row31; 912 mlib_d64 row02, row12, row22, row32; 913 mlib_d64 xFilter0, xFilter1, xFilter2; 914 mlib_d64 yFilter0, yFilter1, yFilter2, yFilter3; 915 mlib_d64 v00, v01, v02, v10, v11, v12, v20, v21, v22, v30, v31, v32; 916 mlib_d64 u00, u01, u10, u11, u20, u21; 917 mlib_d64 d0, d1, d2, d3, d4; 918 mlib_d64 *yPtr, *xPtr; 919 mlib_d64 *dpSrc; 920 mlib_s32 cols, i; 921 mlib_f32 f_x01000100 = vis_to_float(0x01000100); 922 mlib_d64 mask8000 = vis_to_double_dup(0x80008000); 923 union { 924 mlib_s16 t[4]; 925 mlib_d64 d; 926 } f0; 927 const mlib_s16 *mlib_filters_table_3; 928 const mlib_s16 *mlib_filters_table_4; 929 930 if (filter == MLIB_BICUBIC) { 931 mlib_filters_table_3 = mlib_filters_s16_bc_3; 932 mlib_filters_table_4 = mlib_filters_s16_bc_4; 933 } else { 934 mlib_filters_table_3 = mlib_filters_s16_bc2_3; 935 mlib_filters_table_4 = mlib_filters_s16_bc2_4; 936 } 937 938 srcYStride >>= 1; 939 940 for (j = yStart; j <= yFinish; j++) { 941 942 vis_write_gsr(10 << 3); 943 944 CLIP(3); 945 946 cols = xRight - xLeft + 1; 947 948 i = 0; 949 950 if (i <= cols - 4) { 951 952 NEXT_PIXEL_3BC_S16(); 953 LOAD_BC_S16_3CH_1PIXEL(mlib_filters_table_3, mlib_filters_table_4); 954 955 NEXT_PIXEL_3BC_S16(); 956 957 BC_S16_3CH(mlib_filters_table_3, mlib_filters_table_4); 958 FADD_3BC_S16(); 959 960 BC_S16_3CH(mlib_filters_table_3, mlib_filters_table_4); 961 962#pragma pipeloop(0) 963 for (; i < cols-4; i++) { 964 STORE_BC_S16_3CH_1PIXEL(); 965 966 FADD_3BC_S16(); 967 BC_S16_3CH(mlib_filters_table_3, mlib_filters_table_4); 968 } 969 970 STORE_BC_S16_3CH_1PIXEL(); 971 972 FADD_3BC_S16(); 973 STORE_BC_S16_3CH_1PIXEL(); 974 975 RESULT_3BC_S16_1PIXEL(); 976 STORE_BC_S16_3CH_1PIXEL(); 977 978 LOAD_BC_S16_3CH_1PIXEL(mlib_filters_table_3, mlib_filters_table_4); 979 RESULT_3BC_S16_1PIXEL(); 980 STORE_BC_S16_3CH_1PIXEL(); 981 i += 4; 982 } 983 984 for (; i < cols; i++) { 985 NEXT_PIXEL_3BC_S16(); 986 LOAD_BC_S16_3CH_1PIXEL(mlib_filters_table_3, mlib_filters_table_4); 987 RESULT_3BC_S16_1PIXEL(); 988 STORE_BC_S16_3CH_1PIXEL(); 989 } 990 } 991 992 return MLIB_SUCCESS; 993} 994 995/***************************************************************/ 996#define NEXT_PIXEL_4BC_S16() \ 997 xSrc = (X >> MLIB_SHIFT)-1; \ 998 ySrc = (Y >> MLIB_SHIFT)-1; \ 999 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 2) 1000 1001/***************************************************************/ 1002#define LOAD_BC_S16_4CH_1PIXEL(mlib_filters_s16_4) \ 1003 dpSrc = vis_alignaddr(sPtr, 0); \ 1004 data0 = dpSrc[0]; \ 1005 data1 = dpSrc[1]; \ 1006 data2 = dpSrc[2]; \ 1007 data3 = dpSrc[3]; \ 1008 data4 = dpSrc[4]; \ 1009 row00 = vis_faligndata(data0, data1); \ 1010 row01 = vis_faligndata(data1, data2); \ 1011 row02 = vis_faligndata(data2, data3); \ 1012 row03 = vis_faligndata(data3, data4); \ 1013 sPtr += srcYStride; \ 1014 dpSrc = vis_alignaddr(sPtr, 0); \ 1015 data0 = dpSrc[0]; \ 1016 data1 = dpSrc[1]; \ 1017 data2 = dpSrc[2]; \ 1018 data3 = dpSrc[3]; \ 1019 data4 = dpSrc[4]; \ 1020 row10 = vis_faligndata(data0, data1); \ 1021 row11 = vis_faligndata(data1, data2); \ 1022 row12 = vis_faligndata(data2, data3); \ 1023 row13 = vis_faligndata(data3, data4); \ 1024 sPtr += srcYStride; \ 1025 dpSrc = vis_alignaddr(sPtr, 0); \ 1026 data0 = dpSrc[0]; \ 1027 data1 = dpSrc[1]; \ 1028 data2 = dpSrc[2]; \ 1029 data3 = dpSrc[3]; \ 1030 data4 = dpSrc[4]; \ 1031 row20 = vis_faligndata(data0, data1); \ 1032 row21 = vis_faligndata(data1, data2); \ 1033 row22 = vis_faligndata(data2, data3); \ 1034 row23 = vis_faligndata(data3, data4); \ 1035 sPtr += srcYStride; \ 1036 dpSrc = vis_alignaddr(sPtr, 0); \ 1037 data0 = dpSrc[0]; \ 1038 data1 = dpSrc[1]; \ 1039 data2 = dpSrc[2]; \ 1040 data3 = dpSrc[3]; \ 1041 data4 = dpSrc[4]; \ 1042 row30 = vis_faligndata(data0, data1); \ 1043 row31 = vis_faligndata(data1, data2); \ 1044 row32 = vis_faligndata(data2, data3); \ 1045 row33 = vis_faligndata(data3, data4); \ 1046 filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \ 1047 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ 1048 yFilter0 = yPtr[0]; \ 1049 yFilter1 = yPtr[1]; \ 1050 yFilter2 = yPtr[2]; \ 1051 yFilter3 = yPtr[3]; \ 1052 filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \ 1053 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_4 + filterposx*4)); \ 1054 xFilter0 = xPtr[0]; \ 1055 xFilter1 = xPtr[1]; \ 1056 xFilter2 = xPtr[2]; \ 1057 xFilter3 = xPtr[3]; \ 1058 X += dX; \ 1059 Y += dY 1060 1061/***************************************************************/ 1062#define RESULT_4BC_S16_1PIXEL() \ 1063 u00 = vis_fmul8sux16(vis_fxor(row00, mask8000), yFilter0); \ 1064 u01 = vis_fmul8ulx16(vis_fxor(row00, mask8000), yFilter0); \ 1065 u10 = vis_fmul8sux16(vis_fxor(row01, mask8000), yFilter0); \ 1066 u11 = vis_fmul8ulx16(vis_fxor(row01, mask8000), yFilter0); \ 1067 v00 = vis_fpadd16(u00, u01); \ 1068 u20 = vis_fmul8sux16(vis_fxor(row02, mask8000), yFilter0); \ 1069 v01 = vis_fpadd16(u10, u11); \ 1070 u21 = vis_fmul8ulx16(vis_fxor(row02, mask8000), yFilter0); \ 1071 u30 = vis_fmul8sux16(vis_fxor(row03, mask8000), yFilter0); \ 1072 u31 = vis_fmul8ulx16(vis_fxor(row03, mask8000), yFilter0); \ 1073 v02 = vis_fpadd16(u20, u21); \ 1074 u00 = vis_fmul8sux16(vis_fxor(row10, mask8000), yFilter1); \ 1075 u01 = vis_fmul8ulx16(vis_fxor(row10, mask8000), yFilter1); \ 1076 v03 = vis_fpadd16(u30, u31); \ 1077 u10 = vis_fmul8sux16(vis_fxor(row11, mask8000), yFilter1); \ 1078 u11 = vis_fmul8ulx16(vis_fxor(row11, mask8000), yFilter1); \ 1079 v10 = vis_fpadd16(u00, u01); \ 1080 u20 = vis_fmul8sux16(vis_fxor(row12, mask8000), yFilter1); \ 1081 v11 = vis_fpadd16(u10, u11); \ 1082 u21 = vis_fmul8ulx16(vis_fxor(row12, mask8000), yFilter1); \ 1083 u30 = vis_fmul8sux16(vis_fxor(row13, mask8000), yFilter1); \ 1084 u31 = vis_fmul8ulx16(vis_fxor(row13, mask8000), yFilter1); \ 1085 u00 = vis_fmul8sux16(vis_fxor(row20, mask8000), yFilter2); \ 1086 v12 = vis_fpadd16(u20, u21); \ 1087 u01 = vis_fmul8ulx16(vis_fxor(row20, mask8000), yFilter2); \ 1088 v13 = vis_fpadd16(u30, u31); \ 1089 u10 = vis_fmul8sux16(vis_fxor(row21, mask8000), yFilter2); \ 1090 u11 = vis_fmul8ulx16(vis_fxor(row21, mask8000), yFilter2); \ 1091 v20 = vis_fpadd16(u00, u01); \ 1092 u20 = vis_fmul8sux16(vis_fxor(row22, mask8000), yFilter2); \ 1093 sum0 = vis_fpadd16(v00, v10); \ 1094 u21 = vis_fmul8ulx16(vis_fxor(row22, mask8000), yFilter2); \ 1095 u30 = vis_fmul8sux16(vis_fxor(row23, mask8000), yFilter2); \ 1096 u31 = vis_fmul8ulx16(vis_fxor(row23, mask8000), yFilter2); \ 1097 u00 = vis_fmul8sux16(vis_fxor(row30, mask8000), yFilter3); \ 1098 u01 = vis_fmul8ulx16(vis_fxor(row30, mask8000), yFilter3); \ 1099 v21 = vis_fpadd16(u10, u11); \ 1100 sum1 = vis_fpadd16(v01, v11); \ 1101 u10 = vis_fmul8sux16(vis_fxor(row31, mask8000), yFilter3); \ 1102 sum2 = vis_fpadd16(v02, v12); \ 1103 sum3 = vis_fpadd16(v03, v13); \ 1104 v22 = vis_fpadd16(u20, u21); \ 1105 u11 = vis_fmul8ulx16(vis_fxor(row31, mask8000), yFilter3); \ 1106 sum0 = vis_fpadd16(sum0, v20); \ 1107 u20 = vis_fmul8sux16(vis_fxor(row32, mask8000), yFilter3); \ 1108 u21 = vis_fmul8ulx16(vis_fxor(row32, mask8000), yFilter3); \ 1109 v23 = vis_fpadd16(u30, u31); \ 1110 v30 = vis_fpadd16(u00, u01); \ 1111 sum1 = vis_fpadd16(sum1, v21); \ 1112 u30 = vis_fmul8sux16(vis_fxor(row33, mask8000), yFilter3); \ 1113 u31 = vis_fmul8ulx16(vis_fxor(row33, mask8000), yFilter3); \ 1114 v31 = vis_fpadd16(u10, u11); \ 1115 sum2 = vis_fpadd16(sum2, v22); \ 1116 sum3 = vis_fpadd16(sum3, v23); \ 1117 v32 = vis_fpadd16(u20, u21); \ 1118 sum0 = vis_fpadd16(sum0, v30); \ 1119 v33 = vis_fpadd16(u30, u31); \ 1120 v00 = vis_fmul8sux16(sum0, xFilter0); \ 1121 sum1 = vis_fpadd16(sum1, v31); \ 1122 sum2 = vis_fpadd16(sum2, v32); \ 1123 v01 = vis_fmul8ulx16(sum0, xFilter0); \ 1124 v10 = vis_fmul8sux16(sum1, xFilter1); \ 1125 sum3 = vis_fpadd16(sum3, v33); \ 1126 v11 = vis_fmul8ulx16(sum1, xFilter1); \ 1127 d0 = vis_fpadd16(v00, v01); \ 1128 v20 = vis_fmul8sux16(sum2, xFilter2); \ 1129 v21 = vis_fmul8ulx16(sum2, xFilter2); \ 1130 d1 = vis_fpadd16(v10, v11); \ 1131 v30 = vis_fmul8sux16(sum3, xFilter3); \ 1132 v31 = vis_fmul8ulx16(sum3, xFilter3); \ 1133 d2 = vis_fpadd16(v20, v21); \ 1134 d3 = vis_fpadd16(v30, v31); \ 1135 d0 = vis_fpadd16(d0, d1); \ 1136 d2 = vis_fpadd16(d2, d3); \ 1137 d0 = vis_fpadd16(d0, d2); \ 1138 d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \ 1139 d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \ 1140 res = vis_fxor(vis_fpackfix_pair(d2, d3), mask8000) 1141 1142/***************************************************************/ 1143#define BC_S16_4CH(mlib_filters_s16_4) \ 1144 u00 = vis_fmul8sux16(vis_fxor(row00, mask8000), yFilter0); \ 1145 u01 = vis_fmul8ulx16(vis_fxor(row00, mask8000), yFilter0); \ 1146 u10 = vis_fmul8sux16(vis_fxor(row01, mask8000), yFilter0); \ 1147 u11 = vis_fmul8ulx16(vis_fxor(row01, mask8000), yFilter0); \ 1148 v00 = vis_fpadd16(u00, u01); \ 1149 u20 = vis_fmul8sux16(vis_fxor(row02, mask8000), yFilter0); \ 1150 v01 = vis_fpadd16(u10, u11); \ 1151 u21 = vis_fmul8ulx16(vis_fxor(row02, mask8000), yFilter0); \ 1152 u30 = vis_fmul8sux16(vis_fxor(row03, mask8000), yFilter0); \ 1153 u31 = vis_fmul8ulx16(vis_fxor(row03, mask8000), yFilter0); \ 1154 v02 = vis_fpadd16(u20, u21); \ 1155 dpSrc = vis_alignaddr(sPtr, 0); \ 1156 u00 = vis_fmul8sux16(vis_fxor(row10, mask8000), yFilter1); \ 1157 u01 = vis_fmul8ulx16(vis_fxor(row10, mask8000), yFilter1); \ 1158 data0 = dpSrc[0]; \ 1159 filterposy = (Y >> FILTER_SHIFT); \ 1160 v03 = vis_fpadd16(u30, u31); \ 1161 data1 = dpSrc[1]; \ 1162 u10 = vis_fmul8sux16(vis_fxor(row11, mask8000), yFilter1); \ 1163 data2 = dpSrc[2]; \ 1164 u11 = vis_fmul8ulx16(vis_fxor(row11, mask8000), yFilter1); \ 1165 v10 = vis_fpadd16(u00, u01); \ 1166 data3 = dpSrc[3]; \ 1167 u20 = vis_fmul8sux16(vis_fxor(row12, mask8000), yFilter1); \ 1168 v11 = vis_fpadd16(u10, u11); \ 1169 data4 = dpSrc[4]; \ 1170 u21 = vis_fmul8ulx16(vis_fxor(row12, mask8000), yFilter1); \ 1171 row00 = vis_faligndata(data0, data1); \ 1172 u30 = vis_fmul8sux16(vis_fxor(row13, mask8000), yFilter1); \ 1173 row01 = vis_faligndata(data1, data2); \ 1174 u31 = vis_fmul8ulx16(vis_fxor(row13, mask8000), yFilter1); \ 1175 row02 = vis_faligndata(data2, data3); \ 1176 u00 = vis_fmul8sux16(vis_fxor(row20, mask8000), yFilter2); \ 1177 row03 = vis_faligndata(data3, data4); \ 1178 filterposx = (X >> FILTER_SHIFT); \ 1179 sPtr += srcYStride; \ 1180 v12 = vis_fpadd16(u20, u21); \ 1181 dpSrc = vis_alignaddr(sPtr, 0); \ 1182 u01 = vis_fmul8ulx16(vis_fxor(row20, mask8000), yFilter2); \ 1183 v13 = vis_fpadd16(u30, u31); \ 1184 data0 = dpSrc[0]; \ 1185 u10 = vis_fmul8sux16(vis_fxor(row21, mask8000), yFilter2); \ 1186 X += dX; \ 1187 data1 = dpSrc[1]; \ 1188 u11 = vis_fmul8ulx16(vis_fxor(row21, mask8000), yFilter2); \ 1189 v20 = vis_fpadd16(u00, u01); \ 1190 data2 = dpSrc[2]; \ 1191 u20 = vis_fmul8sux16(vis_fxor(row22, mask8000), yFilter2); \ 1192 sum0 = vis_fpadd16(v00, v10); \ 1193 data3 = dpSrc[3]; \ 1194 u21 = vis_fmul8ulx16(vis_fxor(row22, mask8000), yFilter2); \ 1195 data4 = dpSrc[4]; \ 1196 row10 = vis_faligndata(data0, data1); \ 1197 u30 = vis_fmul8sux16(vis_fxor(row23, mask8000), yFilter2); \ 1198 row11 = vis_faligndata(data1, data2); \ 1199 u31 = vis_fmul8ulx16(vis_fxor(row23, mask8000), yFilter2); \ 1200 row12 = vis_faligndata(data2, data3); \ 1201 u00 = vis_fmul8sux16(vis_fxor(row30, mask8000), yFilter3); \ 1202 row13 = vis_faligndata(data3, data4); \ 1203 sPtr += srcYStride; \ 1204 dpSrc = vis_alignaddr(sPtr, 0); \ 1205 u01 = vis_fmul8ulx16(vis_fxor(row30, mask8000), yFilter3); \ 1206 v21 = vis_fpadd16(u10, u11); \ 1207 Y += dY; \ 1208 xSrc = (X >> MLIB_SHIFT)-1; \ 1209 sum1 = vis_fpadd16(v01, v11); \ 1210 data0 = dpSrc[0]; \ 1211 u10 = vis_fmul8sux16(vis_fxor(row31, mask8000), yFilter3); \ 1212 sum2 = vis_fpadd16(v02, v12); \ 1213 sum3 = vis_fpadd16(v03, v13); \ 1214 ySrc = (Y >> MLIB_SHIFT)-1; \ 1215 data1 = dpSrc[1]; \ 1216 v22 = vis_fpadd16(u20, u21); \ 1217 u11 = vis_fmul8ulx16(vis_fxor(row31, mask8000), yFilter3); \ 1218 data2 = dpSrc[2]; \ 1219 sum0 = vis_fpadd16(sum0, v20); \ 1220 u20 = vis_fmul8sux16(vis_fxor(row32, mask8000), yFilter3); \ 1221 data3 = dpSrc[3]; \ 1222 u21 = vis_fmul8ulx16(vis_fxor(row32, mask8000), yFilter3); \ 1223 v23 = vis_fpadd16(u30, u31); \ 1224 data4 = dpSrc[4]; \ 1225 v30 = vis_fpadd16(u00, u01); \ 1226 filterposy &= FILTER_MASK; \ 1227 row20 = vis_faligndata(data0, data1); \ 1228 sum1 = vis_fpadd16(sum1, v21); \ 1229 u30 = vis_fmul8sux16(vis_fxor(row33, mask8000), yFilter3); \ 1230 row21 = vis_faligndata(data1, data2); \ 1231 u31 = vis_fmul8ulx16(vis_fxor(row33, mask8000), yFilter3); \ 1232 row22 = vis_faligndata(data2, data3); \ 1233 row23 = vis_faligndata(data3, data4); \ 1234 sPtr += srcYStride; \ 1235 filterposx &= FILTER_MASK; \ 1236 v31 = vis_fpadd16(u10, u11); \ 1237 dpSrc = vis_alignaddr(sPtr, 0); \ 1238 data0 = dpSrc[0]; \ 1239 sum2 = vis_fpadd16(sum2, v22); \ 1240 sum3 = vis_fpadd16(sum3, v23); \ 1241 data1 = dpSrc[1]; \ 1242 v32 = vis_fpadd16(u20, u21); \ 1243 data2 = dpSrc[2]; \ 1244 sum0 = vis_fpadd16(sum0, v30); \ 1245 data3 = dpSrc[3]; \ 1246 v33 = vis_fpadd16(u30, u31); \ 1247 data4 = dpSrc[4]; \ 1248 row30 = vis_faligndata(data0, data1); \ 1249 v00 = vis_fmul8sux16(sum0, xFilter0); \ 1250 row31 = vis_faligndata(data1, data2); \ 1251 row32 = vis_faligndata(data2, data3); \ 1252 row33 = vis_faligndata(data3, data4); \ 1253 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ 1254 sum1 = vis_fpadd16(sum1, v31); \ 1255 yFilter0 = yPtr[0]; \ 1256 sum2 = vis_fpadd16(sum2, v32); \ 1257 v01 = vis_fmul8ulx16(sum0, xFilter0); \ 1258 yFilter1 = yPtr[1]; \ 1259 v10 = vis_fmul8sux16(sum1, xFilter1); \ 1260 sum3 = vis_fpadd16(sum3, v33); \ 1261 yFilter2 = yPtr[2]; \ 1262 v11 = vis_fmul8ulx16(sum1, xFilter1); \ 1263 d0 = vis_fpadd16(v00, v01); \ 1264 yFilter3 = yPtr[3]; \ 1265 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_4 + filterposx*4)); \ 1266 v20 = vis_fmul8sux16(sum2, xFilter2); \ 1267 xFilter0 = xPtr[0]; \ 1268 v21 = vis_fmul8ulx16(sum2, xFilter2); \ 1269 d1 = vis_fpadd16(v10, v11); \ 1270 xFilter1 = xPtr[1]; \ 1271 v30 = vis_fmul8sux16(sum3, xFilter3); \ 1272 v31 = vis_fmul8ulx16(sum3, xFilter3); \ 1273 d2 = vis_fpadd16(v20, v21); \ 1274 xFilter2 = xPtr[2]; \ 1275 d3 = vis_fpadd16(v30, v31); \ 1276 xFilter3 = xPtr[3]; \ 1277 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 2) 1278 1279/***************************************************************/ 1280#define FADD_4BC_S16() \ 1281 d0 = vis_fpadd16(d0, d1); \ 1282 d2 = vis_fpadd16(d2, d3); \ 1283 d0 = vis_fpadd16(d0, d2); \ 1284 d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \ 1285 d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \ 1286 res = vis_fxor(vis_fpackfix_pair(d2, d3), mask8000) 1287 1288/***************************************************************/ 1289mlib_status mlib_ImageAffine_u16_4ch_bc (mlib_affine_param *param) 1290{ 1291 DECLAREVAR_BC(); 1292 DTYPE *dstLineEnd; 1293 mlib_s32 filterposx, filterposy; 1294 mlib_d64 data0, data1, data2, data3, data4; 1295 mlib_d64 sum0, sum1, sum2, sum3; 1296 mlib_d64 row00, row10, row20, row30; 1297 mlib_d64 row01, row11, row21, row31; 1298 mlib_d64 row02, row12, row22, row32; 1299 mlib_d64 row03, row13, row23, row33; 1300 mlib_d64 xFilter0, xFilter1, xFilter2, xFilter3; 1301 mlib_d64 yFilter0, yFilter1, yFilter2, yFilter3; 1302 mlib_d64 v00, v01, v02, v03, v10, v11, v12, v13; 1303 mlib_d64 v20, v21, v22, v23, v30, v31, v32, v33; 1304 mlib_d64 u00, u01, u10, u11, u20, u21, u30, u31; 1305 mlib_d64 d0, d1, d2, d3; 1306 mlib_d64 *yPtr, *xPtr; 1307 mlib_d64 *dp, *dpSrc; 1308 mlib_s32 cols, i, mask, gsrd; 1309 mlib_d64 res; 1310 mlib_f32 f_x01000100 = vis_to_float(0x01000100); 1311 mlib_d64 mask8000 = vis_to_double_dup(0x80008000); 1312 const mlib_s16 *mlib_filters_table_4; 1313 1314 if (filter == MLIB_BICUBIC) { 1315 mlib_filters_table_4 = mlib_filters_s16_bc_4; 1316 } else { 1317 mlib_filters_table_4 = mlib_filters_s16_bc2_4; 1318 } 1319 1320 srcYStride >>= 1; 1321 1322 for (j = yStart; j <= yFinish; j++) { 1323 1324 vis_write_gsr(10 << 3); 1325 1326 CLIP(4); 1327 dstLineEnd = (DTYPE*)dstData + 4 * xRight; 1328 1329 cols = xRight - xLeft + 1; 1330 dp = vis_alignaddr(dstPixelPtr, 0); 1331 dstLineEnd += 3; 1332 mask = vis_edge16(dstPixelPtr, dstLineEnd); 1333 gsrd = ((8 - (mlib_addr)dstPixelPtr) & 7); 1334 1335 i = 0; 1336 1337 if (i <= cols - 4) { 1338 1339 NEXT_PIXEL_4BC_S16(); 1340 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_4); 1341 1342 NEXT_PIXEL_4BC_S16(); 1343 1344 BC_S16_4CH(mlib_filters_table_4); 1345 FADD_4BC_S16(); 1346 1347 BC_S16_4CH(mlib_filters_table_4); 1348 1349#pragma pipeloop(0) 1350 for (; i < cols-4; i++) { 1351 vis_alignaddr((void *)gsrd, 0); 1352 res = vis_faligndata(res, res); 1353 1354 vis_pst_16(res, dp++, mask); 1355 vis_pst_16(res, dp, ~mask); 1356 1357 FADD_4BC_S16(); 1358 BC_S16_4CH(mlib_filters_table_4); 1359 } 1360 1361 vis_alignaddr((void *)gsrd, 0); 1362 res = vis_faligndata(res, res); 1363 vis_pst_16(res, dp++, mask); 1364 vis_pst_16(res, dp, ~mask); 1365 1366 FADD_4BC_S16(); 1367 vis_alignaddr((void *)gsrd, 0); 1368 res = vis_faligndata(res, res); 1369 vis_pst_16(res, dp++, mask); 1370 vis_pst_16(res, dp, ~mask); 1371 1372 RESULT_4BC_S16_1PIXEL(); 1373 vis_alignaddr((void *)gsrd, 0); 1374 res = vis_faligndata(res, res); 1375 vis_pst_16(res, dp++, mask); 1376 vis_pst_16(res, dp, ~mask); 1377 1378 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_4); 1379 RESULT_4BC_S16_1PIXEL(); 1380 vis_alignaddr((void *)gsrd, 0); 1381 res = vis_faligndata(res, res); 1382 vis_pst_16(res, dp++, mask); 1383 vis_pst_16(res, dp, ~mask); 1384 i += 4; 1385 } 1386 1387#pragma pipeloop(0) 1388 for (; i < cols; i++) { 1389 NEXT_PIXEL_4BC_S16(); 1390 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_4); 1391 RESULT_4BC_S16_1PIXEL(); 1392 vis_alignaddr((void *)gsrd, 0); 1393 res = vis_faligndata(res, res); 1394 vis_pst_16(res, dp++, mask); 1395 vis_pst_16(res, dp, ~mask); 1396 } 1397 } 1398 1399 return MLIB_SUCCESS; 1400} 1401 1402/***************************************************************/ 1403