1/* 2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 28/* 29 * The functions step along the lines from xLeft to xRight and apply 30 * the bicubic filtering. 31 * 32 */ 33 34#include "vis_proto.h" 35#include "mlib_ImageAffine.h" 36#include "mlib_v_ImageFilters.h" 37 38/***************************************************************/ 39#define DTYPE mlib_s16 40 41#define FILTER_BITS 9 42 43/***************************************************************/ 44#define sPtr srcPixelPtr 45 46/***************************************************************/ 47#define NEXT_PIXEL_1BC_S16() \ 48 xSrc = (X >> MLIB_SHIFT)-1; \ 49 ySrc = (Y >> MLIB_SHIFT)-1; \ 50 sPtr = (mlib_s16 *)lineAddr[ySrc] + xSrc 51 52/***************************************************************/ 53#define LOAD_BC_S16_1CH_1PIXEL(mlib_filters_s16, mlib_filters_s16_4) \ 54 vis_alignaddr(sPtr, 0); \ 55 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ 56 data0 = dpSrc[0]; \ 57 data1 = dpSrc[1]; \ 58 row0 = vis_faligndata(data0, data1); \ 59 sPtr += srcYStride; \ 60 vis_alignaddr(sPtr, 0); \ 61 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ 62 data0 = dpSrc[0]; \ 63 data1 = dpSrc[1]; \ 64 row1 = vis_faligndata(data0, data1); \ 65 sPtr += srcYStride; \ 66 vis_alignaddr(sPtr, 0); \ 67 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ 68 data0 = dpSrc[0]; \ 69 data1 = dpSrc[1]; \ 70 row2 = vis_faligndata(data0, data1); \ 71 sPtr += srcYStride; \ 72 vis_alignaddr(sPtr, 0); \ 73 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ 74 data0 = dpSrc[0]; \ 75 data1 = dpSrc[1]; \ 76 row3 = vis_faligndata(data0, data1); \ 77 filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \ 78 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ 79 yFilter0 = yPtr[0]; \ 80 yFilter1 = yPtr[1]; \ 81 yFilter2 = yPtr[2]; \ 82 yFilter3 = yPtr[3]; \ 83 filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \ 84 xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \ 85 X += dX; \ 86 Y += dY 87 88/***************************************************************/ 89#define RESULT_1BC_S16_1PIXEL() \ 90 u0 = vis_fmul8sux16(row0, yFilter0); \ 91 u1 = vis_fmul8ulx16(row0, yFilter0); \ 92 u2 = vis_fmul8sux16(row1, yFilter1); \ 93 v0 = vis_fpadd16(u0, u1); \ 94 u3 = vis_fmul8ulx16(row1, yFilter1); \ 95 u0 = vis_fmul8sux16(row2, yFilter2); \ 96 v1 = vis_fpadd16(u2, u3); \ 97 u1 = vis_fmul8ulx16(row2, yFilter2); \ 98 sum = vis_fpadd16(v0, v1); \ 99 u2 = vis_fmul8sux16(row3, yFilter3); \ 100 v2 = vis_fpadd16(u0, u1); \ 101 u3 = vis_fmul8ulx16(row3, yFilter3); \ 102 sum = vis_fpadd16(sum, v2); \ 103 v3 = vis_fpadd16(u2, u3); \ 104 sum = vis_fpadd16(sum, v3); \ 105 d00 = vis_fmul8sux16(sum, xFilter); \ 106 d10 = vis_fmul8ulx16(sum, xFilter); \ 107 d0 = vis_fpadd16(d00, d10); \ 108 p0 = vis_fpadd16s(vis_read_hi(d0), vis_read_lo(d0)); \ 109 d0 = vis_fmuld8sux16(f_x01000100, p0); \ 110 d1 = vis_write_lo(d1, vis_fpadd32s(vis_read_hi(d0), vis_read_lo(d0))); \ 111 res = vis_fpackfix_pair(d1, d1) 112 113/***************************************************************/ 114#define BC_S16_1CH(ind, mlib_filters_s16, mlib_filters_s16_4) \ 115 u0 = vis_fmul8sux16(row0, yFilter0); \ 116 u1 = vis_fmul8ulx16(row0, yFilter0); \ 117 vis_alignaddr(sPtr, 0); \ 118 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ 119 u2 = vis_fmul8sux16(row1, yFilter1); \ 120 v0 = vis_fpadd16(u0, u1); \ 121 data0 = dpSrc[0]; \ 122 filterposy = (Y >> FILTER_SHIFT); \ 123 u3 = vis_fmul8ulx16(row1, yFilter1); \ 124 data1 = dpSrc[1]; \ 125 row0 = vis_faligndata(data0, data1); \ 126 filterposx = (X >> FILTER_SHIFT); \ 127 sPtr += srcYStride; \ 128 vis_alignaddr(sPtr, 0); \ 129 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ 130 u0 = vis_fmul8sux16(row2, yFilter2); \ 131 v1 = vis_fpadd16(u2, u3); \ 132 data0 = dpSrc[0]; \ 133 u1 = vis_fmul8ulx16(row2, yFilter2); \ 134 sum = vis_fpadd16(v0, v1); \ 135 X += dX; \ 136 data1 = dpSrc[1]; \ 137 row1 = vis_faligndata(data0, data1); \ 138 sPtr += srcYStride; \ 139 vis_alignaddr(sPtr, 0); \ 140 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ 141 u2 = vis_fmul8sux16(row3, yFilter3); \ 142 v2 = vis_fpadd16(u0, u1); \ 143 Y += dY; \ 144 xSrc = (X >> MLIB_SHIFT)-1; \ 145 data0 = dpSrc[0]; \ 146 u3 = vis_fmul8ulx16(row3, yFilter3); \ 147 sum = vis_fpadd16(sum, v2); \ 148 ySrc = (Y >> MLIB_SHIFT)-1; \ 149 data1 = dpSrc[1]; \ 150 filterposy &= FILTER_MASK; \ 151 row2 = vis_faligndata(data0, data1); \ 152 sPtr += srcYStride; \ 153 filterposx &= FILTER_MASK; \ 154 vis_alignaddr(sPtr, 0); \ 155 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ 156 data0 = dpSrc[0]; \ 157 v3 = vis_fpadd16(u2, u3); \ 158 data1 = dpSrc[1]; \ 159 row3 = vis_faligndata(data0, data1); \ 160 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ 161 yFilter0 = yPtr[0]; \ 162 sum = vis_fpadd16(sum, v3); \ 163 yFilter1 = yPtr[1]; \ 164 d0 = vis_fmul8sux16(sum, xFilter); \ 165 yFilter2 = yPtr[2]; \ 166 d1 = vis_fmul8ulx16(sum, xFilter); \ 167 yFilter3 = yPtr[3]; \ 168 xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \ 169 d0##ind = vis_fpadd16(d0, d1); \ 170 sPtr = (mlib_s16 *)lineAddr[ySrc] + xSrc 171 172/***************************************************************/ 173#define FADD_1BC_S16() \ 174 p0 = vis_fpadd16s(vis_read_hi(d00), vis_read_lo(d00)); \ 175 p1 = vis_fpadd16s(vis_read_hi(d01), vis_read_lo(d01)); \ 176 p2 = vis_fpadd16s(vis_read_hi(d02), vis_read_lo(d02)); \ 177 p3 = vis_fpadd16s(vis_read_hi(d03), vis_read_lo(d03)); \ 178 d0 = vis_fmuld8sux16(f_x01000100, p0); \ 179 d1 = vis_fmuld8sux16(f_x01000100, p1); \ 180 d2 = vis_fmuld8sux16(f_x01000100, p2); \ 181 d3 = vis_fmuld8sux16(f_x01000100, p3); \ 182 d0 = vis_freg_pair(vis_fpadd32s(vis_read_hi(d0), vis_read_lo(d0)), \ 183 vis_fpadd32s(vis_read_hi(d1), vis_read_lo(d1))); \ 184 d1 = vis_freg_pair(vis_fpadd32s(vis_read_hi(d2), vis_read_lo(d2)), \ 185 vis_fpadd32s(vis_read_hi(d3), vis_read_lo(d3))); \ 186 res = vis_fpackfix_pair(d0, d1) 187 188/***************************************************************/ 189mlib_status mlib_ImageAffine_s16_1ch_bc (mlib_affine_param *param) 190{ 191 DECLAREVAR_BC(); 192 mlib_s32 filterposx, filterposy; 193 mlib_d64 data0, data1; 194 mlib_d64 sum; 195 mlib_d64 row0, row1, row2, row3; 196 mlib_f32 p0, p1, p2, p3; 197 mlib_d64 xFilter, yFilter0, yFilter1, yFilter2, yFilter3; 198 mlib_d64 v0, v1, v2, v3; 199 mlib_d64 u0, u1, u2, u3; 200 mlib_d64 d0, d1, d2, d3; 201 mlib_d64 d00, d10, d01, d02, d03; 202 mlib_d64 *yPtr; 203 mlib_d64 *dpSrc; 204 mlib_s32 align, cols, i; 205 mlib_d64 res; 206 mlib_f32 f_x01000100 = vis_to_float(0x01000100); 207 const mlib_s16 *mlib_filters_table ; 208 const mlib_s16 *mlib_filters_table_4; 209 210 if (filter == MLIB_BICUBIC) { 211 mlib_filters_table = mlib_filters_s16_bc; 212 mlib_filters_table_4 = mlib_filters_s16_bc_4; 213 } else { 214 mlib_filters_table = mlib_filters_s16_bc2; 215 mlib_filters_table_4 = mlib_filters_s16_bc2_4; 216 } 217 218 srcYStride >>= 1; 219 220 for (j = yStart; j <= yFinish; j++) { 221 222 vis_write_gsr(10 << 3); 223 224 CLIP(1); 225 226 cols = xRight - xLeft + 1; 227 align = (8 - ((mlib_addr)dstPixelPtr) & 7) & 7; 228 align >>= 1; 229 align = (cols < align)? cols : align; 230 231 for (i = 0; i < align; i++) { 232 NEXT_PIXEL_1BC_S16(); 233 LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 234 RESULT_1BC_S16_1PIXEL(); 235 vis_st_u16(res, dstPixelPtr++); 236 } 237 238 if (i <= cols - 10) { 239 240 NEXT_PIXEL_1BC_S16(); 241 LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 242 243 NEXT_PIXEL_1BC_S16(); 244 245 BC_S16_1CH(0, mlib_filters_table, mlib_filters_table_4); 246 BC_S16_1CH(1, mlib_filters_table, mlib_filters_table_4); 247 BC_S16_1CH(2, mlib_filters_table, mlib_filters_table_4); 248 BC_S16_1CH(3, mlib_filters_table, mlib_filters_table_4); 249 250 FADD_1BC_S16(); 251 252 BC_S16_1CH(0, mlib_filters_table, mlib_filters_table_4); 253 BC_S16_1CH(1, mlib_filters_table, mlib_filters_table_4); 254 BC_S16_1CH(2, mlib_filters_table, mlib_filters_table_4); 255 BC_S16_1CH(3, mlib_filters_table, mlib_filters_table_4); 256 257#pragma pipeloop(0) 258 for (; i <= cols - 14; i += 4) { 259 *(mlib_d64*)dstPixelPtr = res; 260 FADD_1BC_S16(); 261 BC_S16_1CH(0, mlib_filters_table, mlib_filters_table_4); 262 BC_S16_1CH(1, mlib_filters_table, mlib_filters_table_4); 263 BC_S16_1CH(2, mlib_filters_table, mlib_filters_table_4); 264 BC_S16_1CH(3, mlib_filters_table, mlib_filters_table_4); 265 dstPixelPtr += 4; 266 } 267 268 *(mlib_d64*)dstPixelPtr = res; 269 dstPixelPtr += 4; 270 FADD_1BC_S16(); 271 *(mlib_d64*)dstPixelPtr = res; 272 dstPixelPtr += 4; 273 274 RESULT_1BC_S16_1PIXEL(); 275 vis_st_u16(res, dstPixelPtr++); 276 277 LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 278 RESULT_1BC_S16_1PIXEL(); 279 vis_st_u16(res, dstPixelPtr++); 280 i += 10; 281 } 282 283 for (; i < cols; i++) { 284 NEXT_PIXEL_1BC_S16(); 285 LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 286 RESULT_1BC_S16_1PIXEL(); 287 vis_st_u16(res, dstPixelPtr++); 288 } 289 } 290 291 return MLIB_SUCCESS; 292} 293 294/***************************************************************/ 295#define NEXT_PIXEL_2BC_S16() \ 296 xSrc = (X >> MLIB_SHIFT)-1; \ 297 ySrc = (Y >> MLIB_SHIFT)-1; \ 298 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 1) 299 300/***************************************************************/ 301#define LOAD_BC_S16_2CH_1PIXEL(mlib_filters_s16, mlib_filters_s16_4) \ 302 vis_alignaddr(sPtr, 0); \ 303 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ 304 data0 = dpSrc[0]; \ 305 data1 = dpSrc[1]; \ 306 data2 = dpSrc[2]; \ 307 row00 = vis_faligndata(data0, data1); \ 308 row01 = vis_faligndata(data1, data2); \ 309 sPtr += srcYStride; \ 310 vis_alignaddr(sPtr, 0); \ 311 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ 312 data0 = dpSrc[0]; \ 313 data1 = dpSrc[1]; \ 314 data2 = dpSrc[2]; \ 315 row10 = vis_faligndata(data0, data1); \ 316 row11 = vis_faligndata(data1, data2); \ 317 sPtr += srcYStride; \ 318 vis_alignaddr(sPtr, 0); \ 319 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ 320 data0 = dpSrc[0]; \ 321 data1 = dpSrc[1]; \ 322 data2 = dpSrc[2]; \ 323 row20 = vis_faligndata(data0, data1); \ 324 row21 = vis_faligndata(data1, data2); \ 325 sPtr += srcYStride; \ 326 vis_alignaddr(sPtr, 0); \ 327 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ 328 data0 = dpSrc[0]; \ 329 data1 = dpSrc[1]; \ 330 data2 = dpSrc[2]; \ 331 row30 = vis_faligndata(data0, data1); \ 332 row31 = vis_faligndata(data1, data2); \ 333 filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \ 334 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ 335 yFilter0 = yPtr[0]; \ 336 yFilter1 = yPtr[1]; \ 337 yFilter2 = yPtr[2]; \ 338 yFilter3 = yPtr[3]; \ 339 filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \ 340 xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \ 341 X += dX; \ 342 Y += dY 343 344/***************************************************************/ 345#define RESULT_2BC_S16_1PIXEL() \ 346 u00 = vis_fmul8sux16(row00, yFilter0); \ 347 dr = vis_fpmerge(vis_read_hi(xFilter), vis_read_lo(xFilter)); \ 348 u01 = vis_fmul8ulx16(row00, yFilter0); \ 349 dr = vis_fpmerge(vis_read_hi(dr), vis_read_lo(dr)); \ 350 u10 = vis_fmul8sux16(row01, yFilter0); \ 351 dr1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr)); \ 352 u11 = vis_fmul8ulx16(row01, yFilter0); \ 353 dr = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr)); \ 354 u20 = vis_fmul8sux16(row10, yFilter1); \ 355 v00 = vis_fpadd16(u00, u01); \ 356 u21 = vis_fmul8ulx16(row10, yFilter1); \ 357 v01 = vis_fpadd16(u10, u11); \ 358 u00 = vis_fmul8sux16(row11, yFilter1); \ 359 xFilter0 = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr1)); \ 360 u01 = vis_fmul8ulx16(row11, yFilter1); \ 361 u10 = vis_fmul8sux16(row20, yFilter2); \ 362 u11 = vis_fmul8ulx16(row20, yFilter2); \ 363 v10 = vis_fpadd16(u20, u21); \ 364 sum0 = vis_fpadd16(v00, v10); \ 365 u20 = vis_fmul8sux16(row21, yFilter2); \ 366 v11 = vis_fpadd16(u00, u01); \ 367 u21 = vis_fmul8ulx16(row21, yFilter2); \ 368 xFilter1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr1)); \ 369 u00 = vis_fmul8sux16(row30, yFilter3); \ 370 v20 = vis_fpadd16(u10, u11); \ 371 sum1 = vis_fpadd16(v01, v11); \ 372 u01 = vis_fmul8ulx16(row30, yFilter3); \ 373 sum0 = vis_fpadd16(sum0, v20); \ 374 v21 = vis_fpadd16(u20, u21); \ 375 u10 = vis_fmul8sux16(row31, yFilter3); \ 376 v30 = vis_fpadd16(u00, u01); \ 377 sum1 = vis_fpadd16(sum1, v21); \ 378 u11 = vis_fmul8ulx16(row31, yFilter3); \ 379 sum0 = vis_fpadd16(sum0, v30); \ 380 v31 = vis_fpadd16(u10, u11); \ 381 sum1 = vis_fpadd16(sum1, v31); \ 382 d00 = vis_fmul8sux16(sum0, xFilter0); \ 383 d10 = vis_fmul8ulx16(sum0, xFilter0); \ 384 d20 = vis_fmul8sux16(sum1, xFilter1); \ 385 d30 = vis_fmul8ulx16(sum1, xFilter1); \ 386 d0 = vis_fpadd16(d00, d10); \ 387 d1 = vis_fpadd16(d20, d30); \ 388 d0 = vis_fpadd16(d0, d1); \ 389 p0 = vis_fpadd16s(vis_read_hi(d0), vis_read_lo(d0)); \ 390 d0 = vis_fmuld8sux16(f_x01000100, p0); \ 391 res = vis_fpackfix_pair(d0, d0) 392 393/***************************************************************/ 394#define BC_S16_2CH(ind, mlib_filters_s16, mlib_filters_s16_4) \ 395 u00 = vis_fmul8sux16(row00, yFilter0); \ 396 dr = vis_fpmerge(vis_read_hi(xFilter), vis_read_lo(xFilter)); \ 397 u01 = vis_fmul8ulx16(row00, yFilter0); \ 398 dr = vis_fpmerge(vis_read_hi(dr), vis_read_lo(dr)); \ 399 u10 = vis_fmul8sux16(row01, yFilter0); \ 400 dr1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr)); \ 401 u11 = vis_fmul8ulx16(row01, yFilter0); \ 402 dr = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr)); \ 403 vis_alignaddr(sPtr, 0); \ 404 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ 405 u20 = vis_fmul8sux16(row10, yFilter1); \ 406 v00 = vis_fpadd16(u00, u01); \ 407 u21 = vis_fmul8ulx16(row10, yFilter1); \ 408 data0 = dpSrc[0]; \ 409 filterposy = (Y >> FILTER_SHIFT); \ 410 v01 = vis_fpadd16(u10, u11); \ 411 data1 = dpSrc[1]; \ 412 u00 = vis_fmul8sux16(row11, yFilter1); \ 413 xFilter0 = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr1)); \ 414 data2 = dpSrc[2]; \ 415 u01 = vis_fmul8ulx16(row11, yFilter1); \ 416 row00 = vis_faligndata(data0, data1); \ 417 u10 = vis_fmul8sux16(row20, yFilter2); \ 418 row01 = vis_faligndata(data1, data2); \ 419 filterposx = (X >> FILTER_SHIFT); \ 420 sPtr += srcYStride; \ 421 vis_alignaddr(sPtr, 0); \ 422 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ 423 u11 = vis_fmul8ulx16(row20, yFilter2); \ 424 v10 = vis_fpadd16(u20, u21); \ 425 data0 = dpSrc[0]; \ 426 sum0 = vis_fpadd16(v00, v10); \ 427 X += dX; \ 428 data1 = dpSrc[1]; \ 429 u20 = vis_fmul8sux16(row21, yFilter2); \ 430 v11 = vis_fpadd16(u00, u01); \ 431 data2 = dpSrc[2]; \ 432 row10 = vis_faligndata(data0, data1); \ 433 u21 = vis_fmul8ulx16(row21, yFilter2); \ 434 row11 = vis_faligndata(data1, data2); \ 435 sPtr += srcYStride; \ 436 xFilter1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr1)); \ 437 vis_alignaddr(sPtr, 0); \ 438 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ 439 u00 = vis_fmul8sux16(row30, yFilter3); \ 440 v20 = vis_fpadd16(u10, u11); \ 441 Y += dY; \ 442 xSrc = (X >> MLIB_SHIFT)-1; \ 443 sum1 = vis_fpadd16(v01, v11); \ 444 data0 = dpSrc[0]; \ 445 u01 = vis_fmul8ulx16(row30, yFilter3); \ 446 sum0 = vis_fpadd16(sum0, v20); \ 447 ySrc = (Y >> MLIB_SHIFT)-1; \ 448 data1 = dpSrc[1]; \ 449 v21 = vis_fpadd16(u20, u21); \ 450 u10 = vis_fmul8sux16(row31, yFilter3); \ 451 data2 = dpSrc[2]; \ 452 v30 = vis_fpadd16(u00, u01); \ 453 filterposy &= FILTER_MASK; \ 454 row20 = vis_faligndata(data0, data1); \ 455 sum1 = vis_fpadd16(sum1, v21); \ 456 u11 = vis_fmul8ulx16(row31, yFilter3); \ 457 row21 = vis_faligndata(data1, data2); \ 458 sPtr += srcYStride; \ 459 filterposx &= FILTER_MASK; \ 460 v31 = vis_fpadd16(u10, u11); \ 461 vis_alignaddr(sPtr, 0); \ 462 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \ 463 data0 = dpSrc[0]; \ 464 sum0 = vis_fpadd16(sum0, v30); \ 465 data1 = dpSrc[1]; \ 466 sum1 = vis_fpadd16(sum1, v31); \ 467 data2 = dpSrc[2]; \ 468 row30 = vis_faligndata(data0, data1); \ 469 d0 = vis_fmul8sux16(sum0, xFilter0); \ 470 row31 = vis_faligndata(data1, data2); \ 471 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ 472 d1 = vis_fmul8ulx16(sum0, xFilter0); \ 473 yFilter0 = yPtr[0]; \ 474 d2 = vis_fmul8sux16(sum1, xFilter1); \ 475 yFilter1 = yPtr[1]; \ 476 d3 = vis_fmul8ulx16(sum1, xFilter1); \ 477 d0##ind = vis_fpadd16(d0, d1); \ 478 yFilter2 = yPtr[2]; \ 479 yFilter3 = yPtr[3]; \ 480 d1##ind = vis_fpadd16(d2, d3); \ 481 xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \ 482 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 1) 483 484/***************************************************************/ 485#define FADD_2BC_S16() \ 486 d0 = vis_fpadd16(d00, d10); \ 487 d2 = vis_fpadd16(d01, d11); \ 488 p0 = vis_fpadd16s(vis_read_hi(d0), vis_read_lo(d0)); \ 489 p1 = vis_fpadd16s(vis_read_hi(d2), vis_read_lo(d2)); \ 490 d0 = vis_fmuld8sux16(f_x01000100, p0); \ 491 d1 = vis_fmuld8sux16(f_x01000100, p1); \ 492 res = vis_fpackfix_pair(d0, d1) 493 494/***************************************************************/ 495mlib_status mlib_ImageAffine_s16_2ch_bc (mlib_affine_param *param) 496{ 497 DECLAREVAR_BC(); 498 DTYPE *dstLineEnd; 499 mlib_s32 filterposx, filterposy; 500 mlib_d64 data0, data1, data2; 501 mlib_d64 sum0, sum1; 502 mlib_d64 row00, row10, row20, row30; 503 mlib_d64 row01, row11, row21, row31; 504 mlib_f32 p0, p1; 505 mlib_d64 xFilter, xFilter0, xFilter1; 506 mlib_d64 yFilter0, yFilter1, yFilter2, yFilter3; 507 mlib_d64 v00, v01, v10, v11, v20, v21, v30, v31; 508 mlib_d64 u00, u01, u10, u11, u20, u21; 509 mlib_d64 d0, d1, d2, d3; 510 mlib_d64 d00, d10, d20, d30, d01, d11; 511 mlib_d64 *yPtr; 512 mlib_d64 *dp, *dpSrc; 513 mlib_s32 cols, i, mask, emask; 514 mlib_d64 res, res1; 515 mlib_d64 dr, dr1; 516 mlib_f32 f_x01000100 = vis_to_float(0x01000100); 517 const mlib_s16 *mlib_filters_table ; 518 const mlib_s16 *mlib_filters_table_4; 519 520 if (filter == MLIB_BICUBIC) { 521 mlib_filters_table = mlib_filters_s16_bc; 522 mlib_filters_table_4 = mlib_filters_s16_bc_4; 523 } else { 524 mlib_filters_table = mlib_filters_s16_bc2; 525 mlib_filters_table_4 = mlib_filters_s16_bc2_4; 526 } 527 528 srcYStride >>= 1; 529 530 for (j = yStart; j <= yFinish; j++) { 531 532 vis_write_gsr(10 << 3); 533 534 CLIP(2); 535 dstLineEnd = (DTYPE*)dstData + 2 * xRight; 536 537 cols = xRight - xLeft + 1; 538 dp = vis_alignaddr(dstPixelPtr, 0); 539 dstLineEnd += 1; 540 mask = vis_edge16(dstPixelPtr, dstLineEnd); 541 i = 0; 542 543 if (i <= cols - 6) { 544 545 NEXT_PIXEL_2BC_S16(); 546 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 547 548 NEXT_PIXEL_2BC_S16(); 549 550 BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4); 551 BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4); 552 553 FADD_2BC_S16(); 554 555 BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4); 556 BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4); 557 558#pragma pipeloop(0) 559 for (; i <= cols-8; i += 2) { 560 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); 561 res = vis_faligndata(res, res); 562 vis_pst_16(res, dp++, mask); 563 vis_pst_16(res, dp, ~mask); 564 FADD_2BC_S16(); 565 BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4); 566 BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4); 567 } 568 569 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); 570 res = vis_faligndata(res, res); 571 vis_pst_16(res, dp++, mask); 572 vis_pst_16(res, dp, ~mask); 573 574 FADD_2BC_S16(); 575 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); 576 res = vis_faligndata(res, res); 577 vis_pst_16(res, dp++, mask); 578 vis_pst_16(res, dp, ~mask); 579 580 RESULT_2BC_S16_1PIXEL(); 581 res1 = res; 582 583 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 584 RESULT_2BC_S16_1PIXEL(); 585 res = vis_write_hi(res, vis_read_hi(res1)); 586 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); 587 res = vis_faligndata(res, res); 588 vis_pst_16(res, dp++, mask); 589 vis_pst_16(res, dp, ~mask); 590 591 i += 6; 592 } 593 594 if (i <= cols - 4) { 595 NEXT_PIXEL_2BC_S16(); 596 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 597 598 NEXT_PIXEL_2BC_S16(); 599 600 BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4); 601 BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4); 602 603 FADD_2BC_S16(); 604 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); 605 res = vis_faligndata(res, res); 606 vis_pst_16(res, dp++, mask); 607 vis_pst_16(res, dp, ~mask); 608 609 RESULT_2BC_S16_1PIXEL(); 610 res1 = res; 611 612 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 613 RESULT_2BC_S16_1PIXEL(); 614 res = vis_write_hi(res, vis_read_hi(res1)); 615 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); 616 res = vis_faligndata(res, res); 617 vis_pst_16(res, dp++, mask); 618 vis_pst_16(res, dp, ~mask); 619 620 i += 4; 621 } 622 623 if (i <= cols - 2) { 624 NEXT_PIXEL_2BC_S16(); 625 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 626 RESULT_2BC_S16_1PIXEL(); 627 res1 = res; 628 629 NEXT_PIXEL_2BC_S16(); 630 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 631 RESULT_2BC_S16_1PIXEL(); 632 res = vis_write_hi(res, vis_read_hi(res1)); 633 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); 634 res = vis_faligndata(res, res); 635 vis_pst_16(res, dp++, mask); 636 vis_pst_16(res, dp, ~mask); 637 638 i += 2; 639 } 640 641 if (i < cols) { 642 NEXT_PIXEL_2BC_S16(); 643 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4); 644 RESULT_2BC_S16_1PIXEL(); 645 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0); 646 res = vis_faligndata(res, res); 647 emask = vis_edge16(dp, dstLineEnd); 648 vis_pst_16(res, dp++, mask & emask); 649 650 if ((mlib_s16*)dp <= dstLineEnd) { 651 mask = vis_edge16(dp, dstLineEnd); 652 vis_pst_16(res, dp, mask); 653 } 654 } 655 } 656 657 return MLIB_SUCCESS; 658} 659 660/***************************************************************/ 661#define NEXT_PIXEL_3BC_S16() \ 662 xSrc = (X >> MLIB_SHIFT)-1; \ 663 ySrc = (Y >> MLIB_SHIFT)-1; \ 664 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc*3) 665 666/***************************************************************/ 667#define LOAD_BC_S16_3CH_1PIXEL(mlib_filters_s16_3, mlib_filters_s16_4) \ 668 dpSrc = vis_alignaddr(sPtr, 0); \ 669 data0 = dpSrc[0]; \ 670 data1 = dpSrc[1]; \ 671 data2 = dpSrc[2]; \ 672 data3 = dpSrc[3]; \ 673 row00 = vis_faligndata(data0, data1); \ 674 row01 = vis_faligndata(data1, data2); \ 675 row02 = vis_faligndata(data2, data3); \ 676 sPtr += srcYStride; \ 677 dpSrc = vis_alignaddr(sPtr, 0); \ 678 data0 = dpSrc[0]; \ 679 data1 = dpSrc[1]; \ 680 data2 = dpSrc[2]; \ 681 data3 = dpSrc[3]; \ 682 row10 = vis_faligndata(data0, data1); \ 683 row11 = vis_faligndata(data1, data2); \ 684 row12 = vis_faligndata(data2, data3); \ 685 sPtr += srcYStride; \ 686 dpSrc = vis_alignaddr(sPtr, 0); \ 687 data0 = dpSrc[0]; \ 688 data1 = dpSrc[1]; \ 689 data2 = dpSrc[2]; \ 690 data3 = dpSrc[3]; \ 691 row20 = vis_faligndata(data0, data1); \ 692 row21 = vis_faligndata(data1, data2); \ 693 row22 = vis_faligndata(data2, data3); \ 694 sPtr += srcYStride; \ 695 dpSrc = vis_alignaddr(sPtr, 0); \ 696 data0 = dpSrc[0]; \ 697 data1 = dpSrc[1]; \ 698 data2 = dpSrc[2]; \ 699 data3 = dpSrc[3]; \ 700 row30 = vis_faligndata(data0, data1); \ 701 row31 = vis_faligndata(data1, data2); \ 702 row32 = vis_faligndata(data2, data3); \ 703 filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \ 704 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ 705 yFilter0 = yPtr[0]; \ 706 yFilter1 = yPtr[1]; \ 707 yFilter2 = yPtr[2]; \ 708 yFilter3 = yPtr[3]; \ 709 filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \ 710 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_3 + filterposx*3)); \ 711 xFilter0 = xPtr[0]; \ 712 xFilter1 = xPtr[1]; \ 713 xFilter2 = xPtr[2]; \ 714 X += dX; \ 715 Y += dY 716 717/***************************************************************/ 718#define STORE_BC_S16_3CH_1PIXEL() \ 719 dstPixelPtr[0] = f0.t[0]; \ 720 dstPixelPtr[1] = f0.t[1]; \ 721 dstPixelPtr[2] = f0.t[2]; \ 722 dstPixelPtr += 3 723 724/***************************************************************/ 725#define RESULT_3BC_S16_1PIXEL() \ 726 u00 = vis_fmul8sux16(row00, yFilter0); \ 727 u01 = vis_fmul8ulx16(row00, yFilter0); \ 728 u10 = vis_fmul8sux16(row01, yFilter0); \ 729 u11 = vis_fmul8ulx16(row01, yFilter0); \ 730 v00 = vis_fpadd16(u00, u01); \ 731 u20 = vis_fmul8sux16(row02, yFilter0); \ 732 v01 = vis_fpadd16(u10, u11); \ 733 u21 = vis_fmul8ulx16(row02, yFilter0); \ 734 u00 = vis_fmul8sux16(row10, yFilter1); \ 735 u01 = vis_fmul8ulx16(row10, yFilter1); \ 736 v02 = vis_fpadd16(u20, u21); \ 737 u10 = vis_fmul8sux16(row11, yFilter1); \ 738 u11 = vis_fmul8ulx16(row11, yFilter1); \ 739 v10 = vis_fpadd16(u00, u01); \ 740 u20 = vis_fmul8sux16(row12, yFilter1); \ 741 u21 = vis_fmul8ulx16(row12, yFilter1); \ 742 u00 = vis_fmul8sux16(row20, yFilter2); \ 743 v11 = vis_fpadd16(u10, u11); \ 744 u01 = vis_fmul8ulx16(row20, yFilter2); \ 745 v12 = vis_fpadd16(u20, u21); \ 746 u10 = vis_fmul8sux16(row21, yFilter2); \ 747 u11 = vis_fmul8ulx16(row21, yFilter2); \ 748 v20 = vis_fpadd16(u00, u01); \ 749 u20 = vis_fmul8sux16(row22, yFilter2); \ 750 sum0 = vis_fpadd16(v00, v10); \ 751 u21 = vis_fmul8ulx16(row22, yFilter2); \ 752 u00 = vis_fmul8sux16(row30, yFilter3); \ 753 u01 = vis_fmul8ulx16(row30, yFilter3); \ 754 v21 = vis_fpadd16(u10, u11); \ 755 sum1 = vis_fpadd16(v01, v11); \ 756 u10 = vis_fmul8sux16(row31, yFilter3); \ 757 sum2 = vis_fpadd16(v02, v12); \ 758 v22 = vis_fpadd16(u20, u21); \ 759 u11 = vis_fmul8ulx16(row31, yFilter3); \ 760 sum0 = vis_fpadd16(sum0, v20); \ 761 u20 = vis_fmul8sux16(row32, yFilter3); \ 762 v30 = vis_fpadd16(u00, u01); \ 763 sum1 = vis_fpadd16(sum1, v21); \ 764 u21 = vis_fmul8ulx16(row32, yFilter3); \ 765 v31 = vis_fpadd16(u10, u11); \ 766 sum2 = vis_fpadd16(sum2, v22); \ 767 v32 = vis_fpadd16(u20, u21); \ 768 sum0 = vis_fpadd16(sum0, v30); \ 769 row30 = vis_faligndata(data0, data1); \ 770 v00 = vis_fmul8sux16(sum0, xFilter0); \ 771 sum1 = vis_fpadd16(sum1, v31); \ 772 sum2 = vis_fpadd16(sum2, v32); \ 773 v01 = vis_fmul8ulx16(sum0, xFilter0); \ 774 v10 = vis_fmul8sux16(sum1, xFilter1); \ 775 v11 = vis_fmul8ulx16(sum1, xFilter1); \ 776 d0 = vis_fpadd16(v00, v01); \ 777 v20 = vis_fmul8sux16(sum2, xFilter2); \ 778 v21 = vis_fmul8ulx16(sum2, xFilter2); \ 779 d1 = vis_fpadd16(v10, v11); \ 780 d2 = vis_fpadd16(v20, v21); \ 781 vis_alignaddr((void*)6, 0); \ 782 d3 = vis_faligndata(d0, d1); \ 783 vis_alignaddr((void*)2, 0); \ 784 d4 = vis_faligndata(d1, d2); \ 785 d0 = vis_fpadd16(d0, d3); \ 786 d2 = vis_fpadd16(d2, d4); \ 787 d1 = vis_faligndata(d2, d2); \ 788 d0 = vis_fpadd16(d0, d1); \ 789 d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \ 790 d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \ 791 f0.d = vis_fpackfix_pair(d2, d3) 792 793/***************************************************************/ 794#define BC_S16_3CH(mlib_filters_s16_3, mlib_filters_s16_4) \ 795 u00 = vis_fmul8sux16(row00, yFilter0); \ 796 u01 = vis_fmul8ulx16(row00, yFilter0); \ 797 u10 = vis_fmul8sux16(row01, yFilter0); \ 798 u11 = vis_fmul8ulx16(row01, yFilter0); \ 799 v00 = vis_fpadd16(u00, u01); \ 800 u20 = vis_fmul8sux16(row02, yFilter0); \ 801 v01 = vis_fpadd16(u10, u11); \ 802 u21 = vis_fmul8ulx16(row02, yFilter0); \ 803 dpSrc = vis_alignaddr(sPtr, 0); \ 804 u00 = vis_fmul8sux16(row10, yFilter1); \ 805 u01 = vis_fmul8ulx16(row10, yFilter1); \ 806 data0 = dpSrc[0]; \ 807 filterposy = (Y >> FILTER_SHIFT); \ 808 v02 = vis_fpadd16(u20, u21); \ 809 data1 = dpSrc[1]; \ 810 u10 = vis_fmul8sux16(row11, yFilter1); \ 811 data2 = dpSrc[2]; \ 812 u11 = vis_fmul8ulx16(row11, yFilter1); \ 813 v10 = vis_fpadd16(u00, u01); \ 814 data3 = dpSrc[3]; \ 815 u20 = vis_fmul8sux16(row12, yFilter1); \ 816 row00 = vis_faligndata(data0, data1); \ 817 u21 = vis_fmul8ulx16(row12, yFilter1); \ 818 row01 = vis_faligndata(data1, data2); \ 819 u00 = vis_fmul8sux16(row20, yFilter2); \ 820 row02 = vis_faligndata(data2, data3); \ 821 filterposx = (X >> FILTER_SHIFT); \ 822 sPtr += srcYStride; \ 823 dpSrc = vis_alignaddr(sPtr, 0); \ 824 v11 = vis_fpadd16(u10, u11); \ 825 u01 = vis_fmul8ulx16(row20, yFilter2); \ 826 v12 = vis_fpadd16(u20, u21); \ 827 data0 = dpSrc[0]; \ 828 u10 = vis_fmul8sux16(row21, yFilter2); \ 829 X += dX; \ 830 data1 = dpSrc[1]; \ 831 u11 = vis_fmul8ulx16(row21, yFilter2); \ 832 v20 = vis_fpadd16(u00, u01); \ 833 data2 = dpSrc[2]; \ 834 u20 = vis_fmul8sux16(row22, yFilter2); \ 835 sum0 = vis_fpadd16(v00, v10); \ 836 data3 = dpSrc[3]; \ 837 row10 = vis_faligndata(data0, data1); \ 838 u21 = vis_fmul8ulx16(row22, yFilter2); \ 839 row11 = vis_faligndata(data1, data2); \ 840 u00 = vis_fmul8sux16(row30, yFilter3); \ 841 row12 = vis_faligndata(data2, data3); \ 842 sPtr += srcYStride; \ 843 dpSrc = vis_alignaddr(sPtr, 0); \ 844 u01 = vis_fmul8ulx16(row30, yFilter3); \ 845 v21 = vis_fpadd16(u10, u11); \ 846 Y += dY; \ 847 xSrc = (X >> MLIB_SHIFT)-1; \ 848 sum1 = vis_fpadd16(v01, v11); \ 849 data0 = dpSrc[0]; \ 850 u10 = vis_fmul8sux16(row31, yFilter3); \ 851 sum2 = vis_fpadd16(v02, v12); \ 852 ySrc = (Y >> MLIB_SHIFT)-1; \ 853 data1 = dpSrc[1]; \ 854 v22 = vis_fpadd16(u20, u21); \ 855 u11 = vis_fmul8ulx16(row31, yFilter3); \ 856 data2 = dpSrc[2]; \ 857 sum0 = vis_fpadd16(sum0, v20); \ 858 u20 = vis_fmul8sux16(row32, yFilter3); \ 859 data3 = dpSrc[3]; \ 860 v30 = vis_fpadd16(u00, u01); \ 861 filterposy &= FILTER_MASK; \ 862 row20 = vis_faligndata(data0, data1); \ 863 sum1 = vis_fpadd16(sum1, v21); \ 864 u21 = vis_fmul8ulx16(row32, yFilter3); \ 865 row21 = vis_faligndata(data1, data2); \ 866 row22 = vis_faligndata(data2, data3); \ 867 sPtr += srcYStride; \ 868 filterposx &= FILTER_MASK; \ 869 v31 = vis_fpadd16(u10, u11); \ 870 dpSrc = vis_alignaddr(sPtr, 0); \ 871 data0 = dpSrc[0]; \ 872 sum2 = vis_fpadd16(sum2, v22); \ 873 data1 = dpSrc[1]; \ 874 v32 = vis_fpadd16(u20, u21); \ 875 data2 = dpSrc[2]; \ 876 sum0 = vis_fpadd16(sum0, v30); \ 877 data3 = dpSrc[3]; \ 878 row30 = vis_faligndata(data0, data1); \ 879 v00 = vis_fmul8sux16(sum0, xFilter0); \ 880 row31 = vis_faligndata(data1, data2); \ 881 row32 = vis_faligndata(data2, data3); \ 882 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ 883 sum1 = vis_fpadd16(sum1, v31); \ 884 yFilter0 = yPtr[0]; \ 885 sum2 = vis_fpadd16(sum2, v32); \ 886 v01 = vis_fmul8ulx16(sum0, xFilter0); \ 887 yFilter1 = yPtr[1]; \ 888 v10 = vis_fmul8sux16(sum1, xFilter1); \ 889 yFilter2 = yPtr[2]; \ 890 v11 = vis_fmul8ulx16(sum1, xFilter1); \ 891 d0 = vis_fpadd16(v00, v01); \ 892 yFilter3 = yPtr[3]; \ 893 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_3 + filterposx*3)); \ 894 v20 = vis_fmul8sux16(sum2, xFilter2); \ 895 xFilter0 = xPtr[0]; \ 896 v21 = vis_fmul8ulx16(sum2, xFilter2); \ 897 d1 = vis_fpadd16(v10, v11); \ 898 xFilter1 = xPtr[1]; \ 899 d2 = vis_fpadd16(v20, v21); \ 900 xFilter2 = xPtr[2]; \ 901 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc*3) 902 903/***************************************************************/ 904#define FADD_3BC_S16() \ 905 vis_alignaddr((void*)6, 0); \ 906 d3 = vis_faligndata(d0, d1); \ 907 vis_alignaddr((void*)2, 0); \ 908 d4 = vis_faligndata(d1, d2); \ 909 d0 = vis_fpadd16(d0, d3); \ 910 d2 = vis_fpadd16(d2, d4); \ 911 d1 = vis_faligndata(d2, d2); \ 912 d0 = vis_fpadd16(d0, d1); \ 913 d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \ 914 d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \ 915 f0.d = vis_fpackfix_pair(d2, d3) 916 917/***************************************************************/ 918mlib_status mlib_ImageAffine_s16_3ch_bc (mlib_affine_param *param) 919{ 920 DECLAREVAR_BC(); 921 mlib_s32 filterposx, filterposy; 922 mlib_d64 data0, data1, data2, data3; 923 mlib_d64 sum0, sum1, sum2; 924 mlib_d64 row00, row10, row20, row30; 925 mlib_d64 row01, row11, row21, row31; 926 mlib_d64 row02, row12, row22, row32; 927 mlib_d64 xFilter0, xFilter1, xFilter2; 928 mlib_d64 yFilter0, yFilter1, yFilter2, yFilter3; 929 mlib_d64 v00, v01, v02, v10, v11, v12, v20, v21, v22, v30, v31, v32; 930 mlib_d64 u00, u01, u10, u11, u20, u21; 931 mlib_d64 d0, d1, d2, d3, d4; 932 mlib_d64 *yPtr, *xPtr; 933 mlib_d64 *dpSrc; 934 mlib_s32 cols, i; 935 mlib_f32 f_x01000100 = vis_to_float(0x01000100); 936 union { 937 mlib_s16 t[4]; 938 mlib_d64 d; 939 } f0; 940 const mlib_s16 *mlib_filters_table_3; 941 const mlib_s16 *mlib_filters_table_4; 942 943 if (filter == MLIB_BICUBIC) { 944 mlib_filters_table_3 = mlib_filters_s16_bc_3; 945 mlib_filters_table_4 = mlib_filters_s16_bc_4; 946 } else { 947 mlib_filters_table_3 = mlib_filters_s16_bc2_3; 948 mlib_filters_table_4 = mlib_filters_s16_bc2_4; 949 } 950 951 srcYStride >>= 1; 952 953 for (j = yStart; j <= yFinish; j++) { 954 955 vis_write_gsr(10 << 3); 956 957 CLIP(3); 958 959 cols = xRight - xLeft + 1; 960 961 i = 0; 962 963 if (i <= cols - 4) { 964 965 NEXT_PIXEL_3BC_S16(); 966 LOAD_BC_S16_3CH_1PIXEL(mlib_filters_table_3, mlib_filters_table_4); 967 968 NEXT_PIXEL_3BC_S16(); 969 970 BC_S16_3CH(mlib_filters_table_3, mlib_filters_table_4); 971 FADD_3BC_S16(); 972 973 BC_S16_3CH(mlib_filters_table_3, mlib_filters_table_4); 974 975#pragma pipeloop(0) 976 for (; i < cols-4; i++) { 977 STORE_BC_S16_3CH_1PIXEL(); 978 979 FADD_3BC_S16(); 980 BC_S16_3CH(mlib_filters_table_3, mlib_filters_table_4); 981 } 982 983 STORE_BC_S16_3CH_1PIXEL(); 984 985 FADD_3BC_S16(); 986 STORE_BC_S16_3CH_1PIXEL(); 987 988 RESULT_3BC_S16_1PIXEL(); 989 STORE_BC_S16_3CH_1PIXEL(); 990 991 LOAD_BC_S16_3CH_1PIXEL(mlib_filters_table_3, mlib_filters_table_4); 992 RESULT_3BC_S16_1PIXEL(); 993 STORE_BC_S16_3CH_1PIXEL(); 994 i += 4; 995 } 996 997 for (; i < cols; i++) { 998 NEXT_PIXEL_3BC_S16(); 999 LOAD_BC_S16_3CH_1PIXEL(mlib_filters_table_3, mlib_filters_table_4); 1000 RESULT_3BC_S16_1PIXEL(); 1001 STORE_BC_S16_3CH_1PIXEL(); 1002 } 1003 } 1004 1005 return MLIB_SUCCESS; 1006} 1007 1008/***************************************************************/ 1009#define NEXT_PIXEL_4BC_S16() \ 1010 xSrc = (X >> MLIB_SHIFT)-1; \ 1011 ySrc = (Y >> MLIB_SHIFT)-1; \ 1012 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 2) 1013 1014/***************************************************************/ 1015#define LOAD_BC_S16_4CH_1PIXEL(mlib_filters_s16_4) \ 1016 dpSrc = vis_alignaddr(sPtr, 0); \ 1017 data0 = dpSrc[0]; \ 1018 data1 = dpSrc[1]; \ 1019 data2 = dpSrc[2]; \ 1020 data3 = dpSrc[3]; \ 1021 data4 = dpSrc[4]; \ 1022 row00 = vis_faligndata(data0, data1); \ 1023 row01 = vis_faligndata(data1, data2); \ 1024 row02 = vis_faligndata(data2, data3); \ 1025 row03 = vis_faligndata(data3, data4); \ 1026 sPtr += srcYStride; \ 1027 dpSrc = vis_alignaddr(sPtr, 0); \ 1028 data0 = dpSrc[0]; \ 1029 data1 = dpSrc[1]; \ 1030 data2 = dpSrc[2]; \ 1031 data3 = dpSrc[3]; \ 1032 data4 = dpSrc[4]; \ 1033 row10 = vis_faligndata(data0, data1); \ 1034 row11 = vis_faligndata(data1, data2); \ 1035 row12 = vis_faligndata(data2, data3); \ 1036 row13 = vis_faligndata(data3, data4); \ 1037 sPtr += srcYStride; \ 1038 dpSrc = vis_alignaddr(sPtr, 0); \ 1039 data0 = dpSrc[0]; \ 1040 data1 = dpSrc[1]; \ 1041 data2 = dpSrc[2]; \ 1042 data3 = dpSrc[3]; \ 1043 data4 = dpSrc[4]; \ 1044 row20 = vis_faligndata(data0, data1); \ 1045 row21 = vis_faligndata(data1, data2); \ 1046 row22 = vis_faligndata(data2, data3); \ 1047 row23 = vis_faligndata(data3, data4); \ 1048 sPtr += srcYStride; \ 1049 dpSrc = vis_alignaddr(sPtr, 0); \ 1050 data0 = dpSrc[0]; \ 1051 data1 = dpSrc[1]; \ 1052 data2 = dpSrc[2]; \ 1053 data3 = dpSrc[3]; \ 1054 data4 = dpSrc[4]; \ 1055 row30 = vis_faligndata(data0, data1); \ 1056 row31 = vis_faligndata(data1, data2); \ 1057 row32 = vis_faligndata(data2, data3); \ 1058 row33 = vis_faligndata(data3, data4); \ 1059 filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \ 1060 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ 1061 yFilter0 = yPtr[0]; \ 1062 yFilter1 = yPtr[1]; \ 1063 yFilter2 = yPtr[2]; \ 1064 yFilter3 = yPtr[3]; \ 1065 filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \ 1066 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_4 + filterposx*4)); \ 1067 xFilter0 = xPtr[0]; \ 1068 xFilter1 = xPtr[1]; \ 1069 xFilter2 = xPtr[2]; \ 1070 xFilter3 = xPtr[3]; \ 1071 X += dX; \ 1072 Y += dY 1073 1074/***************************************************************/ 1075#define RESULT_4BC_S16_1PIXEL() \ 1076 u00 = vis_fmul8sux16(row00, yFilter0); \ 1077 u01 = vis_fmul8ulx16(row00, yFilter0); \ 1078 u10 = vis_fmul8sux16(row01, yFilter0); \ 1079 u11 = vis_fmul8ulx16(row01, yFilter0); \ 1080 v00 = vis_fpadd16(u00, u01); \ 1081 u20 = vis_fmul8sux16(row02, yFilter0); \ 1082 v01 = vis_fpadd16(u10, u11); \ 1083 u21 = vis_fmul8ulx16(row02, yFilter0); \ 1084 u30 = vis_fmul8sux16(row03, yFilter0); \ 1085 u31 = vis_fmul8ulx16(row03, yFilter0); \ 1086 v02 = vis_fpadd16(u20, u21); \ 1087 u00 = vis_fmul8sux16(row10, yFilter1); \ 1088 u01 = vis_fmul8ulx16(row10, yFilter1); \ 1089 v03 = vis_fpadd16(u30, u31); \ 1090 u10 = vis_fmul8sux16(row11, yFilter1); \ 1091 u11 = vis_fmul8ulx16(row11, yFilter1); \ 1092 v10 = vis_fpadd16(u00, u01); \ 1093 u20 = vis_fmul8sux16(row12, yFilter1); \ 1094 v11 = vis_fpadd16(u10, u11); \ 1095 u21 = vis_fmul8ulx16(row12, yFilter1); \ 1096 u30 = vis_fmul8sux16(row13, yFilter1); \ 1097 u31 = vis_fmul8ulx16(row13, yFilter1); \ 1098 u00 = vis_fmul8sux16(row20, yFilter2); \ 1099 v12 = vis_fpadd16(u20, u21); \ 1100 u01 = vis_fmul8ulx16(row20, yFilter2); \ 1101 v13 = vis_fpadd16(u30, u31); \ 1102 u10 = vis_fmul8sux16(row21, yFilter2); \ 1103 u11 = vis_fmul8ulx16(row21, yFilter2); \ 1104 v20 = vis_fpadd16(u00, u01); \ 1105 u20 = vis_fmul8sux16(row22, yFilter2); \ 1106 sum0 = vis_fpadd16(v00, v10); \ 1107 u21 = vis_fmul8ulx16(row22, yFilter2); \ 1108 u30 = vis_fmul8sux16(row23, yFilter2); \ 1109 u31 = vis_fmul8ulx16(row23, yFilter2); \ 1110 u00 = vis_fmul8sux16(row30, yFilter3); \ 1111 u01 = vis_fmul8ulx16(row30, yFilter3); \ 1112 v21 = vis_fpadd16(u10, u11); \ 1113 sum1 = vis_fpadd16(v01, v11); \ 1114 u10 = vis_fmul8sux16(row31, yFilter3); \ 1115 sum2 = vis_fpadd16(v02, v12); \ 1116 sum3 = vis_fpadd16(v03, v13); \ 1117 v22 = vis_fpadd16(u20, u21); \ 1118 u11 = vis_fmul8ulx16(row31, yFilter3); \ 1119 sum0 = vis_fpadd16(sum0, v20); \ 1120 u20 = vis_fmul8sux16(row32, yFilter3); \ 1121 u21 = vis_fmul8ulx16(row32, yFilter3); \ 1122 v23 = vis_fpadd16(u30, u31); \ 1123 v30 = vis_fpadd16(u00, u01); \ 1124 sum1 = vis_fpadd16(sum1, v21); \ 1125 u30 = vis_fmul8sux16(row33, yFilter3); \ 1126 u31 = vis_fmul8ulx16(row33, yFilter3); \ 1127 v31 = vis_fpadd16(u10, u11); \ 1128 sum2 = vis_fpadd16(sum2, v22); \ 1129 sum3 = vis_fpadd16(sum3, v23); \ 1130 v32 = vis_fpadd16(u20, u21); \ 1131 sum0 = vis_fpadd16(sum0, v30); \ 1132 v33 = vis_fpadd16(u30, u31); \ 1133 v00 = vis_fmul8sux16(sum0, xFilter0); \ 1134 sum1 = vis_fpadd16(sum1, v31); \ 1135 sum2 = vis_fpadd16(sum2, v32); \ 1136 v01 = vis_fmul8ulx16(sum0, xFilter0); \ 1137 v10 = vis_fmul8sux16(sum1, xFilter1); \ 1138 sum3 = vis_fpadd16(sum3, v33); \ 1139 v11 = vis_fmul8ulx16(sum1, xFilter1); \ 1140 d0 = vis_fpadd16(v00, v01); \ 1141 v20 = vis_fmul8sux16(sum2, xFilter2); \ 1142 v21 = vis_fmul8ulx16(sum2, xFilter2); \ 1143 d1 = vis_fpadd16(v10, v11); \ 1144 v30 = vis_fmul8sux16(sum3, xFilter3); \ 1145 v31 = vis_fmul8ulx16(sum3, xFilter3); \ 1146 d2 = vis_fpadd16(v20, v21); \ 1147 d3 = vis_fpadd16(v30, v31); \ 1148 d0 = vis_fpadd16(d0, d1); \ 1149 d2 = vis_fpadd16(d2, d3); \ 1150 d0 = vis_fpadd16(d0, d2); \ 1151 d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \ 1152 d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \ 1153 res = vis_fpackfix_pair(d2, d3) 1154 1155/***************************************************************/ 1156#define BC_S16_4CH(mlib_filters_s16_4) \ 1157 u00 = vis_fmul8sux16(row00, yFilter0); \ 1158 u01 = vis_fmul8ulx16(row00, yFilter0); \ 1159 u10 = vis_fmul8sux16(row01, yFilter0); \ 1160 u11 = vis_fmul8ulx16(row01, yFilter0); \ 1161 v00 = vis_fpadd16(u00, u01); \ 1162 u20 = vis_fmul8sux16(row02, yFilter0); \ 1163 v01 = vis_fpadd16(u10, u11); \ 1164 u21 = vis_fmul8ulx16(row02, yFilter0); \ 1165 u30 = vis_fmul8sux16(row03, yFilter0); \ 1166 u31 = vis_fmul8ulx16(row03, yFilter0); \ 1167 v02 = vis_fpadd16(u20, u21); \ 1168 dpSrc = vis_alignaddr(sPtr, 0); \ 1169 u00 = vis_fmul8sux16(row10, yFilter1); \ 1170 u01 = vis_fmul8ulx16(row10, yFilter1); \ 1171 data0 = dpSrc[0]; \ 1172 filterposy = (Y >> FILTER_SHIFT); \ 1173 v03 = vis_fpadd16(u30, u31); \ 1174 data1 = dpSrc[1]; \ 1175 u10 = vis_fmul8sux16(row11, yFilter1); \ 1176 data2 = dpSrc[2]; \ 1177 u11 = vis_fmul8ulx16(row11, yFilter1); \ 1178 v10 = vis_fpadd16(u00, u01); \ 1179 data3 = dpSrc[3]; \ 1180 u20 = vis_fmul8sux16(row12, yFilter1); \ 1181 v11 = vis_fpadd16(u10, u11); \ 1182 data4 = dpSrc[4]; \ 1183 u21 = vis_fmul8ulx16(row12, yFilter1); \ 1184 row00 = vis_faligndata(data0, data1); \ 1185 u30 = vis_fmul8sux16(row13, yFilter1); \ 1186 row01 = vis_faligndata(data1, data2); \ 1187 u31 = vis_fmul8ulx16(row13, yFilter1); \ 1188 row02 = vis_faligndata(data2, data3); \ 1189 u00 = vis_fmul8sux16(row20, yFilter2); \ 1190 row03 = vis_faligndata(data3, data4); \ 1191 filterposx = (X >> FILTER_SHIFT); \ 1192 sPtr += srcYStride; \ 1193 v12 = vis_fpadd16(u20, u21); \ 1194 dpSrc = vis_alignaddr(sPtr, 0); \ 1195 u01 = vis_fmul8ulx16(row20, yFilter2); \ 1196 v13 = vis_fpadd16(u30, u31); \ 1197 data0 = dpSrc[0]; \ 1198 u10 = vis_fmul8sux16(row21, yFilter2); \ 1199 X += dX; \ 1200 data1 = dpSrc[1]; \ 1201 u11 = vis_fmul8ulx16(row21, yFilter2); \ 1202 v20 = vis_fpadd16(u00, u01); \ 1203 data2 = dpSrc[2]; \ 1204 u20 = vis_fmul8sux16(row22, yFilter2); \ 1205 sum0 = vis_fpadd16(v00, v10); \ 1206 data3 = dpSrc[3]; \ 1207 u21 = vis_fmul8ulx16(row22, yFilter2); \ 1208 data4 = dpSrc[4]; \ 1209 row10 = vis_faligndata(data0, data1); \ 1210 u30 = vis_fmul8sux16(row23, yFilter2); \ 1211 row11 = vis_faligndata(data1, data2); \ 1212 u31 = vis_fmul8ulx16(row23, yFilter2); \ 1213 row12 = vis_faligndata(data2, data3); \ 1214 u00 = vis_fmul8sux16(row30, yFilter3); \ 1215 row13 = vis_faligndata(data3, data4); \ 1216 sPtr += srcYStride; \ 1217 dpSrc = vis_alignaddr(sPtr, 0); \ 1218 u01 = vis_fmul8ulx16(row30, yFilter3); \ 1219 v21 = vis_fpadd16(u10, u11); \ 1220 Y += dY; \ 1221 xSrc = (X >> MLIB_SHIFT)-1; \ 1222 sum1 = vis_fpadd16(v01, v11); \ 1223 data0 = dpSrc[0]; \ 1224 u10 = vis_fmul8sux16(row31, yFilter3); \ 1225 sum2 = vis_fpadd16(v02, v12); \ 1226 sum3 = vis_fpadd16(v03, v13); \ 1227 ySrc = (Y >> MLIB_SHIFT)-1; \ 1228 data1 = dpSrc[1]; \ 1229 v22 = vis_fpadd16(u20, u21); \ 1230 u11 = vis_fmul8ulx16(row31, yFilter3); \ 1231 data2 = dpSrc[2]; \ 1232 sum0 = vis_fpadd16(sum0, v20); \ 1233 u20 = vis_fmul8sux16(row32, yFilter3); \ 1234 data3 = dpSrc[3]; \ 1235 u21 = vis_fmul8ulx16(row32, yFilter3); \ 1236 v23 = vis_fpadd16(u30, u31); \ 1237 data4 = dpSrc[4]; \ 1238 v30 = vis_fpadd16(u00, u01); \ 1239 filterposy &= FILTER_MASK; \ 1240 row20 = vis_faligndata(data0, data1); \ 1241 sum1 = vis_fpadd16(sum1, v21); \ 1242 u30 = vis_fmul8sux16(row33, yFilter3); \ 1243 row21 = vis_faligndata(data1, data2); \ 1244 u31 = vis_fmul8ulx16(row33, yFilter3); \ 1245 row22 = vis_faligndata(data2, data3); \ 1246 row23 = vis_faligndata(data3, data4); \ 1247 sPtr += srcYStride; \ 1248 filterposx &= FILTER_MASK; \ 1249 v31 = vis_fpadd16(u10, u11); \ 1250 dpSrc = vis_alignaddr(sPtr, 0); \ 1251 data0 = dpSrc[0]; \ 1252 sum2 = vis_fpadd16(sum2, v22); \ 1253 sum3 = vis_fpadd16(sum3, v23); \ 1254 data1 = dpSrc[1]; \ 1255 v32 = vis_fpadd16(u20, u21); \ 1256 data2 = dpSrc[2]; \ 1257 sum0 = vis_fpadd16(sum0, v30); \ 1258 data3 = dpSrc[3]; \ 1259 v33 = vis_fpadd16(u30, u31); \ 1260 data4 = dpSrc[4]; \ 1261 row30 = vis_faligndata(data0, data1); \ 1262 v00 = vis_fmul8sux16(sum0, xFilter0); \ 1263 row31 = vis_faligndata(data1, data2); \ 1264 row32 = vis_faligndata(data2, data3); \ 1265 row33 = vis_faligndata(data3, data4); \ 1266 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ 1267 sum1 = vis_fpadd16(sum1, v31); \ 1268 yFilter0 = yPtr[0]; \ 1269 sum2 = vis_fpadd16(sum2, v32); \ 1270 v01 = vis_fmul8ulx16(sum0, xFilter0); \ 1271 yFilter1 = yPtr[1]; \ 1272 v10 = vis_fmul8sux16(sum1, xFilter1); \ 1273 sum3 = vis_fpadd16(sum3, v33); \ 1274 yFilter2 = yPtr[2]; \ 1275 v11 = vis_fmul8ulx16(sum1, xFilter1); \ 1276 d0 = vis_fpadd16(v00, v01); \ 1277 yFilter3 = yPtr[3]; \ 1278 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_4 + filterposx*4)); \ 1279 v20 = vis_fmul8sux16(sum2, xFilter2); \ 1280 xFilter0 = xPtr[0]; \ 1281 v21 = vis_fmul8ulx16(sum2, xFilter2); \ 1282 d1 = vis_fpadd16(v10, v11); \ 1283 xFilter1 = xPtr[1]; \ 1284 v30 = vis_fmul8sux16(sum3, xFilter3); \ 1285 v31 = vis_fmul8ulx16(sum3, xFilter3); \ 1286 d2 = vis_fpadd16(v20, v21); \ 1287 xFilter2 = xPtr[2]; \ 1288 d3 = vis_fpadd16(v30, v31); \ 1289 xFilter3 = xPtr[3]; \ 1290 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 2) 1291 1292/***************************************************************/ 1293#define FADD_4BC_S16() \ 1294 d0 = vis_fpadd16(d0, d1); \ 1295 d2 = vis_fpadd16(d2, d3); \ 1296 d0 = vis_fpadd16(d0, d2); \ 1297 d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \ 1298 d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \ 1299 res = vis_fpackfix_pair(d2, d3) 1300 1301/***************************************************************/ 1302mlib_status mlib_ImageAffine_s16_4ch_bc (mlib_affine_param *param) 1303{ 1304 DECLAREVAR_BC(); 1305 DTYPE *dstLineEnd; 1306 mlib_s32 filterposx, filterposy; 1307 mlib_d64 data0, data1, data2, data3, data4; 1308 mlib_d64 sum0, sum1, sum2, sum3; 1309 mlib_d64 row00, row10, row20, row30; 1310 mlib_d64 row01, row11, row21, row31; 1311 mlib_d64 row02, row12, row22, row32; 1312 mlib_d64 row03, row13, row23, row33; 1313 mlib_d64 xFilter0, xFilter1, xFilter2, xFilter3; 1314 mlib_d64 yFilter0, yFilter1, yFilter2, yFilter3; 1315 mlib_d64 v00, v01, v02, v03, v10, v11, v12, v13; 1316 mlib_d64 v20, v21, v22, v23, v30, v31, v32, v33; 1317 mlib_d64 u00, u01, u10, u11, u20, u21, u30, u31; 1318 mlib_d64 d0, d1, d2, d3; 1319 mlib_d64 *yPtr, *xPtr; 1320 mlib_d64 *dp, *dpSrc; 1321 mlib_s32 cols, i, mask, gsrd; 1322 mlib_d64 res; 1323 mlib_f32 f_x01000100 = vis_to_float(0x01000100); 1324 const mlib_s16 *mlib_filters_table_4; 1325 1326 if (filter == MLIB_BICUBIC) { 1327 mlib_filters_table_4 = mlib_filters_s16_bc_4; 1328 } else { 1329 mlib_filters_table_4 = mlib_filters_s16_bc2_4; 1330 } 1331 1332 srcYStride >>= 1; 1333 1334 for (j = yStart; j <= yFinish; j++) { 1335 1336 vis_write_gsr(10 << 3); 1337 1338 CLIP(4); 1339 dstLineEnd = (DTYPE*)dstData + 4 * xRight; 1340 1341 cols = xRight - xLeft + 1; 1342 dp = vis_alignaddr(dstPixelPtr, 0); 1343 dstLineEnd += 3; 1344 mask = vis_edge16(dstPixelPtr, dstLineEnd); 1345 gsrd = ((8 - (mlib_addr)dstPixelPtr) & 7); 1346 1347 i = 0; 1348 1349 if (i <= cols - 4) { 1350 1351 NEXT_PIXEL_4BC_S16(); 1352 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_4); 1353 1354 NEXT_PIXEL_4BC_S16(); 1355 1356 BC_S16_4CH(mlib_filters_table_4); 1357 FADD_4BC_S16(); 1358 1359 BC_S16_4CH(mlib_filters_table_4); 1360 1361#pragma pipeloop(0) 1362 for (; i < cols-4; i++) { 1363 vis_alignaddr((void *)gsrd, 0); 1364 res = vis_faligndata(res, res); 1365 1366 vis_pst_16(res, dp++, mask); 1367 vis_pst_16(res, dp, ~mask); 1368 1369 FADD_4BC_S16(); 1370 BC_S16_4CH(mlib_filters_table_4); 1371 } 1372 1373 vis_alignaddr((void *)gsrd, 0); 1374 res = vis_faligndata(res, res); 1375 vis_pst_16(res, dp++, mask); 1376 vis_pst_16(res, dp, ~mask); 1377 1378 FADD_4BC_S16(); 1379 vis_alignaddr((void *)gsrd, 0); 1380 res = vis_faligndata(res, res); 1381 vis_pst_16(res, dp++, mask); 1382 vis_pst_16(res, dp, ~mask); 1383 1384 RESULT_4BC_S16_1PIXEL(); 1385 vis_alignaddr((void *)gsrd, 0); 1386 res = vis_faligndata(res, res); 1387 vis_pst_16(res, dp++, mask); 1388 vis_pst_16(res, dp, ~mask); 1389 1390 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_4); 1391 RESULT_4BC_S16_1PIXEL(); 1392 vis_alignaddr((void *)gsrd, 0); 1393 res = vis_faligndata(res, res); 1394 vis_pst_16(res, dp++, mask); 1395 vis_pst_16(res, dp, ~mask); 1396 i += 4; 1397 } 1398 1399#pragma pipeloop(0) 1400 for (; i < cols; i++) { 1401 NEXT_PIXEL_4BC_S16(); 1402 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_4); 1403 RESULT_4BC_S16_1PIXEL(); 1404 vis_alignaddr((void *)gsrd, 0); 1405 res = vis_faligndata(res, res); 1406 vis_pst_16(res, dp++, mask); 1407 vis_pst_16(res, dp, ~mask); 1408 } 1409 } 1410 1411 return MLIB_SUCCESS; 1412} 1413 1414/***************************************************************/ 1415