1/* 2 * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 28#include "vis_proto.h" 29#include "mlib_image.h" 30#include "mlib_v_ImageLookUpFunc.h" 31 32/***************************************************************/ 33static void mlib_v_ImageLookUpSI_S32_U8_2_DstA8D1(const mlib_s32 *src, 34 mlib_u8 *dst, 35 mlib_s32 xsize, 36 const mlib_u8 **table); 37 38static void mlib_v_ImageLookUpSI_S32_U8_2_D1(const mlib_s32 *src, 39 mlib_u8 *dst, 40 mlib_s32 xsize, 41 const mlib_u8 **table); 42 43static void mlib_v_ImageLookUpSI_S32_U8_3_D1(const mlib_s32 *src, 44 mlib_u8 *dst, 45 mlib_s32 xsize, 46 const mlib_u8 **table); 47 48static void mlib_v_ImageLookUpSI_S32_U8_4_DstOff0_D1(const mlib_s32 *src, 49 mlib_u8 *dst, 50 mlib_s32 xsize, 51 const mlib_u8 **table); 52 53static void mlib_v_ImageLookUpSI_S32_U8_4_DstOff1_D1(const mlib_s32 *src, 54 mlib_u8 *dst, 55 mlib_s32 xsize, 56 const mlib_u8 **table); 57 58static void mlib_v_ImageLookUpSI_S32_U8_4_DstOff2_D1(const mlib_s32 *src, 59 mlib_u8 *dst, 60 mlib_s32 xsize, 61 const mlib_u8 **table); 62 63static void mlib_v_ImageLookUpSI_S32_U8_4_DstOff3_D1(const mlib_s32 *src, 64 mlib_u8 *dst, 65 mlib_s32 xsize, 66 const mlib_u8 **table); 67 68/***************************************************************/ 69#define VIS_LD_U8_I(X, Y) vis_ld_u8_i((void *)(X), (Y)) 70 71/***************************************************************/ 72void mlib_v_ImageLookUpSI_S32_U8_2_DstA8D1(const mlib_s32 *src, 73 mlib_u8 *dst, 74 mlib_s32 xsize, 75 const mlib_u8 **table) 76{ 77 mlib_s32 *sp; /* pointer to source data */ 78 mlib_s32 s0, s1, s2, s3; /* source data */ 79 mlib_u16 *dl; /* pointer to start of destination */ 80 mlib_u16 *dend; /* pointer to end of destination */ 81 mlib_d64 *dp; /* aligned pointer to destination */ 82 mlib_d64 t0, t1, t2; /* destination data */ 83 mlib_d64 t3, t4, t5; /* destination data */ 84 mlib_d64 t6, t7, acc; /* destination data */ 85 mlib_s32 emask; /* edge mask */ 86 mlib_s32 i, num; /* loop variable */ 87 const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u]; 88 const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u]; 89 90 sp = (void *)src; 91 dl = (mlib_u16 *) dst; 92 dp = (mlib_d64 *) dl; 93 dend = dl + xsize - 1; 94 95 vis_alignaddr((void *)0, 7); 96 97 if (xsize >= 4) { 98 99 s0 = sp[0]; 100 s1 = sp[1]; 101 s2 = sp[2]; 102 s3 = sp[3]; 103 sp += 4; 104 105#pragma pipeloop(0) 106 for (i = 0; i <= xsize - 8; i += 4, sp += 4) { 107 t7 = VIS_LD_U8_I(tab1, s3); 108 t6 = VIS_LD_U8_I(tab0, s3); 109 t5 = VIS_LD_U8_I(tab1, s2); 110 t4 = VIS_LD_U8_I(tab0, s2); 111 t3 = VIS_LD_U8_I(tab1, s1); 112 t2 = VIS_LD_U8_I(tab0, s1); 113 t1 = VIS_LD_U8_I(tab1, s0); 114 t0 = VIS_LD_U8_I(tab0, s0); 115 acc = vis_faligndata(t7, acc); 116 acc = vis_faligndata(t6, acc); 117 acc = vis_faligndata(t5, acc); 118 acc = vis_faligndata(t4, acc); 119 acc = vis_faligndata(t3, acc); 120 acc = vis_faligndata(t2, acc); 121 acc = vis_faligndata(t1, acc); 122 acc = vis_faligndata(t0, acc); 123 s0 = sp[0]; 124 s1 = sp[1]; 125 s2 = sp[2]; 126 s3 = sp[3]; 127 *dp++ = acc; 128 } 129 130 t7 = VIS_LD_U8_I(tab1, s3); 131 t6 = VIS_LD_U8_I(tab0, s3); 132 t5 = VIS_LD_U8_I(tab1, s2); 133 t4 = VIS_LD_U8_I(tab0, s2); 134 t3 = VIS_LD_U8_I(tab1, s1); 135 t2 = VIS_LD_U8_I(tab0, s1); 136 t1 = VIS_LD_U8_I(tab1, s0); 137 t0 = VIS_LD_U8_I(tab0, s0); 138 acc = vis_faligndata(t7, acc); 139 acc = vis_faligndata(t6, acc); 140 acc = vis_faligndata(t5, acc); 141 acc = vis_faligndata(t4, acc); 142 acc = vis_faligndata(t3, acc); 143 acc = vis_faligndata(t2, acc); 144 acc = vis_faligndata(t1, acc); 145 acc = vis_faligndata(t0, acc); 146 *dp++ = acc; 147 } 148 149 if ((mlib_addr) dp <= (mlib_addr) dend) { 150 151 num = (mlib_s32) ((mlib_u16 *) dend - (mlib_u16 *) dp); 152 sp += num; 153 num++; 154#pragma pipeloop(0) 155 for (i = 0; i < num; i++) { 156 s0 = *sp; 157 sp--; 158 159 t0 = VIS_LD_U8_I(tab1, s0); 160 acc = vis_faligndata(t0, acc); 161 162 t0 = VIS_LD_U8_I(tab0, s0); 163 acc = vis_faligndata(t0, acc); 164 } 165 166 emask = vis_edge16(dp, dend); 167 vis_pst_16(acc, dp, emask); 168 } 169} 170 171/***************************************************************/ 172void mlib_v_ImageLookUpSI_S32_U8_2_D1(const mlib_s32 *src, 173 mlib_u8 *dst, 174 mlib_s32 xsize, 175 const mlib_u8 **table) 176{ 177 mlib_s32 *sp; /* pointer to source data */ 178 mlib_s32 s0, s1, s2, s3, s4; /* source data */ 179 mlib_u8 *dl; /* pointer to start of destination */ 180 mlib_u8 *dend; /* pointer to end of destination */ 181 mlib_d64 *dp; /* aligned pointer to destination */ 182 mlib_d64 t0, t1, t2; /* destination data */ 183 mlib_d64 t3, t4, t5; /* destination data */ 184 mlib_d64 t6, t7, acc; /* destination data */ 185 mlib_s32 emask; /* edge mask */ 186 mlib_s32 i, num; /* loop variable */ 187 const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u]; 188 const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u]; 189 190 sp = (void *)src; 191 dl = dst; 192 193 dend = dl + 2 * xsize - 1; 194 195 vis_alignaddr((void *)0, 7); 196 197 s0 = *sp++; 198 *dl++ = tab0[s0]; 199 dp = (mlib_d64 *) dl; 200 xsize--; 201 202 if (xsize >= 4) { 203 204 s1 = sp[0]; 205 s2 = sp[1]; 206 s3 = sp[2]; 207 s4 = sp[3]; 208 sp += 4; 209 210#pragma pipeloop(0) 211 for (i = 0; i <= xsize - 8; i += 4, sp += 4) { 212 t7 = VIS_LD_U8_I(tab0, s4); 213 t6 = VIS_LD_U8_I(tab1, s3); 214 t5 = VIS_LD_U8_I(tab0, s3); 215 t4 = VIS_LD_U8_I(tab1, s2); 216 t3 = VIS_LD_U8_I(tab0, s2); 217 t2 = VIS_LD_U8_I(tab1, s1); 218 t1 = VIS_LD_U8_I(tab0, s1); 219 t0 = VIS_LD_U8_I(tab1, s0); 220 acc = vis_faligndata(t7, acc); 221 acc = vis_faligndata(t6, acc); 222 acc = vis_faligndata(t5, acc); 223 acc = vis_faligndata(t4, acc); 224 acc = vis_faligndata(t3, acc); 225 acc = vis_faligndata(t2, acc); 226 acc = vis_faligndata(t1, acc); 227 acc = vis_faligndata(t0, acc); 228 s0 = s4; 229 s1 = sp[0]; 230 s2 = sp[1]; 231 s3 = sp[2]; 232 s4 = sp[3]; 233 *dp++ = acc; 234 } 235 236 t7 = VIS_LD_U8_I(tab0, s4); 237 t6 = VIS_LD_U8_I(tab1, s3); 238 t5 = VIS_LD_U8_I(tab0, s3); 239 t4 = VIS_LD_U8_I(tab1, s2); 240 t3 = VIS_LD_U8_I(tab0, s2); 241 t2 = VIS_LD_U8_I(tab1, s1); 242 t1 = VIS_LD_U8_I(tab0, s1); 243 t0 = VIS_LD_U8_I(tab1, s0); 244 acc = vis_faligndata(t7, acc); 245 acc = vis_faligndata(t6, acc); 246 acc = vis_faligndata(t5, acc); 247 acc = vis_faligndata(t4, acc); 248 acc = vis_faligndata(t3, acc); 249 acc = vis_faligndata(t2, acc); 250 acc = vis_faligndata(t1, acc); 251 acc = vis_faligndata(t0, acc); 252 s0 = s4; 253 *dp++ = acc; 254 } 255 256 num = (mlib_s32) (((mlib_u8 *) dend - (mlib_u8 *) dp) >> 1); 257 sp += num - 1; 258 259#pragma pipeloop(0) 260 for (i = 0; i < num; i++) { 261 s1 = *sp; 262 sp--; 263 264 t0 = VIS_LD_U8_I(tab1, s1); 265 acc = vis_faligndata(t0, acc); 266 267 t0 = VIS_LD_U8_I(tab0, s1); 268 acc = vis_faligndata(t0, acc); 269 } 270 271 t0 = VIS_LD_U8_I(tab1, s0); 272 acc = vis_faligndata(t0, acc); 273 emask = vis_edge8(dp, dend); 274 vis_pst_8(acc, dp, emask); 275} 276 277/***************************************************************/ 278void mlib_v_ImageLookUpSI_S32_U8_2(const mlib_s32 *src, 279 mlib_s32 slb, 280 mlib_u8 *dst, 281 mlib_s32 dlb, 282 mlib_s32 xsize, 283 mlib_s32 ysize, 284 const mlib_u8 **table) 285{ 286 mlib_s32 *sl; 287 mlib_u8 *dl; 288 mlib_s32 i, j; 289 const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u]; 290 const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u]; 291 292 sl = (void *)src; 293 dl = dst; 294 295 /* row loop */ 296 for (j = 0; j < ysize; j++) { 297 mlib_s32 *sp = sl; 298 mlib_u8 *dp = dl; 299 mlib_s32 off, s0, size = xsize; 300 301 off = (mlib_s32) (((8 - ((mlib_addr) dp & 7)) & 7) >> 1); 302 off = (off < size) ? off : size; 303 304 for (i = 0; i < off; i++) { 305 s0 = *sp++; 306 *dp++ = tab0[s0]; 307 *dp++ = tab1[s0]; 308 size--; 309 } 310 311 if (size > 0) { 312 313 if (((mlib_addr) dp & 1) == 0) { 314 mlib_v_ImageLookUpSI_S32_U8_2_DstA8D1(sp, dp, size, table); 315 } 316 else { 317 mlib_v_ImageLookUpSI_S32_U8_2_D1(sp, dp, size, table); 318 } 319 } 320 321 sl = (mlib_s32 *) ((mlib_u8 *) sl + slb); 322 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); 323 } 324} 325 326/***************************************************************/ 327void mlib_v_ImageLookUpSI_S32_U8_3_D1(const mlib_s32 *src, 328 mlib_u8 *dst, 329 mlib_s32 xsize, 330 const mlib_u8 **table) 331{ 332 mlib_s32 *sp; /* pointer to source data */ 333 mlib_u8 *dl; /* pointer to start of destination */ 334 mlib_d64 *dp; /* aligned pointer to destination */ 335 mlib_d64 t0, t1, t2; /* destination data */ 336 mlib_d64 t3, t4, t5; /* destination data */ 337 mlib_d64 t6, t7; /* destination data */ 338 mlib_d64 acc0, acc1, acc2; /* destination data */ 339 mlib_s32 i; /* loop variable */ 340 const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u]; 341 const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u]; 342 const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u]; 343 mlib_s32 s00, s01, s02, s03; 344 mlib_s32 s10, s11, s12, s13; 345 346 sp = (void *)src; 347 dl = dst; 348 dp = (mlib_d64 *) dl; 349 350 vis_alignaddr((void *)0, 7); 351 352 i = 0; 353 354 if (xsize >= 8) { 355 356 s00 = sp[0]; 357 s01 = sp[1]; 358 s02 = sp[2]; 359 s03 = sp[3]; 360 s10 = sp[4]; 361 s11 = sp[5]; 362 s12 = sp[6]; 363 s13 = sp[7]; 364 sp += 8; 365 366#pragma pipeloop(0) 367 for (i = 0; i <= xsize - 16; i += 8, sp += 8) { 368 t7 = VIS_LD_U8_I(tab1, s02); 369 t6 = VIS_LD_U8_I(tab0, s02); 370 t5 = VIS_LD_U8_I(tab2, s01); 371 t4 = VIS_LD_U8_I(tab1, s01); 372 t3 = VIS_LD_U8_I(tab0, s01); 373 t2 = VIS_LD_U8_I(tab2, s00); 374 t1 = VIS_LD_U8_I(tab1, s00); 375 t0 = VIS_LD_U8_I(tab0, s00); 376 acc0 = vis_faligndata(t7, acc0); 377 acc0 = vis_faligndata(t6, acc0); 378 acc0 = vis_faligndata(t5, acc0); 379 acc0 = vis_faligndata(t4, acc0); 380 acc0 = vis_faligndata(t3, acc0); 381 acc0 = vis_faligndata(t2, acc0); 382 acc0 = vis_faligndata(t1, acc0); 383 acc0 = vis_faligndata(t0, acc0); 384 t7 = VIS_LD_U8_I(tab0, s11); 385 t6 = VIS_LD_U8_I(tab2, s10); 386 t5 = VIS_LD_U8_I(tab1, s10); 387 t4 = VIS_LD_U8_I(tab0, s10); 388 t3 = VIS_LD_U8_I(tab2, s03); 389 t2 = VIS_LD_U8_I(tab1, s03); 390 t1 = VIS_LD_U8_I(tab0, s03); 391 t0 = VIS_LD_U8_I(tab2, s02); 392 acc1 = vis_faligndata(t7, acc1); 393 acc1 = vis_faligndata(t6, acc1); 394 acc1 = vis_faligndata(t5, acc1); 395 acc1 = vis_faligndata(t4, acc1); 396 acc1 = vis_faligndata(t3, acc1); 397 acc1 = vis_faligndata(t2, acc1); 398 acc1 = vis_faligndata(t1, acc1); 399 acc1 = vis_faligndata(t0, acc1); 400 t7 = VIS_LD_U8_I(tab2, s13); 401 t6 = VIS_LD_U8_I(tab1, s13); 402 t5 = VIS_LD_U8_I(tab0, s13); 403 t4 = VIS_LD_U8_I(tab2, s12); 404 t3 = VIS_LD_U8_I(tab1, s12); 405 t2 = VIS_LD_U8_I(tab0, s12); 406 t1 = VIS_LD_U8_I(tab2, s11); 407 t0 = VIS_LD_U8_I(tab1, s11); 408 acc2 = vis_faligndata(t7, acc2); 409 acc2 = vis_faligndata(t6, acc2); 410 acc2 = vis_faligndata(t5, acc2); 411 acc2 = vis_faligndata(t4, acc2); 412 acc2 = vis_faligndata(t3, acc2); 413 acc2 = vis_faligndata(t2, acc2); 414 acc2 = vis_faligndata(t1, acc2); 415 acc2 = vis_faligndata(t0, acc2); 416 s00 = sp[0]; 417 s01 = sp[1]; 418 s02 = sp[2]; 419 s03 = sp[3]; 420 s10 = sp[4]; 421 s11 = sp[5]; 422 s12 = sp[6]; 423 s13 = sp[7]; 424 *dp++ = acc0; 425 *dp++ = acc1; 426 *dp++ = acc2; 427 } 428 429 t7 = VIS_LD_U8_I(tab1, s02); 430 t6 = VIS_LD_U8_I(tab0, s02); 431 t5 = VIS_LD_U8_I(tab2, s01); 432 t4 = VIS_LD_U8_I(tab1, s01); 433 t3 = VIS_LD_U8_I(tab0, s01); 434 t2 = VIS_LD_U8_I(tab2, s00); 435 t1 = VIS_LD_U8_I(tab1, s00); 436 t0 = VIS_LD_U8_I(tab0, s00); 437 acc0 = vis_faligndata(t7, acc0); 438 acc0 = vis_faligndata(t6, acc0); 439 acc0 = vis_faligndata(t5, acc0); 440 acc0 = vis_faligndata(t4, acc0); 441 acc0 = vis_faligndata(t3, acc0); 442 acc0 = vis_faligndata(t2, acc0); 443 acc0 = vis_faligndata(t1, acc0); 444 acc0 = vis_faligndata(t0, acc0); 445 t7 = VIS_LD_U8_I(tab0, s11); 446 t6 = VIS_LD_U8_I(tab2, s10); 447 t5 = VIS_LD_U8_I(tab1, s10); 448 t4 = VIS_LD_U8_I(tab0, s10); 449 t3 = VIS_LD_U8_I(tab2, s03); 450 t2 = VIS_LD_U8_I(tab1, s03); 451 t1 = VIS_LD_U8_I(tab0, s03); 452 t0 = VIS_LD_U8_I(tab2, s02); 453 acc1 = vis_faligndata(t7, acc1); 454 acc1 = vis_faligndata(t6, acc1); 455 acc1 = vis_faligndata(t5, acc1); 456 acc1 = vis_faligndata(t4, acc1); 457 acc1 = vis_faligndata(t3, acc1); 458 acc1 = vis_faligndata(t2, acc1); 459 acc1 = vis_faligndata(t1, acc1); 460 acc1 = vis_faligndata(t0, acc1); 461 t7 = VIS_LD_U8_I(tab2, s13); 462 t6 = VIS_LD_U8_I(tab1, s13); 463 t5 = VIS_LD_U8_I(tab0, s13); 464 t4 = VIS_LD_U8_I(tab2, s12); 465 t3 = VIS_LD_U8_I(tab1, s12); 466 t2 = VIS_LD_U8_I(tab0, s12); 467 t1 = VIS_LD_U8_I(tab2, s11); 468 t0 = VIS_LD_U8_I(tab1, s11); 469 acc2 = vis_faligndata(t7, acc2); 470 acc2 = vis_faligndata(t6, acc2); 471 acc2 = vis_faligndata(t5, acc2); 472 acc2 = vis_faligndata(t4, acc2); 473 acc2 = vis_faligndata(t3, acc2); 474 acc2 = vis_faligndata(t2, acc2); 475 acc2 = vis_faligndata(t1, acc2); 476 acc2 = vis_faligndata(t0, acc2); 477 *dp++ = acc0; 478 *dp++ = acc1; 479 *dp++ = acc2; 480 i += 8; 481 } 482 483 dl = (mlib_u8 *) dp; 484 485#pragma pipeloop(0) 486 for (; i < xsize; i++) { 487 s00 = sp[0]; 488 dl[0] = tab0[s00]; 489 dl[1] = tab1[s00]; 490 dl[2] = tab2[s00]; 491 dl += 3; 492 sp++; 493 } 494} 495 496/***************************************************************/ 497void mlib_v_ImageLookUpSI_S32_U8_3(const mlib_s32 *src, 498 mlib_s32 slb, 499 mlib_u8 *dst, 500 mlib_s32 dlb, 501 mlib_s32 xsize, 502 mlib_s32 ysize, 503 const mlib_u8 **table) 504{ 505 mlib_s32 *sl; 506 mlib_u8 *dl; 507 mlib_s32 i, j; 508 const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u]; 509 const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u]; 510 const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u]; 511 512 sl = (void *)src; 513 dl = dst; 514 515 /* row loop */ 516 for (j = 0; j < ysize; j++) { 517 mlib_s32 *sp = sl; 518 mlib_u8 *dp = dl; 519 mlib_s32 off, s0, size = xsize; 520 521 off = (mlib_s32) ((mlib_addr) dp & 7); 522 off = (off * 5) & 7; 523 off = (off < size) ? off : size; 524 525 for (i = 0; i < off; i++) { 526 s0 = *sp++; 527 *dp++ = tab0[s0]; 528 *dp++ = tab1[s0]; 529 *dp++ = tab2[s0]; 530 size--; 531 } 532 533 if (size > 0) { 534 mlib_v_ImageLookUpSI_S32_U8_3_D1(sp, dp, size, table); 535 } 536 537 sl = (mlib_s32 *) ((mlib_u8 *) sl + slb); 538 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); 539 } 540} 541 542/***************************************************************/ 543void mlib_v_ImageLookUpSI_S32_U8_4_DstOff0_D1(const mlib_s32 *src, 544 mlib_u8 *dst, 545 mlib_s32 xsize, 546 const mlib_u8 **table) 547{ 548 mlib_s32 *sp; /* pointer to source data */ 549 mlib_s32 s0, s1; /* source data */ 550 mlib_u8 *dl; /* pointer to start of destination */ 551 mlib_d64 *dp; /* aligned pointer to destination */ 552 mlib_d64 t0, t1, t2; /* destination data */ 553 mlib_d64 t3, t4, t5; /* destination data */ 554 mlib_d64 t6, t7, acc; /* destination data */ 555 mlib_s32 i; /* loop variable */ 556 const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u]; 557 const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u]; 558 const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u]; 559 const mlib_u8 *tab3 = &table[3][(mlib_u32) 2147483648u]; 560 561 sp = (void *)src; 562 dl = dst; 563 dp = (mlib_d64 *) dl; 564 565 vis_alignaddr((void *)0, 7); 566 567 if (xsize >= 2) { 568 569 s0 = sp[0]; 570 s1 = sp[1]; 571 sp += 2; 572 573#pragma pipeloop(0) 574 for (i = 0; i <= xsize - 4; i += 2, sp += 2) { 575 t7 = VIS_LD_U8_I(tab3, s1); 576 t6 = VIS_LD_U8_I(tab2, s1); 577 t5 = VIS_LD_U8_I(tab1, s1); 578 t4 = VIS_LD_U8_I(tab0, s1); 579 t3 = VIS_LD_U8_I(tab3, s0); 580 t2 = VIS_LD_U8_I(tab2, s0); 581 t1 = VIS_LD_U8_I(tab1, s0); 582 t0 = VIS_LD_U8_I(tab0, s0); 583 acc = vis_faligndata(t7, acc); 584 acc = vis_faligndata(t6, acc); 585 acc = vis_faligndata(t5, acc); 586 acc = vis_faligndata(t4, acc); 587 acc = vis_faligndata(t3, acc); 588 acc = vis_faligndata(t2, acc); 589 acc = vis_faligndata(t1, acc); 590 acc = vis_faligndata(t0, acc); 591 s0 = sp[0]; 592 s1 = sp[1]; 593 *dp++ = acc; 594 } 595 596 t7 = VIS_LD_U8_I(tab3, s1); 597 t6 = VIS_LD_U8_I(tab2, s1); 598 t5 = VIS_LD_U8_I(tab1, s1); 599 t4 = VIS_LD_U8_I(tab0, s1); 600 t3 = VIS_LD_U8_I(tab3, s0); 601 t2 = VIS_LD_U8_I(tab2, s0); 602 t1 = VIS_LD_U8_I(tab1, s0); 603 t0 = VIS_LD_U8_I(tab0, s0); 604 acc = vis_faligndata(t7, acc); 605 acc = vis_faligndata(t6, acc); 606 acc = vis_faligndata(t5, acc); 607 acc = vis_faligndata(t4, acc); 608 acc = vis_faligndata(t3, acc); 609 acc = vis_faligndata(t2, acc); 610 acc = vis_faligndata(t1, acc); 611 acc = vis_faligndata(t0, acc); 612 *dp++ = acc; 613 } 614 615 if ((xsize & 1) != 0) { 616 s0 = sp[0]; 617 t7 = VIS_LD_U8_I(tab3, s0); 618 t6 = VIS_LD_U8_I(tab2, s0); 619 t5 = VIS_LD_U8_I(tab1, s0); 620 t4 = VIS_LD_U8_I(tab0, s0); 621 acc = vis_faligndata(t7, acc); 622 acc = vis_faligndata(t6, acc); 623 acc = vis_faligndata(t5, acc); 624 acc = vis_faligndata(t4, acc); 625 *(mlib_f32 *) dp = vis_read_hi(acc); 626 } 627} 628 629/***************************************************************/ 630void mlib_v_ImageLookUpSI_S32_U8_4_DstOff1_D1(const mlib_s32 *src, 631 mlib_u8 *dst, 632 mlib_s32 xsize, 633 const mlib_u8 **table) 634{ 635 mlib_s32 *sp; /* pointer to source data */ 636 mlib_s32 s0, s1, s2; /* source data */ 637 mlib_u8 *dl; /* pointer to start of destination */ 638 mlib_d64 *dp; /* aligned pointer to destination */ 639 mlib_d64 t0, t1, t2; /* destination data */ 640 mlib_d64 t3, t4, t5; /* destination data */ 641 mlib_d64 t6, t7, acc; /* destination data */ 642 mlib_s32 i; /* loop variable */ 643 const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u]; 644 const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u]; 645 const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u]; 646 const mlib_u8 *tab3 = &table[3][(mlib_u32) 2147483648u]; 647 648 sp = (void *)src; 649 dl = dst; 650 dp = (mlib_d64 *) dl; 651 652 vis_alignaddr((void *)0, 7); 653 654 s0 = *sp++; 655 656 if (xsize >= 2) { 657 658 s1 = sp[0]; 659 s2 = sp[1]; 660 sp += 2; 661 662#pragma pipeloop(0) 663 for (i = 0; i <= xsize - 4; i += 2, sp += 2) { 664 t7 = VIS_LD_U8_I(tab0, s2); 665 t6 = VIS_LD_U8_I(tab3, s1); 666 t5 = VIS_LD_U8_I(tab2, s1); 667 t4 = VIS_LD_U8_I(tab1, s1); 668 t3 = VIS_LD_U8_I(tab0, s1); 669 t2 = VIS_LD_U8_I(tab3, s0); 670 t1 = VIS_LD_U8_I(tab2, s0); 671 t0 = VIS_LD_U8_I(tab1, s0); 672 acc = vis_faligndata(t7, acc); 673 acc = vis_faligndata(t6, acc); 674 acc = vis_faligndata(t5, acc); 675 acc = vis_faligndata(t4, acc); 676 acc = vis_faligndata(t3, acc); 677 acc = vis_faligndata(t2, acc); 678 acc = vis_faligndata(t1, acc); 679 acc = vis_faligndata(t0, acc); 680 s0 = s2; 681 s1 = sp[0]; 682 s2 = sp[1]; 683 *dp++ = acc; 684 } 685 686 t7 = VIS_LD_U8_I(tab0, s2); 687 t6 = VIS_LD_U8_I(tab3, s1); 688 t5 = VIS_LD_U8_I(tab2, s1); 689 t4 = VIS_LD_U8_I(tab1, s1); 690 t3 = VIS_LD_U8_I(tab0, s1); 691 t2 = VIS_LD_U8_I(tab3, s0); 692 t1 = VIS_LD_U8_I(tab2, s0); 693 t0 = VIS_LD_U8_I(tab1, s0); 694 acc = vis_faligndata(t7, acc); 695 acc = vis_faligndata(t6, acc); 696 acc = vis_faligndata(t5, acc); 697 acc = vis_faligndata(t4, acc); 698 acc = vis_faligndata(t3, acc); 699 acc = vis_faligndata(t2, acc); 700 acc = vis_faligndata(t1, acc); 701 acc = vis_faligndata(t0, acc); 702 s0 = s2; 703 *dp++ = acc; 704 } 705 706 dl = (mlib_u8 *) dp; 707 708 if ((xsize & 1) != 0) { 709 s1 = sp[0]; 710 t7 = VIS_LD_U8_I(tab0, s1); 711 t6 = VIS_LD_U8_I(tab3, s0); 712 t5 = VIS_LD_U8_I(tab2, s0); 713 t4 = VIS_LD_U8_I(tab1, s0); 714 acc = vis_faligndata(t7, acc); 715 acc = vis_faligndata(t6, acc); 716 acc = vis_faligndata(t5, acc); 717 acc = vis_faligndata(t4, acc); 718 *(mlib_f32 *) dl = vis_read_hi(acc); 719 dl += 4; 720 s0 = s1; 721 } 722 723 dl[0] = tab1[s0]; 724 dl[1] = tab2[s0]; 725 dl[2] = tab3[s0]; 726} 727 728/***************************************************************/ 729void mlib_v_ImageLookUpSI_S32_U8_4_DstOff2_D1(const mlib_s32 *src, 730 mlib_u8 *dst, 731 mlib_s32 xsize, 732 const mlib_u8 **table) 733{ 734 mlib_s32 *sp; /* pointer to source data */ 735 mlib_s32 s0, s1, s2; /* source data */ 736 mlib_u8 *dl; /* pointer to start of destination */ 737 mlib_d64 *dp; /* aligned pointer to destination */ 738 mlib_d64 t0, t1, t2; /* destination data */ 739 mlib_d64 t3, t4, t5; /* destination data */ 740 mlib_d64 t6, t7, acc; /* destination data */ 741 mlib_s32 i; /* loop variable */ 742 const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u]; 743 const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u]; 744 const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u]; 745 const mlib_u8 *tab3 = &table[3][(mlib_u32) 2147483648u]; 746 747 sp = (void *)src; 748 dl = dst; 749 dp = (mlib_d64 *) dl; 750 751 vis_alignaddr((void *)0, 7); 752 753 s0 = *sp++; 754 755 if (xsize >= 2) { 756 757 s1 = sp[0]; 758 s2 = sp[1]; 759 sp += 2; 760 761#pragma pipeloop(0) 762 for (i = 0; i <= xsize - 4; i += 2, sp += 2) { 763 t7 = VIS_LD_U8_I(tab1, s2); 764 t6 = VIS_LD_U8_I(tab0, s2); 765 t5 = VIS_LD_U8_I(tab3, s1); 766 t4 = VIS_LD_U8_I(tab2, s1); 767 t3 = VIS_LD_U8_I(tab1, s1); 768 t2 = VIS_LD_U8_I(tab0, s1); 769 t1 = VIS_LD_U8_I(tab3, s0); 770 t0 = VIS_LD_U8_I(tab2, s0); 771 acc = vis_faligndata(t7, acc); 772 acc = vis_faligndata(t6, acc); 773 acc = vis_faligndata(t5, acc); 774 acc = vis_faligndata(t4, acc); 775 acc = vis_faligndata(t3, acc); 776 acc = vis_faligndata(t2, acc); 777 acc = vis_faligndata(t1, acc); 778 acc = vis_faligndata(t0, acc); 779 s0 = s2; 780 s1 = sp[0]; 781 s2 = sp[1]; 782 *dp++ = acc; 783 } 784 785 t7 = VIS_LD_U8_I(tab1, s2); 786 t6 = VIS_LD_U8_I(tab0, s2); 787 t5 = VIS_LD_U8_I(tab3, s1); 788 t4 = VIS_LD_U8_I(tab2, s1); 789 t3 = VIS_LD_U8_I(tab1, s1); 790 t2 = VIS_LD_U8_I(tab0, s1); 791 t1 = VIS_LD_U8_I(tab3, s0); 792 t0 = VIS_LD_U8_I(tab2, s0); 793 acc = vis_faligndata(t7, acc); 794 acc = vis_faligndata(t6, acc); 795 acc = vis_faligndata(t5, acc); 796 acc = vis_faligndata(t4, acc); 797 acc = vis_faligndata(t3, acc); 798 acc = vis_faligndata(t2, acc); 799 acc = vis_faligndata(t1, acc); 800 acc = vis_faligndata(t0, acc); 801 s0 = s2; 802 *dp++ = acc; 803 } 804 805 dl = (mlib_u8 *) dp; 806 807 if ((xsize & 1) != 0) { 808 s1 = sp[0]; 809 t7 = VIS_LD_U8_I(tab1, s1); 810 t6 = VIS_LD_U8_I(tab0, s1); 811 t5 = VIS_LD_U8_I(tab3, s0); 812 t4 = VIS_LD_U8_I(tab2, s0); 813 acc = vis_faligndata(t7, acc); 814 acc = vis_faligndata(t6, acc); 815 acc = vis_faligndata(t5, acc); 816 acc = vis_faligndata(t4, acc); 817 *(mlib_f32 *) dl = vis_read_hi(acc); 818 dl += 4; 819 s0 = s1; 820 } 821 822 dl[0] = tab2[s0]; 823 dl[1] = tab3[s0]; 824} 825 826/***************************************************************/ 827void mlib_v_ImageLookUpSI_S32_U8_4_DstOff3_D1(const mlib_s32 *src, 828 mlib_u8 *dst, 829 mlib_s32 xsize, 830 const mlib_u8 **table) 831{ 832 mlib_s32 *sp; /* pointer to source data */ 833 mlib_s32 s0, s1, s2; /* source data */ 834 mlib_u8 *dl; /* pointer to start of destination */ 835 mlib_d64 *dp; /* aligned pointer to destination */ 836 mlib_d64 t0, t1, t2; /* destination data */ 837 mlib_d64 t3, t4, t5; /* destination data */ 838 mlib_d64 t6, t7, acc; /* destination data */ 839 mlib_s32 i; /* loop variable */ 840 const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u]; 841 const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u]; 842 const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u]; 843 const mlib_u8 *tab3 = &table[3][(mlib_u32) 2147483648u]; 844 845 sp = (void *)src; 846 dl = dst; 847 dp = (mlib_d64 *) dl; 848 849 vis_alignaddr((void *)0, 7); 850 851 s0 = *sp++; 852 853 if (xsize >= 2) { 854 855 s1 = sp[0]; 856 s2 = sp[1]; 857 sp += 2; 858 859#pragma pipeloop(0) 860 for (i = 0; i <= xsize - 4; i += 2, sp += 2) { 861 t7 = VIS_LD_U8_I(tab2, s2); 862 t6 = VIS_LD_U8_I(tab1, s2); 863 t5 = VIS_LD_U8_I(tab0, s2); 864 t4 = VIS_LD_U8_I(tab3, s1); 865 t3 = VIS_LD_U8_I(tab2, s1); 866 t2 = VIS_LD_U8_I(tab1, s1); 867 t1 = VIS_LD_U8_I(tab0, s1); 868 t0 = VIS_LD_U8_I(tab3, s0); 869 acc = vis_faligndata(t7, acc); 870 acc = vis_faligndata(t6, acc); 871 acc = vis_faligndata(t5, acc); 872 acc = vis_faligndata(t4, acc); 873 acc = vis_faligndata(t3, acc); 874 acc = vis_faligndata(t2, acc); 875 acc = vis_faligndata(t1, acc); 876 acc = vis_faligndata(t0, acc); 877 s0 = s2; 878 s1 = sp[0]; 879 s2 = sp[1]; 880 *dp++ = acc; 881 } 882 883 t7 = VIS_LD_U8_I(tab2, s2); 884 t6 = VIS_LD_U8_I(tab1, s2); 885 t5 = VIS_LD_U8_I(tab0, s2); 886 t4 = VIS_LD_U8_I(tab3, s1); 887 t3 = VIS_LD_U8_I(tab2, s1); 888 t2 = VIS_LD_U8_I(tab1, s1); 889 t1 = VIS_LD_U8_I(tab0, s1); 890 t0 = VIS_LD_U8_I(tab3, s0); 891 acc = vis_faligndata(t7, acc); 892 acc = vis_faligndata(t6, acc); 893 acc = vis_faligndata(t5, acc); 894 acc = vis_faligndata(t4, acc); 895 acc = vis_faligndata(t3, acc); 896 acc = vis_faligndata(t2, acc); 897 acc = vis_faligndata(t1, acc); 898 acc = vis_faligndata(t0, acc); 899 s0 = s2; 900 *dp++ = acc; 901 } 902 903 dl = (mlib_u8 *) dp; 904 905 if ((xsize & 1) != 0) { 906 s1 = sp[0]; 907 t7 = VIS_LD_U8_I(tab2, s1); 908 t6 = VIS_LD_U8_I(tab1, s1); 909 t5 = VIS_LD_U8_I(tab0, s1); 910 t4 = VIS_LD_U8_I(tab3, s0); 911 acc = vis_faligndata(t7, acc); 912 acc = vis_faligndata(t6, acc); 913 acc = vis_faligndata(t5, acc); 914 acc = vis_faligndata(t4, acc); 915 *(mlib_f32 *) dl = vis_read_hi(acc); 916 dl += 4; 917 s0 = s1; 918 } 919 920 dl[0] = tab3[s0]; 921} 922 923/***************************************************************/ 924void mlib_v_ImageLookUpSI_S32_U8_4(const mlib_s32 *src, 925 mlib_s32 slb, 926 mlib_u8 *dst, 927 mlib_s32 dlb, 928 mlib_s32 xsize, 929 mlib_s32 ysize, 930 const mlib_u8 **table) 931{ 932 mlib_s32 *sl; 933 mlib_u8 *dl; 934 mlib_s32 j; 935 const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u]; 936 const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u]; 937 const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u]; 938 const mlib_u8 *tab3 = &table[3][(mlib_u32) 2147483648u]; 939 940 sl = (void *)src; 941 dl = dst; 942 943 /* row loop */ 944 for (j = 0; j < ysize; j++) { 945 mlib_s32 *sp = sl; 946 mlib_u8 *dp = dl; 947 mlib_s32 off, s0, size = xsize; 948 949 off = (mlib_s32) ((8 - ((mlib_addr) dp & 7)) & 7); 950 951 if ((off >= 4) && (size > 0)) { 952 s0 = *sp++; 953 *dp++ = tab0[s0]; 954 *dp++ = tab1[s0]; 955 *dp++ = tab2[s0]; 956 *dp++ = tab3[s0]; 957 size--; 958 } 959 960 if (size > 0) { 961 off = (mlib_s32) ((4 - ((mlib_addr) dp & 3)) & 3); 962 963 if (off == 0) { 964 mlib_v_ImageLookUpSI_S32_U8_4_DstOff0_D1(sp, dp, size, table); 965 } 966 else if (off == 1) { 967 s0 = *sp; 968 *dp++ = tab0[s0]; 969 size--; 970 mlib_v_ImageLookUpSI_S32_U8_4_DstOff1_D1(sp, dp, size, table); 971 } 972 else if (off == 2) { 973 s0 = *sp; 974 *dp++ = tab0[s0]; 975 *dp++ = tab1[s0]; 976 size--; 977 mlib_v_ImageLookUpSI_S32_U8_4_DstOff2_D1(sp, dp, size, table); 978 } 979 else if (off == 3) { 980 s0 = *sp; 981 *dp++ = tab0[s0]; 982 *dp++ = tab1[s0]; 983 *dp++ = tab2[s0]; 984 size--; 985 mlib_v_ImageLookUpSI_S32_U8_4_DstOff3_D1(sp, dp, size, table); 986 } 987 } 988 989 sl = (mlib_s32 *) ((mlib_u8 *) sl + slb); 990 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); 991 } 992} 993 994/***************************************************************/ 995