1/* 2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 28#include "vis_proto.h" 29#include "mlib_image.h" 30#include "mlib_v_ImageLookUpFunc.h" 31 32/***************************************************************/ 33static void mlib_v_ImageLookUpSI_U16_U8_2_DstA8D1(const mlib_u16 *src, 34 mlib_u8 *dst, 35 mlib_s32 xsize, 36 const mlib_u8 **table); 37 38static void mlib_v_ImageLookUpSI_U16_U8_2_D1(const mlib_u16 *src, 39 mlib_u8 *dst, 40 mlib_s32 xsize, 41 const mlib_u8 **table); 42 43static void mlib_v_ImageLookUpSI_U16_U8_3_D1(const mlib_u16 *src, 44 mlib_u8 *dst, 45 mlib_s32 xsize, 46 const mlib_u8 **table); 47 48static void mlib_v_ImageLookUpSI_U16_U8_4_DstOff0_D1(const mlib_u16 *src, 49 mlib_u8 *dst, 50 mlib_s32 xsize, 51 const mlib_u8 **table); 52 53static void mlib_v_ImageLookUpSI_U16_U8_4_DstOff1_D1(const mlib_u16 *src, 54 mlib_u8 *dst, 55 mlib_s32 xsize, 56 const mlib_u8 **table); 57 58static void mlib_v_ImageLookUpSI_U16_U8_4_DstOff2_D1(const mlib_u16 *src, 59 mlib_u8 *dst, 60 mlib_s32 xsize, 61 const mlib_u8 **table); 62 63static void mlib_v_ImageLookUpSI_U16_U8_4_DstOff3_D1(const mlib_u16 *src, 64 mlib_u8 *dst, 65 mlib_s32 xsize, 66 const mlib_u8 **table); 67 68/***************************************************************/ 69#define VIS_LD_U8_I(X, Y) vis_ld_u8_i((void *)(X), (Y)) 70 71/***************************************************************/ 72void mlib_v_ImageLookUpSI_U16_U8_2_DstA8D1(const mlib_u16 *src, 73 mlib_u8 *dst, 74 mlib_s32 xsize, 75 const mlib_u8 **table) 76{ 77 mlib_u16 *sp; /* pointer to source data */ 78 mlib_s32 s0, s1, s2, s3; /* source data */ 79 mlib_u16 *dl; /* pointer to start of destination */ 80 mlib_u16 *dend; /* pointer to end of destination */ 81 mlib_d64 *dp; /* aligned pointer to destination */ 82 mlib_d64 t0, t1, t2; /* destination data */ 83 mlib_d64 t3, t4, t5; /* destination data */ 84 mlib_d64 t6, t7, acc; /* destination data */ 85 mlib_s32 emask; /* edge mask */ 86 mlib_s32 i, num; /* loop variable */ 87 const mlib_u8 *tab0 = &table[0][0]; 88 const mlib_u8 *tab1 = &table[1][0]; 89 90 sp = (void *)src; 91 dl = (mlib_u16 *) dst; 92 dp = (mlib_d64 *) dl; 93 dend = dl + xsize - 1; 94 95 vis_alignaddr((void *)0, 7); 96 97 if (xsize >= 4) { 98 99 s0 = sp[0]; 100 s1 = sp[1]; 101 s2 = sp[2]; 102 s3 = sp[3]; 103 sp += 4; 104 105#pragma pipeloop(0) 106 for (i = 0; i <= xsize - 8; i += 4, sp += 4) { 107 t7 = VIS_LD_U8_I(tab1, s3); 108 t6 = VIS_LD_U8_I(tab0, s3); 109 t5 = VIS_LD_U8_I(tab1, s2); 110 t4 = VIS_LD_U8_I(tab0, s2); 111 t3 = VIS_LD_U8_I(tab1, s1); 112 t2 = VIS_LD_U8_I(tab0, s1); 113 t1 = VIS_LD_U8_I(tab1, s0); 114 t0 = VIS_LD_U8_I(tab0, s0); 115 acc = vis_faligndata(t7, acc); 116 acc = vis_faligndata(t6, acc); 117 acc = vis_faligndata(t5, acc); 118 acc = vis_faligndata(t4, acc); 119 acc = vis_faligndata(t3, acc); 120 acc = vis_faligndata(t2, acc); 121 acc = vis_faligndata(t1, acc); 122 acc = vis_faligndata(t0, acc); 123 s0 = sp[0]; 124 s1 = sp[1]; 125 s2 = sp[2]; 126 s3 = sp[3]; 127 *dp++ = acc; 128 } 129 130 t7 = VIS_LD_U8_I(tab1, s3); 131 t6 = VIS_LD_U8_I(tab0, s3); 132 t5 = VIS_LD_U8_I(tab1, s2); 133 t4 = VIS_LD_U8_I(tab0, s2); 134 t3 = VIS_LD_U8_I(tab1, s1); 135 t2 = VIS_LD_U8_I(tab0, s1); 136 t1 = VIS_LD_U8_I(tab1, s0); 137 t0 = VIS_LD_U8_I(tab0, s0); 138 acc = vis_faligndata(t7, acc); 139 acc = vis_faligndata(t6, acc); 140 acc = vis_faligndata(t5, acc); 141 acc = vis_faligndata(t4, acc); 142 acc = vis_faligndata(t3, acc); 143 acc = vis_faligndata(t2, acc); 144 acc = vis_faligndata(t1, acc); 145 acc = vis_faligndata(t0, acc); 146 *dp++ = acc; 147 } 148 149 if ((mlib_addr) dp <= (mlib_addr) dend) { 150 151 num = (mlib_u16 *) dend - (mlib_u16 *) dp; 152 sp += num; 153 num++; 154#pragma pipeloop(0) 155 for (i = 0; i < num; i++) { 156 s0 = (mlib_s32) * sp; 157 sp--; 158 159 t0 = VIS_LD_U8_I(tab1, s0); 160 acc = vis_faligndata(t0, acc); 161 162 t0 = VIS_LD_U8_I(tab0, s0); 163 acc = vis_faligndata(t0, acc); 164 } 165 166 emask = vis_edge16(dp, dend); 167 vis_pst_16(acc, dp, emask); 168 } 169} 170 171/***************************************************************/ 172void mlib_v_ImageLookUpSI_U16_U8_2_D1(const mlib_u16 *src, 173 mlib_u8 *dst, 174 mlib_s32 xsize, 175 const mlib_u8 **table) 176{ 177 mlib_u16 *sp; /* pointer to source data */ 178 mlib_s32 s0, s1, s2, s3, s4; /* source data */ 179 mlib_u8 *dl; /* pointer to start of destination */ 180 mlib_u8 *dend; /* pointer to end of destination */ 181 mlib_d64 *dp; /* aligned pointer to destination */ 182 mlib_d64 t0, t1, t2; /* destination data */ 183 mlib_d64 t3, t4, t5; /* destination data */ 184 mlib_d64 t6, t7, acc; /* destination data */ 185 mlib_s32 emask; /* edge mask */ 186 mlib_s32 i, num; /* loop variable */ 187 const mlib_u8 *tab0 = &table[0][0]; 188 const mlib_u8 *tab1 = &table[1][0]; 189 190 sp = (void *)src; 191 dl = dst; 192 193 dend = dl + 2 * xsize - 1; 194 195 vis_alignaddr((void *)0, 7); 196 197 s0 = *sp++; 198 *dl++ = tab0[s0]; 199 dp = (mlib_d64 *) dl; 200 xsize--; 201 202 if (xsize >= 4) { 203 204 s1 = sp[0]; 205 s2 = sp[1]; 206 s3 = sp[2]; 207 s4 = sp[3]; 208 sp += 4; 209 210#pragma pipeloop(0) 211 for (i = 0; i <= xsize - 8; i += 4, sp += 4) { 212 t7 = VIS_LD_U8_I(tab0, s4); 213 t6 = VIS_LD_U8_I(tab1, s3); 214 t5 = VIS_LD_U8_I(tab0, s3); 215 t4 = VIS_LD_U8_I(tab1, s2); 216 t3 = VIS_LD_U8_I(tab0, s2); 217 t2 = VIS_LD_U8_I(tab1, s1); 218 t1 = VIS_LD_U8_I(tab0, s1); 219 t0 = VIS_LD_U8_I(tab1, s0); 220 acc = vis_faligndata(t7, acc); 221 acc = vis_faligndata(t6, acc); 222 acc = vis_faligndata(t5, acc); 223 acc = vis_faligndata(t4, acc); 224 acc = vis_faligndata(t3, acc); 225 acc = vis_faligndata(t2, acc); 226 acc = vis_faligndata(t1, acc); 227 acc = vis_faligndata(t0, acc); 228 s0 = s4; 229 s1 = sp[0]; 230 s2 = sp[1]; 231 s3 = sp[2]; 232 s4 = sp[3]; 233 *dp++ = acc; 234 } 235 236 t7 = VIS_LD_U8_I(tab0, s4); 237 t6 = VIS_LD_U8_I(tab1, s3); 238 t5 = VIS_LD_U8_I(tab0, s3); 239 t4 = VIS_LD_U8_I(tab1, s2); 240 t3 = VIS_LD_U8_I(tab0, s2); 241 t2 = VIS_LD_U8_I(tab1, s1); 242 t1 = VIS_LD_U8_I(tab0, s1); 243 t0 = VIS_LD_U8_I(tab1, s0); 244 acc = vis_faligndata(t7, acc); 245 acc = vis_faligndata(t6, acc); 246 acc = vis_faligndata(t5, acc); 247 acc = vis_faligndata(t4, acc); 248 acc = vis_faligndata(t3, acc); 249 acc = vis_faligndata(t2, acc); 250 acc = vis_faligndata(t1, acc); 251 acc = vis_faligndata(t0, acc); 252 s0 = s4; 253 *dp++ = acc; 254 } 255 256 num = ((mlib_u8 *) dend - (mlib_u8 *) dp) >> 1; 257 sp += num; 258 num++; 259 260#pragma pipeloop(0) 261 for (i = 0; i < num; i++) { 262 s1 = (mlib_s32) * sp; 263 sp--; 264 265 t0 = VIS_LD_U8_I(tab1, s1); 266 acc = vis_faligndata(t0, acc); 267 268 t0 = VIS_LD_U8_I(tab0, s1); 269 acc = vis_faligndata(t0, acc); 270 } 271 272 t0 = VIS_LD_U8_I(tab1, s0); 273 acc = vis_faligndata(t0, acc); 274 emask = vis_edge8(dp, dend); 275 vis_pst_8(acc, dp, emask); 276} 277 278/***************************************************************/ 279void mlib_v_ImageLookUpSI_U16_U8_2(const mlib_u16 *src, 280 mlib_s32 slb, 281 mlib_u8 *dst, 282 mlib_s32 dlb, 283 mlib_s32 xsize, 284 mlib_s32 ysize, 285 const mlib_u8 **table) 286{ 287 mlib_u16 *sl; 288 mlib_u8 *dl; 289 mlib_s32 i, j; 290 const mlib_u8 *tab0 = &table[0][0]; 291 const mlib_u8 *tab1 = &table[1][0]; 292 293 sl = (void *)src; 294 dl = dst; 295 296 /* row loop */ 297 for (j = 0; j < ysize; j++) { 298 mlib_u16 *sp = sl; 299 mlib_u8 *dp = dl; 300 mlib_s32 off, s0, size = xsize; 301 302 off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1; 303 off = (off < size) ? off : size; 304 305 for (i = 0; i < off; i++) { 306 s0 = *sp++; 307 *dp++ = tab0[s0]; 308 *dp++ = tab1[s0]; 309 size--; 310 } 311 312 if (size > 0) { 313 314 if (((mlib_addr) dp & 1) == 0) { 315 mlib_v_ImageLookUpSI_U16_U8_2_DstA8D1(sp, dp, size, table); 316 } 317 else { 318 mlib_v_ImageLookUpSI_U16_U8_2_D1(sp, dp, size, table); 319 } 320 } 321 322 sl = (mlib_u16 *) ((mlib_u8 *) sl + slb); 323 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); 324 } 325} 326 327/***************************************************************/ 328void mlib_v_ImageLookUpSI_U16_U8_3_D1(const mlib_u16 *src, 329 mlib_u8 *dst, 330 mlib_s32 xsize, 331 const mlib_u8 **table) 332{ 333 mlib_u16 *sp; /* pointer to source data */ 334 mlib_u8 *dl; /* pointer to start of destination */ 335 mlib_d64 *dp; /* aligned pointer to destination */ 336 mlib_d64 t0, t1, t2; /* destination data */ 337 mlib_d64 t3, t4, t5; /* destination data */ 338 mlib_d64 t6, t7; /* destination data */ 339 mlib_d64 acc0, acc1, acc2; /* destination data */ 340 mlib_s32 i; /* loop variable */ 341 const mlib_u8 *tab0 = &table[0][0]; 342 const mlib_u8 *tab1 = &table[1][0]; 343 const mlib_u8 *tab2 = &table[2][0]; 344 mlib_s32 s00, s01, s02, s03; 345 mlib_s32 s10, s11, s12, s13; 346 347 sp = (void *)src; 348 dl = dst; 349 dp = (mlib_d64 *) dl; 350 351 vis_alignaddr((void *)0, 7); 352 353 i = 0; 354 355 if (xsize >= 8) { 356 357 s00 = sp[0]; 358 s01 = sp[1]; 359 s02 = sp[2]; 360 s03 = sp[3]; 361 s10 = sp[4]; 362 s11 = sp[5]; 363 s12 = sp[6]; 364 s13 = sp[7]; 365 sp += 8; 366 367#pragma pipeloop(0) 368 for (i = 0; i <= xsize - 16; i += 8, sp += 8) { 369 t7 = VIS_LD_U8_I(tab1, s02); 370 t6 = VIS_LD_U8_I(tab0, s02); 371 t5 = VIS_LD_U8_I(tab2, s01); 372 t4 = VIS_LD_U8_I(tab1, s01); 373 t3 = VIS_LD_U8_I(tab0, s01); 374 t2 = VIS_LD_U8_I(tab2, s00); 375 t1 = VIS_LD_U8_I(tab1, s00); 376 t0 = VIS_LD_U8_I(tab0, s00); 377 acc0 = vis_faligndata(t7, acc0); 378 acc0 = vis_faligndata(t6, acc0); 379 acc0 = vis_faligndata(t5, acc0); 380 acc0 = vis_faligndata(t4, acc0); 381 acc0 = vis_faligndata(t3, acc0); 382 acc0 = vis_faligndata(t2, acc0); 383 acc0 = vis_faligndata(t1, acc0); 384 acc0 = vis_faligndata(t0, acc0); 385 t7 = VIS_LD_U8_I(tab0, s11); 386 t6 = VIS_LD_U8_I(tab2, s10); 387 t5 = VIS_LD_U8_I(tab1, s10); 388 t4 = VIS_LD_U8_I(tab0, s10); 389 t3 = VIS_LD_U8_I(tab2, s03); 390 t2 = VIS_LD_U8_I(tab1, s03); 391 t1 = VIS_LD_U8_I(tab0, s03); 392 t0 = VIS_LD_U8_I(tab2, s02); 393 acc1 = vis_faligndata(t7, acc1); 394 acc1 = vis_faligndata(t6, acc1); 395 acc1 = vis_faligndata(t5, acc1); 396 acc1 = vis_faligndata(t4, acc1); 397 acc1 = vis_faligndata(t3, acc1); 398 acc1 = vis_faligndata(t2, acc1); 399 acc1 = vis_faligndata(t1, acc1); 400 acc1 = vis_faligndata(t0, acc1); 401 t7 = VIS_LD_U8_I(tab2, s13); 402 t6 = VIS_LD_U8_I(tab1, s13); 403 t5 = VIS_LD_U8_I(tab0, s13); 404 t4 = VIS_LD_U8_I(tab2, s12); 405 t3 = VIS_LD_U8_I(tab1, s12); 406 t2 = VIS_LD_U8_I(tab0, s12); 407 t1 = VIS_LD_U8_I(tab2, s11); 408 t0 = VIS_LD_U8_I(tab1, s11); 409 acc2 = vis_faligndata(t7, acc2); 410 acc2 = vis_faligndata(t6, acc2); 411 acc2 = vis_faligndata(t5, acc2); 412 acc2 = vis_faligndata(t4, acc2); 413 acc2 = vis_faligndata(t3, acc2); 414 acc2 = vis_faligndata(t2, acc2); 415 acc2 = vis_faligndata(t1, acc2); 416 acc2 = vis_faligndata(t0, acc2); 417 s00 = sp[0]; 418 s01 = sp[1]; 419 s02 = sp[2]; 420 s03 = sp[3]; 421 s10 = sp[4]; 422 s11 = sp[5]; 423 s12 = sp[6]; 424 s13 = sp[7]; 425 *dp++ = acc0; 426 *dp++ = acc1; 427 *dp++ = acc2; 428 } 429 430 t7 = VIS_LD_U8_I(tab1, s02); 431 t6 = VIS_LD_U8_I(tab0, s02); 432 t5 = VIS_LD_U8_I(tab2, s01); 433 t4 = VIS_LD_U8_I(tab1, s01); 434 t3 = VIS_LD_U8_I(tab0, s01); 435 t2 = VIS_LD_U8_I(tab2, s00); 436 t1 = VIS_LD_U8_I(tab1, s00); 437 t0 = VIS_LD_U8_I(tab0, s00); 438 acc0 = vis_faligndata(t7, acc0); 439 acc0 = vis_faligndata(t6, acc0); 440 acc0 = vis_faligndata(t5, acc0); 441 acc0 = vis_faligndata(t4, acc0); 442 acc0 = vis_faligndata(t3, acc0); 443 acc0 = vis_faligndata(t2, acc0); 444 acc0 = vis_faligndata(t1, acc0); 445 acc0 = vis_faligndata(t0, acc0); 446 t7 = VIS_LD_U8_I(tab0, s11); 447 t6 = VIS_LD_U8_I(tab2, s10); 448 t5 = VIS_LD_U8_I(tab1, s10); 449 t4 = VIS_LD_U8_I(tab0, s10); 450 t3 = VIS_LD_U8_I(tab2, s03); 451 t2 = VIS_LD_U8_I(tab1, s03); 452 t1 = VIS_LD_U8_I(tab0, s03); 453 t0 = VIS_LD_U8_I(tab2, s02); 454 acc1 = vis_faligndata(t7, acc1); 455 acc1 = vis_faligndata(t6, acc1); 456 acc1 = vis_faligndata(t5, acc1); 457 acc1 = vis_faligndata(t4, acc1); 458 acc1 = vis_faligndata(t3, acc1); 459 acc1 = vis_faligndata(t2, acc1); 460 acc1 = vis_faligndata(t1, acc1); 461 acc1 = vis_faligndata(t0, acc1); 462 t7 = VIS_LD_U8_I(tab2, s13); 463 t6 = VIS_LD_U8_I(tab1, s13); 464 t5 = VIS_LD_U8_I(tab0, s13); 465 t4 = VIS_LD_U8_I(tab2, s12); 466 t3 = VIS_LD_U8_I(tab1, s12); 467 t2 = VIS_LD_U8_I(tab0, s12); 468 t1 = VIS_LD_U8_I(tab2, s11); 469 t0 = VIS_LD_U8_I(tab1, s11); 470 acc2 = vis_faligndata(t7, acc2); 471 acc2 = vis_faligndata(t6, acc2); 472 acc2 = vis_faligndata(t5, acc2); 473 acc2 = vis_faligndata(t4, acc2); 474 acc2 = vis_faligndata(t3, acc2); 475 acc2 = vis_faligndata(t2, acc2); 476 acc2 = vis_faligndata(t1, acc2); 477 acc2 = vis_faligndata(t0, acc2); 478 *dp++ = acc0; 479 *dp++ = acc1; 480 *dp++ = acc2; 481 i += 8; 482 } 483 484 dl = (mlib_u8 *) dp; 485 486#pragma pipeloop(0) 487 for (; i < xsize; i++) { 488 s00 = sp[0]; 489 dl[0] = tab0[s00]; 490 dl[1] = tab1[s00]; 491 dl[2] = tab2[s00]; 492 dl += 3; 493 sp++; 494 } 495} 496 497/***************************************************************/ 498void mlib_v_ImageLookUpSI_U16_U8_3(const mlib_u16 *src, 499 mlib_s32 slb, 500 mlib_u8 *dst, 501 mlib_s32 dlb, 502 mlib_s32 xsize, 503 mlib_s32 ysize, 504 const mlib_u8 **table) 505{ 506 mlib_u16 *sl; 507 mlib_u8 *dl; 508 mlib_s32 i, j; 509 const mlib_u8 *tab0 = &table[0][0]; 510 const mlib_u8 *tab1 = &table[1][0]; 511 const mlib_u8 *tab2 = &table[2][0]; 512 513 sl = (void *)src; 514 dl = dst; 515 516 /* row loop */ 517 for (j = 0; j < ysize; j++) { 518 mlib_u16 *sp = sl; 519 mlib_u8 *dp = dl; 520 mlib_s32 off, s0, size = xsize; 521 522 off = (mlib_addr) dp & 7; 523 off = (off * 5) & 7; 524 off = (off < size) ? off : size; 525 526 for (i = 0; i < off; i++) { 527 s0 = *sp++; 528 *dp++ = tab0[s0]; 529 *dp++ = tab1[s0]; 530 *dp++ = tab2[s0]; 531 size--; 532 } 533 534 if (size > 0) { 535 mlib_v_ImageLookUpSI_U16_U8_3_D1(sp, dp, size, table); 536 } 537 538 sl = (mlib_u16 *) ((mlib_u8 *) sl + slb); 539 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); 540 } 541} 542 543/***************************************************************/ 544void mlib_v_ImageLookUpSI_U16_U8_4_DstOff0_D1(const mlib_u16 *src, 545 mlib_u8 *dst, 546 mlib_s32 xsize, 547 const mlib_u8 **table) 548{ 549 mlib_u16 *sp; /* pointer to source data */ 550 mlib_s32 s0, s1; /* source data */ 551 mlib_u8 *dl; /* pointer to start of destination */ 552 mlib_d64 *dp; /* aligned pointer to destination */ 553 mlib_d64 t0, t1, t2; /* destination data */ 554 mlib_d64 t3, t4, t5; /* destination data */ 555 mlib_d64 t6, t7, acc; /* destination data */ 556 mlib_s32 i; /* loop variable */ 557 const mlib_u8 *tab0 = &table[0][0]; 558 const mlib_u8 *tab1 = &table[1][0]; 559 const mlib_u8 *tab2 = &table[2][0]; 560 const mlib_u8 *tab3 = &table[3][0]; 561 562 sp = (void *)src; 563 dl = dst; 564 dp = (mlib_d64 *) dl; 565 566 vis_alignaddr((void *)0, 7); 567 568 if (xsize >= 2) { 569 570 s0 = sp[0]; 571 s1 = sp[1]; 572 sp += 2; 573 574#pragma pipeloop(0) 575 for (i = 0; i <= xsize - 4; i += 2, sp += 2) { 576 t7 = VIS_LD_U8_I(tab3, s1); 577 t6 = VIS_LD_U8_I(tab2, s1); 578 t5 = VIS_LD_U8_I(tab1, s1); 579 t4 = VIS_LD_U8_I(tab0, s1); 580 t3 = VIS_LD_U8_I(tab3, s0); 581 t2 = VIS_LD_U8_I(tab2, s0); 582 t1 = VIS_LD_U8_I(tab1, s0); 583 t0 = VIS_LD_U8_I(tab0, s0); 584 acc = vis_faligndata(t7, acc); 585 acc = vis_faligndata(t6, acc); 586 acc = vis_faligndata(t5, acc); 587 acc = vis_faligndata(t4, acc); 588 acc = vis_faligndata(t3, acc); 589 acc = vis_faligndata(t2, acc); 590 acc = vis_faligndata(t1, acc); 591 acc = vis_faligndata(t0, acc); 592 s0 = sp[0]; 593 s1 = sp[1]; 594 *dp++ = acc; 595 } 596 597 t7 = VIS_LD_U8_I(tab3, s1); 598 t6 = VIS_LD_U8_I(tab2, s1); 599 t5 = VIS_LD_U8_I(tab1, s1); 600 t4 = VIS_LD_U8_I(tab0, s1); 601 t3 = VIS_LD_U8_I(tab3, s0); 602 t2 = VIS_LD_U8_I(tab2, s0); 603 t1 = VIS_LD_U8_I(tab1, s0); 604 t0 = VIS_LD_U8_I(tab0, s0); 605 acc = vis_faligndata(t7, acc); 606 acc = vis_faligndata(t6, acc); 607 acc = vis_faligndata(t5, acc); 608 acc = vis_faligndata(t4, acc); 609 acc = vis_faligndata(t3, acc); 610 acc = vis_faligndata(t2, acc); 611 acc = vis_faligndata(t1, acc); 612 acc = vis_faligndata(t0, acc); 613 *dp++ = acc; 614 } 615 616 if ((xsize & 1) != 0) { 617 s0 = sp[0]; 618 t7 = VIS_LD_U8_I(tab3, s0); 619 t6 = VIS_LD_U8_I(tab2, s0); 620 t5 = VIS_LD_U8_I(tab1, s0); 621 t4 = VIS_LD_U8_I(tab0, s0); 622 acc = vis_faligndata(t7, acc); 623 acc = vis_faligndata(t6, acc); 624 acc = vis_faligndata(t5, acc); 625 acc = vis_faligndata(t4, acc); 626 *(mlib_f32 *) dp = vis_read_hi(acc); 627 } 628} 629 630/***************************************************************/ 631void mlib_v_ImageLookUpSI_U16_U8_4_DstOff1_D1(const mlib_u16 *src, 632 mlib_u8 *dst, 633 mlib_s32 xsize, 634 const mlib_u8 **table) 635{ 636 mlib_u16 *sp; /* pointer to source data */ 637 mlib_s32 s0, s1, s2; /* source data */ 638 mlib_u8 *dl; /* pointer to start of destination */ 639 mlib_d64 *dp; /* aligned pointer to destination */ 640 mlib_d64 t0, t1, t2; /* destination data */ 641 mlib_d64 t3, t4, t5; /* destination data */ 642 mlib_d64 t6, t7, acc; /* destination data */ 643 mlib_s32 i; /* loop variable */ 644 const mlib_u8 *tab0 = &table[0][0]; 645 const mlib_u8 *tab1 = &table[1][0]; 646 const mlib_u8 *tab2 = &table[2][0]; 647 const mlib_u8 *tab3 = &table[3][0]; 648 649 sp = (void *)src; 650 dl = dst; 651 dp = (mlib_d64 *) dl; 652 653 vis_alignaddr((void *)0, 7); 654 655 s0 = *sp++; 656 657 if (xsize >= 2) { 658 659 s1 = sp[0]; 660 s2 = sp[1]; 661 sp += 2; 662 663#pragma pipeloop(0) 664 for (i = 0; i <= xsize - 4; i += 2, sp += 2) { 665 t7 = VIS_LD_U8_I(tab0, s2); 666 t6 = VIS_LD_U8_I(tab3, s1); 667 t5 = VIS_LD_U8_I(tab2, s1); 668 t4 = VIS_LD_U8_I(tab1, s1); 669 t3 = VIS_LD_U8_I(tab0, s1); 670 t2 = VIS_LD_U8_I(tab3, s0); 671 t1 = VIS_LD_U8_I(tab2, s0); 672 t0 = VIS_LD_U8_I(tab1, s0); 673 acc = vis_faligndata(t7, acc); 674 acc = vis_faligndata(t6, acc); 675 acc = vis_faligndata(t5, acc); 676 acc = vis_faligndata(t4, acc); 677 acc = vis_faligndata(t3, acc); 678 acc = vis_faligndata(t2, acc); 679 acc = vis_faligndata(t1, acc); 680 acc = vis_faligndata(t0, acc); 681 s0 = s2; 682 s1 = sp[0]; 683 s2 = sp[1]; 684 *dp++ = acc; 685 } 686 687 t7 = VIS_LD_U8_I(tab0, s2); 688 t6 = VIS_LD_U8_I(tab3, s1); 689 t5 = VIS_LD_U8_I(tab2, s1); 690 t4 = VIS_LD_U8_I(tab1, s1); 691 t3 = VIS_LD_U8_I(tab0, s1); 692 t2 = VIS_LD_U8_I(tab3, s0); 693 t1 = VIS_LD_U8_I(tab2, s0); 694 t0 = VIS_LD_U8_I(tab1, s0); 695 acc = vis_faligndata(t7, acc); 696 acc = vis_faligndata(t6, acc); 697 acc = vis_faligndata(t5, acc); 698 acc = vis_faligndata(t4, acc); 699 acc = vis_faligndata(t3, acc); 700 acc = vis_faligndata(t2, acc); 701 acc = vis_faligndata(t1, acc); 702 acc = vis_faligndata(t0, acc); 703 s0 = s2; 704 *dp++ = acc; 705 } 706 707 dl = (mlib_u8 *) dp; 708 709 if ((xsize & 1) != 0) { 710 s1 = sp[0]; 711 t7 = VIS_LD_U8_I(tab0, s1); 712 t6 = VIS_LD_U8_I(tab3, s0); 713 t5 = VIS_LD_U8_I(tab2, s0); 714 t4 = VIS_LD_U8_I(tab1, s0); 715 acc = vis_faligndata(t7, acc); 716 acc = vis_faligndata(t6, acc); 717 acc = vis_faligndata(t5, acc); 718 acc = vis_faligndata(t4, acc); 719 *(mlib_f32 *) dl = vis_read_hi(acc); 720 dl += 4; 721 s0 = s1; 722 } 723 724 dl[0] = tab1[s0]; 725 dl[1] = tab2[s0]; 726 dl[2] = tab3[s0]; 727} 728 729/***************************************************************/ 730void mlib_v_ImageLookUpSI_U16_U8_4_DstOff2_D1(const mlib_u16 *src, 731 mlib_u8 *dst, 732 mlib_s32 xsize, 733 const mlib_u8 **table) 734{ 735 mlib_u16 *sp; /* pointer to source data */ 736 mlib_s32 s0, s1, s2; /* source data */ 737 mlib_u8 *dl; /* pointer to start of destination */ 738 mlib_d64 *dp; /* aligned pointer to destination */ 739 mlib_d64 t0, t1, t2; /* destination data */ 740 mlib_d64 t3, t4, t5; /* destination data */ 741 mlib_d64 t6, t7, acc; /* destination data */ 742 mlib_s32 i; /* loop variable */ 743 const mlib_u8 *tab0 = &table[0][0]; 744 const mlib_u8 *tab1 = &table[1][0]; 745 const mlib_u8 *tab2 = &table[2][0]; 746 const mlib_u8 *tab3 = &table[3][0]; 747 748 sp = (void *)src; 749 dl = dst; 750 dp = (mlib_d64 *) dl; 751 752 vis_alignaddr((void *)0, 7); 753 754 s0 = *sp++; 755 756 if (xsize >= 2) { 757 758 s1 = sp[0]; 759 s2 = sp[1]; 760 sp += 2; 761 762#pragma pipeloop(0) 763 for (i = 0; i <= xsize - 4; i += 2, sp += 2) { 764 t7 = VIS_LD_U8_I(tab1, s2); 765 t6 = VIS_LD_U8_I(tab0, s2); 766 t5 = VIS_LD_U8_I(tab3, s1); 767 t4 = VIS_LD_U8_I(tab2, s1); 768 t3 = VIS_LD_U8_I(tab1, s1); 769 t2 = VIS_LD_U8_I(tab0, s1); 770 t1 = VIS_LD_U8_I(tab3, s0); 771 t0 = VIS_LD_U8_I(tab2, s0); 772 acc = vis_faligndata(t7, acc); 773 acc = vis_faligndata(t6, acc); 774 acc = vis_faligndata(t5, acc); 775 acc = vis_faligndata(t4, acc); 776 acc = vis_faligndata(t3, acc); 777 acc = vis_faligndata(t2, acc); 778 acc = vis_faligndata(t1, acc); 779 acc = vis_faligndata(t0, acc); 780 s0 = s2; 781 s1 = sp[0]; 782 s2 = sp[1]; 783 *dp++ = acc; 784 } 785 786 t7 = VIS_LD_U8_I(tab1, s2); 787 t6 = VIS_LD_U8_I(tab0, s2); 788 t5 = VIS_LD_U8_I(tab3, s1); 789 t4 = VIS_LD_U8_I(tab2, s1); 790 t3 = VIS_LD_U8_I(tab1, s1); 791 t2 = VIS_LD_U8_I(tab0, s1); 792 t1 = VIS_LD_U8_I(tab3, s0); 793 t0 = VIS_LD_U8_I(tab2, s0); 794 acc = vis_faligndata(t7, acc); 795 acc = vis_faligndata(t6, acc); 796 acc = vis_faligndata(t5, acc); 797 acc = vis_faligndata(t4, acc); 798 acc = vis_faligndata(t3, acc); 799 acc = vis_faligndata(t2, acc); 800 acc = vis_faligndata(t1, acc); 801 acc = vis_faligndata(t0, acc); 802 s0 = s2; 803 *dp++ = acc; 804 } 805 806 dl = (mlib_u8 *) dp; 807 808 if ((xsize & 1) != 0) { 809 s1 = sp[0]; 810 t7 = VIS_LD_U8_I(tab1, s1); 811 t6 = VIS_LD_U8_I(tab0, s1); 812 t5 = VIS_LD_U8_I(tab3, s0); 813 t4 = VIS_LD_U8_I(tab2, s0); 814 acc = vis_faligndata(t7, acc); 815 acc = vis_faligndata(t6, acc); 816 acc = vis_faligndata(t5, acc); 817 acc = vis_faligndata(t4, acc); 818 *(mlib_f32 *) dl = vis_read_hi(acc); 819 dl += 4; 820 s0 = s1; 821 } 822 823 dl[0] = tab2[s0]; 824 dl[1] = tab3[s0]; 825} 826 827/***************************************************************/ 828void mlib_v_ImageLookUpSI_U16_U8_4_DstOff3_D1(const mlib_u16 *src, 829 mlib_u8 *dst, 830 mlib_s32 xsize, 831 const mlib_u8 **table) 832{ 833 mlib_u16 *sp; /* pointer to source data */ 834 mlib_s32 s0, s1, s2; /* source data */ 835 mlib_u8 *dl; /* pointer to start of destination */ 836 mlib_d64 *dp; /* aligned pointer to destination */ 837 mlib_d64 t0, t1, t2; /* destination data */ 838 mlib_d64 t3, t4, t5; /* destination data */ 839 mlib_d64 t6, t7, acc; /* destination data */ 840 mlib_s32 i; /* loop variable */ 841 const mlib_u8 *tab0 = &table[0][0]; 842 const mlib_u8 *tab1 = &table[1][0]; 843 const mlib_u8 *tab2 = &table[2][0]; 844 const mlib_u8 *tab3 = &table[3][0]; 845 846 sp = (void *)src; 847 dl = dst; 848 dp = (mlib_d64 *) dl; 849 850 vis_alignaddr((void *)0, 7); 851 852 s0 = *sp++; 853 854 if (xsize >= 2) { 855 856 s1 = sp[0]; 857 s2 = sp[1]; 858 sp += 2; 859 860#pragma pipeloop(0) 861 for (i = 0; i <= xsize - 4; i += 2, sp += 2) { 862 t7 = VIS_LD_U8_I(tab2, s2); 863 t6 = VIS_LD_U8_I(tab1, s2); 864 t5 = VIS_LD_U8_I(tab0, s2); 865 t4 = VIS_LD_U8_I(tab3, s1); 866 t3 = VIS_LD_U8_I(tab2, s1); 867 t2 = VIS_LD_U8_I(tab1, s1); 868 t1 = VIS_LD_U8_I(tab0, s1); 869 t0 = VIS_LD_U8_I(tab3, s0); 870 acc = vis_faligndata(t7, acc); 871 acc = vis_faligndata(t6, acc); 872 acc = vis_faligndata(t5, acc); 873 acc = vis_faligndata(t4, acc); 874 acc = vis_faligndata(t3, acc); 875 acc = vis_faligndata(t2, acc); 876 acc = vis_faligndata(t1, acc); 877 acc = vis_faligndata(t0, acc); 878 s0 = s2; 879 s1 = sp[0]; 880 s2 = sp[1]; 881 *dp++ = acc; 882 } 883 884 t7 = VIS_LD_U8_I(tab2, s2); 885 t6 = VIS_LD_U8_I(tab1, s2); 886 t5 = VIS_LD_U8_I(tab0, s2); 887 t4 = VIS_LD_U8_I(tab3, s1); 888 t3 = VIS_LD_U8_I(tab2, s1); 889 t2 = VIS_LD_U8_I(tab1, s1); 890 t1 = VIS_LD_U8_I(tab0, s1); 891 t0 = VIS_LD_U8_I(tab3, s0); 892 acc = vis_faligndata(t7, acc); 893 acc = vis_faligndata(t6, acc); 894 acc = vis_faligndata(t5, acc); 895 acc = vis_faligndata(t4, acc); 896 acc = vis_faligndata(t3, acc); 897 acc = vis_faligndata(t2, acc); 898 acc = vis_faligndata(t1, acc); 899 acc = vis_faligndata(t0, acc); 900 s0 = s2; 901 *dp++ = acc; 902 } 903 904 dl = (mlib_u8 *) dp; 905 906 if ((xsize & 1) != 0) { 907 s1 = sp[0]; 908 t7 = VIS_LD_U8_I(tab2, s1); 909 t6 = VIS_LD_U8_I(tab1, s1); 910 t5 = VIS_LD_U8_I(tab0, s1); 911 t4 = VIS_LD_U8_I(tab3, s0); 912 acc = vis_faligndata(t7, acc); 913 acc = vis_faligndata(t6, acc); 914 acc = vis_faligndata(t5, acc); 915 acc = vis_faligndata(t4, acc); 916 *(mlib_f32 *) dl = vis_read_hi(acc); 917 dl += 4; 918 s0 = s1; 919 } 920 921 dl[0] = tab3[s0]; 922} 923 924/***************************************************************/ 925void mlib_v_ImageLookUpSI_U16_U8_4(const mlib_u16 *src, 926 mlib_s32 slb, 927 mlib_u8 *dst, 928 mlib_s32 dlb, 929 mlib_s32 xsize, 930 mlib_s32 ysize, 931 const mlib_u8 **table) 932{ 933 mlib_u16 *sl; 934 mlib_u8 *dl; 935 mlib_s32 j; 936 const mlib_u8 *tab0 = &table[0][0]; 937 const mlib_u8 *tab1 = &table[1][0]; 938 const mlib_u8 *tab2 = &table[2][0]; 939 const mlib_u8 *tab3 = &table[3][0]; 940 941 sl = (void *)src; 942 dl = dst; 943 944 /* row loop */ 945 for (j = 0; j < ysize; j++) { 946 mlib_u16 *sp = sl; 947 mlib_u8 *dp = dl; 948 mlib_s32 off, s0, size = xsize; 949 950 off = (8 - ((mlib_addr) dp & 7)) & 7; 951 952 if ((off >= 4) && (size > 0)) { 953 s0 = *sp++; 954 *dp++ = tab0[s0]; 955 *dp++ = tab1[s0]; 956 *dp++ = tab2[s0]; 957 *dp++ = tab3[s0]; 958 size--; 959 } 960 961 if (size > 0) { 962 off = (4 - ((mlib_addr) dp & 3)) & 3; 963 964 if (off == 0) { 965 mlib_v_ImageLookUpSI_U16_U8_4_DstOff0_D1(sp, dp, size, table); 966 } 967 else if (off == 1) { 968 s0 = *sp; 969 *dp++ = tab0[s0]; 970 size--; 971 mlib_v_ImageLookUpSI_U16_U8_4_DstOff1_D1(sp, dp, size, table); 972 } 973 else if (off == 2) { 974 s0 = *sp; 975 *dp++ = tab0[s0]; 976 *dp++ = tab1[s0]; 977 size--; 978 mlib_v_ImageLookUpSI_U16_U8_4_DstOff2_D1(sp, dp, size, table); 979 } 980 else if (off == 3) { 981 s0 = *sp; 982 *dp++ = tab0[s0]; 983 *dp++ = tab1[s0]; 984 *dp++ = tab2[s0]; 985 size--; 986 mlib_v_ImageLookUpSI_U16_U8_4_DstOff3_D1(sp, dp, size, table); 987 } 988 } 989 990 sl = (mlib_u16 *) ((mlib_u8 *) sl + slb); 991 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); 992 } 993} 994 995/***************************************************************/ 996