1/* 2 * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 28#include "vis_proto.h" 29#include "mlib_image.h" 30#include "mlib_v_ImageLookUpFunc.h" 31 32/***************************************************************/ 33static void mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(const mlib_u8 *src, 34 mlib_s16 *dst, 35 mlib_s32 xsize, 36 const mlib_s16 *table0, 37 const mlib_s16 *table1, 38 const mlib_s16 *table2, 39 const mlib_s16 *table3); 40 41static void mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(const mlib_u8 *src, 42 mlib_s16 *dst, 43 mlib_s32 xsize, 44 const mlib_s16 *table0, 45 const mlib_s16 *table1, 46 const mlib_s16 *table2, 47 const mlib_s16 *table3); 48 49static void mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(const mlib_u8 *src, 50 mlib_s16 *dst, 51 mlib_s32 xsize, 52 const mlib_s16 *table0, 53 const mlib_s16 *table1, 54 const mlib_s16 *table2, 55 const mlib_s16 *table3); 56 57static void mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(const mlib_u8 *src, 58 mlib_s16 *dst, 59 mlib_s32 xsize, 60 const mlib_s16 *table0, 61 const mlib_s16 *table1, 62 const mlib_s16 *table2, 63 const mlib_s16 *table3); 64 65static void mlib_v_ImageLookUp_U8_S16_3_SrcOff0_D1(const mlib_u8 *src, 66 mlib_s16 *dst, 67 mlib_s32 xsize, 68 const mlib_s16 *table0, 69 const mlib_s16 *table1, 70 const mlib_s16 *table2); 71 72static void mlib_v_ImageLookUp_U8_S16_3_SrcOff1_D1(const mlib_u8 *src, 73 mlib_s16 *dst, 74 mlib_s32 xsize, 75 const mlib_s16 *table0, 76 const mlib_s16 *table1, 77 const mlib_s16 *table2); 78 79static void mlib_v_ImageLookUp_U8_S16_3_SrcOff2_D1(const mlib_u8 *src, 80 mlib_s16 *dst, 81 mlib_s32 xsize, 82 const mlib_s16 *table0, 83 const mlib_s16 *table1, 84 const mlib_s16 *table2); 85 86static void mlib_v_ImageLookUp_U8_S16_3_SrcOff3_D1(const mlib_u8 *src, 87 mlib_s16 *dst, 88 mlib_s32 xsize, 89 const mlib_s16 *table0, 90 const mlib_s16 *table1, 91 const mlib_s16 *table2); 92 93/***************************************************************/ 94#define VIS_LD_U16_I(X, Y) vis_ld_u16_i((void *)(X), (Y)) 95 96/***************************************************************/ 97void mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(const mlib_u8 *src, 98 mlib_s16 *dst, 99 mlib_s32 xsize, 100 const mlib_s16 *table0, 101 const mlib_s16 *table1, 102 const mlib_s16 *table2, 103 const mlib_s16 *table3) 104{ 105 mlib_u32 *sa; /* aligned pointer to source data */ 106 mlib_u8 *sp; /* pointer to source data */ 107 mlib_u32 s0; /* source data */ 108 mlib_s16 *dl; /* pointer to start of destination */ 109 mlib_s16 *dend; /* pointer to end of destination */ 110 mlib_d64 *dp; /* aligned pointer to destination */ 111 mlib_d64 t0, t1, t2; /* destination data */ 112 mlib_d64 t3, acc0; /* destination data */ 113 mlib_s32 emask; /* edge mask */ 114 mlib_s32 i, num; /* loop variable */ 115 116 sa = (mlib_u32 *) src; 117 dl = dst; 118 dp = (mlib_d64 *) dl; 119 dend = dl + xsize - 1; 120 121 vis_alignaddr((void *)0, 6); 122 123 i = 0; 124 125 if (xsize >= 4) { 126 127 s0 = *sa++; 128 129#pragma pipeloop(0) 130 for (i = 0; i <= xsize - 8; i += 4) { 131 t3 = VIS_LD_U16_I(table3, (s0 << 1) & 0x1FE); 132 t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE); 133 t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE); 134 t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE); 135 acc0 = vis_faligndata(t3, acc0); 136 acc0 = vis_faligndata(t2, acc0); 137 acc0 = vis_faligndata(t1, acc0); 138 acc0 = vis_faligndata(t0, acc0); 139 s0 = *sa++; 140 *dp++ = acc0; 141 } 142 143 t3 = VIS_LD_U16_I(table3, (s0 << 1) & 0x1FE); 144 t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE); 145 t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE); 146 t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE); 147 acc0 = vis_faligndata(t3, acc0); 148 acc0 = vis_faligndata(t2, acc0); 149 acc0 = vis_faligndata(t1, acc0); 150 acc0 = vis_faligndata(t0, acc0); 151 *dp++ = acc0; 152 } 153 154 sp = (mlib_u8 *) sa; 155 156 if ((mlib_addr) dp <= (mlib_addr) dend) { 157 158 num = (mlib_s16 *) dend - (mlib_s16 *) dp; 159 sp += num; 160 num++; 161 162 if (num == 1) { 163 s0 = (mlib_s32) * sp; 164 sp--; 165 166 t0 = VIS_LD_U16_I(table0, 2 * s0); 167 acc0 = vis_faligndata(t0, acc0); 168 } 169 else if (num == 2) { 170 s0 = (mlib_s32) * sp; 171 sp--; 172 173 t0 = VIS_LD_U16_I(table1, 2 * s0); 174 acc0 = vis_faligndata(t0, acc0); 175 176 s0 = (mlib_s32) * sp; 177 sp--; 178 179 t0 = VIS_LD_U16_I(table0, 2 * s0); 180 acc0 = vis_faligndata(t0, acc0); 181 } 182 else if (num == 3) { 183 s0 = (mlib_s32) * sp; 184 sp--; 185 186 t0 = VIS_LD_U16_I(table2, 2 * s0); 187 acc0 = vis_faligndata(t0, acc0); 188 189 s0 = (mlib_s32) * sp; 190 sp--; 191 192 t0 = VIS_LD_U16_I(table1, 2 * s0); 193 acc0 = vis_faligndata(t0, acc0); 194 195 s0 = (mlib_s32) * sp; 196 sp--; 197 198 t0 = VIS_LD_U16_I(table0, 2 * s0); 199 acc0 = vis_faligndata(t0, acc0); 200 } 201 202 emask = vis_edge16(dp, dend); 203 vis_pst_16(acc0, dp, emask); 204 } 205} 206 207/***************************************************************/ 208void mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(const mlib_u8 *src, 209 mlib_s16 *dst, 210 mlib_s32 xsize, 211 const mlib_s16 *table0, 212 const mlib_s16 *table1, 213 const mlib_s16 *table2, 214 const mlib_s16 *table3) 215{ 216 mlib_u32 *sa; /* aligned pointer to source data */ 217 mlib_u8 *sp; /* pointer to source data */ 218 mlib_u32 s0, s1; /* source data */ 219 mlib_s16 *dl; /* pointer to start of destination */ 220 mlib_s16 *dend; /* pointer to end of destination */ 221 mlib_d64 *dp; /* aligned pointer to destination */ 222 mlib_d64 t0, t1, t2; /* destination data */ 223 mlib_d64 t3, acc0; /* destination data */ 224 mlib_s32 emask; /* edge mask */ 225 mlib_s32 i, num; /* loop variable */ 226 227 sa = (mlib_u32 *) (src - 1); 228 dl = dst; 229 dp = (mlib_d64 *) dl; 230 dend = dl + xsize - 1; 231 232 vis_alignaddr((void *)0, 6); 233 234 s0 = *sa++; 235 236 if (xsize >= 4) { 237 238 s1 = *sa++; 239 240#pragma pipeloop(0) 241 for (i = 0; i <= xsize - 8; i += 4) { 242 t3 = VIS_LD_U16_I(table3, (s1 >> 23) & 0x1FE); 243 t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE); 244 t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE); 245 t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE); 246 acc0 = vis_faligndata(t3, acc0); 247 acc0 = vis_faligndata(t2, acc0); 248 acc0 = vis_faligndata(t1, acc0); 249 acc0 = vis_faligndata(t0, acc0); 250 s0 = s1; 251 s1 = *sa++; 252 *dp++ = acc0; 253 } 254 255 t3 = VIS_LD_U16_I(table3, (s1 >> 23) & 0x1FE); 256 t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE); 257 t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE); 258 t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE); 259 acc0 = vis_faligndata(t3, acc0); 260 acc0 = vis_faligndata(t2, acc0); 261 acc0 = vis_faligndata(t1, acc0); 262 acc0 = vis_faligndata(t0, acc0); 263 s0 = s1; 264 *dp++ = acc0; 265 } 266 267 sp = (mlib_u8 *) sa; 268 sp -= 3; 269 270 if ((mlib_addr) dp <= (mlib_addr) dend) { 271 272 num = (mlib_s16 *) dend - (mlib_s16 *) dp; 273 sp += num; 274 num++; 275 276 if (num == 1) { 277 s0 = (mlib_s32) * sp; 278 sp--; 279 280 t0 = VIS_LD_U16_I(table0, 2 * s0); 281 acc0 = vis_faligndata(t0, acc0); 282 } 283 else if (num == 2) { 284 s0 = (mlib_s32) * sp; 285 sp--; 286 287 t0 = VIS_LD_U16_I(table1, 2 * s0); 288 acc0 = vis_faligndata(t0, acc0); 289 290 s0 = (mlib_s32) * sp; 291 sp--; 292 293 t0 = VIS_LD_U16_I(table0, 2 * s0); 294 acc0 = vis_faligndata(t0, acc0); 295 } 296 else if (num == 3) { 297 s0 = (mlib_s32) * sp; 298 sp--; 299 300 t0 = VIS_LD_U16_I(table2, 2 * s0); 301 acc0 = vis_faligndata(t0, acc0); 302 303 s0 = (mlib_s32) * sp; 304 sp--; 305 306 t0 = VIS_LD_U16_I(table1, 2 * s0); 307 acc0 = vis_faligndata(t0, acc0); 308 309 s0 = (mlib_s32) * sp; 310 sp--; 311 312 t0 = VIS_LD_U16_I(table0, 2 * s0); 313 acc0 = vis_faligndata(t0, acc0); 314 } 315 316 emask = vis_edge16(dp, dend); 317 vis_pst_16(acc0, dp, emask); 318 } 319} 320 321/***************************************************************/ 322void mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(const mlib_u8 *src, 323 mlib_s16 *dst, 324 mlib_s32 xsize, 325 const mlib_s16 *table0, 326 const mlib_s16 *table1, 327 const mlib_s16 *table2, 328 const mlib_s16 *table3) 329{ 330 mlib_u32 *sa; /* aligned pointer to source data */ 331 mlib_u8 *sp; /* pointer to source data */ 332 mlib_u32 s0, s1; /* source data */ 333 mlib_s16 *dl; /* pointer to start of destination */ 334 mlib_s16 *dend; /* pointer to end of destination */ 335 mlib_d64 *dp; /* aligned pointer to destination */ 336 mlib_d64 t0, t1, t2; /* destination data */ 337 mlib_d64 t3, acc0; /* destination data */ 338 mlib_s32 emask; /* edge mask */ 339 mlib_s32 i, num; /* loop variable */ 340 341 sa = (mlib_u32 *) (src - 2); 342 dl = dst; 343 dp = (mlib_d64 *) dl; 344 dend = dl + xsize - 1; 345 346 vis_alignaddr((void *)0, 6); 347 348 s0 = *sa++; 349 350 if (xsize >= 4) { 351 352 s1 = *sa++; 353 354#pragma pipeloop(0) 355 for (i = 0; i <= xsize - 8; i += 4) { 356 t3 = VIS_LD_U16_I(table3, (s1 >> 15) & 0x1FE); 357 t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE); 358 t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE); 359 t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE); 360 acc0 = vis_faligndata(t3, acc0); 361 acc0 = vis_faligndata(t2, acc0); 362 acc0 = vis_faligndata(t1, acc0); 363 acc0 = vis_faligndata(t0, acc0); 364 s0 = s1; 365 s1 = *sa++; 366 *dp++ = acc0; 367 } 368 369 t3 = VIS_LD_U16_I(table3, (s1 >> 15) & 0x1FE); 370 t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE); 371 t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE); 372 t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE); 373 acc0 = vis_faligndata(t3, acc0); 374 acc0 = vis_faligndata(t2, acc0); 375 acc0 = vis_faligndata(t1, acc0); 376 acc0 = vis_faligndata(t0, acc0); 377 s0 = s1; 378 *dp++ = acc0; 379 } 380 381 sp = (mlib_u8 *) sa; 382 sp -= 2; 383 384 if ((mlib_addr) dp <= (mlib_addr) dend) { 385 386 num = (mlib_s16 *) dend - (mlib_s16 *) dp; 387 sp += num; 388 num++; 389 390 if (num == 1) { 391 s0 = (mlib_s32) * sp; 392 sp--; 393 394 t0 = VIS_LD_U16_I(table0, 2 * s0); 395 acc0 = vis_faligndata(t0, acc0); 396 } 397 else if (num == 2) { 398 s0 = (mlib_s32) * sp; 399 sp--; 400 401 t0 = VIS_LD_U16_I(table1, 2 * s0); 402 acc0 = vis_faligndata(t0, acc0); 403 404 s0 = (mlib_s32) * sp; 405 sp--; 406 407 t0 = VIS_LD_U16_I(table0, 2 * s0); 408 acc0 = vis_faligndata(t0, acc0); 409 } 410 else if (num == 3) { 411 s0 = (mlib_s32) * sp; 412 sp--; 413 414 t0 = VIS_LD_U16_I(table2, 2 * s0); 415 acc0 = vis_faligndata(t0, acc0); 416 417 s0 = (mlib_s32) * sp; 418 sp--; 419 420 t0 = VIS_LD_U16_I(table1, 2 * s0); 421 acc0 = vis_faligndata(t0, acc0); 422 423 s0 = (mlib_s32) * sp; 424 sp--; 425 426 t0 = VIS_LD_U16_I(table0, 2 * s0); 427 acc0 = vis_faligndata(t0, acc0); 428 } 429 430 emask = vis_edge16(dp, dend); 431 vis_pst_16(acc0, dp, emask); 432 } 433} 434 435/***************************************************************/ 436void mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(const mlib_u8 *src, 437 mlib_s16 *dst, 438 mlib_s32 xsize, 439 const mlib_s16 *table0, 440 const mlib_s16 *table1, 441 const mlib_s16 *table2, 442 const mlib_s16 *table3) 443{ 444 mlib_u32 *sa; /* aligned pointer to source data */ 445 mlib_u8 *sp; /* pointer to source data */ 446 mlib_u32 s0, s1; /* source data */ 447 mlib_s16 *dl; /* pointer to start of destination */ 448 mlib_s16 *dend; /* pointer to end of destination */ 449 mlib_d64 *dp; /* aligned pointer to destination */ 450 mlib_d64 t0, t1, t2; /* destination data */ 451 mlib_d64 t3, acc0; /* destination data */ 452 mlib_s32 emask; /* edge mask */ 453 mlib_s32 i, num; /* loop variable */ 454 455 sa = (mlib_u32 *) (src - 3); 456 dl = dst; 457 dp = (mlib_d64 *) dl; 458 dend = dl + xsize - 1; 459 460 vis_alignaddr((void *)0, 6); 461 462 s0 = *sa++; 463 464 if (xsize >= 4) { 465 466 s1 = *sa++; 467 468#pragma pipeloop(0) 469 for (i = 0; i <= xsize - 8; i += 4) { 470 t3 = VIS_LD_U16_I(table3, (s1 >> 7) & 0x1FE); 471 t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE); 472 t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE); 473 t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE); 474 acc0 = vis_faligndata(t3, acc0); 475 acc0 = vis_faligndata(t2, acc0); 476 acc0 = vis_faligndata(t1, acc0); 477 acc0 = vis_faligndata(t0, acc0); 478 s0 = s1; 479 s1 = *sa++; 480 *dp++ = acc0; 481 } 482 483 t3 = VIS_LD_U16_I(table3, (s1 >> 7) & 0x1FE); 484 t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE); 485 t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE); 486 t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE); 487 acc0 = vis_faligndata(t3, acc0); 488 acc0 = vis_faligndata(t2, acc0); 489 acc0 = vis_faligndata(t1, acc0); 490 acc0 = vis_faligndata(t0, acc0); 491 s0 = s1; 492 *dp++ = acc0; 493 } 494 495 sp = (mlib_u8 *) sa; 496 sp -= 1; 497 498 if ((mlib_addr) dp <= (mlib_addr) dend) { 499 500 num = (mlib_s16 *) dend - (mlib_s16 *) dp; 501 sp += num; 502 num++; 503 504 if (num == 1) { 505 s0 = (mlib_s32) * sp; 506 sp--; 507 508 t0 = VIS_LD_U16_I(table0, 2 * s0); 509 acc0 = vis_faligndata(t0, acc0); 510 } 511 else if (num == 2) { 512 s0 = (mlib_s32) * sp; 513 sp--; 514 515 t0 = VIS_LD_U16_I(table1, 2 * s0); 516 acc0 = vis_faligndata(t0, acc0); 517 518 s0 = (mlib_s32) * sp; 519 sp--; 520 521 t0 = VIS_LD_U16_I(table0, 2 * s0); 522 acc0 = vis_faligndata(t0, acc0); 523 } 524 else if (num == 3) { 525 s0 = (mlib_s32) * sp; 526 sp--; 527 528 t0 = VIS_LD_U16_I(table2, 2 * s0); 529 acc0 = vis_faligndata(t0, acc0); 530 531 s0 = (mlib_s32) * sp; 532 sp--; 533 534 t0 = VIS_LD_U16_I(table1, 2 * s0); 535 acc0 = vis_faligndata(t0, acc0); 536 537 s0 = (mlib_s32) * sp; 538 sp--; 539 540 t0 = VIS_LD_U16_I(table0, 2 * s0); 541 acc0 = vis_faligndata(t0, acc0); 542 } 543 544 emask = vis_edge16(dp, dend); 545 vis_pst_16(acc0, dp, emask); 546 } 547} 548 549/***************************************************************/ 550void mlib_v_ImageLookUp_U8_S16_1(const mlib_u8 *src, 551 mlib_s32 slb, 552 mlib_s16 *dst, 553 mlib_s32 dlb, 554 mlib_s32 xsize, 555 mlib_s32 ysize, 556 const mlib_s16 **table) 557{ 558 mlib_u8 *sl; 559 mlib_s16 *dl; 560 const mlib_s16 *tab = table[0]; 561 mlib_s32 j, i; 562 563 sl = (void *)src; 564 dl = dst; 565 566 /* row loop */ 567 for (j = 0; j < ysize; j++) { 568 mlib_u8 *sp = sl; 569 mlib_s16 *dp = dl; 570 mlib_s32 off, size = xsize; 571 572 off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1; 573 574 off = (off < size) ? off : size; 575 576 for (i = 0; i < off; i++) { 577 *dp++ = tab[(*sp++)]; 578 size--; 579 } 580 581 if (size > 0) { 582 583 off = (mlib_addr) sp & 3; 584 585 if (off == 0) { 586 mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(sp, dp, size, tab, tab, tab, 587 tab); 588 } 589 else if (off == 1) { 590 mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(sp, dp, size, tab, tab, tab, 591 tab); 592 } 593 else if (off == 2) { 594 mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(sp, dp, size, tab, tab, tab, 595 tab); 596 } 597 else { 598 mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(sp, dp, size, tab, tab, tab, 599 tab); 600 } 601 } 602 603 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 604 dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); 605 } 606} 607 608/***************************************************************/ 609void mlib_v_ImageLookUp_U8_S16_2(const mlib_u8 *src, 610 mlib_s32 slb, 611 mlib_s16 *dst, 612 mlib_s32 dlb, 613 mlib_s32 xsize, 614 mlib_s32 ysize, 615 const mlib_s16 **table) 616{ 617 mlib_u8 *sl; 618 mlib_s16 *dl; 619 const mlib_s16 *tab; 620 mlib_s32 j, i; 621 622 sl = (void *)src; 623 dl = dst; 624 625 /* row loop */ 626 for (j = 0; j < ysize; j++) { 627 mlib_u8 *sp = sl; 628 mlib_s16 *dp = dl; 629 mlib_s32 off, size = xsize * 2; 630 const mlib_s16 *tab0 = table[0]; 631 const mlib_s16 *tab1 = table[1]; 632 633 off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1; 634 635 off = (off < size) ? off : size; 636 637 for (i = 0; i < off - 1; i += 2) { 638 *dp++ = tab0[(*sp++)]; 639 *dp++ = tab1[(*sp++)]; 640 size -= 2; 641 } 642 643 if ((off & 1) != 0) { 644 *dp++ = tab0[(*sp++)]; 645 size--; 646 tab = tab0; 647 tab0 = tab1; 648 tab1 = tab; 649 } 650 651 if (size > 0) { 652 653 off = (mlib_addr) sp & 3; 654 655 if (off == 0) { 656 mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(sp, dp, size, tab0, tab1, tab0, 657 tab1); 658 } 659 else if (off == 1) { 660 mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(sp, dp, size, tab0, tab1, tab0, 661 tab1); 662 } 663 else if (off == 2) { 664 mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(sp, dp, size, tab0, tab1, tab0, 665 tab1); 666 } 667 else { 668 mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(sp, dp, size, tab0, tab1, tab0, 669 tab1); 670 } 671 } 672 673 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 674 dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); 675 } 676} 677 678/***************************************************************/ 679void mlib_v_ImageLookUp_U8_S16_4(const mlib_u8 *src, 680 mlib_s32 slb, 681 mlib_s16 *dst, 682 mlib_s32 dlb, 683 mlib_s32 xsize, 684 mlib_s32 ysize, 685 const mlib_s16 **table) 686{ 687 mlib_u8 *sl; 688 mlib_s16 *dl; 689 const mlib_s16 *tab; 690 mlib_s32 j; 691 692 sl = (void *)src; 693 dl = dst; 694 695 /* row loop */ 696 for (j = 0; j < ysize; j++) { 697 mlib_u8 *sp = sl; 698 mlib_s16 *dp = dl; 699 const mlib_s16 *tab0 = table[0]; 700 const mlib_s16 *tab1 = table[1]; 701 const mlib_s16 *tab2 = table[2]; 702 const mlib_s16 *tab3 = table[3]; 703 mlib_s32 off, size = xsize * 4; 704 705 off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1; 706 707 off = (off < size) ? off : size; 708 709 if (off == 1) { 710 *dp++ = tab0[(*sp++)]; 711 tab = tab0; 712 tab0 = tab1; 713 tab1 = tab2; 714 tab2 = tab3; 715 tab3 = tab; 716 size--; 717 } 718 else if (off == 2) { 719 *dp++ = tab0[(*sp++)]; 720 *dp++ = tab1[(*sp++)]; 721 tab = tab0; 722 tab0 = tab2; 723 tab2 = tab; 724 tab = tab1; 725 tab1 = tab3; 726 tab3 = tab; 727 size -= 2; 728 } 729 else if (off == 3) { 730 *dp++ = tab0[(*sp++)]; 731 *dp++ = tab1[(*sp++)]; 732 *dp++ = tab2[(*sp++)]; 733 tab = tab3; 734 tab3 = tab2; 735 tab2 = tab1; 736 tab1 = tab0; 737 tab0 = tab; 738 size -= 3; 739 } 740 741 if (size > 0) { 742 743 off = (mlib_addr) sp & 3; 744 745 if (off == 0) { 746 mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(sp, dp, size, tab0, tab1, tab2, 747 tab3); 748 } 749 else if (off == 1) { 750 mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(sp, dp, size, tab0, tab1, tab2, 751 tab3); 752 } 753 else if (off == 2) { 754 mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(sp, dp, size, tab0, tab1, tab2, 755 tab3); 756 } 757 else { 758 mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(sp, dp, size, tab0, tab1, tab2, 759 tab3); 760 } 761 } 762 763 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 764 dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); 765 } 766} 767 768/***************************************************************/ 769void mlib_v_ImageLookUp_U8_S16_3_SrcOff0_D1(const mlib_u8 *src, 770 mlib_s16 *dst, 771 mlib_s32 xsize, 772 const mlib_s16 *table0, 773 const mlib_s16 *table1, 774 const mlib_s16 *table2) 775{ 776 mlib_u32 *sa; /* aligned pointer to source data */ 777 mlib_u8 *sp; /* pointer to source data */ 778 mlib_u32 s0, s1, s2; /* source data */ 779 mlib_s16 *dl; /* pointer to start of destination */ 780 mlib_s16 *dend; /* pointer to end of destination */ 781 mlib_d64 *dp; /* aligned pointer to destination */ 782 mlib_d64 t0, t1, t2; /* destination data */ 783 mlib_d64 t3, t4, t5; /* destination data */ 784 mlib_d64 t6, t7, t8; /* destination data */ 785 mlib_d64 t9, t10, t11; /* destination data */ 786 mlib_d64 acc0, acc1, acc2; /* destination data */ 787 mlib_s32 emask; /* edge mask */ 788 mlib_s32 i, num; /* loop variable */ 789 const mlib_s16 *table; 790 791 sa = (mlib_u32 *) src; 792 dl = dst; 793 dp = (mlib_d64 *) dl; 794 dend = dl + xsize - 1; 795 796 vis_alignaddr((void *)0, 6); 797 798 i = 0; 799 800 if (xsize >= 12) { 801 802 s0 = sa[0]; 803 s1 = sa[1]; 804 s2 = sa[2]; 805 sa += 3; 806 807#pragma pipeloop(0) 808 for (i = 0; i <= xsize - 24; i += 12, sa += 3, dp += 3) { 809 t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE); 810 t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE); 811 t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE); 812 t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE); 813 t7 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE); 814 t6 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE); 815 t5 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE); 816 t4 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE); 817 t11 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE); 818 t10 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE); 819 t9 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE); 820 t8 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE); 821 acc0 = vis_faligndata(t3, acc0); 822 acc0 = vis_faligndata(t2, acc0); 823 acc0 = vis_faligndata(t1, acc0); 824 acc0 = vis_faligndata(t0, acc0); 825 acc1 = vis_faligndata(t7, acc1); 826 acc1 = vis_faligndata(t6, acc1); 827 acc1 = vis_faligndata(t5, acc1); 828 acc1 = vis_faligndata(t4, acc1); 829 acc2 = vis_faligndata(t11, acc2); 830 acc2 = vis_faligndata(t10, acc2); 831 acc2 = vis_faligndata(t9, acc2); 832 acc2 = vis_faligndata(t8, acc2); 833 s0 = sa[0]; 834 s1 = sa[1]; 835 s2 = sa[2]; 836 dp[0] = acc0; 837 dp[1] = acc1; 838 dp[2] = acc2; 839 } 840 841 t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE); 842 t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE); 843 t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE); 844 t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE); 845 t7 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE); 846 t6 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE); 847 t5 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE); 848 t4 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE); 849 t11 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE); 850 t10 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE); 851 t9 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE); 852 t8 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE); 853 acc0 = vis_faligndata(t3, acc0); 854 acc0 = vis_faligndata(t2, acc0); 855 acc0 = vis_faligndata(t1, acc0); 856 acc0 = vis_faligndata(t0, acc0); 857 acc1 = vis_faligndata(t7, acc1); 858 acc1 = vis_faligndata(t6, acc1); 859 acc1 = vis_faligndata(t5, acc1); 860 acc1 = vis_faligndata(t4, acc1); 861 acc2 = vis_faligndata(t11, acc2); 862 acc2 = vis_faligndata(t10, acc2); 863 acc2 = vis_faligndata(t9, acc2); 864 acc2 = vis_faligndata(t8, acc2); 865 dp[0] = acc0; 866 dp[1] = acc1; 867 dp[2] = acc2; 868 dp += 3; 869 i += 12; 870 } 871 872 if (i <= xsize - 8) { 873 s0 = sa[0]; 874 s1 = sa[1]; 875 t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE); 876 t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE); 877 t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE); 878 t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE); 879 t7 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE); 880 t6 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE); 881 t5 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE); 882 t4 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE); 883 acc0 = vis_faligndata(t3, acc0); 884 acc0 = vis_faligndata(t2, acc0); 885 acc0 = vis_faligndata(t1, acc0); 886 acc0 = vis_faligndata(t0, acc0); 887 acc1 = vis_faligndata(t7, acc1); 888 acc1 = vis_faligndata(t6, acc1); 889 acc1 = vis_faligndata(t5, acc1); 890 acc1 = vis_faligndata(t4, acc1); 891 dp[0] = acc0; 892 dp[1] = acc1; 893 table = table0; 894 table0 = table2; 895 table2 = table1; 896 table1 = table; 897 sa += 2; 898 i += 8; 899 dp += 2; 900 } 901 902 if (i <= xsize - 4) { 903 s0 = sa[0]; 904 t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE); 905 t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE); 906 t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE); 907 t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE); 908 acc0 = vis_faligndata(t3, acc0); 909 acc0 = vis_faligndata(t2, acc0); 910 acc0 = vis_faligndata(t1, acc0); 911 acc0 = vis_faligndata(t0, acc0); 912 dp[0] = acc0; 913 table = table0; 914 table0 = table1; 915 table1 = table2; 916 table2 = table; 917 sa++; 918 i += 4; 919 dp++; 920 } 921 922 sp = (mlib_u8 *) sa; 923 924 if ((mlib_addr) dp <= (mlib_addr) dend) { 925 926 num = (mlib_s16 *) dend - (mlib_s16 *) dp; 927 sp += num; 928 num++; 929 930 if (num == 1) { 931 s0 = (mlib_s32) * sp; 932 sp--; 933 934 t0 = VIS_LD_U16_I(table0, 2 * s0); 935 acc0 = vis_faligndata(t0, acc0); 936 } 937 else if (num == 2) { 938 s0 = (mlib_s32) * sp; 939 sp--; 940 941 t0 = VIS_LD_U16_I(table1, 2 * s0); 942 acc0 = vis_faligndata(t0, acc0); 943 944 s0 = (mlib_s32) * sp; 945 sp--; 946 947 t0 = VIS_LD_U16_I(table0, 2 * s0); 948 acc0 = vis_faligndata(t0, acc0); 949 } 950 else if (num == 3) { 951 s0 = (mlib_s32) * sp; 952 sp--; 953 954 t0 = VIS_LD_U16_I(table2, 2 * s0); 955 acc0 = vis_faligndata(t0, acc0); 956 957 s0 = (mlib_s32) * sp; 958 sp--; 959 960 t0 = VIS_LD_U16_I(table1, 2 * s0); 961 acc0 = vis_faligndata(t0, acc0); 962 963 s0 = (mlib_s32) * sp; 964 sp--; 965 966 t0 = VIS_LD_U16_I(table0, 2 * s0); 967 acc0 = vis_faligndata(t0, acc0); 968 } 969 970 emask = vis_edge16(dp, dend); 971 vis_pst_16(acc0, dp, emask); 972 } 973} 974 975/***************************************************************/ 976void mlib_v_ImageLookUp_U8_S16_3_SrcOff1_D1(const mlib_u8 *src, 977 mlib_s16 *dst, 978 mlib_s32 xsize, 979 const mlib_s16 *table0, 980 const mlib_s16 *table1, 981 const mlib_s16 *table2) 982{ 983 mlib_u32 *sa; /* aligned pointer to source data */ 984 mlib_u8 *sp; /* pointer to source data */ 985 mlib_u32 s0, s1, s2, s3; /* source data */ 986 mlib_s16 *dl; /* pointer to start of destination */ 987 mlib_s16 *dend; /* pointer to end of destination */ 988 mlib_d64 *dp; /* aligned pointer to destination */ 989 mlib_d64 t0, t1, t2; /* destination data */ 990 mlib_d64 t3, t4, t5; /* destination data */ 991 mlib_d64 t6, t7, t8; /* destination data */ 992 mlib_d64 t9, t10, t11; /* destination data */ 993 mlib_d64 acc0, acc1, acc2; /* destination data */ 994 mlib_s32 emask; /* edge mask */ 995 mlib_s32 i, num; /* loop variable */ 996 const mlib_s16 *table; 997 998 sa = (mlib_u32 *) (src - 1); 999 dl = dst; 1000 dp = (mlib_d64 *) dl; 1001 dend = dl + xsize - 1; 1002 1003 vis_alignaddr((void *)0, 6); 1004 1005 i = 0; 1006 1007 s0 = *sa++; 1008 1009 if (xsize >= 12) { 1010 1011 s1 = sa[0]; 1012 s2 = sa[1]; 1013 s3 = sa[2]; 1014 sa += 3; 1015 1016#pragma pipeloop(0) 1017 for (i = 0; i <= xsize - 24; i += 12, sa += 3, dp += 3) { 1018 t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE); 1019 t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE); 1020 t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE); 1021 t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE); 1022 t7 = VIS_LD_U16_I(table1, (s2 >> 23) & 0x1FE); 1023 t6 = VIS_LD_U16_I(table0, (s1 << 1) & 0x1FE); 1024 t5 = VIS_LD_U16_I(table2, (s1 >> 7) & 0x1FE); 1025 t4 = VIS_LD_U16_I(table1, (s1 >> 15) & 0x1FE); 1026 t11 = VIS_LD_U16_I(table2, (s3 >> 23) & 0x1FE); 1027 t10 = VIS_LD_U16_I(table1, (s2 << 1) & 0x1FE); 1028 t9 = VIS_LD_U16_I(table0, (s2 >> 7) & 0x1FE); 1029 t8 = VIS_LD_U16_I(table2, (s2 >> 15) & 0x1FE); 1030 acc0 = vis_faligndata(t3, acc0); 1031 acc0 = vis_faligndata(t2, acc0); 1032 acc0 = vis_faligndata(t1, acc0); 1033 acc0 = vis_faligndata(t0, acc0); 1034 acc1 = vis_faligndata(t7, acc1); 1035 acc1 = vis_faligndata(t6, acc1); 1036 acc1 = vis_faligndata(t5, acc1); 1037 acc1 = vis_faligndata(t4, acc1); 1038 acc2 = vis_faligndata(t11, acc2); 1039 acc2 = vis_faligndata(t10, acc2); 1040 acc2 = vis_faligndata(t9, acc2); 1041 acc2 = vis_faligndata(t8, acc2); 1042 s0 = s3; 1043 s1 = sa[0]; 1044 s2 = sa[1]; 1045 s3 = sa[2]; 1046 dp[0] = acc0; 1047 dp[1] = acc1; 1048 dp[2] = acc2; 1049 } 1050 1051 t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE); 1052 t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE); 1053 t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE); 1054 t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE); 1055 t7 = VIS_LD_U16_I(table1, (s2 >> 23) & 0x1FE); 1056 t6 = VIS_LD_U16_I(table0, (s1 << 1) & 0x1FE); 1057 t5 = VIS_LD_U16_I(table2, (s1 >> 7) & 0x1FE); 1058 t4 = VIS_LD_U16_I(table1, (s1 >> 15) & 0x1FE); 1059 t11 = VIS_LD_U16_I(table2, (s3 >> 23) & 0x1FE); 1060 t10 = VIS_LD_U16_I(table1, (s2 << 1) & 0x1FE); 1061 t9 = VIS_LD_U16_I(table0, (s2 >> 7) & 0x1FE); 1062 t8 = VIS_LD_U16_I(table2, (s2 >> 15) & 0x1FE); 1063 acc0 = vis_faligndata(t3, acc0); 1064 acc0 = vis_faligndata(t2, acc0); 1065 acc0 = vis_faligndata(t1, acc0); 1066 acc0 = vis_faligndata(t0, acc0); 1067 acc1 = vis_faligndata(t7, acc1); 1068 acc1 = vis_faligndata(t6, acc1); 1069 acc1 = vis_faligndata(t5, acc1); 1070 acc1 = vis_faligndata(t4, acc1); 1071 acc2 = vis_faligndata(t11, acc2); 1072 acc2 = vis_faligndata(t10, acc2); 1073 acc2 = vis_faligndata(t9, acc2); 1074 acc2 = vis_faligndata(t8, acc2); 1075 dp[0] = acc0; 1076 dp[1] = acc1; 1077 dp[2] = acc2; 1078 s0 = s3; 1079 dp += 3; 1080 i += 12; 1081 } 1082 1083 if (i <= xsize - 8) { 1084 s1 = sa[0]; 1085 s2 = sa[1]; 1086 t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE); 1087 t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE); 1088 t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE); 1089 t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE); 1090 t7 = VIS_LD_U16_I(table1, (s2 >> 23) & 0x1FE); 1091 t6 = VIS_LD_U16_I(table0, (s1 << 1) & 0x1FE); 1092 t5 = VIS_LD_U16_I(table2, (s1 >> 7) & 0x1FE); 1093 t4 = VIS_LD_U16_I(table1, (s1 >> 15) & 0x1FE); 1094 acc0 = vis_faligndata(t3, acc0); 1095 acc0 = vis_faligndata(t2, acc0); 1096 acc0 = vis_faligndata(t1, acc0); 1097 acc0 = vis_faligndata(t0, acc0); 1098 acc1 = vis_faligndata(t7, acc1); 1099 acc1 = vis_faligndata(t6, acc1); 1100 acc1 = vis_faligndata(t5, acc1); 1101 acc1 = vis_faligndata(t4, acc1); 1102 dp[0] = acc0; 1103 dp[1] = acc1; 1104 table = table0; 1105 table0 = table2; 1106 table2 = table1; 1107 table1 = table; 1108 sa += 2; 1109 i += 8; 1110 dp += 2; 1111 s0 = s2; 1112 } 1113 1114 if (i <= xsize - 4) { 1115 s1 = sa[0]; 1116 t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE); 1117 t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE); 1118 t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE); 1119 t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE); 1120 acc0 = vis_faligndata(t3, acc0); 1121 acc0 = vis_faligndata(t2, acc0); 1122 acc0 = vis_faligndata(t1, acc0); 1123 acc0 = vis_faligndata(t0, acc0); 1124 dp[0] = acc0; 1125 table = table0; 1126 table0 = table1; 1127 table1 = table2; 1128 table2 = table; 1129 sa++; 1130 i += 4; 1131 dp++; 1132 s0 = s1; 1133 } 1134 1135 sp = (mlib_u8 *) sa; 1136 sp -= 3; 1137 1138 if ((mlib_addr) dp <= (mlib_addr) dend) { 1139 1140 num = (mlib_s16 *) dend - (mlib_s16 *) dp; 1141 sp += num; 1142 num++; 1143 1144 if (num == 1) { 1145 s0 = (mlib_s32) * sp; 1146 sp--; 1147 1148 t0 = VIS_LD_U16_I(table0, 2 * s0); 1149 acc0 = vis_faligndata(t0, acc0); 1150 } 1151 else if (num == 2) { 1152 s0 = (mlib_s32) * sp; 1153 sp--; 1154 1155 t0 = VIS_LD_U16_I(table1, 2 * s0); 1156 acc0 = vis_faligndata(t0, acc0); 1157 1158 s0 = (mlib_s32) * sp; 1159 sp--; 1160 1161 t0 = VIS_LD_U16_I(table0, 2 * s0); 1162 acc0 = vis_faligndata(t0, acc0); 1163 } 1164 else if (num == 3) { 1165 s0 = (mlib_s32) * sp; 1166 sp--; 1167 1168 t0 = VIS_LD_U16_I(table2, 2 * s0); 1169 acc0 = vis_faligndata(t0, acc0); 1170 1171 s0 = (mlib_s32) * sp; 1172 sp--; 1173 1174 t0 = VIS_LD_U16_I(table1, 2 * s0); 1175 acc0 = vis_faligndata(t0, acc0); 1176 1177 s0 = (mlib_s32) * sp; 1178 sp--; 1179 1180 t0 = VIS_LD_U16_I(table0, 2 * s0); 1181 acc0 = vis_faligndata(t0, acc0); 1182 } 1183 1184 emask = vis_edge16(dp, dend); 1185 vis_pst_16(acc0, dp, emask); 1186 } 1187} 1188 1189/***************************************************************/ 1190void mlib_v_ImageLookUp_U8_S16_3_SrcOff2_D1(const mlib_u8 *src, 1191 mlib_s16 *dst, 1192 mlib_s32 xsize, 1193 const mlib_s16 *table0, 1194 const mlib_s16 *table1, 1195 const mlib_s16 *table2) 1196{ 1197 mlib_u32 *sa; /* aligned pointer to source data */ 1198 mlib_u8 *sp; /* pointer to source data */ 1199 mlib_u32 s0, s1, s2, s3; /* source data */ 1200 mlib_s16 *dl; /* pointer to start of destination */ 1201 mlib_s16 *dend; /* pointer to end of destination */ 1202 mlib_d64 *dp; /* aligned pointer to destination */ 1203 mlib_d64 t0, t1, t2; /* destination data */ 1204 mlib_d64 t3, t4, t5; /* destination data */ 1205 mlib_d64 t6, t7, t8; /* destination data */ 1206 mlib_d64 t9, t10, t11; /* destination data */ 1207 mlib_d64 acc0, acc1, acc2; /* destination data */ 1208 mlib_s32 emask; /* edge mask */ 1209 mlib_s32 i, num; /* loop variable */ 1210 const mlib_s16 *table; 1211 1212 sa = (mlib_u32 *) (src - 2); 1213 dl = dst; 1214 dp = (mlib_d64 *) dl; 1215 dend = dl + xsize - 1; 1216 1217 vis_alignaddr((void *)0, 6); 1218 1219 i = 0; 1220 1221 s0 = *sa++; 1222 1223 if (xsize >= 12) { 1224 1225 s1 = sa[0]; 1226 s2 = sa[1]; 1227 s3 = sa[2]; 1228 sa += 3; 1229 1230#pragma pipeloop(0) 1231 for (i = 0; i <= xsize - 24; i += 12, sa += 3, dp += 3) { 1232 t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE); 1233 t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE); 1234 t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE); 1235 t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE); 1236 t7 = VIS_LD_U16_I(table1, (s2 >> 15) & 0x1FE); 1237 t6 = VIS_LD_U16_I(table0, (s2 >> 23) & 0x1FE); 1238 t5 = VIS_LD_U16_I(table2, (s1 << 1) & 0x1FE); 1239 t4 = VIS_LD_U16_I(table1, (s1 >> 7) & 0x1FE); 1240 t11 = VIS_LD_U16_I(table2, (s3 >> 15) & 0x1FE); 1241 t10 = VIS_LD_U16_I(table1, (s3 >> 23) & 0x1FE); 1242 t9 = VIS_LD_U16_I(table0, (s2 << 1) & 0x1FE); 1243 t8 = VIS_LD_U16_I(table2, (s2 >> 7) & 0x1FE); 1244 acc0 = vis_faligndata(t3, acc0); 1245 acc0 = vis_faligndata(t2, acc0); 1246 acc0 = vis_faligndata(t1, acc0); 1247 acc0 = vis_faligndata(t0, acc0); 1248 acc1 = vis_faligndata(t7, acc1); 1249 acc1 = vis_faligndata(t6, acc1); 1250 acc1 = vis_faligndata(t5, acc1); 1251 acc1 = vis_faligndata(t4, acc1); 1252 acc2 = vis_faligndata(t11, acc2); 1253 acc2 = vis_faligndata(t10, acc2); 1254 acc2 = vis_faligndata(t9, acc2); 1255 acc2 = vis_faligndata(t8, acc2); 1256 s0 = s3; 1257 s1 = sa[0]; 1258 s2 = sa[1]; 1259 s3 = sa[2]; 1260 dp[0] = acc0; 1261 dp[1] = acc1; 1262 dp[2] = acc2; 1263 } 1264 1265 t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE); 1266 t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE); 1267 t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE); 1268 t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE); 1269 t7 = VIS_LD_U16_I(table1, (s2 >> 15) & 0x1FE); 1270 t6 = VIS_LD_U16_I(table0, (s2 >> 23) & 0x1FE); 1271 t5 = VIS_LD_U16_I(table2, (s1 << 1) & 0x1FE); 1272 t4 = VIS_LD_U16_I(table1, (s1 >> 7) & 0x1FE); 1273 t11 = VIS_LD_U16_I(table2, (s3 >> 15) & 0x1FE); 1274 t10 = VIS_LD_U16_I(table1, (s3 >> 23) & 0x1FE); 1275 t9 = VIS_LD_U16_I(table0, (s2 << 1) & 0x1FE); 1276 t8 = VIS_LD_U16_I(table2, (s2 >> 7) & 0x1FE); 1277 acc0 = vis_faligndata(t3, acc0); 1278 acc0 = vis_faligndata(t2, acc0); 1279 acc0 = vis_faligndata(t1, acc0); 1280 acc0 = vis_faligndata(t0, acc0); 1281 acc1 = vis_faligndata(t7, acc1); 1282 acc1 = vis_faligndata(t6, acc1); 1283 acc1 = vis_faligndata(t5, acc1); 1284 acc1 = vis_faligndata(t4, acc1); 1285 acc2 = vis_faligndata(t11, acc2); 1286 acc2 = vis_faligndata(t10, acc2); 1287 acc2 = vis_faligndata(t9, acc2); 1288 acc2 = vis_faligndata(t8, acc2); 1289 dp[0] = acc0; 1290 dp[1] = acc1; 1291 dp[2] = acc2; 1292 s0 = s3; 1293 dp += 3; 1294 i += 12; 1295 } 1296 1297 if (i <= xsize - 8) { 1298 s1 = sa[0]; 1299 s2 = sa[1]; 1300 t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE); 1301 t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE); 1302 t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE); 1303 t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE); 1304 t7 = VIS_LD_U16_I(table1, (s2 >> 15) & 0x1FE); 1305 t6 = VIS_LD_U16_I(table0, (s2 >> 23) & 0x1FE); 1306 t5 = VIS_LD_U16_I(table2, (s1 << 1) & 0x1FE); 1307 t4 = VIS_LD_U16_I(table1, (s1 >> 7) & 0x1FE); 1308 acc0 = vis_faligndata(t3, acc0); 1309 acc0 = vis_faligndata(t2, acc0); 1310 acc0 = vis_faligndata(t1, acc0); 1311 acc0 = vis_faligndata(t0, acc0); 1312 acc1 = vis_faligndata(t7, acc1); 1313 acc1 = vis_faligndata(t6, acc1); 1314 acc1 = vis_faligndata(t5, acc1); 1315 acc1 = vis_faligndata(t4, acc1); 1316 dp[0] = acc0; 1317 dp[1] = acc1; 1318 table = table0; 1319 table0 = table2; 1320 table2 = table1; 1321 table1 = table; 1322 sa += 2; 1323 i += 8; 1324 dp += 2; 1325 s0 = s2; 1326 } 1327 1328 if (i <= xsize - 4) { 1329 s1 = sa[0]; 1330 t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE); 1331 t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE); 1332 t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE); 1333 t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE); 1334 acc0 = vis_faligndata(t3, acc0); 1335 acc0 = vis_faligndata(t2, acc0); 1336 acc0 = vis_faligndata(t1, acc0); 1337 acc0 = vis_faligndata(t0, acc0); 1338 dp[0] = acc0; 1339 table = table0; 1340 table0 = table1; 1341 table1 = table2; 1342 table2 = table; 1343 sa++; 1344 i += 4; 1345 dp++; 1346 s0 = s1; 1347 } 1348 1349 sp = (mlib_u8 *) sa; 1350 sp -= 2; 1351 1352 if ((mlib_addr) dp <= (mlib_addr) dend) { 1353 1354 num = (mlib_s16 *) dend - (mlib_s16 *) dp; 1355 sp += num; 1356 num++; 1357 1358 if (num == 1) { 1359 s0 = (mlib_s32) * sp; 1360 sp--; 1361 1362 t0 = VIS_LD_U16_I(table0, 2 * s0); 1363 acc0 = vis_faligndata(t0, acc0); 1364 } 1365 else if (num == 2) { 1366 s0 = (mlib_s32) * sp; 1367 sp--; 1368 1369 t0 = VIS_LD_U16_I(table1, 2 * s0); 1370 acc0 = vis_faligndata(t0, acc0); 1371 1372 s0 = (mlib_s32) * sp; 1373 sp--; 1374 1375 t0 = VIS_LD_U16_I(table0, 2 * s0); 1376 acc0 = vis_faligndata(t0, acc0); 1377 } 1378 else if (num == 3) { 1379 s0 = (mlib_s32) * sp; 1380 sp--; 1381 1382 t0 = VIS_LD_U16_I(table2, 2 * s0); 1383 acc0 = vis_faligndata(t0, acc0); 1384 1385 s0 = (mlib_s32) * sp; 1386 sp--; 1387 1388 t0 = VIS_LD_U16_I(table1, 2 * s0); 1389 acc0 = vis_faligndata(t0, acc0); 1390 1391 s0 = (mlib_s32) * sp; 1392 sp--; 1393 1394 t0 = VIS_LD_U16_I(table0, 2 * s0); 1395 acc0 = vis_faligndata(t0, acc0); 1396 } 1397 1398 emask = vis_edge16(dp, dend); 1399 vis_pst_16(acc0, dp, emask); 1400 } 1401} 1402 1403/***************************************************************/ 1404void mlib_v_ImageLookUp_U8_S16_3_SrcOff3_D1(const mlib_u8 *src, 1405 mlib_s16 *dst, 1406 mlib_s32 xsize, 1407 const mlib_s16 *table0, 1408 const mlib_s16 *table1, 1409 const mlib_s16 *table2) 1410{ 1411 mlib_u32 *sa; /* aligned pointer to source data */ 1412 mlib_u8 *sp; /* pointer to source data */ 1413 mlib_u32 s0, s1, s2, s3; /* source data */ 1414 mlib_s16 *dl; /* pointer to start of destination */ 1415 mlib_s16 *dend; /* pointer to end of destination */ 1416 mlib_d64 *dp; /* aligned pointer to destination */ 1417 mlib_d64 t0, t1, t2; /* destination data */ 1418 mlib_d64 t3, t4, t5; /* destination data */ 1419 mlib_d64 t6, t7, t8; /* destination data */ 1420 mlib_d64 t9, t10, t11; /* destination data */ 1421 mlib_d64 acc0, acc1, acc2; /* destination data */ 1422 mlib_s32 emask; /* edge mask */ 1423 mlib_s32 i, num; /* loop variable */ 1424 const mlib_s16 *table; 1425 1426 sa = (mlib_u32 *) (src - 3); 1427 dl = dst; 1428 dp = (mlib_d64 *) dl; 1429 dend = dl + xsize - 1; 1430 1431 vis_alignaddr((void *)0, 6); 1432 1433 i = 0; 1434 1435 s0 = *sa++; 1436 1437 if (xsize >= 12) { 1438 1439 s1 = sa[0]; 1440 s2 = sa[1]; 1441 s3 = sa[2]; 1442 sa += 3; 1443 1444#pragma pipeloop(0) 1445 for (i = 0; i <= xsize - 24; i += 12, sa += 3, dp += 3) { 1446 t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE); 1447 t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE); 1448 t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE); 1449 t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE); 1450 t7 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE); 1451 t6 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE); 1452 t5 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE); 1453 t4 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE); 1454 t11 = VIS_LD_U16_I(table2, (s3 >> 7) & 0x1FE); 1455 t10 = VIS_LD_U16_I(table1, (s3 >> 15) & 0x1FE); 1456 t9 = VIS_LD_U16_I(table0, (s3 >> 23) & 0x1FE); 1457 t8 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE); 1458 acc0 = vis_faligndata(t3, acc0); 1459 acc0 = vis_faligndata(t2, acc0); 1460 acc0 = vis_faligndata(t1, acc0); 1461 acc0 = vis_faligndata(t0, acc0); 1462 acc1 = vis_faligndata(t7, acc1); 1463 acc1 = vis_faligndata(t6, acc1); 1464 acc1 = vis_faligndata(t5, acc1); 1465 acc1 = vis_faligndata(t4, acc1); 1466 acc2 = vis_faligndata(t11, acc2); 1467 acc2 = vis_faligndata(t10, acc2); 1468 acc2 = vis_faligndata(t9, acc2); 1469 acc2 = vis_faligndata(t8, acc2); 1470 s0 = s3; 1471 s1 = sa[0]; 1472 s2 = sa[1]; 1473 s3 = sa[2]; 1474 dp[0] = acc0; 1475 dp[1] = acc1; 1476 dp[2] = acc2; 1477 } 1478 1479 t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE); 1480 t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE); 1481 t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE); 1482 t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE); 1483 t7 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE); 1484 t6 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE); 1485 t5 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE); 1486 t4 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE); 1487 t11 = VIS_LD_U16_I(table2, (s3 >> 7) & 0x1FE); 1488 t10 = VIS_LD_U16_I(table1, (s3 >> 15) & 0x1FE); 1489 t9 = VIS_LD_U16_I(table0, (s3 >> 23) & 0x1FE); 1490 t8 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE); 1491 acc0 = vis_faligndata(t3, acc0); 1492 acc0 = vis_faligndata(t2, acc0); 1493 acc0 = vis_faligndata(t1, acc0); 1494 acc0 = vis_faligndata(t0, acc0); 1495 acc1 = vis_faligndata(t7, acc1); 1496 acc1 = vis_faligndata(t6, acc1); 1497 acc1 = vis_faligndata(t5, acc1); 1498 acc1 = vis_faligndata(t4, acc1); 1499 acc2 = vis_faligndata(t11, acc2); 1500 acc2 = vis_faligndata(t10, acc2); 1501 acc2 = vis_faligndata(t9, acc2); 1502 acc2 = vis_faligndata(t8, acc2); 1503 dp[0] = acc0; 1504 dp[1] = acc1; 1505 dp[2] = acc2; 1506 s0 = s3; 1507 dp += 3; 1508 i += 12; 1509 } 1510 1511 if (i <= xsize - 8) { 1512 s1 = sa[0]; 1513 s2 = sa[1]; 1514 t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE); 1515 t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE); 1516 t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE); 1517 t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE); 1518 t7 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE); 1519 t6 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE); 1520 t5 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE); 1521 t4 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE); 1522 acc0 = vis_faligndata(t3, acc0); 1523 acc0 = vis_faligndata(t2, acc0); 1524 acc0 = vis_faligndata(t1, acc0); 1525 acc0 = vis_faligndata(t0, acc0); 1526 acc1 = vis_faligndata(t7, acc1); 1527 acc1 = vis_faligndata(t6, acc1); 1528 acc1 = vis_faligndata(t5, acc1); 1529 acc1 = vis_faligndata(t4, acc1); 1530 dp[0] = acc0; 1531 dp[1] = acc1; 1532 table = table0; 1533 table0 = table2; 1534 table2 = table1; 1535 table1 = table; 1536 sa += 2; 1537 i += 8; 1538 dp += 2; 1539 s0 = s2; 1540 } 1541 1542 if (i <= xsize - 4) { 1543 s1 = sa[0]; 1544 t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE); 1545 t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE); 1546 t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE); 1547 t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE); 1548 acc0 = vis_faligndata(t3, acc0); 1549 acc0 = vis_faligndata(t2, acc0); 1550 acc0 = vis_faligndata(t1, acc0); 1551 acc0 = vis_faligndata(t0, acc0); 1552 dp[0] = acc0; 1553 table = table0; 1554 table0 = table1; 1555 table1 = table2; 1556 table2 = table; 1557 sa++; 1558 i += 4; 1559 dp++; 1560 s0 = s1; 1561 } 1562 1563 sp = (mlib_u8 *) sa; 1564 sp -= 1; 1565 1566 if ((mlib_addr) dp <= (mlib_addr) dend) { 1567 1568 num = (mlib_s16 *) dend - (mlib_s16 *) dp; 1569 sp += num; 1570 num++; 1571 1572 if (num == 1) { 1573 s0 = (mlib_s32) * sp; 1574 sp--; 1575 1576 t0 = VIS_LD_U16_I(table0, 2 * s0); 1577 acc0 = vis_faligndata(t0, acc0); 1578 } 1579 else if (num == 2) { 1580 s0 = (mlib_s32) * sp; 1581 sp--; 1582 1583 t0 = VIS_LD_U16_I(table1, 2 * s0); 1584 acc0 = vis_faligndata(t0, acc0); 1585 1586 s0 = (mlib_s32) * sp; 1587 sp--; 1588 1589 t0 = VIS_LD_U16_I(table0, 2 * s0); 1590 acc0 = vis_faligndata(t0, acc0); 1591 } 1592 else if (num == 3) { 1593 s0 = (mlib_s32) * sp; 1594 sp--; 1595 1596 t0 = VIS_LD_U16_I(table2, 2 * s0); 1597 acc0 = vis_faligndata(t0, acc0); 1598 1599 s0 = (mlib_s32) * sp; 1600 sp--; 1601 1602 t0 = VIS_LD_U16_I(table1, 2 * s0); 1603 acc0 = vis_faligndata(t0, acc0); 1604 1605 s0 = (mlib_s32) * sp; 1606 sp--; 1607 1608 t0 = VIS_LD_U16_I(table0, 2 * s0); 1609 acc0 = vis_faligndata(t0, acc0); 1610 } 1611 1612 emask = vis_edge16(dp, dend); 1613 vis_pst_16(acc0, dp, emask); 1614 } 1615} 1616 1617/***************************************************************/ 1618void mlib_v_ImageLookUp_U8_S16_3(const mlib_u8 *src, 1619 mlib_s32 slb, 1620 mlib_s16 *dst, 1621 mlib_s32 dlb, 1622 mlib_s32 xsize, 1623 mlib_s32 ysize, 1624 const mlib_s16 **table) 1625{ 1626 mlib_u8 *sl; 1627 mlib_s16 *dl; 1628 const mlib_s16 *tab; 1629 mlib_s32 j, i; 1630 1631 sl = (void *)src; 1632 dl = dst; 1633 1634 /* row loop */ 1635 for (j = 0; j < ysize; j++) { 1636 mlib_u8 *sp = sl; 1637 mlib_s16 *dp = dl; 1638 const mlib_s16 *tab0 = table[0]; 1639 const mlib_s16 *tab1 = table[1]; 1640 const mlib_s16 *tab2 = table[2]; 1641 mlib_s32 off, size = xsize * 3; 1642 1643 off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1; 1644 1645 off = (off < size) ? off : size; 1646 1647 for (i = 0; i < off - 2; i += 3) { 1648 *dp++ = tab0[(*sp++)]; 1649 *dp++ = tab1[(*sp++)]; 1650 *dp++ = tab2[(*sp++)]; 1651 size -= 3; 1652 } 1653 1654 off -= i; 1655 1656 if (off == 1) { 1657 *dp++ = tab0[(*sp++)]; 1658 tab = tab0; 1659 tab0 = tab1; 1660 tab1 = tab2; 1661 tab2 = tab; 1662 size--; 1663 } 1664 else if (off == 2) { 1665 *dp++ = tab0[(*sp++)]; 1666 *dp++ = tab1[(*sp++)]; 1667 tab = tab2; 1668 tab2 = tab1; 1669 tab1 = tab0; 1670 tab0 = tab; 1671 size -= 2; 1672 } 1673 1674 if (size > 0) { 1675 1676 off = (mlib_addr) sp & 3; 1677 1678 if (off == 0) { 1679 mlib_v_ImageLookUp_U8_S16_3_SrcOff0_D1(sp, dp, size, tab0, tab1, tab2); 1680 } 1681 else if (off == 1) { 1682 mlib_v_ImageLookUp_U8_S16_3_SrcOff1_D1(sp, dp, size, tab0, tab1, tab2); 1683 } 1684 else if (off == 2) { 1685 mlib_v_ImageLookUp_U8_S16_3_SrcOff2_D1(sp, dp, size, tab0, tab1, tab2); 1686 } 1687 else { 1688 mlib_v_ImageLookUp_U8_S16_3_SrcOff3_D1(sp, dp, size, tab0, tab1, tab2); 1689 } 1690 } 1691 1692 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 1693 dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); 1694 } 1695} 1696 1697/***************************************************************/ 1698