1/* 2 * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 28#include "vis_proto.h" 29#include "mlib_image.h" 30#include "mlib_v_ImageLookUpFunc.h" 31 32/***************************************************************/ 33static void mlib_v_ImageLookUp_U8_U8_124_SrcOff0_D1(const mlib_u8 *src, 34 mlib_u8 *dst, 35 mlib_s32 xsize, 36 const mlib_u8 *table0, 37 const mlib_u8 *table1, 38 const mlib_u8 *table2, 39 const mlib_u8 *table3); 40 41static void mlib_v_ImageLookUp_U8_U8_124_SrcOff1_D1(const mlib_u8 *src, 42 mlib_u8 *dst, 43 mlib_s32 xsize, 44 const mlib_u8 *table0, 45 const mlib_u8 *table1, 46 const mlib_u8 *table2, 47 const mlib_u8 *table3); 48 49static void mlib_v_ImageLookUp_U8_U8_124_SrcOff2_D1(const mlib_u8 *src, 50 mlib_u8 *dst, 51 mlib_s32 xsize, 52 const mlib_u8 *table0, 53 const mlib_u8 *table1, 54 const mlib_u8 *table2, 55 const mlib_u8 *table3); 56 57static void mlib_v_ImageLookUp_U8_U8_124_SrcOff3_D1(const mlib_u8 *src, 58 mlib_u8 *dst, 59 mlib_s32 xsize, 60 const mlib_u8 *table0, 61 const mlib_u8 *table1, 62 const mlib_u8 *table2, 63 const mlib_u8 *table3); 64 65static void mlib_v_ImageLookUp_U8_U8_3_SrcOff0_D1(const mlib_u8 *src, 66 mlib_u8 *dst, 67 mlib_s32 xsize, 68 const mlib_u8 *table0, 69 const mlib_u8 *table1, 70 const mlib_u8 *table2); 71 72static void mlib_v_ImageLookUp_U8_U8_3_SrcOff1_D1(const mlib_u8 *src, 73 mlib_u8 *dst, 74 mlib_s32 xsize, 75 const mlib_u8 *table0, 76 const mlib_u8 *table1, 77 const mlib_u8 *table2); 78 79static void mlib_v_ImageLookUp_U8_U8_3_SrcOff2_D1(const mlib_u8 *src, 80 mlib_u8 *dst, 81 mlib_s32 xsize, 82 const mlib_u8 *table0, 83 const mlib_u8 *table1, 84 const mlib_u8 *table2); 85 86static void mlib_v_ImageLookUp_U8_U8_3_SrcOff3_D1(const mlib_u8 *src, 87 mlib_u8 *dst, 88 mlib_s32 xsize, 89 const mlib_u8 *table0, 90 const mlib_u8 *table1, 91 const mlib_u8 *table2); 92 93/***************************************************************/ 94#define VIS_LD_U8_I(X, Y) vis_ld_u8_i((void *)(X), (Y)) 95 96/***************************************************************/ 97void mlib_v_ImageLookUp_U8_U8_124_SrcOff0_D1(const mlib_u8 *src, 98 mlib_u8 *dst, 99 mlib_s32 xsize, 100 const mlib_u8 *table0, 101 const mlib_u8 *table1, 102 const mlib_u8 *table2, 103 const mlib_u8 *table3) 104{ 105 mlib_u32 *sa; /* aligned pointer to source data */ 106 mlib_u8 *sp; /* pointer to source data */ 107 mlib_u32 s0, s1; /* source data */ 108 mlib_u8 *dl; /* pointer to start of destination */ 109 mlib_u8 *dend; /* pointer to end of destination */ 110 mlib_d64 *dp; /* aligned pointer to destination */ 111 mlib_d64 t0, t1, t2; /* destination data */ 112 mlib_d64 t3, t4, t5; /* destination data */ 113 mlib_d64 t6, t7, acc; /* destination data */ 114 mlib_s32 emask; /* edge mask */ 115 mlib_s32 i, num; /* loop variable */ 116 117 sa = (mlib_u32 *) src; 118 dl = dst; 119 dp = (mlib_d64 *) dl; 120 dend = dl + xsize - 1; 121 122 vis_alignaddr((void *)0, 7); 123 124 if (xsize >= 8) { 125 126 s0 = sa[0]; 127 s1 = sa[1]; 128 sa += 2; 129 130#pragma pipeloop(0) 131 for (i = 0; i <= xsize - 16; i += 8, sa += 2) { 132 t7 = VIS_LD_U8_I(table3, s1 & 0xFF); 133 t6 = VIS_LD_U8_I(table2, (s1 >> 8) & 0xFF); 134 t5 = VIS_LD_U8_I(table1, (s1 >> 16) & 0xFF); 135 t4 = VIS_LD_U8_I(table0, s1 >> 24); 136 t3 = VIS_LD_U8_I(table3, s0 & 0xFF); 137 t2 = VIS_LD_U8_I(table2, (s0 >> 8) & 0xFF); 138 t1 = VIS_LD_U8_I(table1, (s0 >> 16) & 0xFF); 139 t0 = VIS_LD_U8_I(table0, s0 >> 24); 140 acc = vis_faligndata(t7, acc); 141 acc = vis_faligndata(t6, acc); 142 acc = vis_faligndata(t5, acc); 143 acc = vis_faligndata(t4, acc); 144 acc = vis_faligndata(t3, acc); 145 acc = vis_faligndata(t2, acc); 146 acc = vis_faligndata(t1, acc); 147 acc = vis_faligndata(t0, acc); 148 s0 = sa[0]; 149 s1 = sa[1]; 150 *dp++ = acc; 151 } 152 153 t7 = VIS_LD_U8_I(table3, s1 & 0xFF); 154 t6 = VIS_LD_U8_I(table2, (s1 >> 8) & 0xFF); 155 t5 = VIS_LD_U8_I(table1, (s1 >> 16) & 0xFF); 156 t4 = VIS_LD_U8_I(table0, s1 >> 24); 157 t3 = VIS_LD_U8_I(table3, s0 & 0xFF); 158 t2 = VIS_LD_U8_I(table2, (s0 >> 8) & 0xFF); 159 t1 = VIS_LD_U8_I(table1, (s0 >> 16) & 0xFF); 160 t0 = VIS_LD_U8_I(table0, s0 >> 24); 161 acc = vis_faligndata(t7, acc); 162 acc = vis_faligndata(t6, acc); 163 acc = vis_faligndata(t5, acc); 164 acc = vis_faligndata(t4, acc); 165 acc = vis_faligndata(t3, acc); 166 acc = vis_faligndata(t2, acc); 167 acc = vis_faligndata(t1, acc); 168 acc = vis_faligndata(t0, acc); 169 *dp++ = acc; 170 } 171 172 sp = (mlib_u8 *) sa; 173 174 if ((mlib_addr) dp <= (mlib_addr) dend) { 175 176 num = (mlib_addr) dend - (mlib_addr) dp; 177 sp += num; 178 num++; 179 180 if ((num & 3) == 1) { 181 s0 = (mlib_s32) * sp; 182 sp--; 183 184 t0 = VIS_LD_U8_I(table0, s0); 185 acc = vis_faligndata(t0, acc); 186 num--; 187 } 188 else if ((num & 3) == 2) { 189 s0 = (mlib_s32) * sp; 190 sp--; 191 192 t0 = VIS_LD_U8_I(table1, s0); 193 acc = vis_faligndata(t0, acc); 194 195 s0 = (mlib_s32) * sp; 196 sp--; 197 198 t0 = VIS_LD_U8_I(table0, s0); 199 acc = vis_faligndata(t0, acc); 200 num -= 2; 201 } 202 else if ((num & 3) == 3) { 203 s0 = (mlib_s32) * sp; 204 sp--; 205 206 t0 = VIS_LD_U8_I(table2, s0); 207 acc = vis_faligndata(t0, acc); 208 209 s0 = (mlib_s32) * sp; 210 sp--; 211 212 t0 = VIS_LD_U8_I(table1, s0); 213 acc = vis_faligndata(t0, acc); 214 215 s0 = (mlib_s32) * sp; 216 sp--; 217 218 t0 = VIS_LD_U8_I(table0, s0); 219 acc = vis_faligndata(t0, acc); 220 num -= 3; 221 } 222 223 if (num != 0) { 224 s0 = (mlib_s32) * sp; 225 sp--; 226 227 t0 = VIS_LD_U8_I(table3, s0); 228 acc = vis_faligndata(t0, acc); 229 230 s0 = (mlib_s32) * sp; 231 sp--; 232 233 t0 = VIS_LD_U8_I(table2, s0); 234 acc = vis_faligndata(t0, acc); 235 236 s0 = (mlib_s32) * sp; 237 sp--; 238 239 t0 = VIS_LD_U8_I(table1, s0); 240 acc = vis_faligndata(t0, acc); 241 242 s0 = (mlib_s32) * sp; 243 sp--; 244 245 t0 = VIS_LD_U8_I(table0, s0); 246 acc = vis_faligndata(t0, acc); 247 } 248 249 emask = vis_edge8(dp, dend); 250 vis_pst_8(acc, dp, emask); 251 } 252} 253 254/***************************************************************/ 255void mlib_v_ImageLookUp_U8_U8_124_SrcOff1_D1(const mlib_u8 *src, 256 mlib_u8 *dst, 257 mlib_s32 xsize, 258 const mlib_u8 *table0, 259 const mlib_u8 *table1, 260 const mlib_u8 *table2, 261 const mlib_u8 *table3) 262{ 263 mlib_u32 *sa; /* aligned pointer to source data */ 264 mlib_u8 *sp; /* pointer to source data */ 265 mlib_u32 s0, s1, s2; /* source data */ 266 mlib_u8 *dl; /* pointer to start of destination */ 267 mlib_u8 *dend; /* pointer to end of destination */ 268 mlib_d64 *dp; /* aligned pointer to destination */ 269 mlib_d64 t0, t1, t2; /* destination data */ 270 mlib_d64 t3, t4, t5; /* destination data */ 271 mlib_d64 t6, t7, acc; /* destination data */ 272 mlib_s32 emask; /* edge mask */ 273 mlib_s32 i, num; /* loop variable */ 274 275 sa = (mlib_u32 *) (src - 1); 276 dl = dst; 277 dp = (mlib_d64 *) dl; 278 dend = dl + xsize - 1; 279 280 vis_alignaddr((void *)0, 7); 281 282 s0 = *sa++; 283 284 if (xsize >= 8) { 285 286 s1 = sa[0]; 287 s2 = sa[1]; 288 sa += 2; 289 290#pragma pipeloop(0) 291 for (i = 0; i <= xsize - 16; i += 8, sa += 2) { 292 t7 = VIS_LD_U8_I(table3, s2 >> 24); 293 t6 = VIS_LD_U8_I(table2, s1 & 0xFF); 294 t5 = VIS_LD_U8_I(table1, (s1 >> 8) & 0xFF); 295 t4 = VIS_LD_U8_I(table0, (s1 >> 16) & 0xFF); 296 t3 = VIS_LD_U8_I(table3, s1 >> 24); 297 t2 = VIS_LD_U8_I(table2, s0 & 0xFF); 298 t1 = VIS_LD_U8_I(table1, (s0 >> 8) & 0xFF); 299 t0 = VIS_LD_U8_I(table0, (s0 >> 16) & 0xFF); 300 acc = vis_faligndata(t7, acc); 301 acc = vis_faligndata(t6, acc); 302 acc = vis_faligndata(t5, acc); 303 acc = vis_faligndata(t4, acc); 304 acc = vis_faligndata(t3, acc); 305 acc = vis_faligndata(t2, acc); 306 acc = vis_faligndata(t1, acc); 307 acc = vis_faligndata(t0, acc); 308 s0 = s2; 309 s1 = sa[0]; 310 s2 = sa[1]; 311 *dp++ = acc; 312 } 313 314 t7 = VIS_LD_U8_I(table3, s2 >> 24); 315 t6 = VIS_LD_U8_I(table2, s1 & 0xFF); 316 t5 = VIS_LD_U8_I(table1, (s1 >> 8) & 0xFF); 317 t4 = VIS_LD_U8_I(table0, (s1 >> 16) & 0xFF); 318 t3 = VIS_LD_U8_I(table3, s1 >> 24); 319 t2 = VIS_LD_U8_I(table2, s0 & 0xFF); 320 t1 = VIS_LD_U8_I(table1, (s0 >> 8) & 0xFF); 321 t0 = VIS_LD_U8_I(table0, (s0 >> 16) & 0xFF); 322 acc = vis_faligndata(t7, acc); 323 acc = vis_faligndata(t6, acc); 324 acc = vis_faligndata(t5, acc); 325 acc = vis_faligndata(t4, acc); 326 acc = vis_faligndata(t3, acc); 327 acc = vis_faligndata(t2, acc); 328 acc = vis_faligndata(t1, acc); 329 acc = vis_faligndata(t0, acc); 330 *dp++ = acc; 331 } 332 333 sp = (mlib_u8 *) sa; 334 sp -= 3; 335 336 if ((mlib_addr) dp <= (mlib_addr) dend) { 337 338 num = (mlib_addr) dend - (mlib_addr) dp; 339 sp += num; 340 num++; 341 342 if ((num & 3) == 1) { 343 s0 = (mlib_s32) * sp; 344 sp--; 345 346 t0 = VIS_LD_U8_I(table0, s0); 347 acc = vis_faligndata(t0, acc); 348 num--; 349 } 350 else if ((num & 3) == 2) { 351 s0 = (mlib_s32) * sp; 352 sp--; 353 354 t0 = VIS_LD_U8_I(table1, s0); 355 acc = vis_faligndata(t0, acc); 356 357 s0 = (mlib_s32) * sp; 358 sp--; 359 360 t0 = VIS_LD_U8_I(table0, s0); 361 acc = vis_faligndata(t0, acc); 362 num -= 2; 363 } 364 else if ((num & 3) == 3) { 365 s0 = (mlib_s32) * sp; 366 sp--; 367 368 t0 = VIS_LD_U8_I(table2, s0); 369 acc = vis_faligndata(t0, acc); 370 371 s0 = (mlib_s32) * sp; 372 sp--; 373 374 t0 = VIS_LD_U8_I(table1, s0); 375 acc = vis_faligndata(t0, acc); 376 377 s0 = (mlib_s32) * sp; 378 sp--; 379 380 t0 = VIS_LD_U8_I(table0, s0); 381 acc = vis_faligndata(t0, acc); 382 num -= 3; 383 } 384 385 if (num != 0) { 386 s0 = (mlib_s32) * sp; 387 sp--; 388 389 t0 = VIS_LD_U8_I(table3, s0); 390 acc = vis_faligndata(t0, acc); 391 392 s0 = (mlib_s32) * sp; 393 sp--; 394 395 t0 = VIS_LD_U8_I(table2, s0); 396 acc = vis_faligndata(t0, acc); 397 398 s0 = (mlib_s32) * sp; 399 sp--; 400 401 t0 = VIS_LD_U8_I(table1, s0); 402 acc = vis_faligndata(t0, acc); 403 404 s0 = (mlib_s32) * sp; 405 sp--; 406 407 t0 = VIS_LD_U8_I(table0, s0); 408 acc = vis_faligndata(t0, acc); 409 } 410 411 emask = vis_edge8(dp, dend); 412 vis_pst_8(acc, dp, emask); 413 } 414} 415 416/***************************************************************/ 417void mlib_v_ImageLookUp_U8_U8_124_SrcOff2_D1(const mlib_u8 *src, 418 mlib_u8 *dst, 419 mlib_s32 xsize, 420 const mlib_u8 *table0, 421 const mlib_u8 *table1, 422 const mlib_u8 *table2, 423 const mlib_u8 *table3) 424{ 425 mlib_u32 *sa; /* aligned pointer to source data */ 426 mlib_u8 *sp; /* pointer to source data */ 427 mlib_u32 s0, s1, s2; /* source data */ 428 mlib_u8 *dl; /* pointer to start of destination */ 429 mlib_u8 *dend; /* pointer to end of destination */ 430 mlib_d64 *dp; /* aligned pointer to destination */ 431 mlib_d64 t0, t1, t2; /* destination data */ 432 mlib_d64 t3, t4, t5; /* destination data */ 433 mlib_d64 t6, t7, acc; /* destination data */ 434 mlib_s32 emask; /* edge mask */ 435 mlib_s32 i, num; /* loop variable */ 436 437 sa = (mlib_u32 *) (src - 2); 438 dl = dst; 439 dp = (mlib_d64 *) dl; 440 dend = dl + xsize - 1; 441 442 vis_alignaddr((void *)0, 7); 443 444 s0 = *sa++; 445 446 if (xsize >= 8) { 447 448 s1 = sa[0]; 449 s2 = sa[1]; 450 sa += 2; 451 452#pragma pipeloop(0) 453 for (i = 0; i <= xsize - 16; i += 8, sa += 2) { 454 t7 = VIS_LD_U8_I(table3, (s2 >> 16) & 0xFF); 455 t6 = VIS_LD_U8_I(table2, s2 >> 24); 456 t5 = VIS_LD_U8_I(table1, s1 & 0xFF); 457 t4 = VIS_LD_U8_I(table0, (s1 >> 8) & 0xFF); 458 t3 = VIS_LD_U8_I(table3, (s1 >> 16) & 0xFF); 459 t2 = VIS_LD_U8_I(table2, s1 >> 24); 460 t1 = VIS_LD_U8_I(table1, s0 & 0xFF); 461 t0 = VIS_LD_U8_I(table0, (s0 >> 8) & 0xFF); 462 acc = vis_faligndata(t7, acc); 463 acc = vis_faligndata(t6, acc); 464 acc = vis_faligndata(t5, acc); 465 acc = vis_faligndata(t4, acc); 466 acc = vis_faligndata(t3, acc); 467 acc = vis_faligndata(t2, acc); 468 acc = vis_faligndata(t1, acc); 469 acc = vis_faligndata(t0, acc); 470 s0 = s2; 471 s1 = sa[0]; 472 s2 = sa[1]; 473 *dp++ = acc; 474 } 475 476 t7 = VIS_LD_U8_I(table3, (s2 >> 16) & 0xFF); 477 t6 = VIS_LD_U8_I(table2, s2 >> 24); 478 t5 = VIS_LD_U8_I(table1, s1 & 0xFF); 479 t4 = VIS_LD_U8_I(table0, (s1 >> 8) & 0xFF); 480 t3 = VIS_LD_U8_I(table3, (s1 >> 16) & 0xFF); 481 t2 = VIS_LD_U8_I(table2, s1 >> 24); 482 t1 = VIS_LD_U8_I(table1, s0 & 0xFF); 483 t0 = VIS_LD_U8_I(table0, (s0 >> 8) & 0xFF); 484 acc = vis_faligndata(t7, acc); 485 acc = vis_faligndata(t6, acc); 486 acc = vis_faligndata(t5, acc); 487 acc = vis_faligndata(t4, acc); 488 acc = vis_faligndata(t3, acc); 489 acc = vis_faligndata(t2, acc); 490 acc = vis_faligndata(t1, acc); 491 acc = vis_faligndata(t0, acc); 492 *dp++ = acc; 493 } 494 495 sp = (mlib_u8 *) sa; 496 sp -= 2; 497 498 if ((mlib_addr) dp <= (mlib_addr) dend) { 499 500 num = (mlib_addr) dend - (mlib_addr) dp; 501 sp += num; 502 num++; 503 504 if ((num & 3) == 1) { 505 s0 = (mlib_s32) * sp; 506 sp--; 507 508 t0 = VIS_LD_U8_I(table0, s0); 509 acc = vis_faligndata(t0, acc); 510 num--; 511 } 512 else if ((num & 3) == 2) { 513 s0 = (mlib_s32) * sp; 514 sp--; 515 516 t0 = VIS_LD_U8_I(table1, s0); 517 acc = vis_faligndata(t0, acc); 518 519 s0 = (mlib_s32) * sp; 520 sp--; 521 522 t0 = VIS_LD_U8_I(table0, s0); 523 acc = vis_faligndata(t0, acc); 524 num -= 2; 525 } 526 else if ((num & 3) == 3) { 527 s0 = (mlib_s32) * sp; 528 sp--; 529 530 t0 = VIS_LD_U8_I(table2, s0); 531 acc = vis_faligndata(t0, acc); 532 533 s0 = (mlib_s32) * sp; 534 sp--; 535 536 t0 = VIS_LD_U8_I(table1, s0); 537 acc = vis_faligndata(t0, acc); 538 539 s0 = (mlib_s32) * sp; 540 sp--; 541 542 t0 = VIS_LD_U8_I(table0, s0); 543 acc = vis_faligndata(t0, acc); 544 num -= 3; 545 } 546 547 if (num != 0) { 548 s0 = (mlib_s32) * sp; 549 sp--; 550 551 t0 = VIS_LD_U8_I(table3, s0); 552 acc = vis_faligndata(t0, acc); 553 554 s0 = (mlib_s32) * sp; 555 sp--; 556 557 t0 = VIS_LD_U8_I(table2, s0); 558 acc = vis_faligndata(t0, acc); 559 560 s0 = (mlib_s32) * sp; 561 sp--; 562 563 t0 = VIS_LD_U8_I(table1, s0); 564 acc = vis_faligndata(t0, acc); 565 566 s0 = (mlib_s32) * sp; 567 sp--; 568 569 t0 = VIS_LD_U8_I(table0, s0); 570 acc = vis_faligndata(t0, acc); 571 } 572 573 emask = vis_edge8(dp, dend); 574 vis_pst_8(acc, dp, emask); 575 } 576} 577 578/***************************************************************/ 579void mlib_v_ImageLookUp_U8_U8_124_SrcOff3_D1(const mlib_u8 *src, 580 mlib_u8 *dst, 581 mlib_s32 xsize, 582 const mlib_u8 *table0, 583 const mlib_u8 *table1, 584 const mlib_u8 *table2, 585 const mlib_u8 *table3) 586{ 587 mlib_u32 *sa; /* aligned pointer to source data */ 588 mlib_u8 *sp; /* pointer to source data */ 589 mlib_u32 s0, s1, s2; /* source data */ 590 mlib_u8 *dl; /* pointer to start of destination */ 591 mlib_u8 *dend; /* pointer to end of destination */ 592 mlib_d64 *dp; /* aligned pointer to destination */ 593 mlib_d64 t0, t1, t2; /* destination data */ 594 mlib_d64 t3, t4, t5; /* destination data */ 595 mlib_d64 t6, t7, acc; /* destination data */ 596 mlib_s32 emask; /* edge mask */ 597 mlib_s32 i, num; /* loop variable */ 598 599 sa = (mlib_u32 *) (src - 3); 600 dl = dst; 601 dp = (mlib_d64 *) dl; 602 dend = dl + xsize - 1; 603 604 vis_alignaddr((void *)0, 7); 605 606 s0 = *sa++; 607 608 if (xsize >= 8) { 609 610 s1 = sa[0]; 611 s2 = sa[1]; 612 sa += 2; 613 614#pragma pipeloop(0) 615 for (i = 0; i <= xsize - 16; i += 8, sa += 2) { 616 t7 = VIS_LD_U8_I(table3, (s2 >> 8) & 0xFF); 617 t6 = VIS_LD_U8_I(table2, (s2 >> 16) & 0xFF); 618 t5 = VIS_LD_U8_I(table1, s2 >> 24); 619 t4 = VIS_LD_U8_I(table0, s1 & 0xFF); 620 t3 = VIS_LD_U8_I(table3, (s1 >> 8) & 0xFF); 621 t2 = VIS_LD_U8_I(table2, (s1 >> 16) & 0xFF); 622 t1 = VIS_LD_U8_I(table1, s1 >> 24); 623 t0 = VIS_LD_U8_I(table0, s0 & 0xFF); 624 acc = vis_faligndata(t7, acc); 625 acc = vis_faligndata(t6, acc); 626 acc = vis_faligndata(t5, acc); 627 acc = vis_faligndata(t4, acc); 628 acc = vis_faligndata(t3, acc); 629 acc = vis_faligndata(t2, acc); 630 acc = vis_faligndata(t1, acc); 631 acc = vis_faligndata(t0, acc); 632 s0 = s2; 633 s1 = sa[0]; 634 s2 = sa[1]; 635 *dp++ = acc; 636 } 637 638 t7 = VIS_LD_U8_I(table3, (s2 >> 8) & 0xFF); 639 t6 = VIS_LD_U8_I(table2, (s2 >> 16) & 0xFF); 640 t5 = VIS_LD_U8_I(table1, s2 >> 24); 641 t4 = VIS_LD_U8_I(table0, s1 & 0xFF); 642 t3 = VIS_LD_U8_I(table3, (s1 >> 8) & 0xFF); 643 t2 = VIS_LD_U8_I(table2, (s1 >> 16) & 0xFF); 644 t1 = VIS_LD_U8_I(table1, s1 >> 24); 645 t0 = VIS_LD_U8_I(table0, s0 & 0xFF); 646 acc = vis_faligndata(t7, acc); 647 acc = vis_faligndata(t6, acc); 648 acc = vis_faligndata(t5, acc); 649 acc = vis_faligndata(t4, acc); 650 acc = vis_faligndata(t3, acc); 651 acc = vis_faligndata(t2, acc); 652 acc = vis_faligndata(t1, acc); 653 acc = vis_faligndata(t0, acc); 654 *dp++ = acc; 655 } 656 657 sp = (mlib_u8 *) sa; 658 sp--; 659 660 if ((mlib_addr) dp <= (mlib_addr) dend) { 661 662 num = (mlib_addr) dend - (mlib_addr) dp; 663 sp += num; 664 num++; 665 666 if ((num & 3) == 1) { 667 s0 = (mlib_s32) * sp; 668 sp--; 669 670 t0 = VIS_LD_U8_I(table0, s0); 671 acc = vis_faligndata(t0, acc); 672 num--; 673 } 674 else if ((num & 3) == 2) { 675 s0 = (mlib_s32) * sp; 676 sp--; 677 678 t0 = VIS_LD_U8_I(table1, s0); 679 acc = vis_faligndata(t0, acc); 680 681 s0 = (mlib_s32) * sp; 682 sp--; 683 684 t0 = VIS_LD_U8_I(table0, s0); 685 acc = vis_faligndata(t0, acc); 686 num -= 2; 687 } 688 else if ((num & 3) == 3) { 689 s0 = (mlib_s32) * sp; 690 sp--; 691 692 t0 = VIS_LD_U8_I(table2, s0); 693 acc = vis_faligndata(t0, acc); 694 695 s0 = (mlib_s32) * sp; 696 sp--; 697 698 t0 = VIS_LD_U8_I(table1, s0); 699 acc = vis_faligndata(t0, acc); 700 701 s0 = (mlib_s32) * sp; 702 sp--; 703 704 t0 = VIS_LD_U8_I(table0, s0); 705 acc = vis_faligndata(t0, acc); 706 num -= 3; 707 } 708 709 if (num != 0) { 710 s0 = (mlib_s32) * sp; 711 sp--; 712 713 t0 = VIS_LD_U8_I(table3, s0); 714 acc = vis_faligndata(t0, acc); 715 716 s0 = (mlib_s32) * sp; 717 sp--; 718 719 t0 = VIS_LD_U8_I(table2, s0); 720 acc = vis_faligndata(t0, acc); 721 722 s0 = (mlib_s32) * sp; 723 sp--; 724 725 t0 = VIS_LD_U8_I(table1, s0); 726 acc = vis_faligndata(t0, acc); 727 728 s0 = (mlib_s32) * sp; 729 sp--; 730 731 t0 = VIS_LD_U8_I(table0, s0); 732 acc = vis_faligndata(t0, acc); 733 } 734 735 emask = vis_edge8(dp, dend); 736 vis_pst_8(acc, dp, emask); 737 } 738} 739 740/***************************************************************/ 741void mlib_v_ImageLookUp_U8_U8_1(const mlib_u8 *src, 742 mlib_s32 slb, 743 mlib_u8 *dst, 744 mlib_s32 dlb, 745 mlib_s32 xsize, 746 mlib_s32 ysize, 747 const mlib_u8 **table) 748{ 749 mlib_u8 *sl; 750 mlib_u8 *dl; 751 const mlib_u8 *tab = table[0]; 752 mlib_s32 j, i; 753 754 sl = (void *)src; 755 dl = dst; 756 757 /* row loop */ 758 for (j = 0; j < ysize; j++) { 759 mlib_u8 *sp = sl; 760 mlib_u8 *dp = dl; 761 mlib_s32 off, size = xsize; 762 763 off = (8 - ((mlib_addr) dp & 7)) & 7; 764 765 off = (off < size) ? off : size; 766 767 for (i = 0; i < off; i++) { 768 *dp++ = tab[(*sp++)]; 769 size--; 770 } 771 772 if (size > 0) { 773 774 off = (mlib_addr) sp & 3; 775 776 if (off == 0) { 777 mlib_v_ImageLookUp_U8_U8_124_SrcOff0_D1(sp, dp, size, tab, tab, tab, 778 tab); 779 } 780 else if (off == 1) { 781 mlib_v_ImageLookUp_U8_U8_124_SrcOff1_D1(sp, dp, size, tab, tab, tab, 782 tab); 783 } 784 else if (off == 2) { 785 mlib_v_ImageLookUp_U8_U8_124_SrcOff2_D1(sp, dp, size, tab, tab, tab, 786 tab); 787 } 788 else { 789 mlib_v_ImageLookUp_U8_U8_124_SrcOff3_D1(sp, dp, size, tab, tab, tab, 790 tab); 791 } 792 } 793 794 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 795 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); 796 } 797} 798 799/***************************************************************/ 800void mlib_v_ImageLookUp_U8_U8_2(const mlib_u8 *src, 801 mlib_s32 slb, 802 mlib_u8 *dst, 803 mlib_s32 dlb, 804 mlib_s32 xsize, 805 mlib_s32 ysize, 806 const mlib_u8 **table) 807{ 808 mlib_u8 *sl; 809 mlib_u8 *dl; 810 const mlib_u8 *tab; 811 mlib_s32 j, i; 812 813 sl = (void *)src; 814 dl = dst; 815 816 /* row loop */ 817 for (j = 0; j < ysize; j++) { 818 mlib_u8 *sp = sl; 819 mlib_u8 *dp = dl; 820 mlib_s32 off, size = xsize * 2; 821 const mlib_u8 *tab0 = table[0]; 822 const mlib_u8 *tab1 = table[1]; 823 824 off = (8 - ((mlib_addr) dp & 7)) & 7; 825 826 off = (off < size) ? off : size; 827 828 for (i = 0; i < off - 1; i += 2) { 829 *dp++ = tab0[(*sp++)]; 830 *dp++ = tab1[(*sp++)]; 831 size -= 2; 832 } 833 834 if ((off & 1) != 0) { 835 *dp++ = tab0[(*sp++)]; 836 size--; 837 tab = tab0; 838 tab0 = tab1; 839 tab1 = tab; 840 } 841 842 if (size > 0) { 843 844 off = (mlib_addr) sp & 3; 845 846 if (off == 0) { 847 mlib_v_ImageLookUp_U8_U8_124_SrcOff0_D1(sp, dp, size, tab0, tab1, tab0, 848 tab1); 849 } 850 else if (off == 1) { 851 mlib_v_ImageLookUp_U8_U8_124_SrcOff1_D1(sp, dp, size, tab0, tab1, tab0, 852 tab1); 853 } 854 else if (off == 2) { 855 mlib_v_ImageLookUp_U8_U8_124_SrcOff2_D1(sp, dp, size, tab0, tab1, tab0, 856 tab1); 857 } 858 else { 859 mlib_v_ImageLookUp_U8_U8_124_SrcOff3_D1(sp, dp, size, tab0, tab1, tab0, 860 tab1); 861 } 862 } 863 864 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 865 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); 866 } 867} 868 869/***************************************************************/ 870void mlib_v_ImageLookUp_U8_U8_4(const mlib_u8 *src, 871 mlib_s32 slb, 872 mlib_u8 *dst, 873 mlib_s32 dlb, 874 mlib_s32 xsize, 875 mlib_s32 ysize, 876 const mlib_u8 **table) 877{ 878 mlib_u8 *sl; 879 mlib_u8 *dl; 880 const mlib_u8 *tab; 881 mlib_s32 j; 882 883 sl = (void *)src; 884 dl = dst; 885 886 /* row loop */ 887 for (j = 0; j < ysize; j++) { 888 mlib_u8 *sp = sl; 889 mlib_u8 *dp = dl; 890 const mlib_u8 *tab0 = table[0]; 891 const mlib_u8 *tab1 = table[1]; 892 const mlib_u8 *tab2 = table[2]; 893 const mlib_u8 *tab3 = table[3]; 894 mlib_s32 off, size = xsize * 4; 895 896 off = (8 - ((mlib_addr) dp & 7)) & 7; 897 898 off = (off < size) ? off : size; 899 900 if (off >= 4) { 901 *dp++ = tab0[(*sp++)]; 902 *dp++ = tab1[(*sp++)]; 903 *dp++ = tab2[(*sp++)]; 904 *dp++ = tab3[(*sp++)]; 905 size -= 4; 906 off -= 4; 907 } 908 909 if (off == 1) { 910 *dp++ = tab0[(*sp++)]; 911 tab = tab0; 912 tab0 = tab1; 913 tab1 = tab2; 914 tab2 = tab3; 915 tab3 = tab; 916 size--; 917 } 918 else if (off == 2) { 919 *dp++ = tab0[(*sp++)]; 920 *dp++ = tab1[(*sp++)]; 921 tab = tab0; 922 tab0 = tab2; 923 tab2 = tab; 924 tab = tab1; 925 tab1 = tab3; 926 tab3 = tab; 927 size -= 2; 928 } 929 else if (off == 3) { 930 *dp++ = tab0[(*sp++)]; 931 *dp++ = tab1[(*sp++)]; 932 *dp++ = tab2[(*sp++)]; 933 tab = tab3; 934 tab3 = tab2; 935 tab2 = tab1; 936 tab1 = tab0; 937 tab0 = tab; 938 size -= 3; 939 } 940 941 if (size > 0) { 942 943 off = (mlib_addr) sp & 3; 944 945 if (off == 0) { 946 mlib_v_ImageLookUp_U8_U8_124_SrcOff0_D1(sp, dp, size, tab0, tab1, tab2, 947 tab3); 948 } 949 else if (off == 1) { 950 mlib_v_ImageLookUp_U8_U8_124_SrcOff1_D1(sp, dp, size, tab0, tab1, tab2, 951 tab3); 952 } 953 else if (off == 2) { 954 mlib_v_ImageLookUp_U8_U8_124_SrcOff2_D1(sp, dp, size, tab0, tab1, tab2, 955 tab3); 956 } 957 else { 958 mlib_v_ImageLookUp_U8_U8_124_SrcOff3_D1(sp, dp, size, tab0, tab1, tab2, 959 tab3); 960 } 961 } 962 963 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 964 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); 965 } 966} 967 968/***************************************************************/ 969void mlib_v_ImageLookUp_U8_U8_3_SrcOff0_D1(const mlib_u8 *src, 970 mlib_u8 *dst, 971 mlib_s32 xsize, 972 const mlib_u8 *table0, 973 const mlib_u8 *table1, 974 const mlib_u8 *table2) 975{ 976 mlib_u32 *sa; /* aligned pointer to source data */ 977 mlib_u8 *sp; /* pointer to source data */ 978 mlib_u32 s0, s1; /* source data */ 979 mlib_u8 *dl; /* pointer to start of destination */ 980 mlib_u8 *dend; /* pointer to end of destination */ 981 mlib_d64 *dp; /* aligned pointer to destination */ 982 mlib_d64 t0, t1, t2; /* destination data */ 983 mlib_d64 t3, t4, t5; /* destination data */ 984 mlib_d64 t6, t7, acc; /* destination data */ 985 mlib_s32 emask; /* edge mask */ 986 mlib_s32 i, num; /* loop variable */ 987 const mlib_u8 *table; 988 989 sa = (mlib_u32 *) src; 990 dl = dst; 991 dp = (mlib_d64 *) dl; 992 dend = dl + xsize - 1; 993 994 vis_alignaddr((void *)0, 7); 995 996 if (xsize >= 8) { 997 998 s0 = sa[0]; 999 s1 = sa[1]; 1000 sa += 2; 1001 1002#pragma pipeloop(0) 1003 for (i = 0; i <= xsize - 16; i += 8, sa += 2) { 1004 t7 = VIS_LD_U8_I(table1, s1 & 0xFF); 1005 t6 = VIS_LD_U8_I(table0, (s1 >> 8) & 0xFF); 1006 t5 = VIS_LD_U8_I(table2, (s1 >> 16) & 0xFF); 1007 t4 = VIS_LD_U8_I(table1, s1 >> 24); 1008 t3 = VIS_LD_U8_I(table0, s0 & 0xFF); 1009 t2 = VIS_LD_U8_I(table2, (s0 >> 8) & 0xFF); 1010 t1 = VIS_LD_U8_I(table1, (s0 >> 16) & 0xFF); 1011 t0 = VIS_LD_U8_I(table0, s0 >> 24); 1012 acc = vis_faligndata(t7, acc); 1013 acc = vis_faligndata(t6, acc); 1014 acc = vis_faligndata(t5, acc); 1015 acc = vis_faligndata(t4, acc); 1016 acc = vis_faligndata(t3, acc); 1017 acc = vis_faligndata(t2, acc); 1018 acc = vis_faligndata(t1, acc); 1019 acc = vis_faligndata(t0, acc); 1020 table = table0; 1021 table0 = table2; 1022 table2 = table1; 1023 table1 = table; 1024 s0 = sa[0]; 1025 s1 = sa[1]; 1026 *dp++ = acc; 1027 } 1028 1029 t7 = VIS_LD_U8_I(table1, s1 & 0xFF); 1030 t6 = VIS_LD_U8_I(table0, (s1 >> 8) & 0xFF); 1031 t5 = VIS_LD_U8_I(table2, (s1 >> 16) & 0xFF); 1032 t4 = VIS_LD_U8_I(table1, s1 >> 24); 1033 t3 = VIS_LD_U8_I(table0, s0 & 0xFF); 1034 t2 = VIS_LD_U8_I(table2, (s0 >> 8) & 0xFF); 1035 t1 = VIS_LD_U8_I(table1, (s0 >> 16) & 0xFF); 1036 t0 = VIS_LD_U8_I(table0, s0 >> 24); 1037 acc = vis_faligndata(t7, acc); 1038 acc = vis_faligndata(t6, acc); 1039 acc = vis_faligndata(t5, acc); 1040 acc = vis_faligndata(t4, acc); 1041 acc = vis_faligndata(t3, acc); 1042 acc = vis_faligndata(t2, acc); 1043 acc = vis_faligndata(t1, acc); 1044 acc = vis_faligndata(t0, acc); 1045 table = table0; 1046 table0 = table2; 1047 table2 = table1; 1048 table1 = table; 1049 *dp++ = acc; 1050 } 1051 1052 sp = (mlib_u8 *) sa; 1053 1054 if ((mlib_addr) dp <= (mlib_addr) dend) { 1055 1056 num = (mlib_addr) dend - (mlib_addr) dp; 1057 sp += num; 1058 num++; 1059 i = num - 3 * (num / 3); 1060 1061 if (i == 2) { 1062 s0 = (mlib_s32) * sp; 1063 sp--; 1064 1065 t0 = VIS_LD_U8_I(table1, s0); 1066 acc = vis_faligndata(t0, acc); 1067 1068 s0 = (mlib_s32) * sp; 1069 sp--; 1070 1071 t0 = VIS_LD_U8_I(table0, s0); 1072 acc = vis_faligndata(t0, acc); 1073 num -= 2; 1074 } 1075 else if (i == 1) { 1076 s0 = (mlib_s32) * sp; 1077 sp--; 1078 1079 t0 = VIS_LD_U8_I(table0, s0); 1080 acc = vis_faligndata(t0, acc); 1081 num--; 1082 } 1083 1084#pragma pipeloop(0) 1085 for (i = 0; i < num; i += 3) { 1086 s0 = (mlib_s32) * sp; 1087 sp--; 1088 1089 t0 = VIS_LD_U8_I(table2, s0); 1090 acc = vis_faligndata(t0, acc); 1091 1092 s0 = (mlib_s32) * sp; 1093 sp--; 1094 1095 t0 = VIS_LD_U8_I(table1, s0); 1096 acc = vis_faligndata(t0, acc); 1097 1098 s0 = (mlib_s32) * sp; 1099 sp--; 1100 1101 t0 = VIS_LD_U8_I(table0, s0); 1102 acc = vis_faligndata(t0, acc); 1103 } 1104 1105 emask = vis_edge8(dp, dend); 1106 vis_pst_8(acc, dp, emask); 1107 } 1108} 1109 1110/***************************************************************/ 1111void mlib_v_ImageLookUp_U8_U8_3_SrcOff1_D1(const mlib_u8 *src, 1112 mlib_u8 *dst, 1113 mlib_s32 xsize, 1114 const mlib_u8 *table0, 1115 const mlib_u8 *table1, 1116 const mlib_u8 *table2) 1117{ 1118 mlib_u32 *sa; /* aligned pointer to source data */ 1119 mlib_u8 *sp; /* pointer to source data */ 1120 mlib_u32 s0, s1, s2; /* source data */ 1121 mlib_u8 *dl; /* pointer to start of destination */ 1122 mlib_u8 *dend; /* pointer to end of destination */ 1123 mlib_d64 *dp; /* aligned pointer to destination */ 1124 mlib_d64 t0, t1, t2; /* destination data */ 1125 mlib_d64 t3, t4, t5; /* destination data */ 1126 mlib_d64 t6, t7, acc; /* destination data */ 1127 mlib_s32 emask; /* edge mask */ 1128 mlib_s32 i, num; /* loop variable */ 1129 const mlib_u8 *table; 1130 1131 sa = (mlib_u32 *) (src - 1); 1132 dl = dst; 1133 dp = (mlib_d64 *) dl; 1134 dend = dl + xsize - 1; 1135 1136 vis_alignaddr((void *)0, 7); 1137 1138 s0 = *sa++; 1139 1140 if (xsize >= 8) { 1141 1142 s1 = sa[0]; 1143 s2 = sa[1]; 1144 sa += 2; 1145 1146#pragma pipeloop(0) 1147 for (i = 0; i <= xsize - 16; i += 8, sa += 2) { 1148 t7 = VIS_LD_U8_I(table1, s2 >> 24); 1149 t6 = VIS_LD_U8_I(table0, s1 & 0xFF); 1150 t5 = VIS_LD_U8_I(table2, (s1 >> 8) & 0xFF); 1151 t4 = VIS_LD_U8_I(table1, (s1 >> 16) & 0xFF); 1152 t3 = VIS_LD_U8_I(table0, s1 >> 24); 1153 t2 = VIS_LD_U8_I(table2, s0 & 0xFF); 1154 t1 = VIS_LD_U8_I(table1, (s0 >> 8) & 0xFF); 1155 t0 = VIS_LD_U8_I(table0, (s0 >> 16) & 0xFF); 1156 acc = vis_faligndata(t7, acc); 1157 acc = vis_faligndata(t6, acc); 1158 acc = vis_faligndata(t5, acc); 1159 acc = vis_faligndata(t4, acc); 1160 acc = vis_faligndata(t3, acc); 1161 acc = vis_faligndata(t2, acc); 1162 acc = vis_faligndata(t1, acc); 1163 acc = vis_faligndata(t0, acc); 1164 table = table0; 1165 table0 = table2; 1166 table2 = table1; 1167 table1 = table; 1168 s0 = s2; 1169 s1 = sa[0]; 1170 s2 = sa[1]; 1171 *dp++ = acc; 1172 } 1173 1174 t7 = VIS_LD_U8_I(table1, s2 >> 24); 1175 t6 = VIS_LD_U8_I(table0, s1 & 0xFF); 1176 t5 = VIS_LD_U8_I(table2, (s1 >> 8) & 0xFF); 1177 t4 = VIS_LD_U8_I(table1, (s1 >> 16) & 0xFF); 1178 t3 = VIS_LD_U8_I(table0, s1 >> 24); 1179 t2 = VIS_LD_U8_I(table2, s0 & 0xFF); 1180 t1 = VIS_LD_U8_I(table1, (s0 >> 8) & 0xFF); 1181 t0 = VIS_LD_U8_I(table0, (s0 >> 16) & 0xFF); 1182 acc = vis_faligndata(t7, acc); 1183 acc = vis_faligndata(t6, acc); 1184 acc = vis_faligndata(t5, acc); 1185 acc = vis_faligndata(t4, acc); 1186 acc = vis_faligndata(t3, acc); 1187 acc = vis_faligndata(t2, acc); 1188 acc = vis_faligndata(t1, acc); 1189 acc = vis_faligndata(t0, acc); 1190 table = table0; 1191 table0 = table2; 1192 table2 = table1; 1193 table1 = table; 1194 *dp++ = acc; 1195 } 1196 1197 sp = (mlib_u8 *) sa; 1198 sp -= 3; 1199 1200 if ((mlib_addr) dp <= (mlib_addr) dend) { 1201 1202 num = (mlib_addr) dend - (mlib_addr) dp; 1203 sp += num; 1204 num++; 1205 i = num - 3 * (num / 3); 1206 1207 if (i == 2) { 1208 s0 = (mlib_s32) * sp; 1209 sp--; 1210 1211 t0 = VIS_LD_U8_I(table1, s0); 1212 acc = vis_faligndata(t0, acc); 1213 1214 s0 = (mlib_s32) * sp; 1215 sp--; 1216 1217 t0 = VIS_LD_U8_I(table0, s0); 1218 acc = vis_faligndata(t0, acc); 1219 num -= 2; 1220 } 1221 else if (i == 1) { 1222 s0 = (mlib_s32) * sp; 1223 sp--; 1224 1225 t0 = VIS_LD_U8_I(table0, s0); 1226 acc = vis_faligndata(t0, acc); 1227 num--; 1228 } 1229 1230#pragma pipeloop(0) 1231 for (i = 0; i < num; i += 3) { 1232 s0 = (mlib_s32) * sp; 1233 sp--; 1234 1235 t0 = VIS_LD_U8_I(table2, s0); 1236 acc = vis_faligndata(t0, acc); 1237 1238 s0 = (mlib_s32) * sp; 1239 sp--; 1240 1241 t0 = VIS_LD_U8_I(table1, s0); 1242 acc = vis_faligndata(t0, acc); 1243 1244 s0 = (mlib_s32) * sp; 1245 sp--; 1246 1247 t0 = VIS_LD_U8_I(table0, s0); 1248 acc = vis_faligndata(t0, acc); 1249 } 1250 1251 emask = vis_edge8(dp, dend); 1252 vis_pst_8(acc, dp, emask); 1253 } 1254} 1255 1256/***************************************************************/ 1257void mlib_v_ImageLookUp_U8_U8_3_SrcOff2_D1(const mlib_u8 *src, 1258 mlib_u8 *dst, 1259 mlib_s32 xsize, 1260 const mlib_u8 *table0, 1261 const mlib_u8 *table1, 1262 const mlib_u8 *table2) 1263{ 1264 mlib_u32 *sa; /* aligned pointer to source data */ 1265 mlib_u8 *sp; /* pointer to source data */ 1266 mlib_u32 s0, s1, s2; /* source data */ 1267 mlib_u8 *dl; /* pointer to start of destination */ 1268 mlib_u8 *dend; /* pointer to end of destination */ 1269 mlib_d64 *dp; /* aligned pointer to destination */ 1270 mlib_d64 t0, t1, t2; /* destination data */ 1271 mlib_d64 t3, t4, t5; /* destination data */ 1272 mlib_d64 t6, t7, acc; /* destination data */ 1273 mlib_s32 emask; /* edge mask */ 1274 mlib_s32 i, num; /* loop variable */ 1275 const mlib_u8 *table; 1276 1277 sa = (mlib_u32 *) (src - 2); 1278 dl = dst; 1279 dp = (mlib_d64 *) dl; 1280 dend = dl + xsize - 1; 1281 1282 vis_alignaddr((void *)0, 7); 1283 1284 s0 = *sa++; 1285 1286 if (xsize >= 8) { 1287 1288 s1 = sa[0]; 1289 s2 = sa[1]; 1290 sa += 2; 1291 1292#pragma pipeloop(0) 1293 for (i = 0; i <= xsize - 16; i += 8, sa += 2) { 1294 t7 = VIS_LD_U8_I(table1, (s2 >> 16) & 0xFF); 1295 t6 = VIS_LD_U8_I(table0, s2 >> 24); 1296 t5 = VIS_LD_U8_I(table2, s1 & 0xFF); 1297 t4 = VIS_LD_U8_I(table1, (s1 >> 8) & 0xFF); 1298 t3 = VIS_LD_U8_I(table0, (s1 >> 16) & 0xFF); 1299 t2 = VIS_LD_U8_I(table2, s1 >> 24); 1300 t1 = VIS_LD_U8_I(table1, s0 & 0xFF); 1301 t0 = VIS_LD_U8_I(table0, (s0 >> 8) & 0xFF); 1302 acc = vis_faligndata(t7, acc); 1303 acc = vis_faligndata(t6, acc); 1304 acc = vis_faligndata(t5, acc); 1305 acc = vis_faligndata(t4, acc); 1306 acc = vis_faligndata(t3, acc); 1307 acc = vis_faligndata(t2, acc); 1308 acc = vis_faligndata(t1, acc); 1309 acc = vis_faligndata(t0, acc); 1310 table = table0; 1311 table0 = table2; 1312 table2 = table1; 1313 table1 = table; 1314 s0 = s2; 1315 s1 = sa[0]; 1316 s2 = sa[1]; 1317 *dp++ = acc; 1318 } 1319 1320 t7 = VIS_LD_U8_I(table1, (s2 >> 16) & 0xFF); 1321 t6 = VIS_LD_U8_I(table0, s2 >> 24); 1322 t5 = VIS_LD_U8_I(table2, s1 & 0xFF); 1323 t4 = VIS_LD_U8_I(table1, (s1 >> 8) & 0xFF); 1324 t3 = VIS_LD_U8_I(table0, (s1 >> 16) & 0xFF); 1325 t2 = VIS_LD_U8_I(table2, s1 >> 24); 1326 t1 = VIS_LD_U8_I(table1, s0 & 0xFF); 1327 t0 = VIS_LD_U8_I(table0, (s0 >> 8) & 0xFF); 1328 acc = vis_faligndata(t7, acc); 1329 acc = vis_faligndata(t6, acc); 1330 acc = vis_faligndata(t5, acc); 1331 acc = vis_faligndata(t4, acc); 1332 acc = vis_faligndata(t3, acc); 1333 acc = vis_faligndata(t2, acc); 1334 acc = vis_faligndata(t1, acc); 1335 acc = vis_faligndata(t0, acc); 1336 table = table0; 1337 table0 = table2; 1338 table2 = table1; 1339 table1 = table; 1340 *dp++ = acc; 1341 } 1342 1343 sp = (mlib_u8 *) sa; 1344 sp -= 2; 1345 1346 if ((mlib_addr) dp <= (mlib_addr) dend) { 1347 1348 num = (mlib_addr) dend - (mlib_addr) dp; 1349 sp += num; 1350 num++; 1351 i = num - 3 * (num / 3); 1352 1353 if (i == 2) { 1354 s0 = (mlib_s32) * sp; 1355 sp--; 1356 1357 t0 = VIS_LD_U8_I(table1, s0); 1358 acc = vis_faligndata(t0, acc); 1359 1360 s0 = (mlib_s32) * sp; 1361 sp--; 1362 1363 t0 = VIS_LD_U8_I(table0, s0); 1364 acc = vis_faligndata(t0, acc); 1365 num -= 2; 1366 } 1367 else if (i == 1) { 1368 s0 = (mlib_s32) * sp; 1369 sp--; 1370 1371 t0 = VIS_LD_U8_I(table0, s0); 1372 acc = vis_faligndata(t0, acc); 1373 num--; 1374 } 1375 1376#pragma pipeloop(0) 1377 for (i = 0; i < num; i += 3) { 1378 s0 = (mlib_s32) * sp; 1379 sp--; 1380 1381 t0 = VIS_LD_U8_I(table2, s0); 1382 acc = vis_faligndata(t0, acc); 1383 1384 s0 = (mlib_s32) * sp; 1385 sp--; 1386 1387 t0 = VIS_LD_U8_I(table1, s0); 1388 acc = vis_faligndata(t0, acc); 1389 1390 s0 = (mlib_s32) * sp; 1391 sp--; 1392 1393 t0 = VIS_LD_U8_I(table0, s0); 1394 acc = vis_faligndata(t0, acc); 1395 } 1396 1397 emask = vis_edge8(dp, dend); 1398 vis_pst_8(acc, dp, emask); 1399 } 1400} 1401 1402/***************************************************************/ 1403void mlib_v_ImageLookUp_U8_U8_3_SrcOff3_D1(const mlib_u8 *src, 1404 mlib_u8 *dst, 1405 mlib_s32 xsize, 1406 const mlib_u8 *table0, 1407 const mlib_u8 *table1, 1408 const mlib_u8 *table2) 1409{ 1410 mlib_u32 *sa; /* aligned pointer to source data */ 1411 mlib_u8 *sp; /* pointer to source data */ 1412 mlib_u32 s0, s1, s2; /* source data */ 1413 mlib_u8 *dl; /* pointer to start of destination */ 1414 mlib_u8 *dend; /* pointer to end of destination */ 1415 mlib_d64 *dp; /* aligned pointer to destination */ 1416 mlib_d64 t0, t1, t2; /* destination data */ 1417 mlib_d64 t3, t4, t5; /* destination data */ 1418 mlib_d64 t6, t7, acc; /* destination data */ 1419 mlib_s32 emask; /* edge mask */ 1420 mlib_s32 i, num; /* loop variable */ 1421 const mlib_u8 *table; 1422 1423 sa = (mlib_u32 *) (src - 3); 1424 dl = dst; 1425 dp = (mlib_d64 *) dl; 1426 dend = dl + xsize - 1; 1427 1428 vis_alignaddr((void *)0, 7); 1429 1430 s0 = *sa++; 1431 1432 if (xsize >= 8) { 1433 1434 s1 = sa[0]; 1435 s2 = sa[1]; 1436 sa += 2; 1437 1438#pragma pipeloop(0) 1439 for (i = 0; i <= xsize - 16; i += 8, sa += 2) { 1440 t7 = VIS_LD_U8_I(table1, (s2 >> 8) & 0xFF); 1441 t6 = VIS_LD_U8_I(table0, (s2 >> 16) & 0xFF); 1442 t5 = VIS_LD_U8_I(table2, s2 >> 24); 1443 t4 = VIS_LD_U8_I(table1, s1 & 0xFF); 1444 t3 = VIS_LD_U8_I(table0, (s1 >> 8) & 0xFF); 1445 t2 = VIS_LD_U8_I(table2, (s1 >> 16) & 0xFF); 1446 t1 = VIS_LD_U8_I(table1, s1 >> 24); 1447 t0 = VIS_LD_U8_I(table0, s0 & 0xFF); 1448 acc = vis_faligndata(t7, acc); 1449 acc = vis_faligndata(t6, acc); 1450 acc = vis_faligndata(t5, acc); 1451 acc = vis_faligndata(t4, acc); 1452 acc = vis_faligndata(t3, acc); 1453 acc = vis_faligndata(t2, acc); 1454 acc = vis_faligndata(t1, acc); 1455 acc = vis_faligndata(t0, acc); 1456 table = table0; 1457 table0 = table2; 1458 table2 = table1; 1459 table1 = table; 1460 s0 = s2; 1461 s1 = sa[0]; 1462 s2 = sa[1]; 1463 *dp++ = acc; 1464 } 1465 1466 t7 = VIS_LD_U8_I(table1, (s2 >> 8) & 0xFF); 1467 t6 = VIS_LD_U8_I(table0, (s2 >> 16) & 0xFF); 1468 t5 = VIS_LD_U8_I(table2, s2 >> 24); 1469 t4 = VIS_LD_U8_I(table1, s1 & 0xFF); 1470 t3 = VIS_LD_U8_I(table0, (s1 >> 8) & 0xFF); 1471 t2 = VIS_LD_U8_I(table2, (s1 >> 16) & 0xFF); 1472 t1 = VIS_LD_U8_I(table1, s1 >> 24); 1473 t0 = VIS_LD_U8_I(table0, s0 & 0xFF); 1474 acc = vis_faligndata(t7, acc); 1475 acc = vis_faligndata(t6, acc); 1476 acc = vis_faligndata(t5, acc); 1477 acc = vis_faligndata(t4, acc); 1478 acc = vis_faligndata(t3, acc); 1479 acc = vis_faligndata(t2, acc); 1480 acc = vis_faligndata(t1, acc); 1481 acc = vis_faligndata(t0, acc); 1482 table = table0; 1483 table0 = table2; 1484 table2 = table1; 1485 table1 = table; 1486 *dp++ = acc; 1487 } 1488 1489 sp = (mlib_u8 *) sa; 1490 sp--; 1491 1492 if ((mlib_addr) dp <= (mlib_addr) dend) { 1493 1494 num = (mlib_addr) dend - (mlib_addr) dp; 1495 sp += num; 1496 num++; 1497 i = num - 3 * (num / 3); 1498 1499 if (i == 2) { 1500 s0 = (mlib_s32) * sp; 1501 sp--; 1502 1503 t0 = VIS_LD_U8_I(table1, s0); 1504 acc = vis_faligndata(t0, acc); 1505 1506 s0 = (mlib_s32) * sp; 1507 sp--; 1508 1509 t0 = VIS_LD_U8_I(table0, s0); 1510 acc = vis_faligndata(t0, acc); 1511 num -= 2; 1512 } 1513 else if (i == 1) { 1514 s0 = (mlib_s32) * sp; 1515 sp--; 1516 1517 t0 = VIS_LD_U8_I(table0, s0); 1518 acc = vis_faligndata(t0, acc); 1519 num--; 1520 } 1521 1522#pragma pipeloop(0) 1523 for (i = 0; i < num; i += 3) { 1524 s0 = (mlib_s32) * sp; 1525 sp--; 1526 1527 t0 = VIS_LD_U8_I(table2, s0); 1528 acc = vis_faligndata(t0, acc); 1529 1530 s0 = (mlib_s32) * sp; 1531 sp--; 1532 1533 t0 = VIS_LD_U8_I(table1, s0); 1534 acc = vis_faligndata(t0, acc); 1535 1536 s0 = (mlib_s32) * sp; 1537 sp--; 1538 1539 t0 = VIS_LD_U8_I(table0, s0); 1540 acc = vis_faligndata(t0, acc); 1541 } 1542 1543 emask = vis_edge8(dp, dend); 1544 vis_pst_8(acc, dp, emask); 1545 } 1546} 1547 1548/***************************************************************/ 1549void mlib_v_ImageLookUp_U8_U8_3(const mlib_u8 *src, 1550 mlib_s32 slb, 1551 mlib_u8 *dst, 1552 mlib_s32 dlb, 1553 mlib_s32 xsize, 1554 mlib_s32 ysize, 1555 const mlib_u8 **table) 1556{ 1557 mlib_u8 *sl; 1558 mlib_u8 *dl; 1559 const mlib_u8 *tab; 1560 mlib_s32 j, i; 1561 1562 sl = (void *)src; 1563 dl = dst; 1564 1565 /* row loop */ 1566 for (j = 0; j < ysize; j++) { 1567 mlib_u8 *sp = sl; 1568 mlib_u8 *dp = dl; 1569 const mlib_u8 *tab0 = table[0]; 1570 const mlib_u8 *tab1 = table[1]; 1571 const mlib_u8 *tab2 = table[2]; 1572 mlib_s32 off, size = xsize * 3; 1573 1574 off = (8 - ((mlib_addr) dp & 7)) & 7; 1575 1576 off = (off < size) ? off : size; 1577 1578 for (i = 0; i < off - 2; i += 3) { 1579 *dp++ = tab0[(*sp++)]; 1580 *dp++ = tab1[(*sp++)]; 1581 *dp++ = tab2[(*sp++)]; 1582 size -= 3; 1583 } 1584 1585 off -= i; 1586 1587 if (off == 1) { 1588 *dp++ = tab0[(*sp++)]; 1589 tab = tab0; 1590 tab0 = tab1; 1591 tab1 = tab2; 1592 tab2 = tab; 1593 size--; 1594 } 1595 else if (off == 2) { 1596 *dp++ = tab0[(*sp++)]; 1597 *dp++ = tab1[(*sp++)]; 1598 tab = tab2; 1599 tab2 = tab1; 1600 tab1 = tab0; 1601 tab0 = tab; 1602 size -= 2; 1603 } 1604 1605 if (size > 0) { 1606 1607 off = (mlib_addr) sp & 3; 1608 1609 if (off == 0) { 1610 mlib_v_ImageLookUp_U8_U8_3_SrcOff0_D1(sp, dp, size, tab0, tab1, tab2); 1611 } 1612 else if (off == 1) { 1613 mlib_v_ImageLookUp_U8_U8_3_SrcOff1_D1(sp, dp, size, tab0, tab1, tab2); 1614 } 1615 else if (off == 2) { 1616 mlib_v_ImageLookUp_U8_U8_3_SrcOff2_D1(sp, dp, size, tab0, tab1, tab2); 1617 } 1618 else { 1619 mlib_v_ImageLookUp_U8_U8_3_SrcOff3_D1(sp, dp, size, tab0, tab1, tab2); 1620 } 1621 } 1622 1623 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 1624 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); 1625 } 1626} 1627 1628/***************************************************************/ 1629