1/* 2 * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 28#include "vis_proto.h" 29#include "mlib_image.h" 30#include "mlib_v_ImageLookUpFunc.h" 31 32/***************************************************************/ 33static void mlib_v_ImageLookUpSI_U8_S16_2_SrcOff0_D1(const mlib_u8 *src, 34 mlib_s16 *dst, 35 mlib_s32 xsize, 36 const mlib_f32 *table); 37 38static void mlib_v_ImageLookUpSI_U8_S16_2_DstNonAl_D1(const mlib_u8 *src, 39 mlib_s16 *dst, 40 mlib_s32 xsize, 41 const mlib_f32 *table); 42 43static void mlib_v_ImageLookUpSI_U8_S16_2_DstA8D1_SMALL(const mlib_u8 *src, 44 mlib_s16 *dst, 45 mlib_s32 xsize, 46 const mlib_s16 **table); 47 48static void mlib_v_ImageLookUpSI_U8_S16_2_D1_SMALL(const mlib_u8 *src, 49 mlib_s16 *dst, 50 mlib_s32 xsize, 51 const mlib_s16 **table); 52 53static void mlib_v_ImageLookUpSI_U8_S16_3_SrcOff0_D1(const mlib_u8 *src, 54 mlib_s16 *dst, 55 mlib_s32 xsize, 56 const mlib_d64 *table); 57 58static void mlib_v_ImageLookUpSI_U8_S16_3_SrcOff1_D1(const mlib_u8 *src, 59 mlib_s16 *dst, 60 mlib_s32 xsize, 61 const mlib_d64 *table); 62 63static void mlib_v_ImageLookUpSI_U8_S16_3_SrcOff2_D1(const mlib_u8 *src, 64 mlib_s16 *dst, 65 mlib_s32 xsize, 66 const mlib_d64 *table); 67 68static void mlib_v_ImageLookUpSI_U8_S16_3_SrcOff3_D1(const mlib_u8 *src, 69 mlib_s16 *dst, 70 mlib_s32 xsize, 71 const mlib_d64 *table); 72 73static void mlib_v_ImageLookUpSI_U8_S16_3_D1_SMALL(const mlib_u8 *src, 74 mlib_s16 *dst, 75 mlib_s32 xsize, 76 const mlib_s16 **table); 77 78static void mlib_v_ImageLookUpSI_U8_S16_4_DstA8D1_D1(const mlib_u8 *src, 79 mlib_s16 *dst, 80 mlib_s32 xsize, 81 const mlib_d64 *table); 82 83static void mlib_v_ImageLookUpSI_U8_S16_4_DstNonAl_D1(const mlib_u8 *src, 84 mlib_s16 *dst, 85 mlib_s32 xsize, 86 const mlib_d64 *table); 87 88static void mlib_v_ImageLookUpSI_U8_S16_4_DstOff0_D1_SMALL(const mlib_u8 *src, 89 mlib_s16 *dst, 90 mlib_s32 xsize, 91 const mlib_s16 **table); 92 93static void mlib_v_ImageLookUpSI_U8_S16_4_DstOff1_D1_SMALL(const mlib_u8 *src, 94 mlib_s16 *dst, 95 mlib_s32 xsize, 96 const mlib_s16 **table); 97 98static void mlib_v_ImageLookUpSI_U8_S16_4_DstOff2_D1_SMALL(const mlib_u8 *src, 99 mlib_s16 *dst, 100 mlib_s32 xsize, 101 const mlib_s16 **table); 102 103static void mlib_v_ImageLookUpSI_U8_S16_4_DstOff3_D1_SMALL(const mlib_u8 *src, 104 mlib_s16 *dst, 105 mlib_s32 xsize, 106 const mlib_s16 **table); 107 108/***************************************************************/ 109#define VIS_LD_U16_I(X, Y) vis_ld_u16_i((void *)(X), (Y)) 110 111/***************************************************************/ 112void mlib_v_ImageLookUpSI_U8_S16_2_SrcOff0_D1(const mlib_u8 *src, 113 mlib_s16 *dst, 114 mlib_s32 xsize, 115 const mlib_f32 *table) 116{ 117 mlib_u32 *sa; /* aligned pointer to source data */ 118 mlib_u8 *sp; /* pointer to source data */ 119 mlib_u32 s0; /* source data */ 120 mlib_f32 *dp; /* aligned pointer to destination */ 121 mlib_f32 acc0, acc1; /* destination data */ 122 mlib_f32 acc2, acc3; /* destination data */ 123 mlib_s32 i; /* loop variable */ 124 mlib_u32 s00, s01, s02, s03; 125 126 sa = (mlib_u32 *) src; 127 dp = (mlib_f32 *) dst; 128 129 i = 0; 130 131 if (xsize >= 4) { 132 133 s0 = *sa++; 134 s00 = (s0 >> 22) & 0x3FC; 135 s01 = (s0 >> 14) & 0x3FC; 136 137#pragma pipeloop(0) 138 for (i = 0; i <= xsize - 8; i += 4, dp += 4) { 139 s02 = (s0 >> 6) & 0x3FC; 140 s03 = (s0 << 2) & 0x3FC; 141 acc0 = *(mlib_f32 *) ((mlib_u8 *) table + s00); 142 acc1 = *(mlib_f32 *) ((mlib_u8 *) table + s01); 143 acc2 = *(mlib_f32 *) ((mlib_u8 *) table + s02); 144 acc3 = *(mlib_f32 *) ((mlib_u8 *) table + s03); 145 s0 = *sa++; 146 s00 = (s0 >> 22) & 0x3FC; 147 s01 = (s0 >> 14) & 0x3FC; 148 dp[0] = acc0; 149 dp[1] = acc1; 150 dp[2] = acc2; 151 dp[3] = acc3; 152 } 153 154 s02 = (s0 >> 6) & 0x3FC; 155 s03 = (s0 << 2) & 0x3FC; 156 acc0 = *(mlib_f32 *) ((mlib_u8 *) table + s00); 157 acc1 = *(mlib_f32 *) ((mlib_u8 *) table + s01); 158 acc2 = *(mlib_f32 *) ((mlib_u8 *) table + s02); 159 acc3 = *(mlib_f32 *) ((mlib_u8 *) table + s03); 160 dp[0] = acc0; 161 dp[1] = acc1; 162 dp[2] = acc2; 163 dp[3] = acc3; 164 dp += 4; 165 i += 4; 166 } 167 168 sp = (mlib_u8 *) sa; 169 170 if (i <= xsize - 2) { 171 *dp++ = table[sp[0]]; 172 *dp++ = table[sp[1]]; 173 i += 2; 174 sp += 2; 175 } 176 177 if (i < xsize) 178 *dp = table[sp[0]]; 179} 180 181/***************************************************************/ 182void mlib_v_ImageLookUpSI_U8_S16_2_DstNonAl_D1(const mlib_u8 *src, 183 mlib_s16 *dst, 184 mlib_s32 xsize, 185 const mlib_f32 *table) 186{ 187 mlib_u32 *sa; /* aligned pointer to source data */ 188 mlib_u8 *sp; /* pointer to source data */ 189 mlib_u32 s0; /* source data */ 190 mlib_s16 *dl; /* pointer to start of destination */ 191 mlib_d64 *dp; /* aligned pointer to destination */ 192 mlib_d64 acc0, acc1, acc2; /* destination data */ 193 mlib_s32 i; /* loop variable */ 194 mlib_s16 *dend; /* pointer to end of destination */ 195 mlib_s32 emask; /* edge mask */ 196 mlib_s32 off; 197 mlib_u32 s00, s01, s02, s03; 198 199 sa = (mlib_u32 *) src; 200 sp = (void *)src; 201 dl = dst; 202 dend = dl + (xsize << 1) - 1; 203 dp = (mlib_d64 *) ((mlib_addr) dl & (~7)); 204 off = (mlib_addr) dp - (mlib_addr) dl; 205 vis_alignaddr(dp, off); 206 207 emask = vis_edge16(dl, dend); 208 acc0 = vis_freg_pair(table[sp[0]], table[sp[1]]); 209 vis_pst_16(vis_faligndata(acc0, acc0), dp++, emask); 210 sp += 2; 211 212 xsize -= 2; 213 214 if (xsize >= 2) { 215 acc1 = vis_freg_pair(table[sp[0]], table[sp[1]]); 216 *dp++ = vis_faligndata(acc0, acc1); 217 acc0 = acc1; 218 sp += 2; 219 xsize -= 2; 220 } 221 222 sa++; 223 224 i = 0; 225 226 if (xsize >= 4) { 227 228 s0 = *sa++; 229 s00 = (s0 >> 22) & 0x3FC; 230 s01 = (s0 >> 14) & 0x3FC; 231 232#pragma pipeloop(0) 233 for (i = 0; i <= xsize - 8; i += 4, dp += 2) { 234 s02 = (s0 >> 6) & 0x3FC; 235 s03 = (s0 << 2) & 0x3FC; 236 acc1 = vis_freg_pair(*(mlib_f32 *) ((mlib_u8 *) table + s00), 237 *(mlib_f32 *) ((mlib_u8 *) table + s01)); 238 acc2 = vis_freg_pair(*(mlib_f32 *) ((mlib_u8 *) table + s02), 239 *(mlib_f32 *) ((mlib_u8 *) table + s03)); 240 s0 = *sa++; 241 s00 = (s0 >> 22) & 0x3FC; 242 s01 = (s0 >> 14) & 0x3FC; 243 dp[0] = vis_faligndata(acc0, acc1); 244 dp[1] = vis_faligndata(acc1, acc2); 245 acc0 = acc2; 246 } 247 248 s02 = (s0 >> 6) & 0x3FC; 249 s03 = (s0 << 2) & 0x3FC; 250 acc1 = vis_freg_pair(*(mlib_f32 *) ((mlib_u8 *) table + s00), 251 *(mlib_f32 *) ((mlib_u8 *) table + s01)); 252 acc2 = vis_freg_pair(*(mlib_f32 *) ((mlib_u8 *) table + s02), 253 *(mlib_f32 *) ((mlib_u8 *) table + s03)); 254 dp[0] = vis_faligndata(acc0, acc1); 255 dp[1] = vis_faligndata(acc1, acc2); 256 acc0 = acc2; 257 sp = (mlib_u8 *) sa; 258 dp += 2; 259 i += 4; 260 } 261 262 if (i <= xsize - 2) { 263 acc1 = vis_freg_pair(table[sp[0]], table[sp[1]]); 264 *dp++ = vis_faligndata(acc0, acc1); 265 acc0 = acc1; 266 i += 2; 267 sp += 2; 268 } 269 270 if ((mlib_addr) dp <= (mlib_addr) dend) { 271 emask = vis_edge16(dp, dend); 272 acc1 = vis_freg_pair(table[sp[0]], table[sp[1]]); 273 vis_pst_16(vis_faligndata(acc0, acc1), dp++, emask); 274 } 275 276 if ((mlib_addr) dp <= (mlib_addr) dend) { 277 emask = vis_edge16(dp, dend); 278 vis_pst_16(vis_faligndata(acc1, acc1), dp++, emask); 279 } 280} 281 282/***************************************************************/ 283void mlib_v_ImageLookUpSI_U8_S16_2_DstA8D1_SMALL(const mlib_u8 *src, 284 mlib_s16 *dst, 285 mlib_s32 xsize, 286 const mlib_s16 **table) 287{ 288 mlib_u8 *sp; /* pointer to source data */ 289 mlib_u32 s0, s1; /* source data */ 290 mlib_s16 *dl; /* pointer to start of destination */ 291 mlib_d64 *dp; /* aligned pointer to destination */ 292 mlib_d64 t0, t1, t2; /* destination data */ 293 mlib_d64 t3, acc; /* destination data */ 294 mlib_s32 i; /* loop variable */ 295 const mlib_s16 *tab0 = table[0]; 296 const mlib_s16 *tab1 = table[1]; 297 298 sp = (void *)src; 299 dl = dst; 300 dp = (mlib_d64 *) dl; 301 302 vis_alignaddr((void *)0, 6); 303 304 if (xsize >= 2) { 305 306 s0 = (sp[0] << 1); 307 s1 = (sp[1] << 1); 308 sp += 2; 309 310#pragma pipeloop(0) 311 for (i = 0; i <= xsize - 4; i += 2, sp += 2) { 312 t3 = VIS_LD_U16_I(tab1, s1); 313 t2 = VIS_LD_U16_I(tab0, s1); 314 t1 = VIS_LD_U16_I(tab1, s0); 315 t0 = VIS_LD_U16_I(tab0, s0); 316 acc = vis_faligndata(t3, acc); 317 acc = vis_faligndata(t2, acc); 318 acc = vis_faligndata(t1, acc); 319 acc = vis_faligndata(t0, acc); 320 s0 = (sp[0] << 1); 321 s1 = (sp[1] << 1); 322 *dp++ = acc; 323 } 324 325 t3 = VIS_LD_U16_I(tab1, s1); 326 t2 = VIS_LD_U16_I(tab0, s1); 327 t1 = VIS_LD_U16_I(tab1, s0); 328 t0 = VIS_LD_U16_I(tab0, s0); 329 acc = vis_faligndata(t3, acc); 330 acc = vis_faligndata(t2, acc); 331 acc = vis_faligndata(t1, acc); 332 acc = vis_faligndata(t0, acc); 333 *dp++ = acc; 334 } 335 336 if ((xsize & 1) != 0) { 337 s0 = (sp[0] << 1); 338 t1 = VIS_LD_U16_I(tab1, s0); 339 t0 = VIS_LD_U16_I(tab0, s0); 340 acc = vis_faligndata(t1, acc); 341 acc = vis_faligndata(t0, acc); 342 *(mlib_f32 *) dp = vis_read_hi(acc); 343 } 344} 345 346/***************************************************************/ 347void mlib_v_ImageLookUpSI_U8_S16_2_D1_SMALL(const mlib_u8 *src, 348 mlib_s16 *dst, 349 mlib_s32 xsize, 350 const mlib_s16 **table) 351{ 352 mlib_u8 *sp; /* pointer to source data */ 353 mlib_u32 s0, s1, s2; /* source data */ 354 mlib_s16 *dl; /* pointer to start of destination */ 355 mlib_d64 *dp; /* aligned pointer to destination */ 356 mlib_d64 t0, t1, t2; /* destination data */ 357 mlib_d64 t3, acc; /* destination data */ 358 mlib_s32 i; /* loop variable */ 359 const mlib_s16 *tab0 = table[0]; 360 const mlib_s16 *tab1 = table[1]; 361 362 sp = (void *)src; 363 dl = dst; 364 365 vis_alignaddr((void *)0, 6); 366 367 s0 = *sp++; 368 *dl++ = tab0[s0]; 369 dp = (mlib_d64 *) dl; 370 xsize--; 371 s0 <<= 1; 372 373 if (xsize >= 2) { 374 375 s1 = (sp[0] << 1); 376 s2 = (sp[1] << 1); 377 sp += 2; 378 379#pragma pipeloop(0) 380 for (i = 0; i <= xsize - 4; i += 2, sp += 2) { 381 t3 = VIS_LD_U16_I(tab0, s2); 382 t2 = VIS_LD_U16_I(tab1, s1); 383 t1 = VIS_LD_U16_I(tab0, s1); 384 t0 = VIS_LD_U16_I(tab1, s0); 385 acc = vis_faligndata(t3, acc); 386 acc = vis_faligndata(t2, acc); 387 acc = vis_faligndata(t1, acc); 388 acc = vis_faligndata(t0, acc); 389 s0 = s2; 390 s1 = (sp[0] << 1); 391 s2 = (sp[1] << 1); 392 *dp++ = acc; 393 } 394 395 t3 = VIS_LD_U16_I(tab0, s2); 396 t2 = VIS_LD_U16_I(tab1, s1); 397 t1 = VIS_LD_U16_I(tab0, s1); 398 t0 = VIS_LD_U16_I(tab1, s0); 399 acc = vis_faligndata(t3, acc); 400 acc = vis_faligndata(t2, acc); 401 acc = vis_faligndata(t1, acc); 402 acc = vis_faligndata(t0, acc); 403 s0 = s2; 404 *dp++ = acc; 405 } 406 407 dl = (mlib_s16 *) dp; 408 409 if ((xsize & 1) != 0) { 410 s1 = (sp[0] << 1); 411 t1 = VIS_LD_U16_I(tab0, s1); 412 t0 = VIS_LD_U16_I(tab1, s0); 413 acc = vis_faligndata(t1, acc); 414 acc = vis_faligndata(t0, acc); 415 *(mlib_f32 *) dp = vis_read_hi(acc); 416 s0 = s1; 417 dl += 2; 418 } 419 420 s0 >>= 1; 421 *dl = tab1[s0]; 422} 423 424/***************************************************************/ 425void mlib_v_ImageLookUpSI_U8_S16_2(const mlib_u8 *src, 426 mlib_s32 slb, 427 mlib_s16 *dst, 428 mlib_s32 dlb, 429 mlib_s32 xsize, 430 mlib_s32 ysize, 431 const mlib_s16 **table) 432{ 433 if ((xsize * ysize) < 550) { 434 mlib_u8 *sl; 435 mlib_s16 *dl; 436 mlib_s32 j; 437 const mlib_s16 *tab0 = table[0]; 438 const mlib_s16 *tab1 = table[1]; 439 440 sl = (void *)src; 441 dl = dst; 442 443 /* row loop */ 444 for (j = 0; j < ysize; j++) { 445 mlib_u8 *sp = sl; 446 mlib_s16 *dp = dl; 447 mlib_s32 off, s0, size = xsize; 448 449 off = ((8 - ((mlib_addr) dp & 7)) & 7); 450 451 if ((off >= 4) && (size > 0)) { 452 s0 = *sp++; 453 *dp++ = tab0[s0]; 454 *dp++ = tab1[s0]; 455 size--; 456 } 457 458 if (size > 0) { 459 460 if (((mlib_addr) dp & 7) == 0) { 461 mlib_v_ImageLookUpSI_U8_S16_2_DstA8D1_SMALL(sp, dp, size, table); 462 } 463 else { 464 mlib_v_ImageLookUpSI_U8_S16_2_D1_SMALL(sp, dp, size, table); 465 } 466 } 467 468 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 469 dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); 470 } 471 } 472 else { 473 mlib_u8 *sl; 474 mlib_s16 *dl; 475 mlib_u32 tab[256]; 476 mlib_u16 *tab0 = (mlib_u16 *) table[0]; 477 mlib_u16 *tab1 = (mlib_u16 *) table[1]; 478 mlib_s32 i, j; 479 mlib_u32 s0, s1, s2; 480 481 s0 = tab0[0]; 482 s1 = tab1[0]; 483 for (i = 1; i < 256; i++) { 484 s2 = (s0 << 16) + s1; 485 s0 = tab0[i]; 486 s1 = tab1[i]; 487 tab[i - 1] = s2; 488 } 489 490 s2 = (s0 << 16) + s1; 491 tab[255] = s2; 492 493 sl = (void *)src; 494 dl = dst; 495 496 /* row loop */ 497 for (j = 0; j < ysize; j++) { 498 mlib_u8 *sp = sl; 499 mlib_s16 *dp = dl; 500 mlib_s32 off, s0, size = xsize; 501 502 if (((mlib_addr) dp & 3) == 0) { 503 504 off = (4 - (mlib_addr) sp & 3) & 3; 505 506 off = (off < size) ? off : size; 507 508#pragma pipeloop(0) 509 for (i = 0; i < off; i++, sp++) { 510 *(mlib_u32 *) dp = tab[(*sp)]; 511 dp += 2; 512 } 513 514 size -= off; 515 516 if (size > 0) { 517 mlib_v_ImageLookUpSI_U8_S16_2_SrcOff0_D1(sp, dp, size, 518 (mlib_f32 *) tab); 519 } 520 } 521 else { 522 523 off = ((4 - ((mlib_addr) sp & 3)) & 3); 524 off = (off < size) ? off : size; 525 526 for (i = 0; i < off; i++) { 527 s0 = tab[(*sp)]; 528 *dp++ = (s0 >> 16); 529 *dp++ = (s0 & 0xFFFF); 530 size--; 531 sp++; 532 } 533 534 if (size > 0) { 535 mlib_v_ImageLookUpSI_U8_S16_2_DstNonAl_D1(sp, dp, size, 536 (mlib_f32 *) tab); 537 } 538 } 539 540 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 541 dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); 542 } 543 } 544} 545 546/***************************************************************/ 547void mlib_v_ImageLookUpSI_U8_S16_3_SrcOff0_D1(const mlib_u8 *src, 548 mlib_s16 *dst, 549 mlib_s32 xsize, 550 const mlib_d64 *table) 551{ 552 mlib_u8 *sp; /* pointer to source data */ 553 mlib_u32 *sa; /* aligned pointer to source data */ 554 mlib_u32 s0; /* source data */ 555 mlib_s16 *dl; /* pointer to start of destination */ 556 mlib_d64 *dp; /* aligned pointer to destination */ 557 mlib_d64 t0, t1, t2, t3; /* destination data */ 558 mlib_d64 acc0, acc1, acc2; /* destination data */ 559 mlib_s32 i; /* loop variable */ 560 mlib_s16 *ptr; 561 562 dl = dst; 563 sp = (void *)src; 564 dp = (mlib_d64 *) dl; 565 sa = (mlib_u32 *) sp; 566 567 vis_alignaddr((void *)0, 6); 568 569 i = 0; 570 571 if (xsize >= 4) { 572 573 s0 = *sa++; 574 575#pragma pipeloop(0) 576 for (i = 0; i <= xsize - 8; i += 4, dp += 3) { 577 t0 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 >> 21) & 0x7F8)); 578 t1 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 >> 13) & 0x7F8)); 579 t2 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 >> 5) & 0x7F8)); 580 t3 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 << 3) & 0x7F8)); 581 acc0 = vis_faligndata(t0, t0); 582 acc1 = vis_faligndata(acc0, acc0); 583 acc2 = vis_faligndata(acc0, t1); 584 acc0 = vis_faligndata(acc1, acc1); 585 acc1 = vis_faligndata(acc1, acc2); 586 acc2 = vis_faligndata(acc2, t2); 587 acc0 = vis_faligndata(acc0, acc1); 588 acc1 = vis_faligndata(acc1, acc2); 589 acc2 = vis_faligndata(acc2, t3); 590 s0 = *sa++; 591 dp[0] = acc0; 592 dp[1] = acc1; 593 dp[2] = acc2; 594 } 595 596 t0 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 >> 21) & 0x7F8)); 597 t1 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 >> 13) & 0x7F8)); 598 t2 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 >> 5) & 0x7F8)); 599 t3 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 << 3) & 0x7F8)); 600 acc0 = vis_faligndata(t0, t0); 601 acc1 = vis_faligndata(acc0, acc0); 602 acc2 = vis_faligndata(acc0, t1); 603 acc0 = vis_faligndata(acc1, acc1); 604 acc1 = vis_faligndata(acc1, acc2); 605 acc2 = vis_faligndata(acc2, t2); 606 acc0 = vis_faligndata(acc0, acc1); 607 acc1 = vis_faligndata(acc1, acc2); 608 acc2 = vis_faligndata(acc2, t3); 609 dp[0] = acc0; 610 dp[1] = acc1; 611 dp[2] = acc2; 612 i += 4; 613 dp += 3; 614 } 615 616 dl = (mlib_s16 *) dp; 617 618#pragma pipeloop(0) 619 for (; i < xsize; i++) { 620 ptr = (mlib_s16 *) (table + src[i]); 621 dl[0] = ptr[0]; 622 dl[1] = ptr[1]; 623 dl[2] = ptr[2]; 624 dl += 3; 625 } 626} 627 628/***************************************************************/ 629void mlib_v_ImageLookUpSI_U8_S16_3_SrcOff1_D1(const mlib_u8 *src, 630 mlib_s16 *dst, 631 mlib_s32 xsize, 632 const mlib_d64 *table) 633{ 634 mlib_u8 *sp; /* pointer to source data */ 635 mlib_u32 *sa; /* aligned pointer to source data */ 636 mlib_u32 s0, s1; /* source data */ 637 mlib_s16 *dl; /* pointer to start of destination */ 638 mlib_d64 *dp; /* aligned pointer to destination */ 639 mlib_d64 t0, t1, t2, t3; /* destination data */ 640 mlib_d64 acc0, acc1, acc2; /* destination data */ 641 mlib_s32 i; /* loop variable */ 642 mlib_s16 *ptr; 643 644 dl = dst; 645 sp = (void *)src; 646 dp = (mlib_d64 *) dl; 647 sa = (mlib_u32 *) (sp - 1); 648 649 i = 0; 650 s0 = *sa++; 651 652 vis_alignaddr((void *)0, 6); 653 654 if (xsize >= 4) { 655 656 s1 = *sa++; 657 658#pragma pipeloop(0) 659 for (i = 0; i <= xsize - 8; i += 4, dp += 3) { 660 t0 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 >> 13) & 0x7F8)); 661 t1 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 >> 5) & 0x7F8)); 662 t2 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 << 3) & 0x7F8)); 663 t3 = *(mlib_d64 *) ((mlib_u8 *) table + ((s1 >> 21) & 0x7F8)); 664 acc0 = vis_faligndata(t0, t0); 665 acc1 = vis_faligndata(acc0, acc0); 666 acc2 = vis_faligndata(acc0, t1); 667 acc0 = vis_faligndata(acc1, acc1); 668 acc1 = vis_faligndata(acc1, acc2); 669 acc2 = vis_faligndata(acc2, t2); 670 acc0 = vis_faligndata(acc0, acc1); 671 acc1 = vis_faligndata(acc1, acc2); 672 acc2 = vis_faligndata(acc2, t3); 673 s0 = s1; 674 s1 = *sa++; 675 dp[0] = acc0; 676 dp[1] = acc1; 677 dp[2] = acc2; 678 } 679 680 t0 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 >> 13) & 0x7F8)); 681 t1 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 >> 5) & 0x7F8)); 682 t2 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 << 3) & 0x7F8)); 683 t3 = *(mlib_d64 *) ((mlib_u8 *) table + ((s1 >> 21) & 0x7F8)); 684 acc0 = vis_faligndata(t0, t0); 685 acc1 = vis_faligndata(acc0, acc0); 686 acc2 = vis_faligndata(acc0, t1); 687 acc0 = vis_faligndata(acc1, acc1); 688 acc1 = vis_faligndata(acc1, acc2); 689 acc2 = vis_faligndata(acc2, t2); 690 acc0 = vis_faligndata(acc0, acc1); 691 acc1 = vis_faligndata(acc1, acc2); 692 acc2 = vis_faligndata(acc2, t3); 693 dp[0] = acc0; 694 dp[1] = acc1; 695 dp[2] = acc2; 696 i += 4; 697 dp += 3; 698 } 699 700 dl = (mlib_s16 *) dp; 701 702#pragma pipeloop(0) 703 for (; i < xsize; i++) { 704 ptr = (mlib_s16 *) (table + src[i]); 705 dl[0] = ptr[0]; 706 dl[1] = ptr[1]; 707 dl[2] = ptr[2]; 708 dl += 3; 709 } 710} 711 712/***************************************************************/ 713void mlib_v_ImageLookUpSI_U8_S16_3_SrcOff2_D1(const mlib_u8 *src, 714 mlib_s16 *dst, 715 mlib_s32 xsize, 716 const mlib_d64 *table) 717{ 718 mlib_u8 *sp; /* pointer to source data */ 719 mlib_u32 *sa; /* aligned pointer to source data */ 720 mlib_u32 s0, s1; /* source data */ 721 mlib_s16 *dl; /* pointer to start of destination */ 722 mlib_d64 *dp; /* aligned pointer to destination */ 723 mlib_d64 t0, t1, t2, t3; /* destination data */ 724 mlib_d64 acc0, acc1, acc2; /* destination data */ 725 mlib_s32 i; /* loop variable */ 726 mlib_s16 *ptr; 727 728 dl = dst; 729 sp = (void *)src; 730 dp = (mlib_d64 *) dl; 731 sa = (mlib_u32 *) (sp - 2); 732 733 i = 0; 734 s0 = *sa++; 735 736 vis_alignaddr((void *)0, 6); 737 738 if (xsize >= 4) { 739 740 s1 = *sa++; 741 742#pragma pipeloop(0) 743 for (i = 0; i <= xsize - 8; i += 4, dp += 3) { 744 t0 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 >> 5) & 0x7F8)); 745 t1 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 << 3) & 0x7F8)); 746 t2 = *(mlib_d64 *) ((mlib_u8 *) table + ((s1 >> 21) & 0x7F8)); 747 t3 = *(mlib_d64 *) ((mlib_u8 *) table + ((s1 >> 13) & 0x7F8)); 748 acc0 = vis_faligndata(t0, t0); 749 acc1 = vis_faligndata(acc0, acc0); 750 acc2 = vis_faligndata(acc0, t1); 751 acc0 = vis_faligndata(acc1, acc1); 752 acc1 = vis_faligndata(acc1, acc2); 753 acc2 = vis_faligndata(acc2, t2); 754 acc0 = vis_faligndata(acc0, acc1); 755 acc1 = vis_faligndata(acc1, acc2); 756 acc2 = vis_faligndata(acc2, t3); 757 s0 = s1; 758 s1 = *sa++; 759 dp[0] = acc0; 760 dp[1] = acc1; 761 dp[2] = acc2; 762 } 763 764 t0 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 >> 5) & 0x7F8)); 765 t1 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 << 3) & 0x7F8)); 766 t2 = *(mlib_d64 *) ((mlib_u8 *) table + ((s1 >> 21) & 0x7F8)); 767 t3 = *(mlib_d64 *) ((mlib_u8 *) table + ((s1 >> 13) & 0x7F8)); 768 acc0 = vis_faligndata(t0, t0); 769 acc1 = vis_faligndata(acc0, acc0); 770 acc2 = vis_faligndata(acc0, t1); 771 acc0 = vis_faligndata(acc1, acc1); 772 acc1 = vis_faligndata(acc1, acc2); 773 acc2 = vis_faligndata(acc2, t2); 774 acc0 = vis_faligndata(acc0, acc1); 775 acc1 = vis_faligndata(acc1, acc2); 776 acc2 = vis_faligndata(acc2, t3); 777 dp[0] = acc0; 778 dp[1] = acc1; 779 dp[2] = acc2; 780 i += 4; 781 dp += 3; 782 } 783 784 dl = (mlib_s16 *) dp; 785 786#pragma pipeloop(0) 787 for (; i < xsize; i++) { 788 ptr = (mlib_s16 *) (table + src[i]); 789 dl[0] = ptr[0]; 790 dl[1] = ptr[1]; 791 dl[2] = ptr[2]; 792 dl += 3; 793 } 794} 795 796/***************************************************************/ 797void mlib_v_ImageLookUpSI_U8_S16_3_SrcOff3_D1(const mlib_u8 *src, 798 mlib_s16 *dst, 799 mlib_s32 xsize, 800 const mlib_d64 *table) 801{ 802 mlib_u8 *sp; /* pointer to source data */ 803 mlib_u32 *sa; /* aligned pointer to source data */ 804 mlib_u32 s0, s1; /* source data */ 805 mlib_s16 *dl; /* pointer to start of destination */ 806 mlib_d64 *dp; /* aligned pointer to destination */ 807 mlib_d64 t0, t1, t2, t3; /* destination data */ 808 mlib_d64 acc0, acc1, acc2; /* destination data */ 809 mlib_s32 i; /* loop variable */ 810 mlib_s16 *ptr; 811 812 dl = dst; 813 sp = (void *)src; 814 dp = (mlib_d64 *) dl; 815 sa = (mlib_u32 *) (sp - 3); 816 817 i = 0; 818 s0 = *sa++; 819 820 vis_alignaddr((void *)0, 6); 821 822 if (xsize >= 4) { 823 824 s1 = *sa++; 825 826#pragma pipeloop(0) 827 for (i = 0; i <= xsize - 8; i += 4, dp += 3) { 828 t0 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 << 3) & 0x7F8)); 829 t1 = *(mlib_d64 *) ((mlib_u8 *) table + ((s1 >> 21) & 0x7F8)); 830 t2 = *(mlib_d64 *) ((mlib_u8 *) table + ((s1 >> 13) & 0x7F8)); 831 t3 = *(mlib_d64 *) ((mlib_u8 *) table + ((s1 >> 5) & 0x7F8)); 832 acc0 = vis_faligndata(t0, t0); 833 acc1 = vis_faligndata(acc0, acc0); 834 acc2 = vis_faligndata(acc0, t1); 835 acc0 = vis_faligndata(acc1, acc1); 836 acc1 = vis_faligndata(acc1, acc2); 837 acc2 = vis_faligndata(acc2, t2); 838 acc0 = vis_faligndata(acc0, acc1); 839 acc1 = vis_faligndata(acc1, acc2); 840 acc2 = vis_faligndata(acc2, t3); 841 s0 = s1; 842 s1 = *sa++; 843 dp[0] = acc0; 844 dp[1] = acc1; 845 dp[2] = acc2; 846 } 847 848 t0 = *(mlib_d64 *) ((mlib_u8 *) table + ((s0 << 3) & 0x7F8)); 849 t1 = *(mlib_d64 *) ((mlib_u8 *) table + ((s1 >> 21) & 0x7F8)); 850 t2 = *(mlib_d64 *) ((mlib_u8 *) table + ((s1 >> 13) & 0x7F8)); 851 t3 = *(mlib_d64 *) ((mlib_u8 *) table + ((s1 >> 5) & 0x7F8)); 852 acc0 = vis_faligndata(t0, t0); 853 acc1 = vis_faligndata(acc0, acc0); 854 acc2 = vis_faligndata(acc0, t1); 855 acc0 = vis_faligndata(acc1, acc1); 856 acc1 = vis_faligndata(acc1, acc2); 857 acc2 = vis_faligndata(acc2, t2); 858 acc0 = vis_faligndata(acc0, acc1); 859 acc1 = vis_faligndata(acc1, acc2); 860 acc2 = vis_faligndata(acc2, t3); 861 dp[0] = acc0; 862 dp[1] = acc1; 863 dp[2] = acc2; 864 i += 4; 865 dp += 3; 866 } 867 868 dl = (mlib_s16 *) dp; 869 870#pragma pipeloop(0) 871 for (; i < xsize; i++) { 872 ptr = (mlib_s16 *) (table + src[i]); 873 dl[0] = ptr[0]; 874 dl[1] = ptr[1]; 875 dl[2] = ptr[2]; 876 dl += 3; 877 } 878} 879 880/***************************************************************/ 881void mlib_v_ImageLookUpSI_U8_S16_3_D1_SMALL(const mlib_u8 *src, 882 mlib_s16 *dst, 883 mlib_s32 xsize, 884 const mlib_s16 **table) 885{ 886 mlib_u8 *sp; /* pointer to source data */ 887 mlib_s16 *dl; /* pointer to start of destination */ 888 mlib_d64 *dp; /* aligned pointer to destination */ 889 mlib_d64 t0, t1, t2, t3; /* destination data */ 890 mlib_d64 acc0, acc1, acc2; /* destination data */ 891 mlib_s32 i; /* loop variable */ 892 const mlib_s16 *tab0 = table[0]; 893 const mlib_s16 *tab1 = table[1]; 894 const mlib_s16 *tab2 = table[2]; 895 mlib_u32 s00, s01, s02, s03; 896 897 sp = (void *)src; 898 dl = dst; 899 dp = (mlib_d64 *) dl; 900 901 vis_alignaddr((void *)0, 6); 902 903 i = 0; 904 905 if (xsize >= 4) { 906 907 s00 = (sp[0] << 1); 908 s01 = (sp[1] << 1); 909 s02 = (sp[2] << 1); 910 s03 = (sp[3] << 1); 911 sp += 4; 912 913#pragma pipeloop(0) 914 for (i = 0; i <= xsize - 8; i += 4, sp += 4) { 915 t3 = VIS_LD_U16_I(tab0, s01); 916 t2 = VIS_LD_U16_I(tab2, s00); 917 t1 = VIS_LD_U16_I(tab1, s00); 918 t0 = VIS_LD_U16_I(tab0, s00); 919 acc0 = vis_faligndata(t3, acc0); 920 acc0 = vis_faligndata(t2, acc0); 921 acc0 = vis_faligndata(t1, acc0); 922 acc0 = vis_faligndata(t0, acc0); 923 t3 = VIS_LD_U16_I(tab1, s02); 924 t2 = VIS_LD_U16_I(tab0, s02); 925 t1 = VIS_LD_U16_I(tab2, s01); 926 t0 = VIS_LD_U16_I(tab1, s01); 927 acc1 = vis_faligndata(t3, acc1); 928 acc1 = vis_faligndata(t2, acc1); 929 acc1 = vis_faligndata(t1, acc1); 930 acc1 = vis_faligndata(t0, acc1); 931 t3 = VIS_LD_U16_I(tab2, s03); 932 t2 = VIS_LD_U16_I(tab1, s03); 933 t1 = VIS_LD_U16_I(tab0, s03); 934 t0 = VIS_LD_U16_I(tab2, s02); 935 acc2 = vis_faligndata(t3, acc2); 936 acc2 = vis_faligndata(t2, acc2); 937 acc2 = vis_faligndata(t1, acc2); 938 acc2 = vis_faligndata(t0, acc2); 939 s00 = (sp[0] << 1); 940 s01 = (sp[1] << 1); 941 s02 = (sp[2] << 1); 942 s03 = (sp[3] << 1); 943 *dp++ = acc0; 944 *dp++ = acc1; 945 *dp++ = acc2; 946 } 947 948 t3 = VIS_LD_U16_I(tab0, s01); 949 t2 = VIS_LD_U16_I(tab2, s00); 950 t1 = VIS_LD_U16_I(tab1, s00); 951 t0 = VIS_LD_U16_I(tab0, s00); 952 acc0 = vis_faligndata(t3, acc0); 953 acc0 = vis_faligndata(t2, acc0); 954 acc0 = vis_faligndata(t1, acc0); 955 acc0 = vis_faligndata(t0, acc0); 956 t3 = VIS_LD_U16_I(tab1, s02); 957 t2 = VIS_LD_U16_I(tab0, s02); 958 t1 = VIS_LD_U16_I(tab2, s01); 959 t0 = VIS_LD_U16_I(tab1, s01); 960 acc1 = vis_faligndata(t3, acc1); 961 acc1 = vis_faligndata(t2, acc1); 962 acc1 = vis_faligndata(t1, acc1); 963 acc1 = vis_faligndata(t0, acc1); 964 t3 = VIS_LD_U16_I(tab2, s03); 965 t2 = VIS_LD_U16_I(tab1, s03); 966 t1 = VIS_LD_U16_I(tab0, s03); 967 t0 = VIS_LD_U16_I(tab2, s02); 968 acc2 = vis_faligndata(t3, acc2); 969 acc2 = vis_faligndata(t2, acc2); 970 acc2 = vis_faligndata(t1, acc2); 971 acc2 = vis_faligndata(t0, acc2); 972 *dp++ = acc0; 973 *dp++ = acc1; 974 *dp++ = acc2; 975 i += 4; 976 } 977 978 dl = (mlib_s16 *) dp; 979 980#pragma pipeloop(0) 981 for (; i < xsize; i++) { 982 s00 = sp[0]; 983 dl[0] = tab0[s00]; 984 dl[1] = tab1[s00]; 985 dl[2] = tab2[s00]; 986 dl += 3; 987 sp++; 988 } 989} 990 991/***************************************************************/ 992void mlib_v_ImageLookUpSI_U8_S16_3(const mlib_u8 *src, 993 mlib_s32 slb, 994 mlib_s16 *dst, 995 mlib_s32 dlb, 996 mlib_s32 xsize, 997 mlib_s32 ysize, 998 const mlib_s16 **table) 999{ 1000 if ((xsize * ysize) < 550) { 1001 mlib_u8 *sl; 1002 mlib_s16 *dl; 1003 mlib_s32 i, j; 1004 const mlib_s16 *tab0 = table[0]; 1005 const mlib_s16 *tab1 = table[1]; 1006 const mlib_s16 *tab2 = table[2]; 1007 1008 sl = (void *)src; 1009 dl = dst; 1010 1011 /* row loop */ 1012 for (j = 0; j < ysize; j++) { 1013 mlib_u8 *sp = sl; 1014 mlib_s16 *dp = dl; 1015 mlib_s32 off, s0, size = xsize; 1016 1017 off = ((mlib_addr) dp & 7) >> 1; 1018 off = (off < size) ? off : size; 1019 1020 for (i = 0; i < off; i++) { 1021 s0 = *sp++; 1022 *dp++ = tab0[s0]; 1023 *dp++ = tab1[s0]; 1024 *dp++ = tab2[s0]; 1025 size--; 1026 } 1027 1028 if (size > 0) { 1029 mlib_v_ImageLookUpSI_U8_S16_3_D1_SMALL(sp, dp, size, table); 1030 } 1031 1032 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 1033 dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); 1034 } 1035 } 1036 else { 1037 mlib_u8 *sl; 1038 mlib_s16 *dl; 1039 mlib_u32 tab[512]; 1040 mlib_u16 *tab0 = (mlib_u16 *) table[0]; 1041 mlib_u16 *tab1 = (mlib_u16 *) table[1]; 1042 mlib_u16 *tab2 = (mlib_u16 *) table[2]; 1043 mlib_s32 i, j; 1044 mlib_u32 s0, s1, s2, s3; 1045 1046 s0 = tab0[0]; 1047 s1 = tab1[0]; 1048 s2 = tab2[0]; 1049 for (i = 1; i < 256; i++) { 1050 s3 = (s0 << 16) + s1; 1051 s0 = tab0[i]; 1052 s1 = tab1[i]; 1053 tab[2 * i - 2] = s3; 1054 tab[2 * i - 1] = (s2 << 16); 1055 s2 = tab2[i]; 1056 } 1057 1058 s3 = (s0 << 16) + s1; 1059 tab[510] = s3; 1060 tab[511] = (s2 << 16); 1061 1062 sl = (void *)src; 1063 dl = dst; 1064 1065 /* row loop */ 1066 for (j = 0; j < ysize; j++) { 1067 mlib_u8 *sp = sl; 1068 mlib_s16 *dp = dl; 1069 mlib_s32 off, size = xsize; 1070 mlib_s16 *ptr; 1071 1072 off = ((mlib_addr) dp & 7) >> 1; 1073 off = (off < size) ? off : size; 1074 1075#pragma pipeloop(0) 1076 for (i = 0; i < off; i++) { 1077 ptr = (mlib_s16 *) (tab + 2 * sp[i]); 1078 dp[0] = ptr[0]; 1079 dp[1] = ptr[1]; 1080 dp[2] = ptr[2]; 1081 dp += 3; 1082 } 1083 1084 size -= off; 1085 sp += off; 1086 1087 if (size > 0) { 1088 off = (mlib_addr) sp & 3; 1089 1090 if (off == 0) { 1091 mlib_v_ImageLookUpSI_U8_S16_3_SrcOff0_D1(sp, dp, size, 1092 (mlib_d64 *) tab); 1093 } 1094 else if (off == 1) { 1095 mlib_v_ImageLookUpSI_U8_S16_3_SrcOff1_D1(sp, dp, size, 1096 (mlib_d64 *) tab); 1097 } 1098 else if (off == 2) { 1099 mlib_v_ImageLookUpSI_U8_S16_3_SrcOff2_D1(sp, dp, size, 1100 (mlib_d64 *) tab); 1101 } 1102 else if (off == 3) { 1103 mlib_v_ImageLookUpSI_U8_S16_3_SrcOff3_D1(sp, dp, size, 1104 (mlib_d64 *) tab); 1105 } 1106 } 1107 1108 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 1109 dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); 1110 } 1111 } 1112} 1113 1114/***************************************************************/ 1115void mlib_v_ImageLookUpSI_U8_S16_4_DstA8D1_D1(const mlib_u8 *src, 1116 mlib_s16 *dst, 1117 mlib_s32 xsize, 1118 const mlib_d64 *table) 1119{ 1120 mlib_u32 *sa; /* aligned pointer to source data */ 1121 mlib_u8 *sp; /* pointer to source data */ 1122 mlib_u32 s0; /* source data */ 1123 mlib_d64 *dp; /* aligned pointer to destination */ 1124 mlib_d64 acc0, acc1; /* destination data */ 1125 mlib_d64 acc2, acc3; /* destination data */ 1126 mlib_s32 i; /* loop variable */ 1127 mlib_u32 s00, s01, s02, s03; 1128 1129 sa = (mlib_u32 *) src; 1130 dp = (mlib_d64 *) dst; 1131 1132 i = 0; 1133 1134 if (xsize >= 4) { 1135 1136 s0 = *sa++; 1137 s00 = (s0 >> 21) & 0x7F8; 1138 s01 = (s0 >> 13) & 0x7F8; 1139 1140#pragma pipeloop(0) 1141 for (i = 0; i <= xsize - 8; i += 4, dp += 4) { 1142 s02 = (s0 >> 5) & 0x7F8; 1143 s03 = (s0 << 3) & 0x7F8; 1144 acc0 = *(mlib_d64 *) ((mlib_u8 *) table + s00); 1145 acc1 = *(mlib_d64 *) ((mlib_u8 *) table + s01); 1146 acc2 = *(mlib_d64 *) ((mlib_u8 *) table + s02); 1147 acc3 = *(mlib_d64 *) ((mlib_u8 *) table + s03); 1148 s0 = *sa++; 1149 s00 = (s0 >> 21) & 0x7F8; 1150 s01 = (s0 >> 13) & 0x7F8; 1151 dp[0] = acc0; 1152 dp[1] = acc1; 1153 dp[2] = acc2; 1154 dp[3] = acc3; 1155 } 1156 1157 s02 = (s0 >> 5) & 0x7F8; 1158 s03 = (s0 << 3) & 0x7F8; 1159 acc0 = *(mlib_d64 *) ((mlib_u8 *) table + s00); 1160 acc1 = *(mlib_d64 *) ((mlib_u8 *) table + s01); 1161 acc2 = *(mlib_d64 *) ((mlib_u8 *) table + s02); 1162 acc3 = *(mlib_d64 *) ((mlib_u8 *) table + s03); 1163 dp[0] = acc0; 1164 dp[1] = acc1; 1165 dp[2] = acc2; 1166 dp[3] = acc3; 1167 dp += 4; 1168 i += 4; 1169 } 1170 1171 sp = (mlib_u8 *) sa; 1172 1173 if (i <= xsize - 2) { 1174 *dp++ = table[sp[0]]; 1175 *dp++ = table[sp[1]]; 1176 i += 2; 1177 sp += 2; 1178 } 1179 1180 if (i < xsize) 1181 *dp++ = table[sp[0]]; 1182} 1183 1184/***************************************************************/ 1185void mlib_v_ImageLookUpSI_U8_S16_4_DstNonAl_D1(const mlib_u8 *src, 1186 mlib_s16 *dst, 1187 mlib_s32 xsize, 1188 const mlib_d64 *table) 1189{ 1190 mlib_u32 *sa; /* aligned pointer to source data */ 1191 mlib_u8 *sp; /* pointer to source data */ 1192 mlib_u32 s0; /* source data */ 1193 mlib_s16 *dl; /* pointer to start of destination */ 1194 mlib_d64 *dp; /* aligned pointer to destination */ 1195 mlib_d64 acc0, acc1; /* destination data */ 1196 mlib_d64 acc2, acc3, acc4; /* destination data */ 1197 mlib_s32 i; /* loop variable */ 1198 mlib_s16 *dend; /* pointer to end of destination */ 1199 mlib_s32 emask; /* edge mask */ 1200 mlib_s32 off; 1201 mlib_u32 s00, s01, s02, s03; 1202 1203 sp = (void *)src; 1204 dl = dst; 1205 dend = dl + (xsize << 2) - 1; 1206 dp = (mlib_d64 *) ((mlib_addr) dl & (~7)); 1207 off = (mlib_addr) dp - (mlib_addr) dl; 1208 vis_alignaddr(dp, off); 1209 1210 emask = vis_edge16(dl, dend); 1211 acc0 = table[sp[0]]; 1212 vis_pst_16(vis_faligndata(acc0, acc0), dp++, emask); 1213 sp++; 1214 1215 sa = (mlib_u32 *) sp; 1216 1217 xsize--; 1218 1219 i = 0; 1220 1221 if (xsize >= 4) { 1222 1223 s0 = *sa++; 1224 s00 = (s0 >> 21) & 0x7F8; 1225 s01 = (s0 >> 13) & 0x7F8; 1226 1227#pragma pipeloop(0) 1228 for (i = 0; i <= xsize - 8; i += 4, dp += 4) { 1229 s02 = (s0 >> 5) & 0x7F8; 1230 s03 = (s0 << 3) & 0x7F8; 1231 acc1 = *(mlib_d64 *) ((mlib_u8 *) table + s00); 1232 acc2 = *(mlib_d64 *) ((mlib_u8 *) table + s01); 1233 acc3 = *(mlib_d64 *) ((mlib_u8 *) table + s02); 1234 acc4 = *(mlib_d64 *) ((mlib_u8 *) table + s03); 1235 s0 = *sa++; 1236 s00 = (s0 >> 21) & 0x7F8; 1237 s01 = (s0 >> 13) & 0x7F8; 1238 dp[0] = vis_faligndata(acc0, acc1); 1239 dp[1] = vis_faligndata(acc1, acc2); 1240 dp[2] = vis_faligndata(acc2, acc3); 1241 dp[3] = vis_faligndata(acc3, acc4); 1242 acc0 = acc4; 1243 } 1244 1245 s02 = (s0 >> 5) & 0x7F8; 1246 s03 = (s0 << 3) & 0x7F8; 1247 acc1 = *(mlib_d64 *) ((mlib_u8 *) table + s00); 1248 acc2 = *(mlib_d64 *) ((mlib_u8 *) table + s01); 1249 acc3 = *(mlib_d64 *) ((mlib_u8 *) table + s02); 1250 acc4 = *(mlib_d64 *) ((mlib_u8 *) table + s03); 1251 dp[0] = vis_faligndata(acc0, acc1); 1252 dp[1] = vis_faligndata(acc1, acc2); 1253 dp[2] = vis_faligndata(acc2, acc3); 1254 dp[3] = vis_faligndata(acc3, acc4); 1255 acc0 = acc4; 1256 dp += 4; 1257 i += 4; 1258 } 1259 1260 sp = (mlib_u8 *) sa; 1261 1262 if (i <= xsize - 2) { 1263 acc1 = table[sp[0]]; 1264 acc2 = table[sp[1]]; 1265 *dp++ = vis_faligndata(acc0, acc1); 1266 *dp++ = vis_faligndata(acc1, acc2); 1267 i += 2; 1268 sp += 2; 1269 acc0 = acc2; 1270 } 1271 1272 if (i < xsize) { 1273 acc1 = table[sp[0]]; 1274 *dp++ = vis_faligndata(acc0, acc1); 1275 acc0 = acc1; 1276 } 1277 1278 emask = vis_edge16(dp, dend); 1279 vis_pst_16(vis_faligndata(acc0, acc0), dp++, emask); 1280} 1281 1282/***************************************************************/ 1283void mlib_v_ImageLookUpSI_U8_S16_4_DstOff0_D1_SMALL(const mlib_u8 *src, 1284 mlib_s16 *dst, 1285 mlib_s32 xsize, 1286 const mlib_s16 **table) 1287{ 1288 mlib_u8 *sp; /* pointer to source data */ 1289 mlib_u32 s0; /* source data */ 1290 mlib_s16 *dl; /* pointer to start of destination */ 1291 mlib_d64 *dp; /* aligned pointer to destination */ 1292 mlib_d64 t0, t1, t2, t3; /* destination data */ 1293 mlib_d64 acc; /* destination data */ 1294 mlib_s32 i; /* loop variable */ 1295 const mlib_s16 *tab0 = table[0]; 1296 const mlib_s16 *tab1 = table[1]; 1297 const mlib_s16 *tab2 = table[2]; 1298 const mlib_s16 *tab3 = table[3]; 1299 1300 sp = (void *)src; 1301 dl = dst; 1302 dp = (mlib_d64 *) dl; 1303 1304 vis_alignaddr((void *)0, 6); 1305 1306 if (xsize >= 1) { 1307 1308 s0 = (*sp++) << 1; 1309 1310#pragma pipeloop(0) 1311 for (i = 0; i <= xsize - 2; i++) { 1312 t3 = VIS_LD_U16_I(tab3, s0); 1313 t2 = VIS_LD_U16_I(tab2, s0); 1314 t1 = VIS_LD_U16_I(tab1, s0); 1315 t0 = VIS_LD_U16_I(tab0, s0); 1316 acc = vis_faligndata(t3, acc); 1317 acc = vis_faligndata(t2, acc); 1318 acc = vis_faligndata(t1, acc); 1319 acc = vis_faligndata(t0, acc); 1320 s0 = (*sp++) << 1; 1321 *dp++ = acc; 1322 } 1323 1324 t3 = VIS_LD_U16_I(tab3, s0); 1325 t2 = VIS_LD_U16_I(tab2, s0); 1326 t1 = VIS_LD_U16_I(tab1, s0); 1327 t0 = VIS_LD_U16_I(tab0, s0); 1328 acc = vis_faligndata(t3, acc); 1329 acc = vis_faligndata(t2, acc); 1330 acc = vis_faligndata(t1, acc); 1331 acc = vis_faligndata(t0, acc); 1332 *dp++ = acc; 1333 } 1334} 1335 1336/***************************************************************/ 1337void mlib_v_ImageLookUpSI_U8_S16_4_DstOff1_D1_SMALL(const mlib_u8 *src, 1338 mlib_s16 *dst, 1339 mlib_s32 xsize, 1340 const mlib_s16 **table) 1341{ 1342 mlib_u8 *sp; /* pointer to source data */ 1343 mlib_u32 s0, s1; /* source data */ 1344 mlib_s16 *dl; /* pointer to start of destination */ 1345 mlib_d64 *dp; /* aligned pointer to destination */ 1346 mlib_d64 t0, t1, t2, t3; /* destination data */ 1347 mlib_d64 acc; /* destination data */ 1348 mlib_s32 i; /* loop variable */ 1349 const mlib_s16 *tab0 = table[0]; 1350 const mlib_s16 *tab1 = table[1]; 1351 const mlib_s16 *tab2 = table[2]; 1352 const mlib_s16 *tab3 = table[3]; 1353 1354 sp = (void *)src; 1355 dl = dst; 1356 dp = (mlib_d64 *) dl; 1357 1358 vis_alignaddr((void *)0, 6); 1359 1360 s0 = (*sp++) << 1; 1361 1362 if (xsize >= 1) { 1363 1364 s1 = (*sp++) << 1; 1365 1366#pragma pipeloop(0) 1367 for (i = 0; i <= xsize - 2; i++) { 1368 t3 = VIS_LD_U16_I(tab0, s1); 1369 t2 = VIS_LD_U16_I(tab3, s0); 1370 t1 = VIS_LD_U16_I(tab2, s0); 1371 t0 = VIS_LD_U16_I(tab1, s0); 1372 acc = vis_faligndata(t3, acc); 1373 acc = vis_faligndata(t2, acc); 1374 acc = vis_faligndata(t1, acc); 1375 acc = vis_faligndata(t0, acc); 1376 s0 = s1; 1377 s1 = (*sp++) << 1; 1378 *dp++ = acc; 1379 } 1380 1381 t3 = VIS_LD_U16_I(tab0, s1); 1382 t2 = VIS_LD_U16_I(tab3, s0); 1383 t1 = VIS_LD_U16_I(tab2, s0); 1384 t0 = VIS_LD_U16_I(tab1, s0); 1385 acc = vis_faligndata(t3, acc); 1386 acc = vis_faligndata(t2, acc); 1387 acc = vis_faligndata(t1, acc); 1388 acc = vis_faligndata(t0, acc); 1389 s0 = s1; 1390 *dp++ = acc; 1391 } 1392 1393 dl = (mlib_s16 *) dp; 1394 s0 >>= 1; 1395 1396 dl[0] = tab1[s0]; 1397 dl[1] = tab2[s0]; 1398 dl[2] = tab3[s0]; 1399} 1400 1401/***************************************************************/ 1402void mlib_v_ImageLookUpSI_U8_S16_4_DstOff2_D1_SMALL(const mlib_u8 *src, 1403 mlib_s16 *dst, 1404 mlib_s32 xsize, 1405 const mlib_s16 **table) 1406{ 1407 mlib_u8 *sp; /* pointer to source data */ 1408 mlib_u32 s0, s1; /* source data */ 1409 mlib_s16 *dl; /* pointer to start of destination */ 1410 mlib_d64 *dp; /* aligned pointer to destination */ 1411 mlib_d64 t0, t1, t2, t3; /* destination data */ 1412 mlib_d64 acc; /* destination data */ 1413 mlib_s32 i; /* loop variable */ 1414 const mlib_s16 *tab0 = table[0]; 1415 const mlib_s16 *tab1 = table[1]; 1416 const mlib_s16 *tab2 = table[2]; 1417 const mlib_s16 *tab3 = table[3]; 1418 1419 sp = (void *)src; 1420 dl = dst; 1421 dp = (mlib_d64 *) dl; 1422 1423 vis_alignaddr((void *)0, 6); 1424 1425 s0 = (*sp++) << 1; 1426 1427 if (xsize >= 1) { 1428 1429 s1 = (*sp++) << 1; 1430 1431#pragma pipeloop(0) 1432 for (i = 0; i <= xsize - 2; i++) { 1433 t3 = VIS_LD_U16_I(tab1, s1); 1434 t2 = VIS_LD_U16_I(tab0, s1); 1435 t1 = VIS_LD_U16_I(tab3, s0); 1436 t0 = VIS_LD_U16_I(tab2, s0); 1437 acc = vis_faligndata(t3, acc); 1438 acc = vis_faligndata(t2, acc); 1439 acc = vis_faligndata(t1, acc); 1440 acc = vis_faligndata(t0, acc); 1441 s0 = s1; 1442 s1 = (*sp++) << 1; 1443 *dp++ = acc; 1444 } 1445 1446 t3 = VIS_LD_U16_I(tab1, s1); 1447 t2 = VIS_LD_U16_I(tab0, s1); 1448 t1 = VIS_LD_U16_I(tab3, s0); 1449 t0 = VIS_LD_U16_I(tab2, s0); 1450 acc = vis_faligndata(t3, acc); 1451 acc = vis_faligndata(t2, acc); 1452 acc = vis_faligndata(t1, acc); 1453 acc = vis_faligndata(t0, acc); 1454 s0 = s1; 1455 *dp++ = acc; 1456 } 1457 1458 dl = (mlib_s16 *) dp; 1459 s0 >>= 1; 1460 1461 dl[0] = tab2[s0]; 1462 dl[1] = tab3[s0]; 1463} 1464 1465/***************************************************************/ 1466void mlib_v_ImageLookUpSI_U8_S16_4_DstOff3_D1_SMALL(const mlib_u8 *src, 1467 mlib_s16 *dst, 1468 mlib_s32 xsize, 1469 const mlib_s16 **table) 1470{ 1471 mlib_u8 *sp; /* pointer to source data */ 1472 mlib_u32 s0, s1; /* source data */ 1473 mlib_s16 *dl; /* pointer to start of destination */ 1474 mlib_d64 *dp; /* aligned pointer to destination */ 1475 mlib_d64 t0, t1, t2, t3; /* destination data */ 1476 mlib_d64 acc; /* destination data */ 1477 mlib_s32 i; /* loop variable */ 1478 const mlib_s16 *tab0 = table[0]; 1479 const mlib_s16 *tab1 = table[1]; 1480 const mlib_s16 *tab2 = table[2]; 1481 const mlib_s16 *tab3 = table[3]; 1482 1483 sp = (void *)src; 1484 dl = dst; 1485 dp = (mlib_d64 *) dl; 1486 1487 vis_alignaddr((void *)0, 6); 1488 1489 s0 = (*sp++) << 1; 1490 1491 if (xsize >= 1) { 1492 1493 s1 = (*sp++) << 1; 1494 1495#pragma pipeloop(0) 1496 for (i = 0; i <= xsize - 2; i++) { 1497 t3 = VIS_LD_U16_I(tab2, s1); 1498 t2 = VIS_LD_U16_I(tab1, s1); 1499 t1 = VIS_LD_U16_I(tab0, s1); 1500 t0 = VIS_LD_U16_I(tab3, s0); 1501 acc = vis_faligndata(t3, acc); 1502 acc = vis_faligndata(t2, acc); 1503 acc = vis_faligndata(t1, acc); 1504 acc = vis_faligndata(t0, acc); 1505 s0 = s1; 1506 s1 = (*sp++) << 1; 1507 *dp++ = acc; 1508 } 1509 1510 t3 = VIS_LD_U16_I(tab2, s1); 1511 t2 = VIS_LD_U16_I(tab1, s1); 1512 t1 = VIS_LD_U16_I(tab0, s1); 1513 t0 = VIS_LD_U16_I(tab3, s0); 1514 acc = vis_faligndata(t3, acc); 1515 acc = vis_faligndata(t2, acc); 1516 acc = vis_faligndata(t1, acc); 1517 acc = vis_faligndata(t0, acc); 1518 s0 = s1; 1519 *dp++ = acc; 1520 } 1521 1522 dl = (mlib_s16 *) dp; 1523 s0 >>= 1; 1524 1525 dl[0] = tab3[s0]; 1526} 1527 1528/***************************************************************/ 1529void mlib_v_ImageLookUpSI_U8_S16_4(const mlib_u8 *src, 1530 mlib_s32 slb, 1531 mlib_s16 *dst, 1532 mlib_s32 dlb, 1533 mlib_s32 xsize, 1534 mlib_s32 ysize, 1535 const mlib_s16 **table) 1536{ 1537 if ((xsize * ysize) < 550) { 1538 mlib_u8 *sl; 1539 mlib_s16 *dl; 1540 mlib_s32 j; 1541 const mlib_s16 *tab0 = table[0]; 1542 const mlib_s16 *tab1 = table[1]; 1543 const mlib_s16 *tab2 = table[2]; 1544 1545 sl = (void *)src; 1546 dl = dst; 1547 1548 /* row loop */ 1549 for (j = 0; j < ysize; j++) { 1550 mlib_u8 *sp = sl; 1551 mlib_s16 *dp = dl; 1552 mlib_s32 off, s0, size = xsize; 1553 1554 if (size > 0) { 1555 off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1; 1556 1557 if (off == 0) { 1558 mlib_v_ImageLookUpSI_U8_S16_4_DstOff0_D1_SMALL(sp, dp, size, table); 1559 } 1560 else if (off == 1) { 1561 s0 = *sp; 1562 *dp++ = tab0[s0]; 1563 size--; 1564 mlib_v_ImageLookUpSI_U8_S16_4_DstOff1_D1_SMALL(sp, dp, size, table); 1565 } 1566 else if (off == 2) { 1567 s0 = *sp; 1568 *dp++ = tab0[s0]; 1569 *dp++ = tab1[s0]; 1570 size--; 1571 mlib_v_ImageLookUpSI_U8_S16_4_DstOff2_D1_SMALL(sp, dp, size, table); 1572 } 1573 else if (off == 3) { 1574 s0 = *sp; 1575 *dp++ = tab0[s0]; 1576 *dp++ = tab1[s0]; 1577 *dp++ = tab2[s0]; 1578 size--; 1579 mlib_v_ImageLookUpSI_U8_S16_4_DstOff3_D1_SMALL(sp, dp, size, table); 1580 } 1581 } 1582 1583 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 1584 dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); 1585 } 1586 } 1587 else { 1588 mlib_u8 *sl; 1589 mlib_s16 *dl; 1590 mlib_u32 tab[512]; 1591 mlib_u16 *tab0 = (mlib_u16 *) table[0]; 1592 mlib_u16 *tab1 = (mlib_u16 *) table[1]; 1593 mlib_u16 *tab2 = (mlib_u16 *) table[2]; 1594 mlib_u16 *tab3 = (mlib_u16 *) table[3]; 1595 mlib_s32 i, j; 1596 mlib_u32 s0, s1, s2, s3, s4, s5; 1597 1598 s0 = tab0[0]; 1599 s1 = tab1[0]; 1600 s2 = tab2[0]; 1601 s3 = tab3[0]; 1602 for (i = 1; i < 256; i++) { 1603 s4 = (s0 << 16) + s1; 1604 s5 = (s2 << 16) + s3; 1605 s0 = tab0[i]; 1606 s1 = tab1[i]; 1607 s2 = tab2[i]; 1608 s3 = tab3[i]; 1609 tab[2 * i - 2] = s4; 1610 tab[2 * i - 1] = s5; 1611 } 1612 1613 s4 = (s0 << 16) + s1; 1614 s5 = (s2 << 16) + s3; 1615 tab[510] = s4; 1616 tab[511] = s5; 1617 1618 sl = (void *)src; 1619 dl = dst; 1620 1621 /* row loop */ 1622 for (j = 0; j < ysize; j++) { 1623 mlib_u8 *sp = sl; 1624 mlib_s16 *dp = dl; 1625 mlib_s32 off, s0, size = xsize; 1626 mlib_s16 *ptr; 1627 1628 if (((mlib_addr) dp & 7) == 0) { 1629 1630 off = ((4 - (mlib_addr) sp & 3) & 3); 1631 off = (off < size) ? off : size; 1632 1633#pragma pipeloop(0) 1634 for (i = 0; i < off; i++) { 1635 s0 = (*sp++); 1636 *(mlib_u32 *) dp = tab[2 * s0]; 1637 *(mlib_u32 *) (dp + 2) = tab[2 * s0 + 1]; 1638 dp += 4; 1639 } 1640 1641 size -= off; 1642 1643 if (size > 0) { 1644 mlib_v_ImageLookUpSI_U8_S16_4_DstA8D1_D1(sp, dp, size, 1645 (mlib_d64 *) tab); 1646 } 1647 } 1648 else { 1649 1650 off = (3 - ((mlib_addr) sp & 3)); 1651 off = (off < size) ? off : size; 1652 1653 for (i = 0; i < off; i++) { 1654 ptr = (mlib_s16 *) (tab + 2 * sp[i]); 1655 dp[0] = ptr[0]; 1656 dp[1] = ptr[1]; 1657 dp[2] = ptr[2]; 1658 dp[3] = ptr[3]; 1659 dp += 4; 1660 } 1661 1662 sp += off; 1663 size -= off; 1664 1665 if (size > 0) { 1666 mlib_v_ImageLookUpSI_U8_S16_4_DstNonAl_D1(sp, dp, size, 1667 (mlib_d64 *) tab); 1668 } 1669 } 1670 1671 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 1672 dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); 1673 } 1674 } 1675} 1676 1677/***************************************************************/ 1678