1/* 2 * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 28#include "vis_proto.h" 29#include "mlib_image.h" 30#include "mlib_v_ImageLookUpFunc.h" 31 32/***************************************************************/ 33static void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff0_D1(const mlib_u8 *src, 34 mlib_u8 *dst, 35 mlib_s32 xsize, 36 const mlib_u16 *table); 37 38static void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff1_D1(const mlib_u8 *src, 39 mlib_u8 *dst, 40 mlib_s32 xsize, 41 const mlib_u16 *table); 42 43static void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff2_D1(const mlib_u8 *src, 44 mlib_u8 *dst, 45 mlib_s32 xsize, 46 const mlib_u16 *table); 47 48static void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff3_D1(const mlib_u8 *src, 49 mlib_u8 *dst, 50 mlib_s32 xsize, 51 const mlib_u16 *table); 52 53static void mlib_v_ImageLookUpSI_U8_U8_2_DstNonAl_D1(const mlib_u8 *src, 54 mlib_u8 *dst, 55 mlib_s32 xsize, 56 const mlib_u16 *table); 57 58static void mlib_v_ImageLookUpSI_U8_U8_2_DstA8D1_SMALL(const mlib_u8 *src, 59 mlib_u8 *dst, 60 mlib_s32 xsize, 61 const mlib_u8 **table); 62 63static void mlib_v_ImageLookUpSI_U8_U8_2_D1_SMALL(const mlib_u8 *src, 64 mlib_u8 *dst, 65 mlib_s32 xsize, 66 const mlib_u8 **table); 67 68static void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff0_D1(const mlib_u8 *src, 69 mlib_u8 *dst, 70 mlib_s32 xsize, 71 const mlib_d64 *table); 72 73static void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff1_D1(const mlib_u8 *src, 74 mlib_u8 *dst, 75 mlib_s32 xsize, 76 const mlib_d64 *table); 77 78static void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff2_D1(const mlib_u8 *src, 79 mlib_u8 *dst, 80 mlib_s32 xsize, 81 const mlib_d64 *table); 82 83static void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff3_D1(const mlib_u8 *src, 84 mlib_u8 *dst, 85 mlib_s32 xsize, 86 const mlib_d64 *table); 87 88static void mlib_v_ImageLookUpSI_U8_U8_3_D1_SMALL(const mlib_u8 *src, 89 mlib_u8 *dst, 90 mlib_s32 xsize, 91 const mlib_u8 **table); 92 93static void mlib_v_ImageLookUpSI_U8_U8_4_SrcOff0_D1(const mlib_u8 *src, 94 mlib_u8 *dst, 95 mlib_s32 xsize, 96 const mlib_f32 *table); 97 98static void mlib_v_ImageLookUpSI_U8_U8_4_DstNonAl_D1(const mlib_u8 *src, 99 mlib_u8 *dst, 100 mlib_s32 xsize, 101 const mlib_f32 *table); 102 103static void mlib_v_ImageLookUpSI_U8_U8_4_DstOff0_D1_SMALL(const mlib_u8 *src, 104 mlib_u8 *dst, 105 mlib_s32 xsize, 106 const mlib_u8 **table); 107 108static void mlib_v_ImageLookUpSI_U8_U8_4_DstOff1_D1_SMALL(const mlib_u8 *src, 109 mlib_u8 *dst, 110 mlib_s32 xsize, 111 const mlib_u8 **table); 112 113static void mlib_v_ImageLookUpSI_U8_U8_4_DstOff2_D1_SMALL(const mlib_u8 *src, 114 mlib_u8 *dst, 115 mlib_s32 xsize, 116 const mlib_u8 **table); 117 118static void mlib_v_ImageLookUpSI_U8_U8_4_DstOff3_D1_SMALL(const mlib_u8 *src, 119 mlib_u8 *dst, 120 mlib_s32 xsize, 121 const mlib_u8 **table); 122 123/***************************************************************/ 124#define VIS_LD_U8_I(X, Y) vis_ld_u8_i((void *)(X), (Y)) 125#define VIS_LD_U16_I(X, Y) vis_ld_u16_i((void *)(X), (Y)) 126 127/***************************************************************/ 128void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff0_D1(const mlib_u8 *src, 129 mlib_u8 *dst, 130 mlib_s32 xsize, 131 const mlib_u16 *table) 132{ 133 mlib_u32 *sa; /* aligned pointer to source data */ 134 mlib_u8 *sp; /* pointer to source data */ 135 mlib_u32 s0; /* source data */ 136 mlib_u16 *dl; /* pointer to start of destination */ 137 mlib_u16 *dend; /* pointer to end of destination */ 138 mlib_d64 *dp; /* aligned pointer to destination */ 139 mlib_d64 t0, t1, t2; /* destination data */ 140 mlib_d64 t3, acc; /* destination data */ 141 mlib_s32 emask; /* edge mask */ 142 mlib_s32 i, num; /* loop variable */ 143 144 sa = (mlib_u32*)src; 145 dl = (mlib_u16*)dst; 146 dp = (mlib_d64 *) dl; 147 dend = dl + xsize - 1; 148 149 vis_alignaddr((void *) 0, 6); 150 151 if (xsize >= 4) { 152 153 s0 = sa[0]; 154 sa ++; 155 156#pragma pipeloop(0) 157 for(i = 0; i <= xsize - 8; i+=4, sa++) { 158 t3 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); 159 t2 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); 160 t1 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE); 161 t0 = VIS_LD_U16_I(table, (s0 >> 23) & 0x1FE); 162 acc = vis_faligndata(t3, acc); 163 acc = vis_faligndata(t2, acc); 164 acc = vis_faligndata(t1, acc); 165 acc = vis_faligndata(t0, acc); 166 s0 = sa[0]; 167 *dp++ = acc; 168 } 169 170 t3 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); 171 t2 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); 172 t1 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE); 173 t0 = VIS_LD_U16_I(table, (s0 >> 23) & 0x1FE); 174 acc = vis_faligndata(t3, acc); 175 acc = vis_faligndata(t2, acc); 176 acc = vis_faligndata(t1, acc); 177 acc = vis_faligndata(t0, acc); 178 *dp++ = acc; 179 } 180 181 sp = (mlib_u8*)sa; 182 183 if ((mlib_addr) dp <= (mlib_addr) dend) { 184 185 num = (mlib_u16*) dend - (mlib_u16*) dp; 186 sp += num; 187 num ++; 188#pragma pipeloop(0) 189 for (i = 0; i < num; i ++) { 190 s0 = (mlib_s32) *sp; 191 sp --; 192 193 t0 = VIS_LD_U16_I(table, 2*s0); 194 acc = vis_faligndata(t0, acc); 195 } 196 197 emask = vis_edge16(dp, dend); 198 vis_pst_16(acc, dp, emask); 199 } 200} 201 202/***************************************************************/ 203void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff1_D1(const mlib_u8 *src, 204 mlib_u8 *dst, 205 mlib_s32 xsize, 206 const mlib_u16 *table) 207{ 208 mlib_u32 *sa; /* aligned pointer to source data */ 209 mlib_u8 *sp; /* pointer to source data */ 210 mlib_u32 s0, s1; /* source data */ 211 mlib_u16 *dl; /* pointer to start of destination */ 212 mlib_u16 *dend; /* pointer to end of destination */ 213 mlib_d64 *dp; /* aligned pointer to destination */ 214 mlib_d64 t0, t1, t2; /* destination data */ 215 mlib_d64 t3, acc; /* destination data */ 216 mlib_s32 emask; /* edge mask */ 217 mlib_s32 i, num; /* loop variable */ 218 219 sa = (mlib_u32*)(src-1); 220 dl = (mlib_u16*)dst; 221 dp = (mlib_d64 *) dl; 222 dend = dl + xsize - 1; 223 224 vis_alignaddr((void *) 0, 6); 225 226 s0 = *sa++; 227 228 if (xsize >= 4) { 229 230 s1 = sa[0]; 231 sa ++; 232 233#pragma pipeloop(0) 234 for(i = 0; i <= xsize - 8; i+=4, sa++) { 235 t3 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE); 236 t2 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); 237 t1 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); 238 t0 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE); 239 acc = vis_faligndata(t3, acc); 240 acc = vis_faligndata(t2, acc); 241 acc = vis_faligndata(t1, acc); 242 acc = vis_faligndata(t0, acc); 243 s0 = s1; 244 s1 = sa[0]; 245 *dp++ = acc; 246 } 247 248 t3 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE); 249 t2 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); 250 t1 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); 251 t0 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE); 252 acc = vis_faligndata(t3, acc); 253 acc = vis_faligndata(t2, acc); 254 acc = vis_faligndata(t1, acc); 255 acc = vis_faligndata(t0, acc); 256 *dp++ = acc; 257 } 258 259 sp = (mlib_u8*)sa; 260 sp -= 3; 261 262 if ((mlib_addr) dp <= (mlib_addr) dend) { 263 264 num = (mlib_u16*) dend - (mlib_u16*) dp; 265 sp += num; 266 num ++; 267#pragma pipeloop(0) 268 for (i = 0; i < num; i ++) { 269 s0 = (mlib_s32) *sp; 270 sp --; 271 272 t0 = VIS_LD_U16_I(table, 2*s0); 273 acc = vis_faligndata(t0, acc); 274 } 275 276 emask = vis_edge16(dp, dend); 277 vis_pst_16(acc, dp, emask); 278 } 279} 280 281/***************************************************************/ 282void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff2_D1(const mlib_u8 *src, 283 mlib_u8 *dst, 284 mlib_s32 xsize, 285 const mlib_u16 *table) 286{ 287 mlib_u32 *sa; /* pointer to source data */ 288 mlib_u8 *sp; /* pointer to source data */ 289 mlib_u32 s0, s1; /* source data */ 290 mlib_u16 *dl; /* pointer to start of destination */ 291 mlib_u16 *dend; /* pointer to end of destination */ 292 mlib_d64 *dp; /* aligned pointer to destination */ 293 mlib_d64 t0, t1, t2; /* destination data */ 294 mlib_d64 t3, acc; /* destination data */ 295 mlib_s32 emask; /* edge mask */ 296 mlib_s32 i, num; /* loop variable */ 297 298 sa = (mlib_u32*)(src-2); 299 dl = (mlib_u16*)dst; 300 dp = (mlib_d64 *) dl; 301 dend = dl + xsize - 1; 302 303 vis_alignaddr((void *) 0, 6); 304 305 s0 = *sa++; 306 307 if (xsize >= 4) { 308 309 s1 = sa[0]; 310 sa ++; 311 312#pragma pipeloop(0) 313 for(i = 0; i <= xsize - 8; i+=4, sa++) { 314 t3 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE); 315 t2 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE); 316 t1 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); 317 t0 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); 318 acc = vis_faligndata(t3, acc); 319 acc = vis_faligndata(t2, acc); 320 acc = vis_faligndata(t1, acc); 321 acc = vis_faligndata(t0, acc); 322 s0 = s1; 323 s1 = sa[0]; 324 *dp++ = acc; 325 } 326 327 t3 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE); 328 t2 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE); 329 t1 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); 330 t0 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); 331 acc = vis_faligndata(t3, acc); 332 acc = vis_faligndata(t2, acc); 333 acc = vis_faligndata(t1, acc); 334 acc = vis_faligndata(t0, acc); 335 *dp++ = acc; 336 } 337 338 sp = (mlib_u8*)sa; 339 sp -= 2; 340 341 if ((mlib_addr) dp <= (mlib_addr) dend) { 342 343 num = (mlib_u16*) dend - (mlib_u16*) dp; 344 sp += num; 345 num ++; 346#pragma pipeloop(0) 347 for (i = 0; i < num; i ++) { 348 s0 = (mlib_s32) *sp; 349 sp --; 350 351 t0 = VIS_LD_U16_I(table, 2*s0); 352 acc = vis_faligndata(t0, acc); 353 } 354 355 emask = vis_edge16(dp, dend); 356 vis_pst_16(acc, dp, emask); 357 } 358} 359 360/***************************************************************/ 361void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff3_D1(const mlib_u8 *src, 362 mlib_u8 *dst, 363 mlib_s32 xsize, 364 const mlib_u16 *table) 365{ 366 mlib_u32 *sa; /* aligned pointer to source data */ 367 mlib_u8 *sp; /* pointer to source data */ 368 mlib_u32 s0, s1; /* source data */ 369 mlib_u16 *dl; /* pointer to start of destination */ 370 mlib_u16 *dend; /* pointer to end of destination */ 371 mlib_d64 *dp; /* aligned pointer to destination */ 372 mlib_d64 t0, t1, t2; /* destination data */ 373 mlib_d64 t3, acc; /* destination data */ 374 mlib_s32 emask; /* edge mask */ 375 mlib_s32 i, num; /* loop variable */ 376 377 sa = (mlib_u32*)(src-3); 378 dl = (mlib_u16*)dst; 379 dp = (mlib_d64 *) dl; 380 dend = dl + xsize - 1; 381 382 vis_alignaddr((void *) 0, 6); 383 384 s0 = *sa++; 385 386 if (xsize >= 4) { 387 388 s1 = sa[0]; 389 sa ++; 390 391#pragma pipeloop(0) 392 for(i = 0; i <= xsize - 8; i+=4, sa++) { 393 t3 = VIS_LD_U16_I(table, (s1 >> 7) & 0x1FE); 394 t2 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE); 395 t1 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE); 396 t0 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); 397 acc = vis_faligndata(t3, acc); 398 acc = vis_faligndata(t2, acc); 399 acc = vis_faligndata(t1, acc); 400 acc = vis_faligndata(t0, acc); 401 s0 = s1; 402 s1 = sa[0]; 403 *dp++ = acc; 404 } 405 406 t3 = VIS_LD_U16_I(table, (s1 >> 7) & 0x1FE); 407 t2 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE); 408 t1 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE); 409 t0 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); 410 acc = vis_faligndata(t3, acc); 411 acc = vis_faligndata(t2, acc); 412 acc = vis_faligndata(t1, acc); 413 acc = vis_faligndata(t0, acc); 414 *dp++ = acc; 415 } 416 417 sp = (mlib_u8*)sa; 418 sp -= 1; 419 420 if ((mlib_addr) dp <= (mlib_addr) dend) { 421 422 num = (mlib_u16*) dend - (mlib_u16*) dp; 423 sp += num; 424 num ++; 425#pragma pipeloop(0) 426 for (i = 0; i < num; i ++) { 427 s0 = (mlib_s32) *sp; 428 sp --; 429 430 t0 = VIS_LD_U16_I(table, 2*s0); 431 acc = vis_faligndata(t0, acc); 432 } 433 434 emask = vis_edge16(dp, dend); 435 vis_pst_16(acc, dp, emask); 436 } 437} 438 439/***************************************************************/ 440void mlib_v_ImageLookUpSI_U8_U8_2_DstNonAl_D1(const mlib_u8 *src, 441 mlib_u8 *dst, 442 mlib_s32 xsize, 443 const mlib_u16 *table) 444{ 445 mlib_u32 *sa; /* aligned pointer to source data */ 446 mlib_u8 *sp; /* pointer to source data */ 447 mlib_u32 s0, s1, s2, s3; /* source data */ 448 mlib_u8 *dl; /* pointer to start of destination */ 449 mlib_u8 *dend; /* pointer to end of destination */ 450 mlib_d64 *dp; /* aligned pointer to destination */ 451 mlib_d64 t0, t1, t2; /* destination data */ 452 mlib_d64 t3, t4, t5; /* destination data */ 453 mlib_d64 t6, t7, acc0; /* destination data */ 454 mlib_d64 acc1, acc2; /* destination data */ 455 mlib_d64 acc3, acc4; /* destination data */ 456 mlib_s32 emask; /* edge mask */ 457 mlib_s32 i, num; /* loop variable */ 458 mlib_s32 off; /* offset */ 459 460 sa = (mlib_u32*)src; 461 dl = dst; 462 sp = (void *)src; 463 dend = dl + 2*xsize - 1; 464 dp = (mlib_d64 *) ((mlib_addr) dl & (~7)); 465 off = (mlib_addr) dp - (mlib_addr) dl; 466 467 emask = vis_edge8(dl, dend); 468 num = (xsize < 4) ? xsize : 4; 469 470 sp += (num-1); 471 472 vis_alignaddr(dp, 6); 473 474 for (i = 0; i < num; i ++) { 475 s0 = (mlib_s32) *sp; 476 sp --; 477 478 t0 = VIS_LD_U16_I(table, 2*s0); 479 acc0 = vis_faligndata(t0, acc0); 480 } 481 482 vis_alignaddr(dp, off); 483 vis_pst_8(vis_faligndata(acc0, acc0), dp++, emask); 484 485 sa++; 486 487 xsize -= 4; 488 489 i = 0; 490 491 if (xsize >= 16) { 492 493 s0 = sa[0]; 494 s1 = sa[1]; 495 s2 = sa[2]; 496 s3 = sa[3]; 497 sa += 4; 498 499#pragma pipeloop(0) 500 for(i = 0; i <= xsize - 32; i+=16, sa+=4) { 501 vis_alignaddr(dp, 6); 502 t3 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); 503 t2 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); 504 t1 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE); 505 t0 = VIS_LD_U16_I(table, (s0 >> 23) & 0x1FE); 506 acc1 = vis_faligndata(t3, acc1); 507 acc1 = vis_faligndata(t2, acc1); 508 acc1 = vis_faligndata(t1, acc1); 509 acc1 = vis_faligndata(t0, acc1); 510 t7 = VIS_LD_U16_I(table, (s1 << 1) & 0x1FE); 511 t6 = VIS_LD_U16_I(table, (s1 >> 7) & 0x1FE); 512 t5 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE); 513 t4 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE); 514 acc2 = vis_faligndata(t7, acc2); 515 acc2 = vis_faligndata(t6, acc2); 516 acc2 = vis_faligndata(t5, acc2); 517 acc2 = vis_faligndata(t4, acc2); 518 t3 = VIS_LD_U16_I(table, (s2 << 1) & 0x1FE); 519 t2 = VIS_LD_U16_I(table, (s2 >> 7) & 0x1FE); 520 t1 = VIS_LD_U16_I(table, (s2 >> 15) & 0x1FE); 521 t0 = VIS_LD_U16_I(table, (s2 >> 23) & 0x1FE); 522 acc3 = vis_faligndata(t3, acc3); 523 acc3 = vis_faligndata(t2, acc3); 524 acc3 = vis_faligndata(t1, acc3); 525 acc3 = vis_faligndata(t0, acc3); 526 t7 = VIS_LD_U16_I(table, (s3 << 1) & 0x1FE); 527 t6 = VIS_LD_U16_I(table, (s3 >> 7) & 0x1FE); 528 t5 = VIS_LD_U16_I(table, (s3 >> 15) & 0x1FE); 529 t4 = VIS_LD_U16_I(table, (s3 >> 23) & 0x1FE); 530 acc4 = vis_faligndata(t7, acc4); 531 acc4 = vis_faligndata(t6, acc4); 532 acc4 = vis_faligndata(t5, acc4); 533 acc4 = vis_faligndata(t4, acc4); 534 vis_alignaddr(dp, off); 535 s0 = sa[0]; 536 s1 = sa[1]; 537 s2 = sa[2]; 538 s3 = sa[3]; 539 *dp++ = vis_faligndata(acc0, acc1); 540 *dp++ = vis_faligndata(acc1, acc2); 541 *dp++ = vis_faligndata(acc2, acc3); 542 *dp++ = vis_faligndata(acc3, acc4); 543 acc0 = acc4; 544 } 545 546 vis_alignaddr(dp, 6); 547 t3 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); 548 t2 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); 549 t1 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE); 550 t0 = VIS_LD_U16_I(table, (s0 >> 23) & 0x1FE); 551 acc1 = vis_faligndata(t3, acc1); 552 acc1 = vis_faligndata(t2, acc1); 553 acc1 = vis_faligndata(t1, acc1); 554 acc1 = vis_faligndata(t0, acc1); 555 t7 = VIS_LD_U16_I(table, (s1 << 1) & 0x1FE); 556 t6 = VIS_LD_U16_I(table, (s1 >> 7) & 0x1FE); 557 t5 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE); 558 t4 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE); 559 acc2 = vis_faligndata(t7, acc2); 560 acc2 = vis_faligndata(t6, acc2); 561 acc2 = vis_faligndata(t5, acc2); 562 acc2 = vis_faligndata(t4, acc2); 563 t3 = VIS_LD_U16_I(table, (s2 << 1) & 0x1FE); 564 t2 = VIS_LD_U16_I(table, (s2 >> 7) & 0x1FE); 565 t1 = VIS_LD_U16_I(table, (s2 >> 15) & 0x1FE); 566 t0 = VIS_LD_U16_I(table, (s2 >> 23) & 0x1FE); 567 acc3 = vis_faligndata(t3, acc3); 568 acc3 = vis_faligndata(t2, acc3); 569 acc3 = vis_faligndata(t1, acc3); 570 acc3 = vis_faligndata(t0, acc3); 571 t7 = VIS_LD_U16_I(table, (s3 << 1) & 0x1FE); 572 t6 = VIS_LD_U16_I(table, (s3 >> 7) & 0x1FE); 573 t5 = VIS_LD_U16_I(table, (s3 >> 15) & 0x1FE); 574 t4 = VIS_LD_U16_I(table, (s3 >> 23) & 0x1FE); 575 acc4 = vis_faligndata(t7, acc4); 576 acc4 = vis_faligndata(t6, acc4); 577 acc4 = vis_faligndata(t5, acc4); 578 acc4 = vis_faligndata(t4, acc4); 579 vis_alignaddr(dp, off); 580 *dp++ = vis_faligndata(acc0, acc1); 581 *dp++ = vis_faligndata(acc1, acc2); 582 *dp++ = vis_faligndata(acc2, acc3); 583 *dp++ = vis_faligndata(acc3, acc4); 584 acc0 = acc4; i+=16; 585 } 586 587 if (i <= xsize - 8) { 588 s0 = sa[0]; 589 s1 = sa[1]; 590 vis_alignaddr(dp, 6); 591 t3 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); 592 t2 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); 593 t1 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE); 594 t0 = VIS_LD_U16_I(table, (s0 >> 23) & 0x1FE); 595 acc1 = vis_faligndata(t3, acc1); 596 acc1 = vis_faligndata(t2, acc1); 597 acc1 = vis_faligndata(t1, acc1); 598 acc1 = vis_faligndata(t0, acc1); 599 t7 = VIS_LD_U16_I(table, (s1 << 1) & 0x1FE); 600 t6 = VIS_LD_U16_I(table, (s1 >> 7) & 0x1FE); 601 t5 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE); 602 t4 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE); 603 acc2 = vis_faligndata(t7, acc2); 604 acc2 = vis_faligndata(t6, acc2); 605 acc2 = vis_faligndata(t5, acc2); 606 acc2 = vis_faligndata(t4, acc2); 607 vis_alignaddr(dp, off); 608 *dp++ = vis_faligndata(acc0, acc1); 609 *dp++ = vis_faligndata(acc1, acc2); 610 acc0 = acc2; i += 8; sa += 2; 611 } 612 613 if (i <= xsize - 4) { 614 s0 = *sa++; 615 vis_alignaddr(dp, 6); 616 t3 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); 617 t2 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); 618 t1 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE); 619 t0 = VIS_LD_U16_I(table, (s0 >> 23) & 0x1FE); 620 acc1 = vis_faligndata(t3, acc1); 621 acc1 = vis_faligndata(t2, acc1); 622 acc1 = vis_faligndata(t1, acc1); 623 acc1 = vis_faligndata(t0, acc1); 624 vis_alignaddr(dp, off); 625 *dp++ = vis_faligndata(acc0, acc1); 626 acc0 = acc1; 627 } 628 629 sp = (mlib_u8*)sa; 630 631 if ((mlib_addr) dp <= (mlib_addr) dend) { 632 633 num = (((mlib_u8*) dend - (mlib_u8*) dp) + off + 1) >> 1; 634 sp += (num - 1); 635 vis_alignaddr(dp, 6); 636#pragma pipeloop(0) 637 for (i = 0; i < num; i ++) { 638 s0 = (mlib_s32) *sp; 639 sp --; 640 641 t0 = VIS_LD_U16_I(table, 2*s0); 642 acc1 = vis_faligndata(t0, acc1); 643 } 644 645 vis_alignaddr(dp, off); 646 emask = vis_edge8(dp, dend); 647 vis_pst_8(vis_faligndata(acc0, acc1), dp++, emask); 648 } 649 650 if ((mlib_addr) dp <= (mlib_addr) dend) { 651 emask = vis_edge8(dp, dend); 652 vis_pst_8(vis_faligndata(acc1, acc1), dp++, emask); 653 } 654} 655 656/***************************************************************/ 657void mlib_v_ImageLookUpSI_U8_U8_2_DstA8D1_SMALL(const mlib_u8 *src, 658 mlib_u8 *dst, 659 mlib_s32 xsize, 660 const mlib_u8 **table) 661{ 662 mlib_u8 *sp; /* pointer to source data */ 663 mlib_u32 s0, s1, s2, s3; /* source data */ 664 mlib_u16 *dl; /* pointer to start of destination */ 665 mlib_u16 *dend; /* pointer to end of destination */ 666 mlib_d64 *dp; /* aligned pointer to destination */ 667 mlib_d64 t0, t1, t2; /* destination data */ 668 mlib_d64 t3, t4, t5; /* destination data */ 669 mlib_d64 t6, t7, acc; /* destination data */ 670 mlib_s32 emask; /* edge mask */ 671 mlib_s32 i, num; /* loop variable */ 672 const mlib_u8 *tab0 = table[0]; 673 const mlib_u8 *tab1 = table[1]; 674 675 sp = (void *)src; 676 dl = (mlib_u16*)dst; 677 dp = (mlib_d64 *) dl; 678 dend = dl + xsize - 1; 679 680 vis_alignaddr((void *) 0, 7); 681 682 if (xsize >= 4) { 683 684 s0 = sp[0]; 685 s1 = sp[1]; 686 s2 = sp[2]; 687 s3 = sp[3]; 688 sp += 4; 689 690#pragma pipeloop(0) 691 for(i = 0; i <= xsize - 8; i+=4, sp+=4) { 692 t7 = VIS_LD_U8_I(tab1, s3); 693 t6 = VIS_LD_U8_I(tab0, s3); 694 t5 = VIS_LD_U8_I(tab1, s2); 695 t4 = VIS_LD_U8_I(tab0, s2); 696 t3 = VIS_LD_U8_I(tab1, s1); 697 t2 = VIS_LD_U8_I(tab0, s1); 698 t1 = VIS_LD_U8_I(tab1, s0); 699 t0 = VIS_LD_U8_I(tab0, s0); 700 acc = vis_faligndata(t7, acc); 701 acc = vis_faligndata(t6, acc); 702 acc = vis_faligndata(t5, acc); 703 acc = vis_faligndata(t4, acc); 704 acc = vis_faligndata(t3, acc); 705 acc = vis_faligndata(t2, acc); 706 acc = vis_faligndata(t1, acc); 707 acc = vis_faligndata(t0, acc); 708 s0 = sp[0]; 709 s1 = sp[1]; 710 s2 = sp[2]; 711 s3 = sp[3]; 712 *dp++ = acc; 713 } 714 715 t7 = VIS_LD_U8_I(tab1, s3); 716 t6 = VIS_LD_U8_I(tab0, s3); 717 t5 = VIS_LD_U8_I(tab1, s2); 718 t4 = VIS_LD_U8_I(tab0, s2); 719 t3 = VIS_LD_U8_I(tab1, s1); 720 t2 = VIS_LD_U8_I(tab0, s1); 721 t1 = VIS_LD_U8_I(tab1, s0); 722 t0 = VIS_LD_U8_I(tab0, s0); 723 acc = vis_faligndata(t7, acc); 724 acc = vis_faligndata(t6, acc); 725 acc = vis_faligndata(t5, acc); 726 acc = vis_faligndata(t4, acc); 727 acc = vis_faligndata(t3, acc); 728 acc = vis_faligndata(t2, acc); 729 acc = vis_faligndata(t1, acc); 730 acc = vis_faligndata(t0, acc); 731 *dp++ = acc; 732 } 733 734 if ((mlib_addr) dp <= (mlib_addr) dend) { 735 736 num = (mlib_u16*) dend - (mlib_u16*) dp; 737 sp += num; 738 num ++; 739#pragma pipeloop(0) 740 for (i = 0; i < num; i ++) { 741 s0 = (mlib_s32) *sp; 742 sp --; 743 744 t0 = VIS_LD_U8_I(tab1, s0); 745 acc = vis_faligndata(t0, acc); 746 747 t0 = VIS_LD_U8_I(tab0, s0); 748 acc = vis_faligndata(t0, acc); 749 } 750 751 emask = vis_edge16(dp, dend); 752 vis_pst_16(acc, dp, emask); 753 } 754} 755 756/***************************************************************/ 757void mlib_v_ImageLookUpSI_U8_U8_2_D1_SMALL(const mlib_u8 *src, 758 mlib_u8 *dst, 759 mlib_s32 xsize, 760 const mlib_u8 **table) 761{ 762 mlib_u8 *sp; /* pointer to source data */ 763 mlib_u32 s0, s1, s2, s3, s4; /* source data */ 764 mlib_u8 *dl; /* pointer to start of destination */ 765 mlib_u8 *dend; /* pointer to end of destination */ 766 mlib_d64 *dp; /* aligned pointer to destination */ 767 mlib_d64 t0, t1, t2; /* destination data */ 768 mlib_d64 t3, t4, t5; /* destination data */ 769 mlib_d64 t6, t7, acc; /* destination data */ 770 mlib_s32 emask; /* edge mask */ 771 mlib_s32 i, num; /* loop variable */ 772 const mlib_u8 *tab0 = table[0]; 773 const mlib_u8 *tab1 = table[1]; 774 775 sp = (void *)src; 776 dl = dst; 777 778 dend = dl + 2 * xsize - 1; 779 780 vis_alignaddr((void *) 0, 7); 781 782 s0 = *sp++; 783 *dl++ = tab0[s0]; 784 dp = (mlib_d64 *) dl; 785 xsize--; 786 787 if (xsize >= 4) { 788 789 s1 = sp[0]; 790 s2 = sp[1]; 791 s3 = sp[2]; 792 s4 = sp[3]; 793 sp += 4; 794 795#pragma pipeloop(0) 796 for(i = 0; i <= xsize - 8; i+=4, sp+=4) { 797 t7 = VIS_LD_U8_I(tab0, s4); 798 t6 = VIS_LD_U8_I(tab1, s3); 799 t5 = VIS_LD_U8_I(tab0, s3); 800 t4 = VIS_LD_U8_I(tab1, s2); 801 t3 = VIS_LD_U8_I(tab0, s2); 802 t2 = VIS_LD_U8_I(tab1, s1); 803 t1 = VIS_LD_U8_I(tab0, s1); 804 t0 = VIS_LD_U8_I(tab1, s0); 805 acc = vis_faligndata(t7, acc); 806 acc = vis_faligndata(t6, acc); 807 acc = vis_faligndata(t5, acc); 808 acc = vis_faligndata(t4, acc); 809 acc = vis_faligndata(t3, acc); 810 acc = vis_faligndata(t2, acc); 811 acc = vis_faligndata(t1, acc); 812 acc = vis_faligndata(t0, acc); 813 s0 = s4; 814 s1 = sp[0]; 815 s2 = sp[1]; 816 s3 = sp[2]; 817 s4 = sp[3]; 818 *dp++ = acc; 819 } 820 821 t7 = VIS_LD_U8_I(tab0, s4); 822 t6 = VIS_LD_U8_I(tab1, s3); 823 t5 = VIS_LD_U8_I(tab0, s3); 824 t4 = VIS_LD_U8_I(tab1, s2); 825 t3 = VIS_LD_U8_I(tab0, s2); 826 t2 = VIS_LD_U8_I(tab1, s1); 827 t1 = VIS_LD_U8_I(tab0, s1); 828 t0 = VIS_LD_U8_I(tab1, s0); 829 acc = vis_faligndata(t7, acc); 830 acc = vis_faligndata(t6, acc); 831 acc = vis_faligndata(t5, acc); 832 acc = vis_faligndata(t4, acc); 833 acc = vis_faligndata(t3, acc); 834 acc = vis_faligndata(t2, acc); 835 acc = vis_faligndata(t1, acc); 836 acc = vis_faligndata(t0, acc); 837 s0 = s4; 838 *dp++ = acc; 839 } 840 841 num = ((mlib_u8*) dend - (mlib_u8*) dp) >> 1; 842 sp += num; 843 num ++; 844 845#pragma pipeloop(0) 846 for (i = 0; i < num; i ++) { 847 s1 = (mlib_s32) *sp; 848 sp --; 849 850 t0 = VIS_LD_U8_I(tab1, s1); 851 acc = vis_faligndata(t0, acc); 852 853 t0 = VIS_LD_U8_I(tab0, s1); 854 acc = vis_faligndata(t0, acc); 855 } 856 857 t0 = VIS_LD_U8_I(tab1, s0); 858 acc = vis_faligndata(t0, acc); 859 emask = vis_edge8(dp, dend); 860 vis_pst_8(acc, dp, emask); 861} 862 863/***************************************************************/ 864void mlib_v_ImageLookUpSI_U8_U8_2(const mlib_u8 *src, 865 mlib_s32 slb, 866 mlib_u8 *dst, 867 mlib_s32 dlb, 868 mlib_s32 xsize, 869 mlib_s32 ysize, 870 const mlib_u8 **table) 871{ 872 if ((xsize * ysize) < 650) { 873 mlib_u8 *sl; 874 mlib_u8 *dl; 875 mlib_s32 i, j; 876 877 sl = (void *)src; 878 dl = dst; 879 880 /* row loop */ 881 for (j = 0; j < ysize; j ++) { 882 mlib_u8 *sp = sl; 883 mlib_u8 *dp = dl; 884 mlib_s32 off, s0, size = xsize; 885 886 off = ((8 - ((mlib_addr)dp & 7)) & 7) >> 1; 887 off = (off < size) ? off : size; 888 889 for (i = 0; i < off; i++) { 890 s0 = *sp++; 891 *dp++ = table[0][s0]; 892 *dp++ = table[1][s0]; 893 size--; 894 } 895 896 if (size > 0) { 897 898 if (((mlib_addr)dp & 1) == 0) { 899 mlib_v_ImageLookUpSI_U8_U8_2_DstA8D1_SMALL(sp, dp, size, table); 900 } else { 901 mlib_v_ImageLookUpSI_U8_U8_2_D1_SMALL(sp, dp, size, table); 902 } 903 } 904 905 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 906 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); 907 } 908 909 } else { 910 mlib_u8 *sl; 911 mlib_u8 *dl; 912 mlib_u16 tab[256]; 913 const mlib_u8 *tab0 = table[0]; 914 const mlib_u8 *tab1 = table[1]; 915 mlib_s32 i, j, s0, s1, s2; 916 917 s0 = tab0[0]; 918 s1 = tab1[0]; 919 for (i = 1; i < 256; i++) { 920 s2 = (s0 << 8) + s1; 921 s0 = tab0[i]; 922 s1 = tab1[i]; 923 tab[i-1] = (mlib_u16)s2; 924 } 925 926 s2 = (s0 << 8) + s1; 927 tab[255] = (mlib_u16)s2; 928 929 sl = (void *)src; 930 dl = dst; 931 932 /* row loop */ 933 for (j = 0; j < ysize; j ++) { 934 mlib_u8 *sp = sl; 935 mlib_u8 *dp = dl; 936 mlib_s32 off, s0, size = xsize; 937 938 if (((mlib_addr)dp & 1) == 0) { 939 940 off = ((8 - ((mlib_addr)dp & 7)) & 7) >> 1; 941 off = (off < size) ? off : size; 942 943 for (i = 0; i < off; i++) { 944 *(mlib_u16*)dp = tab[(*sp)]; 945 dp += 2; 946 size--; sp++; 947 } 948 949 if (size > 0) { 950 951 off = (mlib_addr)sp & 3; 952 953 if (off == 0) { 954 mlib_v_ImageLookUpSI_U8_U8_2_SrcOff0_D1(sp, dp, size, tab); 955 } else if (off == 1) { 956 mlib_v_ImageLookUpSI_U8_U8_2_SrcOff1_D1(sp, dp, size, tab); 957 } else if (off == 2) { 958 mlib_v_ImageLookUpSI_U8_U8_2_SrcOff2_D1(sp, dp, size, tab); 959 } else { 960 mlib_v_ImageLookUpSI_U8_U8_2_SrcOff3_D1(sp, dp, size, tab); 961 } 962 } 963 964 } else { 965 966 off = ((4 - ((mlib_addr)sp & 3)) & 3); 967 off = (off < size) ? off : size; 968 969 for (i = 0; i < off; i++) { 970 s0 = tab[(*sp)]; 971 *dp++ = (s0 >> 8); 972 *dp++ = (s0 & 0xFF); 973 size--; sp++; 974 } 975 976 if (size > 0) { 977 mlib_v_ImageLookUpSI_U8_U8_2_DstNonAl_D1(sp, dp, size, tab); 978 } 979 } 980 981 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 982 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); 983 } 984 } 985} 986 987/***************************************************************/ 988void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff0_D1(const mlib_u8 *src, 989 mlib_u8 *dst, 990 mlib_s32 xsize, 991 const mlib_d64 *table) 992{ 993 mlib_u8 *sp; /* pointer to source data */ 994 mlib_u32 *sa; /* aligned pointer to source data */ 995 mlib_u32 s0; /* source data */ 996 mlib_u8 *dl; /* pointer to start of destination */ 997 mlib_f32 *dp; /* aligned pointer to destination */ 998 mlib_d64 t0, t1, t2, t3; /* destination data */ 999 mlib_d64 acc0, acc1; /* destination data */ 1000 mlib_s32 i; /* loop variable */ 1001 mlib_u8 *ptr; 1002 1003 dl = dst; 1004 dp = (mlib_f32 *) dl; 1005 sp = (void *)src; 1006 sa = (mlib_u32*)sp; 1007 1008 vis_alignaddr((void *) 0, 3); 1009 1010 i = 0; 1011 1012 if (xsize >= 4) { 1013 1014 s0 = *sa++; 1015 1016#pragma pipeloop(0) 1017 for(i = 0; i <= xsize - 8; i+=4, dp+=3) { 1018 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 21) & 0x7F8 )); 1019 t1 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 13) & 0x7F8 )); 1020 t2 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 5) & 0x7F8 )); 1021 t3 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 )); 1022 acc0 = vis_faligndata(t0, t0); 1023 acc0 = vis_faligndata(acc0, t1); 1024 acc1 = vis_faligndata(acc0, acc0); 1025 acc0 = vis_faligndata(acc0, t2); 1026 acc1 = vis_faligndata(acc1, acc0); 1027 acc0 = vis_faligndata(acc0, t3); 1028 s0 = *sa++; 1029 dp[0] = vis_read_lo(acc1); 1030 dp[1] = vis_read_hi(acc0); 1031 dp[2] = vis_read_lo(acc0); 1032 } 1033 1034 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 21) & 0x7F8 )); 1035 t1 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 13) & 0x7F8 )); 1036 t2 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 5) & 0x7F8 )); 1037 t3 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 )); 1038 acc0 = vis_faligndata(t0, t0); 1039 acc0 = vis_faligndata(acc0, t1); 1040 acc1 = vis_faligndata(acc0, acc0); 1041 acc0 = vis_faligndata(acc0, t2); 1042 acc1 = vis_faligndata(acc1, acc0); 1043 acc0 = vis_faligndata(acc0, t3); 1044 dp[0] = vis_read_lo(acc1); 1045 dp[1] = vis_read_hi(acc0); 1046 dp[2] = vis_read_lo(acc0); 1047 dp += 3; 1048 i += 4; 1049 } 1050 1051 dl = (mlib_u8*)dp; 1052 1053#pragma pipeloop(0) 1054 for (; i < xsize; i++) { 1055 ptr = (mlib_u8*)(table + src[i]); 1056 dl[0] = ptr[0]; 1057 dl[1] = ptr[1]; 1058 dl[2] = ptr[2]; 1059 dl += 3; 1060 } 1061} 1062 1063/***************************************************************/ 1064void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff1_D1(const mlib_u8 *src, 1065 mlib_u8 *dst, 1066 mlib_s32 xsize, 1067 const mlib_d64 *table) 1068{ 1069 mlib_u8 *sp; /* pointer to source data */ 1070 mlib_u32 *sa; /* aligned pointer to source data */ 1071 mlib_u32 s0, s1; /* source data */ 1072 mlib_u8 *dl; /* pointer to start of destination */ 1073 mlib_f32 *dp; /* aligned pointer to destination */ 1074 mlib_d64 t0, t1, t2, t3; /* destination data */ 1075 mlib_d64 acc0, acc1; /* destination data */ 1076 mlib_s32 i; /* loop variable */ 1077 mlib_u8 *ptr; 1078 1079 dl = dst; 1080 dp = (mlib_f32 *) dl; 1081 sp = (void *)src; 1082 sa = (mlib_u32*)(sp - 1); 1083 1084 vis_alignaddr((void *) 0, 3); 1085 1086 i = 0; 1087 s0 = *sa++; 1088 1089 if (xsize >= 4) { 1090 1091 s1 = *sa++; 1092 1093#pragma pipeloop(0) 1094 for(i = 0; i <= xsize - 8; i+=4, dp+=3) { 1095 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 13) & 0x7F8 )); 1096 t1 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 5) & 0x7F8 )); 1097 t2 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 )); 1098 t3 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 21) & 0x7F8 )); 1099 acc0 = vis_faligndata(t0, t0); 1100 acc0 = vis_faligndata(acc0, t1); 1101 acc1 = vis_faligndata(acc0, acc0); 1102 acc0 = vis_faligndata(acc0, t2); 1103 acc1 = vis_faligndata(acc1, acc0); 1104 acc0 = vis_faligndata(acc0, t3); 1105 s0 = s1; 1106 s1 = *sa++; 1107 dp[0] = vis_read_lo(acc1); 1108 dp[1] = vis_read_hi(acc0); 1109 dp[2] = vis_read_lo(acc0); 1110 } 1111 1112 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 13) & 0x7F8 )); 1113 t1 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 5) & 0x7F8 )); 1114 t2 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 )); 1115 t3 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 21) & 0x7F8 )); 1116 acc0 = vis_faligndata(t0, t0); 1117 acc0 = vis_faligndata(acc0, t1); 1118 acc1 = vis_faligndata(acc0, acc0); 1119 acc0 = vis_faligndata(acc0, t2); 1120 acc1 = vis_faligndata(acc1, acc0); 1121 acc0 = vis_faligndata(acc0, t3); 1122 dp[0] = vis_read_lo(acc1); 1123 dp[1] = vis_read_hi(acc0); 1124 dp[2] = vis_read_lo(acc0); 1125 dp += 3; 1126 i += 4; 1127 } 1128 1129 dl = (mlib_u8*)dp; 1130 1131#pragma pipeloop(0) 1132 for (; i < xsize; i++) { 1133 ptr = (mlib_u8*)(table + src[i]); 1134 dl[0] = ptr[0]; 1135 dl[1] = ptr[1]; 1136 dl[2] = ptr[2]; 1137 dl += 3; 1138 } 1139} 1140 1141/***************************************************************/ 1142void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff2_D1(const mlib_u8 *src, 1143 mlib_u8 *dst, 1144 mlib_s32 xsize, 1145 const mlib_d64 *table) 1146{ 1147 mlib_u8 *sp; /* pointer to source data */ 1148 mlib_u32 *sa; /* aligned pointer to source data */ 1149 mlib_u32 s0, s1; /* source data */ 1150 mlib_u8 *dl; /* pointer to start of destination */ 1151 mlib_f32 *dp; /* aligned pointer to destination */ 1152 mlib_d64 t0, t1, t2, t3; /* destination data */ 1153 mlib_d64 acc0, acc1; /* destination data */ 1154 mlib_s32 i; /* loop variable */ 1155 mlib_u8 *ptr; 1156 1157 dl = dst; 1158 dp = (mlib_f32 *) dl; 1159 sp = (void *)src; 1160 sa = (mlib_u32*)(sp - 2); 1161 1162 vis_alignaddr((void *) 0, 3); 1163 1164 i = 0; 1165 s0 = *sa++; 1166 1167 if (xsize >= 4) { 1168 1169 s1 = *sa++; 1170 1171#pragma pipeloop(0) 1172 for(i = 0; i <= xsize - 8; i+=4, dp+=3) { 1173 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 5) & 0x7F8 )); 1174 t1 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 )); 1175 t2 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 21) & 0x7F8 )); 1176 t3 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 13) & 0x7F8 )); 1177 acc0 = vis_faligndata(t0, t0); 1178 acc0 = vis_faligndata(acc0, t1); 1179 acc1 = vis_faligndata(acc0, acc0); 1180 acc0 = vis_faligndata(acc0, t2); 1181 acc1 = vis_faligndata(acc1, acc0); 1182 acc0 = vis_faligndata(acc0, t3); 1183 s0 = s1; 1184 s1 = *sa++; 1185 dp[0] = vis_read_lo(acc1); 1186 dp[1] = vis_read_hi(acc0); 1187 dp[2] = vis_read_lo(acc0); 1188 } 1189 1190 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 5) & 0x7F8 )); 1191 t1 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 )); 1192 t2 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 21) & 0x7F8 )); 1193 t3 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 13) & 0x7F8 )); 1194 acc0 = vis_faligndata(t0, t0); 1195 acc0 = vis_faligndata(acc0, t1); 1196 acc1 = vis_faligndata(acc0, acc0); 1197 acc0 = vis_faligndata(acc0, t2); 1198 acc1 = vis_faligndata(acc1, acc0); 1199 acc0 = vis_faligndata(acc0, t3); 1200 dp[0] = vis_read_lo(acc1); 1201 dp[1] = vis_read_hi(acc0); 1202 dp[2] = vis_read_lo(acc0); 1203 dp += 3; 1204 i += 4; 1205 } 1206 1207 dl = (mlib_u8*)dp; 1208 1209#pragma pipeloop(0) 1210 for (; i < xsize; i++) { 1211 ptr = (mlib_u8*)(table + src[i]); 1212 dl[0] = ptr[0]; 1213 dl[1] = ptr[1]; 1214 dl[2] = ptr[2]; 1215 dl += 3; 1216 } 1217} 1218 1219/***************************************************************/ 1220void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff3_D1(const mlib_u8 *src, 1221 mlib_u8 *dst, 1222 mlib_s32 xsize, 1223 const mlib_d64 *table) 1224{ 1225 mlib_u8 *sp; /* pointer to source data */ 1226 mlib_u32 *sa; /* aligned pointer to source data */ 1227 mlib_u32 s0, s1; /* source data */ 1228 mlib_u8 *dl; /* pointer to start of destination */ 1229 mlib_f32 *dp; /* aligned pointer to destination */ 1230 mlib_d64 t0, t1, t2, t3; /* destination data */ 1231 mlib_d64 acc0, acc1; /* destination data */ 1232 mlib_s32 i; /* loop variable */ 1233 mlib_u8 *ptr; 1234 1235 dl = dst; 1236 dp = (mlib_f32 *) dl; 1237 sp = (void *)src; 1238 sa = (mlib_u32*)(sp - 3); 1239 1240 vis_alignaddr((void *) 0, 3); 1241 1242 i = 0; 1243 s0 = *sa++; 1244 1245 if (xsize >= 4) { 1246 1247 s1 = *sa++; 1248 1249#pragma pipeloop(0) 1250 for(i = 0; i <= xsize - 8; i+=4, dp+=3) { 1251 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 )); 1252 t1 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 21) & 0x7F8 )); 1253 t2 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 13) & 0x7F8 )); 1254 t3 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 5) & 0x7F8 )); 1255 acc0 = vis_faligndata(t0, t0); 1256 acc0 = vis_faligndata(acc0, t1); 1257 acc1 = vis_faligndata(acc0, acc0); 1258 acc0 = vis_faligndata(acc0, t2); 1259 acc1 = vis_faligndata(acc1, acc0); 1260 acc0 = vis_faligndata(acc0, t3); 1261 s0 = s1; 1262 s1 = *sa++; 1263 dp[0] = vis_read_lo(acc1); 1264 dp[1] = vis_read_hi(acc0); 1265 dp[2] = vis_read_lo(acc0); 1266 } 1267 1268 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 )); 1269 t1 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 21) & 0x7F8 )); 1270 t2 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 13) & 0x7F8 )); 1271 t3 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 5) & 0x7F8 )); 1272 acc0 = vis_faligndata(t0, t0); 1273 acc0 = vis_faligndata(acc0, t1); 1274 acc1 = vis_faligndata(acc0, acc0); 1275 acc0 = vis_faligndata(acc0, t2); 1276 acc1 = vis_faligndata(acc1, acc0); 1277 acc0 = vis_faligndata(acc0, t3); 1278 dp[0] = vis_read_lo(acc1); 1279 dp[1] = vis_read_hi(acc0); 1280 dp[2] = vis_read_lo(acc0); 1281 dp += 3; 1282 i += 4; 1283 } 1284 1285 dl = (mlib_u8*)dp; 1286 1287#pragma pipeloop(0) 1288 for (; i < xsize; i++) { 1289 ptr = (mlib_u8*)(table + src[i]); 1290 dl[0] = ptr[0]; 1291 dl[1] = ptr[1]; 1292 dl[2] = ptr[2]; 1293 dl += 3; 1294 } 1295} 1296 1297/***************************************************************/ 1298void mlib_v_ImageLookUpSI_U8_U8_3_D1_SMALL(const mlib_u8 *src, 1299 mlib_u8 *dst, 1300 mlib_s32 xsize, 1301 const mlib_u8 **table) 1302{ 1303 mlib_u8 *sp; /* pointer to source data */ 1304 mlib_u8 *dl; /* pointer to start of destination */ 1305 mlib_d64 *dp; /* aligned pointer to destination */ 1306 mlib_d64 t0, t1, t2; /* destination data */ 1307 mlib_d64 t3, t4, t5; /* destination data */ 1308 mlib_d64 t6, t7; /* destination data */ 1309 mlib_d64 acc0, acc1, acc2; /* destination data */ 1310 mlib_s32 i; /* loop variable */ 1311 const mlib_u8 *tab0 = table[0]; 1312 const mlib_u8 *tab1 = table[1]; 1313 const mlib_u8 *tab2 = table[2]; 1314 mlib_u32 s00, s01, s02, s03; 1315 mlib_u32 s10, s11, s12, s13; 1316 1317 sp = (void *)src; 1318 dl = dst; 1319 dp = (mlib_d64 *) dl; 1320 1321 vis_alignaddr((void *) 0, 7); 1322 1323 i = 0; 1324 1325 if (xsize >= 8) { 1326 1327 s00 = sp[0]; 1328 s01 = sp[1]; 1329 s02 = sp[2]; 1330 s03 = sp[3]; 1331 s10 = sp[4]; 1332 s11 = sp[5]; 1333 s12 = sp[6]; 1334 s13 = sp[7]; 1335 sp += 8; 1336 1337#pragma pipeloop(0) 1338 for(i = 0; i <= xsize - 16; i+=8, sp+=8) { 1339 t7 = VIS_LD_U8_I(tab1, s02); 1340 t6 = VIS_LD_U8_I(tab0, s02); 1341 t5 = VIS_LD_U8_I(tab2, s01); 1342 t4 = VIS_LD_U8_I(tab1, s01); 1343 t3 = VIS_LD_U8_I(tab0, s01); 1344 t2 = VIS_LD_U8_I(tab2, s00); 1345 t1 = VIS_LD_U8_I(tab1, s00); 1346 t0 = VIS_LD_U8_I(tab0, s00); 1347 acc0 = vis_faligndata(t7, acc0); 1348 acc0 = vis_faligndata(t6, acc0); 1349 acc0 = vis_faligndata(t5, acc0); 1350 acc0 = vis_faligndata(t4, acc0); 1351 acc0 = vis_faligndata(t3, acc0); 1352 acc0 = vis_faligndata(t2, acc0); 1353 acc0 = vis_faligndata(t1, acc0); 1354 acc0 = vis_faligndata(t0, acc0); 1355 t7 = VIS_LD_U8_I(tab0, s11); 1356 t6 = VIS_LD_U8_I(tab2, s10); 1357 t5 = VIS_LD_U8_I(tab1, s10); 1358 t4 = VIS_LD_U8_I(tab0, s10); 1359 t3 = VIS_LD_U8_I(tab2, s03); 1360 t2 = VIS_LD_U8_I(tab1, s03); 1361 t1 = VIS_LD_U8_I(tab0, s03); 1362 t0 = VIS_LD_U8_I(tab2, s02); 1363 acc1 = vis_faligndata(t7, acc1); 1364 acc1 = vis_faligndata(t6, acc1); 1365 acc1 = vis_faligndata(t5, acc1); 1366 acc1 = vis_faligndata(t4, acc1); 1367 acc1 = vis_faligndata(t3, acc1); 1368 acc1 = vis_faligndata(t2, acc1); 1369 acc1 = vis_faligndata(t1, acc1); 1370 acc1 = vis_faligndata(t0, acc1); 1371 t7 = VIS_LD_U8_I(tab2, s13); 1372 t6 = VIS_LD_U8_I(tab1, s13); 1373 t5 = VIS_LD_U8_I(tab0, s13); 1374 t4 = VIS_LD_U8_I(tab2, s12); 1375 t3 = VIS_LD_U8_I(tab1, s12); 1376 t2 = VIS_LD_U8_I(tab0, s12); 1377 t1 = VIS_LD_U8_I(tab2, s11); 1378 t0 = VIS_LD_U8_I(tab1, s11); 1379 acc2 = vis_faligndata(t7, acc2); 1380 acc2 = vis_faligndata(t6, acc2); 1381 acc2 = vis_faligndata(t5, acc2); 1382 acc2 = vis_faligndata(t4, acc2); 1383 acc2 = vis_faligndata(t3, acc2); 1384 acc2 = vis_faligndata(t2, acc2); 1385 acc2 = vis_faligndata(t1, acc2); 1386 acc2 = vis_faligndata(t0, acc2); 1387 s00 = sp[0]; 1388 s01 = sp[1]; 1389 s02 = sp[2]; 1390 s03 = sp[3]; 1391 s10 = sp[4]; 1392 s11 = sp[5]; 1393 s12 = sp[6]; 1394 s13 = sp[7]; 1395 *dp++ = acc0; 1396 *dp++ = acc1; 1397 *dp++ = acc2; 1398 } 1399 1400 t7 = VIS_LD_U8_I(tab1, s02); 1401 t6 = VIS_LD_U8_I(tab0, s02); 1402 t5 = VIS_LD_U8_I(tab2, s01); 1403 t4 = VIS_LD_U8_I(tab1, s01); 1404 t3 = VIS_LD_U8_I(tab0, s01); 1405 t2 = VIS_LD_U8_I(tab2, s00); 1406 t1 = VIS_LD_U8_I(tab1, s00); 1407 t0 = VIS_LD_U8_I(tab0, s00); 1408 acc0 = vis_faligndata(t7, acc0); 1409 acc0 = vis_faligndata(t6, acc0); 1410 acc0 = vis_faligndata(t5, acc0); 1411 acc0 = vis_faligndata(t4, acc0); 1412 acc0 = vis_faligndata(t3, acc0); 1413 acc0 = vis_faligndata(t2, acc0); 1414 acc0 = vis_faligndata(t1, acc0); 1415 acc0 = vis_faligndata(t0, acc0); 1416 t7 = VIS_LD_U8_I(tab0, s11); 1417 t6 = VIS_LD_U8_I(tab2, s10); 1418 t5 = VIS_LD_U8_I(tab1, s10); 1419 t4 = VIS_LD_U8_I(tab0, s10); 1420 t3 = VIS_LD_U8_I(tab2, s03); 1421 t2 = VIS_LD_U8_I(tab1, s03); 1422 t1 = VIS_LD_U8_I(tab0, s03); 1423 t0 = VIS_LD_U8_I(tab2, s02); 1424 acc1 = vis_faligndata(t7, acc1); 1425 acc1 = vis_faligndata(t6, acc1); 1426 acc1 = vis_faligndata(t5, acc1); 1427 acc1 = vis_faligndata(t4, acc1); 1428 acc1 = vis_faligndata(t3, acc1); 1429 acc1 = vis_faligndata(t2, acc1); 1430 acc1 = vis_faligndata(t1, acc1); 1431 acc1 = vis_faligndata(t0, acc1); 1432 t7 = VIS_LD_U8_I(tab2, s13); 1433 t6 = VIS_LD_U8_I(tab1, s13); 1434 t5 = VIS_LD_U8_I(tab0, s13); 1435 t4 = VIS_LD_U8_I(tab2, s12); 1436 t3 = VIS_LD_U8_I(tab1, s12); 1437 t2 = VIS_LD_U8_I(tab0, s12); 1438 t1 = VIS_LD_U8_I(tab2, s11); 1439 t0 = VIS_LD_U8_I(tab1, s11); 1440 acc2 = vis_faligndata(t7, acc2); 1441 acc2 = vis_faligndata(t6, acc2); 1442 acc2 = vis_faligndata(t5, acc2); 1443 acc2 = vis_faligndata(t4, acc2); 1444 acc2 = vis_faligndata(t3, acc2); 1445 acc2 = vis_faligndata(t2, acc2); 1446 acc2 = vis_faligndata(t1, acc2); 1447 acc2 = vis_faligndata(t0, acc2); 1448 *dp++ = acc0; 1449 *dp++ = acc1; 1450 *dp++ = acc2; 1451 i += 8; 1452 } 1453 1454 dl = (mlib_u8*)dp; 1455 1456#pragma pipeloop(0) 1457 for (; i < xsize; i++) { 1458 s00 = sp[0]; 1459 dl[0] = tab0[s00]; 1460 dl[1] = tab1[s00]; 1461 dl[2] = tab2[s00]; 1462 dl += 3; sp ++; 1463 } 1464} 1465 1466/***************************************************************/ 1467void mlib_v_ImageLookUpSI_U8_U8_3(const mlib_u8 *src, 1468 mlib_s32 slb, 1469 mlib_u8 *dst, 1470 mlib_s32 dlb, 1471 mlib_s32 xsize, 1472 mlib_s32 ysize, 1473 const mlib_u8 **table) 1474{ 1475 if ((xsize * ysize) < 650) { 1476 mlib_u8 *sl; 1477 mlib_u8 *dl; 1478 mlib_s32 i, j; 1479 const mlib_u8 *tab0 = table[0]; 1480 const mlib_u8 *tab1 = table[1]; 1481 const mlib_u8 *tab2 = table[2]; 1482 1483 sl = (void *)src; 1484 dl = dst; 1485 1486 /* row loop */ 1487 for (j = 0; j < ysize; j ++) { 1488 mlib_u8 *sp = sl; 1489 mlib_u8 *dp = dl; 1490 mlib_s32 off, s0, size = xsize; 1491 1492 off = (mlib_addr)dp & 7; 1493 off = (off * 5) & 7; 1494 off = (off < size) ? off : size; 1495 1496 for (i = 0; i < off; i++) { 1497 s0 = *sp++; 1498 *dp++ = tab0[s0]; 1499 *dp++ = tab1[s0]; 1500 *dp++ = tab2[s0]; 1501 size--; 1502 } 1503 1504 if (size > 0) { 1505 mlib_v_ImageLookUpSI_U8_U8_3_D1_SMALL(sp, dp, size, table); 1506 } 1507 1508 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 1509 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); 1510 } 1511 1512 } else { 1513 mlib_u8 *sl; 1514 mlib_u8 *dl; 1515 mlib_u32 tab[512]; 1516 const mlib_u8 *tab0 = table[0]; 1517 const mlib_u8 *tab1 = table[1]; 1518 const mlib_u8 *tab2 = table[2]; 1519 mlib_s32 i, j; 1520 mlib_u32 s0, s1, s2, s3; 1521 1522 s0 = tab0[0]; 1523 s1 = tab1[0]; 1524 s2 = tab2[0]; 1525 for (i = 1; i < 256; i++) { 1526 s3 = (s0 << 24) + (s1 << 16) + (s2 << 8); 1527 s0 = tab0[i]; 1528 s1 = tab1[i]; 1529 s2 = tab2[i]; 1530 tab[2*i-2] = s3; 1531 } 1532 1533 s3 = (s0 << 24) + (s1 << 16) + (s2 << 8); 1534 tab[510] = s3; 1535 1536 sl = (void *)src; 1537 dl = dst; 1538 1539 /* row loop */ 1540 for (j = 0; j < ysize; j ++) { 1541 mlib_u8 *sp = sl; 1542 mlib_u8 *dp = dl; 1543 mlib_s32 off, size = xsize; 1544 mlib_u8 *ptr; 1545 1546 off = ((mlib_addr)dp & 3); 1547 off = (off < size) ? off : size; 1548 1549#pragma pipeloop(0) 1550 for (i = 0; i < off; i++) { 1551 ptr = (mlib_u8*)(tab + 2*sp[i]); 1552 dp[0] = ptr[0]; 1553 dp[1] = ptr[1]; 1554 dp[2] = ptr[2]; 1555 dp += 3; 1556 } 1557 1558 size -= off; 1559 sp += off; 1560 1561 if (size > 0) { 1562 off = (mlib_addr)sp & 3; 1563 1564 if (off == 0) { 1565 mlib_v_ImageLookUpSI_U8_U8_3_SrcOff0_D1(sp, dp, size, (mlib_d64*)tab); 1566 } else if (off == 1) { 1567 mlib_v_ImageLookUpSI_U8_U8_3_SrcOff1_D1(sp, dp, size, (mlib_d64*)tab); 1568 } else if (off == 2) { 1569 mlib_v_ImageLookUpSI_U8_U8_3_SrcOff2_D1(sp, dp, size, (mlib_d64*)tab); 1570 } else if (off == 3) { 1571 mlib_v_ImageLookUpSI_U8_U8_3_SrcOff3_D1(sp, dp, size, (mlib_d64*)tab); 1572 } 1573 } 1574 1575 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 1576 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); 1577 } 1578 } 1579} 1580 1581/***************************************************************/ 1582void mlib_v_ImageLookUpSI_U8_U8_4_SrcOff0_D1(const mlib_u8 *src, 1583 mlib_u8 *dst, 1584 mlib_s32 xsize, 1585 const mlib_f32 *table) 1586{ 1587 mlib_u32 *sa; /* aligned pointer to source data */ 1588 mlib_u8 *sp; /* pointer to source data */ 1589 mlib_u32 s0; /* source data */ 1590 mlib_f32 *dp; /* aligned pointer to destination */ 1591 mlib_f32 acc0, acc1; /* destination data */ 1592 mlib_f32 acc2, acc3; /* destination data */ 1593 mlib_s32 i; /* loop variable */ 1594 mlib_u32 s00, s01, s02, s03; 1595 1596 sa = (mlib_u32*)src; 1597 dp = (mlib_f32 *) dst; 1598 1599 i = 0; 1600 1601 if (xsize >= 4) { 1602 1603 s0 = *sa++; 1604 s00 = (s0 >> 22) & 0x3FC; 1605 s01 = (s0 >> 14) & 0x3FC; 1606 1607#pragma pipeloop(0) 1608 for(i = 0; i <= xsize - 8; i+=4, dp += 4) { 1609 s02 = (s0 >> 6) & 0x3FC; 1610 s03 = (s0 << 2) & 0x3FC; 1611 acc0 = *(mlib_f32*)((mlib_u8*)table + s00); 1612 acc1 = *(mlib_f32*)((mlib_u8*)table + s01); 1613 acc2 = *(mlib_f32*)((mlib_u8*)table + s02); 1614 acc3 = *(mlib_f32*)((mlib_u8*)table + s03); 1615 s0 = *sa++; 1616 s00 = (s0 >> 22) & 0x3FC; 1617 s01 = (s0 >> 14) & 0x3FC; 1618 dp[0] = acc0; 1619 dp[1] = acc1; 1620 dp[2] = acc2; 1621 dp[3] = acc3; 1622 } 1623 1624 s02 = (s0 >> 6) & 0x3FC; 1625 s03 = (s0 << 2) & 0x3FC; 1626 acc0 = *(mlib_f32*)((mlib_u8*)table + s00); 1627 acc1 = *(mlib_f32*)((mlib_u8*)table + s01); 1628 acc2 = *(mlib_f32*)((mlib_u8*)table + s02); 1629 acc3 = *(mlib_f32*)((mlib_u8*)table + s03); 1630 dp[0] = acc0; 1631 dp[1] = acc1; 1632 dp[2] = acc2; 1633 dp[3] = acc3; 1634 dp += 4; 1635 i += 4; 1636 } 1637 1638 sp = (mlib_u8*)sa; 1639 1640 if ( i <= xsize - 2) { 1641 *dp++ = table[sp[0]]; 1642 *dp++ = table[sp[1]]; 1643 i+=2; sp += 2; 1644 } 1645 1646 if ( i < xsize) *dp = table[sp[0]]; 1647} 1648 1649/***************************************************************/ 1650void mlib_v_ImageLookUpSI_U8_U8_4_DstNonAl_D1(const mlib_u8 *src, 1651 mlib_u8 *dst, 1652 mlib_s32 xsize, 1653 const mlib_f32 *table) 1654{ 1655 mlib_u32 *sa; /* aligned pointer to source data */ 1656 mlib_u8 *sp; /* pointer to source data */ 1657 mlib_u32 s0; /* source data */ 1658 mlib_u8 *dl; /* pointer to start of destination */ 1659 mlib_d64 *dp; /* aligned pointer to destination */ 1660 mlib_d64 acc0, acc1, acc2; /* destination data */ 1661 mlib_s32 i; /* loop variable */ 1662 mlib_u8 *dend; /* pointer to end of destination */ 1663 mlib_s32 emask; /* edge mask */ 1664 mlib_s32 off; 1665 mlib_u32 s00, s01, s02, s03; 1666 1667 sa = (mlib_u32*)src; 1668 sp = (void *)src; 1669 dl = dst; 1670 dend = dl + (xsize << 2) - 1; 1671 dp = (mlib_d64 *) ((mlib_addr) dl & (~7)); 1672 off = (mlib_addr) dp - (mlib_addr) dl; 1673 vis_alignaddr(dp, off); 1674 1675 emask = vis_edge8(dl, dend); 1676 acc0 = vis_freg_pair(table[sp[0]], table[sp[1]]); 1677 vis_pst_8(vis_faligndata(acc0, acc0), dp++, emask); 1678 sp += 2; 1679 1680 xsize -= 2; 1681 1682 if (xsize >= 2) { 1683 acc1 = vis_freg_pair(table[sp[0]], table[sp[1]]); 1684 *dp++ = vis_faligndata(acc0, acc1); 1685 acc0 = acc1; 1686 sp += 2; xsize -= 2; 1687 } 1688 1689 sa++; 1690 1691 i = 0; 1692 1693 if (xsize >= 4) { 1694 1695 s0 = *sa++; 1696 s00 = (s0 >> 22) & 0x3FC; 1697 s01 = (s0 >> 14) & 0x3FC; 1698 1699#pragma pipeloop(0) 1700 for(i = 0; i <= xsize - 8; i+=4, dp += 2) { 1701 s02 = (s0 >> 6) & 0x3FC; 1702 s03 = (s0 << 2) & 0x3FC; 1703 acc1 = vis_freg_pair(*(mlib_f32*)((mlib_u8*)table + s00), 1704 *(mlib_f32*)((mlib_u8*)table + s01)); 1705 acc2 = vis_freg_pair(*(mlib_f32*)((mlib_u8*)table + s02), 1706 *(mlib_f32*)((mlib_u8*)table + s03)); 1707 s0 = *sa++; 1708 s00 = (s0 >> 22) & 0x3FC; 1709 s01 = (s0 >> 14) & 0x3FC; 1710 dp[0] = vis_faligndata(acc0, acc1); 1711 dp[1] = vis_faligndata(acc1, acc2); 1712 acc0 = acc2; 1713 } 1714 1715 s02 = (s0 >> 6) & 0x3FC; 1716 s03 = (s0 << 2) & 0x3FC; 1717 acc1 = vis_freg_pair(*(mlib_f32*)((mlib_u8*)table + s00), 1718 *(mlib_f32*)((mlib_u8*)table + s01)); 1719 acc2 = vis_freg_pair(*(mlib_f32*)((mlib_u8*)table + s02), 1720 *(mlib_f32*)((mlib_u8*)table + s03)); 1721 dp[0] = vis_faligndata(acc0, acc1); 1722 dp[1] = vis_faligndata(acc1, acc2); 1723 acc0 = acc2; 1724 sp = (mlib_u8*)sa; 1725 dp += 2; 1726 i += 4; 1727 } 1728 1729 if ( i <= xsize - 2) { 1730 acc1 = vis_freg_pair(table[sp[0]], table[sp[1]]); 1731 *dp++ = vis_faligndata(acc0, acc1); 1732 acc0 = acc1; 1733 i+=2; sp += 2; 1734 } 1735 1736 if ((mlib_addr) dp <= (mlib_addr) dend) { 1737 emask = vis_edge8(dp, dend); 1738 acc1 = vis_freg_pair(table[sp[0]], table[sp[1]]); 1739 vis_pst_8(vis_faligndata(acc0, acc1), dp++, emask); 1740 } 1741 1742 if ((mlib_addr) dp <= (mlib_addr) dend) { 1743 emask = vis_edge8(dp, dend); 1744 vis_pst_8(vis_faligndata(acc1, acc1), dp++, emask); 1745 } 1746} 1747 1748/***************************************************************/ 1749void mlib_v_ImageLookUpSI_U8_U8_4_DstOff0_D1_SMALL(const mlib_u8 *src, 1750 mlib_u8 *dst, 1751 mlib_s32 xsize, 1752 const mlib_u8 **table) 1753{ 1754 mlib_u8 *sp; /* pointer to source data */ 1755 mlib_u32 s0, s1; /* source data */ 1756 mlib_u8 *dl; /* pointer to start of destination */ 1757 mlib_d64 *dp; /* aligned pointer to destination */ 1758 mlib_d64 t0, t1, t2; /* destination data */ 1759 mlib_d64 t3, t4, t5; /* destination data */ 1760 mlib_d64 t6, t7, acc; /* destination data */ 1761 mlib_s32 i; /* loop variable */ 1762 const mlib_u8 *tab0 = table[0]; 1763 const mlib_u8 *tab1 = table[1]; 1764 const mlib_u8 *tab2 = table[2]; 1765 const mlib_u8 *tab3 = table[3]; 1766 1767 sp = (void *)src; 1768 dl = dst; 1769 dp = (mlib_d64 *) dl; 1770 1771 vis_alignaddr((void *) 0, 7); 1772 1773 if (xsize >= 2) { 1774 1775 s0 = sp[0]; 1776 s1 = sp[1]; 1777 sp += 2; 1778 1779#pragma pipeloop(0) 1780 for(i = 0; i <= xsize - 4; i+=2, sp+=2) { 1781 t7 = VIS_LD_U8_I(tab3, s1); 1782 t6 = VIS_LD_U8_I(tab2, s1); 1783 t5 = VIS_LD_U8_I(tab1, s1); 1784 t4 = VIS_LD_U8_I(tab0, s1); 1785 t3 = VIS_LD_U8_I(tab3, s0); 1786 t2 = VIS_LD_U8_I(tab2, s0); 1787 t1 = VIS_LD_U8_I(tab1, s0); 1788 t0 = VIS_LD_U8_I(tab0, s0); 1789 acc = vis_faligndata(t7, acc); 1790 acc = vis_faligndata(t6, acc); 1791 acc = vis_faligndata(t5, acc); 1792 acc = vis_faligndata(t4, acc); 1793 acc = vis_faligndata(t3, acc); 1794 acc = vis_faligndata(t2, acc); 1795 acc = vis_faligndata(t1, acc); 1796 acc = vis_faligndata(t0, acc); 1797 s0 = sp[0]; 1798 s1 = sp[1]; 1799 *dp++ = acc; 1800 } 1801 1802 t7 = VIS_LD_U8_I(tab3, s1); 1803 t6 = VIS_LD_U8_I(tab2, s1); 1804 t5 = VIS_LD_U8_I(tab1, s1); 1805 t4 = VIS_LD_U8_I(tab0, s1); 1806 t3 = VIS_LD_U8_I(tab3, s0); 1807 t2 = VIS_LD_U8_I(tab2, s0); 1808 t1 = VIS_LD_U8_I(tab1, s0); 1809 t0 = VIS_LD_U8_I(tab0, s0); 1810 acc = vis_faligndata(t7, acc); 1811 acc = vis_faligndata(t6, acc); 1812 acc = vis_faligndata(t5, acc); 1813 acc = vis_faligndata(t4, acc); 1814 acc = vis_faligndata(t3, acc); 1815 acc = vis_faligndata(t2, acc); 1816 acc = vis_faligndata(t1, acc); 1817 acc = vis_faligndata(t0, acc); 1818 *dp++ = acc; 1819 } 1820 1821 if ((xsize & 1) != 0) { 1822 s0 = sp[0]; 1823 t7 = VIS_LD_U8_I(tab3, s0); 1824 t6 = VIS_LD_U8_I(tab2, s0); 1825 t5 = VIS_LD_U8_I(tab1, s0); 1826 t4 = VIS_LD_U8_I(tab0, s0); 1827 acc = vis_faligndata(t7, acc); 1828 acc = vis_faligndata(t6, acc); 1829 acc = vis_faligndata(t5, acc); 1830 acc = vis_faligndata(t4, acc); 1831 *(mlib_f32*)dp = vis_read_hi(acc); 1832 } 1833} 1834 1835/***************************************************************/ 1836void mlib_v_ImageLookUpSI_U8_U8_4_DstOff1_D1_SMALL(const mlib_u8 *src, 1837 mlib_u8 *dst, 1838 mlib_s32 xsize, 1839 const mlib_u8 **table) 1840{ 1841 mlib_u8 *sp; /* pointer to source data */ 1842 mlib_u32 s0, s1, s2; /* source data */ 1843 mlib_u8 *dl; /* pointer to start of destination */ 1844 mlib_d64 *dp; /* aligned pointer to destination */ 1845 mlib_d64 t0, t1, t2; /* destination data */ 1846 mlib_d64 t3, t4, t5; /* destination data */ 1847 mlib_d64 t6, t7, acc; /* destination data */ 1848 mlib_s32 i; /* loop variable */ 1849 const mlib_u8 *tab0 = table[0]; 1850 const mlib_u8 *tab1 = table[1]; 1851 const mlib_u8 *tab2 = table[2]; 1852 const mlib_u8 *tab3 = table[3]; 1853 1854 sp = (void *)src; 1855 dl = dst; 1856 dp = (mlib_d64 *) dl; 1857 1858 vis_alignaddr((void *) 0, 7); 1859 1860 s0 = *sp++; 1861 1862 if (xsize >= 2) { 1863 1864 s1 = sp[0]; 1865 s2 = sp[1]; 1866 sp += 2; 1867 1868#pragma pipeloop(0) 1869 for(i = 0; i <= xsize - 4; i+=2, sp+=2) { 1870 t7 = VIS_LD_U8_I(tab0, s2); 1871 t6 = VIS_LD_U8_I(tab3, s1); 1872 t5 = VIS_LD_U8_I(tab2, s1); 1873 t4 = VIS_LD_U8_I(tab1, s1); 1874 t3 = VIS_LD_U8_I(tab0, s1); 1875 t2 = VIS_LD_U8_I(tab3, s0); 1876 t1 = VIS_LD_U8_I(tab2, s0); 1877 t0 = VIS_LD_U8_I(tab1, s0); 1878 acc = vis_faligndata(t7, acc); 1879 acc = vis_faligndata(t6, acc); 1880 acc = vis_faligndata(t5, acc); 1881 acc = vis_faligndata(t4, acc); 1882 acc = vis_faligndata(t3, acc); 1883 acc = vis_faligndata(t2, acc); 1884 acc = vis_faligndata(t1, acc); 1885 acc = vis_faligndata(t0, acc); 1886 s0 = s2; 1887 s1 = sp[0]; 1888 s2 = sp[1]; 1889 *dp++ = acc; 1890 } 1891 1892 t7 = VIS_LD_U8_I(tab0, s2); 1893 t6 = VIS_LD_U8_I(tab3, s1); 1894 t5 = VIS_LD_U8_I(tab2, s1); 1895 t4 = VIS_LD_U8_I(tab1, s1); 1896 t3 = VIS_LD_U8_I(tab0, s1); 1897 t2 = VIS_LD_U8_I(tab3, s0); 1898 t1 = VIS_LD_U8_I(tab2, s0); 1899 t0 = VIS_LD_U8_I(tab1, s0); 1900 acc = vis_faligndata(t7, acc); 1901 acc = vis_faligndata(t6, acc); 1902 acc = vis_faligndata(t5, acc); 1903 acc = vis_faligndata(t4, acc); 1904 acc = vis_faligndata(t3, acc); 1905 acc = vis_faligndata(t2, acc); 1906 acc = vis_faligndata(t1, acc); 1907 acc = vis_faligndata(t0, acc); 1908 s0 = s2; 1909 *dp++ = acc; 1910 } 1911 1912 dl = (mlib_u8*)dp; 1913 1914 if ((xsize & 1) != 0) { 1915 s1 = sp[0]; 1916 t7 = VIS_LD_U8_I(tab0, s1); 1917 t6 = VIS_LD_U8_I(tab3, s0); 1918 t5 = VIS_LD_U8_I(tab2, s0); 1919 t4 = VIS_LD_U8_I(tab1, s0); 1920 acc = vis_faligndata(t7, acc); 1921 acc = vis_faligndata(t6, acc); 1922 acc = vis_faligndata(t5, acc); 1923 acc = vis_faligndata(t4, acc); 1924 *(mlib_f32*)dl = vis_read_hi(acc); 1925 dl += 4; 1926 s0 = s1; 1927 } 1928 1929 dl[0] = tab1[s0]; 1930 dl[1] = tab2[s0]; 1931 dl[2] = tab3[s0]; 1932} 1933 1934/***************************************************************/ 1935void mlib_v_ImageLookUpSI_U8_U8_4_DstOff2_D1_SMALL(const mlib_u8 *src, 1936 mlib_u8 *dst, 1937 mlib_s32 xsize, 1938 const mlib_u8 **table) 1939{ 1940 mlib_u8 *sp; /* pointer to source data */ 1941 mlib_u32 s0, s1, s2; /* source data */ 1942 mlib_u8 *dl; /* pointer to start of destination */ 1943 mlib_d64 *dp; /* aligned pointer to destination */ 1944 mlib_d64 t0, t1, t2; /* destination data */ 1945 mlib_d64 t3, t4, t5; /* destination data */ 1946 mlib_d64 t6, t7, acc; /* destination data */ 1947 mlib_s32 i; /* loop variable */ 1948 const mlib_u8 *tab0 = table[0]; 1949 const mlib_u8 *tab1 = table[1]; 1950 const mlib_u8 *tab2 = table[2]; 1951 const mlib_u8 *tab3 = table[3]; 1952 1953 sp = (void *)src; 1954 dl = dst; 1955 dp = (mlib_d64 *) dl; 1956 1957 vis_alignaddr((void *) 0, 7); 1958 1959 s0 = *sp++; 1960 1961 if (xsize >= 2) { 1962 1963 s1 = sp[0]; 1964 s2 = sp[1]; 1965 sp += 2; 1966 1967#pragma pipeloop(0) 1968 for(i = 0; i <= xsize - 4; i+=2, sp+=2) { 1969 t7 = VIS_LD_U8_I(tab1, s2); 1970 t6 = VIS_LD_U8_I(tab0, s2); 1971 t5 = VIS_LD_U8_I(tab3, s1); 1972 t4 = VIS_LD_U8_I(tab2, s1); 1973 t3 = VIS_LD_U8_I(tab1, s1); 1974 t2 = VIS_LD_U8_I(tab0, s1); 1975 t1 = VIS_LD_U8_I(tab3, s0); 1976 t0 = VIS_LD_U8_I(tab2, s0); 1977 acc = vis_faligndata(t7, acc); 1978 acc = vis_faligndata(t6, acc); 1979 acc = vis_faligndata(t5, acc); 1980 acc = vis_faligndata(t4, acc); 1981 acc = vis_faligndata(t3, acc); 1982 acc = vis_faligndata(t2, acc); 1983 acc = vis_faligndata(t1, acc); 1984 acc = vis_faligndata(t0, acc); 1985 s0 = s2; 1986 s1 = sp[0]; 1987 s2 = sp[1]; 1988 *dp++ = acc; 1989 } 1990 1991 t7 = VIS_LD_U8_I(tab1, s2); 1992 t6 = VIS_LD_U8_I(tab0, s2); 1993 t5 = VIS_LD_U8_I(tab3, s1); 1994 t4 = VIS_LD_U8_I(tab2, s1); 1995 t3 = VIS_LD_U8_I(tab1, s1); 1996 t2 = VIS_LD_U8_I(tab0, s1); 1997 t1 = VIS_LD_U8_I(tab3, s0); 1998 t0 = VIS_LD_U8_I(tab2, s0); 1999 acc = vis_faligndata(t7, acc); 2000 acc = vis_faligndata(t6, acc); 2001 acc = vis_faligndata(t5, acc); 2002 acc = vis_faligndata(t4, acc); 2003 acc = vis_faligndata(t3, acc); 2004 acc = vis_faligndata(t2, acc); 2005 acc = vis_faligndata(t1, acc); 2006 acc = vis_faligndata(t0, acc); 2007 s0 = s2; 2008 *dp++ = acc; 2009 } 2010 2011 dl = (mlib_u8*)dp; 2012 2013 if ((xsize & 1) != 0) { 2014 s1 = sp[0]; 2015 t7 = VIS_LD_U8_I(tab1, s1); 2016 t6 = VIS_LD_U8_I(tab0, s1); 2017 t5 = VIS_LD_U8_I(tab3, s0); 2018 t4 = VIS_LD_U8_I(tab2, s0); 2019 acc = vis_faligndata(t7, acc); 2020 acc = vis_faligndata(t6, acc); 2021 acc = vis_faligndata(t5, acc); 2022 acc = vis_faligndata(t4, acc); 2023 *(mlib_f32*)dl = vis_read_hi(acc); 2024 dl += 4; 2025 s0 = s1; 2026 } 2027 2028 dl[0] = tab2[s0]; 2029 dl[1] = tab3[s0]; 2030} 2031 2032/***************************************************************/ 2033void mlib_v_ImageLookUpSI_U8_U8_4_DstOff3_D1_SMALL(const mlib_u8 *src, 2034 mlib_u8 *dst, 2035 mlib_s32 xsize, 2036 const mlib_u8 **table) 2037{ 2038 mlib_u8 *sp; /* pointer to source data */ 2039 mlib_u32 s0, s1, s2; /* source data */ 2040 mlib_u8 *dl; /* pointer to start of destination */ 2041 mlib_d64 *dp; /* aligned pointer to destination */ 2042 mlib_d64 t0, t1, t2; /* destination data */ 2043 mlib_d64 t3, t4, t5; /* destination data */ 2044 mlib_d64 t6, t7, acc; /* destination data */ 2045 mlib_s32 i; /* loop variable */ 2046 const mlib_u8 *tab0 = table[0]; 2047 const mlib_u8 *tab1 = table[1]; 2048 const mlib_u8 *tab2 = table[2]; 2049 const mlib_u8 *tab3 = table[3]; 2050 2051 sp = (void *)src; 2052 dl = dst; 2053 dp = (mlib_d64 *) dl; 2054 2055 vis_alignaddr((void *) 0, 7); 2056 2057 s0 = *sp++; 2058 2059 if (xsize >= 2) { 2060 2061 s1 = sp[0]; 2062 s2 = sp[1]; 2063 sp += 2; 2064 2065#pragma pipeloop(0) 2066 for(i = 0; i <= xsize - 4; i+=2, sp+=2) { 2067 t7 = VIS_LD_U8_I(tab2, s2); 2068 t6 = VIS_LD_U8_I(tab1, s2); 2069 t5 = VIS_LD_U8_I(tab0, s2); 2070 t4 = VIS_LD_U8_I(tab3, s1); 2071 t3 = VIS_LD_U8_I(tab2, s1); 2072 t2 = VIS_LD_U8_I(tab1, s1); 2073 t1 = VIS_LD_U8_I(tab0, s1); 2074 t0 = VIS_LD_U8_I(tab3, s0); 2075 acc = vis_faligndata(t7, acc); 2076 acc = vis_faligndata(t6, acc); 2077 acc = vis_faligndata(t5, acc); 2078 acc = vis_faligndata(t4, acc); 2079 acc = vis_faligndata(t3, acc); 2080 acc = vis_faligndata(t2, acc); 2081 acc = vis_faligndata(t1, acc); 2082 acc = vis_faligndata(t0, acc); 2083 s0 = s2; 2084 s1 = sp[0]; 2085 s2 = sp[1]; 2086 *dp++ = acc; 2087 } 2088 2089 t7 = VIS_LD_U8_I(tab2, s2); 2090 t6 = VIS_LD_U8_I(tab1, s2); 2091 t5 = VIS_LD_U8_I(tab0, s2); 2092 t4 = VIS_LD_U8_I(tab3, s1); 2093 t3 = VIS_LD_U8_I(tab2, s1); 2094 t2 = VIS_LD_U8_I(tab1, s1); 2095 t1 = VIS_LD_U8_I(tab0, s1); 2096 t0 = VIS_LD_U8_I(tab3, s0); 2097 acc = vis_faligndata(t7, acc); 2098 acc = vis_faligndata(t6, acc); 2099 acc = vis_faligndata(t5, acc); 2100 acc = vis_faligndata(t4, acc); 2101 acc = vis_faligndata(t3, acc); 2102 acc = vis_faligndata(t2, acc); 2103 acc = vis_faligndata(t1, acc); 2104 acc = vis_faligndata(t0, acc); 2105 s0 = s2; 2106 *dp++ = acc; 2107 } 2108 2109 dl = (mlib_u8*)dp; 2110 2111 if ((xsize & 1) != 0) { 2112 s1 = sp[0]; 2113 t7 = VIS_LD_U8_I(tab2, s1); 2114 t6 = VIS_LD_U8_I(tab1, s1); 2115 t5 = VIS_LD_U8_I(tab0, s1); 2116 t4 = VIS_LD_U8_I(tab3, s0); 2117 acc = vis_faligndata(t7, acc); 2118 acc = vis_faligndata(t6, acc); 2119 acc = vis_faligndata(t5, acc); 2120 acc = vis_faligndata(t4, acc); 2121 *(mlib_f32*)dl = vis_read_hi(acc); 2122 dl += 4; 2123 s0 = s1; 2124 } 2125 2126 dl[0] = tab3[s0]; 2127} 2128 2129/***************************************************************/ 2130void mlib_v_ImageLookUpSI_U8_U8_4(const mlib_u8 *src, 2131 mlib_s32 slb, 2132 mlib_u8 *dst, 2133 mlib_s32 dlb, 2134 mlib_s32 xsize, 2135 mlib_s32 ysize, 2136 const mlib_u8 **table) 2137{ 2138 if ((xsize * ysize) < 500) { 2139 mlib_u8 *sl; 2140 mlib_u8 *dl; 2141 mlib_s32 j; 2142 const mlib_u8 *tab0 = table[0]; 2143 const mlib_u8 *tab1 = table[1]; 2144 const mlib_u8 *tab2 = table[2]; 2145 const mlib_u8 *tab3 = table[3]; 2146 2147 sl = (void *)src; 2148 dl = dst; 2149 2150 /* row loop */ 2151 for (j = 0; j < ysize; j ++) { 2152 mlib_u8 *sp = sl; 2153 mlib_u8 *dp = dl; 2154 mlib_s32 off, s0, size = xsize; 2155 2156 off = (8 - ((mlib_addr)dp & 7)) & 7; 2157 2158 if ((off >= 4) && (size > 0)) { 2159 s0 = *sp++; 2160 *dp++ = tab0[s0]; 2161 *dp++ = tab1[s0]; 2162 *dp++ = tab2[s0]; 2163 *dp++ = tab3[s0]; 2164 size--; 2165 } 2166 2167 if (size > 0) { 2168 off = (4 - ((mlib_addr)dp & 3)) & 3; 2169 2170 if (off == 0) { 2171 mlib_v_ImageLookUpSI_U8_U8_4_DstOff0_D1_SMALL(sp, dp, size, table); 2172 } else if (off == 1) { 2173 s0 = *sp; 2174 *dp++ = tab0[s0]; 2175 size--; 2176 mlib_v_ImageLookUpSI_U8_U8_4_DstOff1_D1_SMALL(sp, dp, size, table); 2177 } else if (off == 2) { 2178 s0 = *sp; 2179 *dp++ = tab0[s0]; 2180 *dp++ = tab1[s0]; 2181 size--; 2182 mlib_v_ImageLookUpSI_U8_U8_4_DstOff2_D1_SMALL(sp, dp, size, table); 2183 } else if (off == 3) { 2184 s0 = *sp; 2185 *dp++ = tab0[s0]; 2186 *dp++ = tab1[s0]; 2187 *dp++ = tab2[s0]; 2188 size--; 2189 mlib_v_ImageLookUpSI_U8_U8_4_DstOff3_D1_SMALL(sp, dp, size, table); 2190 } 2191 } 2192 2193 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 2194 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); 2195 } 2196 2197 } else { 2198 mlib_u8 *sl; 2199 mlib_u8 *dl; 2200 mlib_u32 tab[256]; 2201 const mlib_u8 *tab0 = table[0]; 2202 const mlib_u8 *tab1 = table[1]; 2203 const mlib_u8 *tab2 = table[2]; 2204 const mlib_u8 *tab3 = table[3]; 2205 mlib_s32 i, j; 2206 mlib_u32 s0, s1, s2, s3, s4; 2207 2208 s0 = tab0[0]; 2209 s1 = tab1[0]; 2210 s2 = tab2[0]; 2211 s3 = tab3[0]; 2212 for (i = 1; i < 256; i++) { 2213 s4 = (s0 << 24) + (s1 << 16) + (s2 << 8) + s3; 2214 s0 = tab0[i]; 2215 s1 = tab1[i]; 2216 s2 = tab2[i]; 2217 s3 = tab3[i]; 2218 tab[i-1] = s4; 2219 } 2220 2221 s4 = (s0 << 24) + (s1 << 16) + (s2 << 8) + s3; 2222 tab[255] = s4; 2223 2224 sl = (void *)src; 2225 dl = dst; 2226 2227 /* row loop */ 2228 for (j = 0; j < ysize; j ++) { 2229 mlib_u8 *sp = sl; 2230 mlib_u8 *dp = dl; 2231 mlib_s32 off, size = xsize; 2232 2233 if (((mlib_addr)dp & 3) == 0) { 2234 off = (4 - (mlib_addr)sp & 3) & 3; 2235 2236 off = (off < size) ? off : size; 2237 2238#pragma pipeloop(0) 2239 for (i = 0; i < off; i++) { 2240 *(mlib_u32*)dp = tab[(*sp)]; 2241 dp += 4; sp++; 2242 } 2243 2244 size -= off; 2245 2246 if (size > 0) { 2247 mlib_v_ImageLookUpSI_U8_U8_4_SrcOff0_D1(sp, dp, size, (mlib_f32*)tab); 2248 } 2249 2250 } else { 2251 2252 off = ((4 - ((mlib_addr)sp & 3)) & 3); 2253 off = (off < size) ? off : size; 2254 2255 for (i = 0; i < off; i++) { 2256 s0 = tab[(*sp)]; 2257 *dp++ = (s0 >> 24); 2258 *dp++ = (s0 >> 16); 2259 *dp++ = (s0 >> 8); 2260 *dp++ = s0; 2261 size--; sp++; 2262 } 2263 2264 if (size > 0) { 2265 mlib_v_ImageLookUpSI_U8_U8_4_DstNonAl_D1(sp, dp, size, (mlib_f32*)tab); 2266 } 2267 } 2268 2269 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); 2270 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); 2271 } 2272 } 2273} 2274 2275/***************************************************************/ 2276