1/*
2 * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27
28#include "vis_proto.h"
29#include "mlib_image.h"
30#include "mlib_v_ImageLookUpFunc.h"
31
32/***************************************************************/
33static void mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(const mlib_u8  *src,
34                                                     mlib_s16       *dst,
35                                                     mlib_s32       xsize,
36                                                     const mlib_s16 *table0,
37                                                     const mlib_s16 *table1,
38                                                     const mlib_s16 *table2,
39                                                     const mlib_s16 *table3);
40
41static void mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(const mlib_u8  *src,
42                                                     mlib_s16       *dst,
43                                                     mlib_s32       xsize,
44                                                     const mlib_s16 *table0,
45                                                     const mlib_s16 *table1,
46                                                     const mlib_s16 *table2,
47                                                     const mlib_s16 *table3);
48
49static void mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(const mlib_u8  *src,
50                                                     mlib_s16       *dst,
51                                                     mlib_s32       xsize,
52                                                     const mlib_s16 *table0,
53                                                     const mlib_s16 *table1,
54                                                     const mlib_s16 *table2,
55                                                     const mlib_s16 *table3);
56
57static void mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(const mlib_u8  *src,
58                                                     mlib_s16       *dst,
59                                                     mlib_s32       xsize,
60                                                     const mlib_s16 *table0,
61                                                     const mlib_s16 *table1,
62                                                     const mlib_s16 *table2,
63                                                     const mlib_s16 *table3);
64
65static void mlib_v_ImageLookUp_U8_S16_3_SrcOff0_D1(const mlib_u8  *src,
66                                                   mlib_s16       *dst,
67                                                   mlib_s32       xsize,
68                                                   const mlib_s16 *table0,
69                                                   const mlib_s16 *table1,
70                                                   const mlib_s16 *table2);
71
72static void mlib_v_ImageLookUp_U8_S16_3_SrcOff1_D1(const mlib_u8  *src,
73                                                   mlib_s16       *dst,
74                                                   mlib_s32       xsize,
75                                                   const mlib_s16 *table0,
76                                                   const mlib_s16 *table1,
77                                                   const mlib_s16 *table2);
78
79static void mlib_v_ImageLookUp_U8_S16_3_SrcOff2_D1(const mlib_u8  *src,
80                                                   mlib_s16       *dst,
81                                                   mlib_s32       xsize,
82                                                   const mlib_s16 *table0,
83                                                   const mlib_s16 *table1,
84                                                   const mlib_s16 *table2);
85
86static void mlib_v_ImageLookUp_U8_S16_3_SrcOff3_D1(const mlib_u8  *src,
87                                                   mlib_s16       *dst,
88                                                   mlib_s32       xsize,
89                                                   const mlib_s16 *table0,
90                                                   const mlib_s16 *table1,
91                                                   const mlib_s16 *table2);
92
93/***************************************************************/
94#define VIS_LD_U16_I(X, Y)      vis_ld_u16_i((void *)(X), (Y))
95
96/***************************************************************/
97void mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(const mlib_u8  *src,
98                                              mlib_s16       *dst,
99                                              mlib_s32       xsize,
100                                              const mlib_s16 *table0,
101                                              const mlib_s16 *table1,
102                                              const mlib_s16 *table2,
103                                              const mlib_s16 *table3)
104{
105  mlib_u32 *sa;                        /* aligned pointer to source data */
106  mlib_u8 *sp;                         /* pointer to source data */
107  mlib_u32 s0;                         /* source data */
108  mlib_s16 *dl;                        /* pointer to start of destination */
109  mlib_s16 *dend;                      /* pointer to end of destination */
110  mlib_d64 *dp;                        /* aligned pointer to destination */
111  mlib_d64 t0, t1, t2;                 /* destination data */
112  mlib_d64 t3, acc0;                   /* destination data */
113  mlib_s32 emask;                      /* edge mask */
114  mlib_s32 i, num;                     /* loop variable */
115
116  sa = (mlib_u32 *) src;
117  dl = dst;
118  dp = (mlib_d64 *) dl;
119  dend = dl + xsize - 1;
120
121  vis_alignaddr((void *)0, 6);
122
123  i = 0;
124
125  if (xsize >= 4) {
126
127    s0 = *sa++;
128
129#pragma pipeloop(0)
130    for (i = 0; i <= xsize - 8; i += 4) {
131      t3 = VIS_LD_U16_I(table3, (s0 << 1) & 0x1FE);
132      t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
133      t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
134      t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
135      acc0 = vis_faligndata(t3, acc0);
136      acc0 = vis_faligndata(t2, acc0);
137      acc0 = vis_faligndata(t1, acc0);
138      acc0 = vis_faligndata(t0, acc0);
139      s0 = *sa++;
140      *dp++ = acc0;
141    }
142
143    t3 = VIS_LD_U16_I(table3, (s0 << 1) & 0x1FE);
144    t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
145    t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
146    t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
147    acc0 = vis_faligndata(t3, acc0);
148    acc0 = vis_faligndata(t2, acc0);
149    acc0 = vis_faligndata(t1, acc0);
150    acc0 = vis_faligndata(t0, acc0);
151    *dp++ = acc0;
152  }
153
154  sp = (mlib_u8 *) sa;
155
156  if ((mlib_addr) dp <= (mlib_addr) dend) {
157
158    num = (mlib_s16 *) dend - (mlib_s16 *) dp;
159    sp += num;
160    num++;
161
162    if (num == 1) {
163      s0 = (mlib_s32) * sp;
164      sp--;
165
166      t0 = VIS_LD_U16_I(table0, 2 * s0);
167      acc0 = vis_faligndata(t0, acc0);
168    }
169    else if (num == 2) {
170      s0 = (mlib_s32) * sp;
171      sp--;
172
173      t0 = VIS_LD_U16_I(table1, 2 * s0);
174      acc0 = vis_faligndata(t0, acc0);
175
176      s0 = (mlib_s32) * sp;
177      sp--;
178
179      t0 = VIS_LD_U16_I(table0, 2 * s0);
180      acc0 = vis_faligndata(t0, acc0);
181    }
182    else if (num == 3) {
183      s0 = (mlib_s32) * sp;
184      sp--;
185
186      t0 = VIS_LD_U16_I(table2, 2 * s0);
187      acc0 = vis_faligndata(t0, acc0);
188
189      s0 = (mlib_s32) * sp;
190      sp--;
191
192      t0 = VIS_LD_U16_I(table1, 2 * s0);
193      acc0 = vis_faligndata(t0, acc0);
194
195      s0 = (mlib_s32) * sp;
196      sp--;
197
198      t0 = VIS_LD_U16_I(table0, 2 * s0);
199      acc0 = vis_faligndata(t0, acc0);
200    }
201
202    emask = vis_edge16(dp, dend);
203    vis_pst_16(acc0, dp, emask);
204  }
205}
206
207/***************************************************************/
208void mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(const mlib_u8  *src,
209                                              mlib_s16       *dst,
210                                              mlib_s32       xsize,
211                                              const mlib_s16 *table0,
212                                              const mlib_s16 *table1,
213                                              const mlib_s16 *table2,
214                                              const mlib_s16 *table3)
215{
216  mlib_u32 *sa;                        /* aligned pointer to source data */
217  mlib_u8 *sp;                         /* pointer to source data */
218  mlib_u32 s0, s1;                     /* source data */
219  mlib_s16 *dl;                        /* pointer to start of destination */
220  mlib_s16 *dend;                      /* pointer to end of destination */
221  mlib_d64 *dp;                        /* aligned pointer to destination */
222  mlib_d64 t0, t1, t2;                 /* destination data */
223  mlib_d64 t3, acc0;                   /* destination data */
224  mlib_s32 emask;                      /* edge mask */
225  mlib_s32 i, num;                     /* loop variable */
226
227  sa = (mlib_u32 *) (src - 1);
228  dl = dst;
229  dp = (mlib_d64 *) dl;
230  dend = dl + xsize - 1;
231
232  vis_alignaddr((void *)0, 6);
233
234  s0 = *sa++;
235
236  if (xsize >= 4) {
237
238    s1 = *sa++;
239
240#pragma pipeloop(0)
241    for (i = 0; i <= xsize - 8; i += 4) {
242      t3 = VIS_LD_U16_I(table3, (s1 >> 23) & 0x1FE);
243      t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
244      t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
245      t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
246      acc0 = vis_faligndata(t3, acc0);
247      acc0 = vis_faligndata(t2, acc0);
248      acc0 = vis_faligndata(t1, acc0);
249      acc0 = vis_faligndata(t0, acc0);
250      s0 = s1;
251      s1 = *sa++;
252      *dp++ = acc0;
253    }
254
255    t3 = VIS_LD_U16_I(table3, (s1 >> 23) & 0x1FE);
256    t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
257    t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
258    t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
259    acc0 = vis_faligndata(t3, acc0);
260    acc0 = vis_faligndata(t2, acc0);
261    acc0 = vis_faligndata(t1, acc0);
262    acc0 = vis_faligndata(t0, acc0);
263    s0 = s1;
264    *dp++ = acc0;
265  }
266
267  sp = (mlib_u8 *) sa;
268  sp -= 3;
269
270  if ((mlib_addr) dp <= (mlib_addr) dend) {
271
272    num = (mlib_s16 *) dend - (mlib_s16 *) dp;
273    sp += num;
274    num++;
275
276    if (num == 1) {
277      s0 = (mlib_s32) * sp;
278      sp--;
279
280      t0 = VIS_LD_U16_I(table0, 2 * s0);
281      acc0 = vis_faligndata(t0, acc0);
282    }
283    else if (num == 2) {
284      s0 = (mlib_s32) * sp;
285      sp--;
286
287      t0 = VIS_LD_U16_I(table1, 2 * s0);
288      acc0 = vis_faligndata(t0, acc0);
289
290      s0 = (mlib_s32) * sp;
291      sp--;
292
293      t0 = VIS_LD_U16_I(table0, 2 * s0);
294      acc0 = vis_faligndata(t0, acc0);
295    }
296    else if (num == 3) {
297      s0 = (mlib_s32) * sp;
298      sp--;
299
300      t0 = VIS_LD_U16_I(table2, 2 * s0);
301      acc0 = vis_faligndata(t0, acc0);
302
303      s0 = (mlib_s32) * sp;
304      sp--;
305
306      t0 = VIS_LD_U16_I(table1, 2 * s0);
307      acc0 = vis_faligndata(t0, acc0);
308
309      s0 = (mlib_s32) * sp;
310      sp--;
311
312      t0 = VIS_LD_U16_I(table0, 2 * s0);
313      acc0 = vis_faligndata(t0, acc0);
314    }
315
316    emask = vis_edge16(dp, dend);
317    vis_pst_16(acc0, dp, emask);
318  }
319}
320
321/***************************************************************/
322void mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(const mlib_u8  *src,
323                                              mlib_s16       *dst,
324                                              mlib_s32       xsize,
325                                              const mlib_s16 *table0,
326                                              const mlib_s16 *table1,
327                                              const mlib_s16 *table2,
328                                              const mlib_s16 *table3)
329{
330  mlib_u32 *sa;                        /* aligned pointer to source data */
331  mlib_u8 *sp;                         /* pointer to source data */
332  mlib_u32 s0, s1;                     /* source data */
333  mlib_s16 *dl;                        /* pointer to start of destination */
334  mlib_s16 *dend;                      /* pointer to end of destination */
335  mlib_d64 *dp;                        /* aligned pointer to destination */
336  mlib_d64 t0, t1, t2;                 /* destination data */
337  mlib_d64 t3, acc0;                   /* destination data */
338  mlib_s32 emask;                      /* edge mask */
339  mlib_s32 i, num;                     /* loop variable */
340
341  sa = (mlib_u32 *) (src - 2);
342  dl = dst;
343  dp = (mlib_d64 *) dl;
344  dend = dl + xsize - 1;
345
346  vis_alignaddr((void *)0, 6);
347
348  s0 = *sa++;
349
350  if (xsize >= 4) {
351
352    s1 = *sa++;
353
354#pragma pipeloop(0)
355    for (i = 0; i <= xsize - 8; i += 4) {
356      t3 = VIS_LD_U16_I(table3, (s1 >> 15) & 0x1FE);
357      t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
358      t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
359      t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
360      acc0 = vis_faligndata(t3, acc0);
361      acc0 = vis_faligndata(t2, acc0);
362      acc0 = vis_faligndata(t1, acc0);
363      acc0 = vis_faligndata(t0, acc0);
364      s0 = s1;
365      s1 = *sa++;
366      *dp++ = acc0;
367    }
368
369    t3 = VIS_LD_U16_I(table3, (s1 >> 15) & 0x1FE);
370    t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
371    t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
372    t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
373    acc0 = vis_faligndata(t3, acc0);
374    acc0 = vis_faligndata(t2, acc0);
375    acc0 = vis_faligndata(t1, acc0);
376    acc0 = vis_faligndata(t0, acc0);
377    s0 = s1;
378    *dp++ = acc0;
379  }
380
381  sp = (mlib_u8 *) sa;
382  sp -= 2;
383
384  if ((mlib_addr) dp <= (mlib_addr) dend) {
385
386    num = (mlib_s16 *) dend - (mlib_s16 *) dp;
387    sp += num;
388    num++;
389
390    if (num == 1) {
391      s0 = (mlib_s32) * sp;
392      sp--;
393
394      t0 = VIS_LD_U16_I(table0, 2 * s0);
395      acc0 = vis_faligndata(t0, acc0);
396    }
397    else if (num == 2) {
398      s0 = (mlib_s32) * sp;
399      sp--;
400
401      t0 = VIS_LD_U16_I(table1, 2 * s0);
402      acc0 = vis_faligndata(t0, acc0);
403
404      s0 = (mlib_s32) * sp;
405      sp--;
406
407      t0 = VIS_LD_U16_I(table0, 2 * s0);
408      acc0 = vis_faligndata(t0, acc0);
409    }
410    else if (num == 3) {
411      s0 = (mlib_s32) * sp;
412      sp--;
413
414      t0 = VIS_LD_U16_I(table2, 2 * s0);
415      acc0 = vis_faligndata(t0, acc0);
416
417      s0 = (mlib_s32) * sp;
418      sp--;
419
420      t0 = VIS_LD_U16_I(table1, 2 * s0);
421      acc0 = vis_faligndata(t0, acc0);
422
423      s0 = (mlib_s32) * sp;
424      sp--;
425
426      t0 = VIS_LD_U16_I(table0, 2 * s0);
427      acc0 = vis_faligndata(t0, acc0);
428    }
429
430    emask = vis_edge16(dp, dend);
431    vis_pst_16(acc0, dp, emask);
432  }
433}
434
435/***************************************************************/
436void mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(const mlib_u8  *src,
437                                              mlib_s16       *dst,
438                                              mlib_s32       xsize,
439                                              const mlib_s16 *table0,
440                                              const mlib_s16 *table1,
441                                              const mlib_s16 *table2,
442                                              const mlib_s16 *table3)
443{
444  mlib_u32 *sa;                        /* aligned pointer to source data */
445  mlib_u8 *sp;                         /* pointer to source data */
446  mlib_u32 s0, s1;                     /* source data */
447  mlib_s16 *dl;                        /* pointer to start of destination */
448  mlib_s16 *dend;                      /* pointer to end of destination */
449  mlib_d64 *dp;                        /* aligned pointer to destination */
450  mlib_d64 t0, t1, t2;                 /* destination data */
451  mlib_d64 t3, acc0;                   /* destination data */
452  mlib_s32 emask;                      /* edge mask */
453  mlib_s32 i, num;                     /* loop variable */
454
455  sa = (mlib_u32 *) (src - 3);
456  dl = dst;
457  dp = (mlib_d64 *) dl;
458  dend = dl + xsize - 1;
459
460  vis_alignaddr((void *)0, 6);
461
462  s0 = *sa++;
463
464  if (xsize >= 4) {
465
466    s1 = *sa++;
467
468#pragma pipeloop(0)
469    for (i = 0; i <= xsize - 8; i += 4) {
470      t3 = VIS_LD_U16_I(table3, (s1 >> 7) & 0x1FE);
471      t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
472      t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
473      t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
474      acc0 = vis_faligndata(t3, acc0);
475      acc0 = vis_faligndata(t2, acc0);
476      acc0 = vis_faligndata(t1, acc0);
477      acc0 = vis_faligndata(t0, acc0);
478      s0 = s1;
479      s1 = *sa++;
480      *dp++ = acc0;
481    }
482
483    t3 = VIS_LD_U16_I(table3, (s1 >> 7) & 0x1FE);
484    t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
485    t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
486    t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
487    acc0 = vis_faligndata(t3, acc0);
488    acc0 = vis_faligndata(t2, acc0);
489    acc0 = vis_faligndata(t1, acc0);
490    acc0 = vis_faligndata(t0, acc0);
491    s0 = s1;
492    *dp++ = acc0;
493  }
494
495  sp = (mlib_u8 *) sa;
496  sp -= 1;
497
498  if ((mlib_addr) dp <= (mlib_addr) dend) {
499
500    num = (mlib_s16 *) dend - (mlib_s16 *) dp;
501    sp += num;
502    num++;
503
504    if (num == 1) {
505      s0 = (mlib_s32) * sp;
506      sp--;
507
508      t0 = VIS_LD_U16_I(table0, 2 * s0);
509      acc0 = vis_faligndata(t0, acc0);
510    }
511    else if (num == 2) {
512      s0 = (mlib_s32) * sp;
513      sp--;
514
515      t0 = VIS_LD_U16_I(table1, 2 * s0);
516      acc0 = vis_faligndata(t0, acc0);
517
518      s0 = (mlib_s32) * sp;
519      sp--;
520
521      t0 = VIS_LD_U16_I(table0, 2 * s0);
522      acc0 = vis_faligndata(t0, acc0);
523    }
524    else if (num == 3) {
525      s0 = (mlib_s32) * sp;
526      sp--;
527
528      t0 = VIS_LD_U16_I(table2, 2 * s0);
529      acc0 = vis_faligndata(t0, acc0);
530
531      s0 = (mlib_s32) * sp;
532      sp--;
533
534      t0 = VIS_LD_U16_I(table1, 2 * s0);
535      acc0 = vis_faligndata(t0, acc0);
536
537      s0 = (mlib_s32) * sp;
538      sp--;
539
540      t0 = VIS_LD_U16_I(table0, 2 * s0);
541      acc0 = vis_faligndata(t0, acc0);
542    }
543
544    emask = vis_edge16(dp, dend);
545    vis_pst_16(acc0, dp, emask);
546  }
547}
548
549/***************************************************************/
550void mlib_v_ImageLookUp_U8_S16_1(const mlib_u8  *src,
551                                 mlib_s32       slb,
552                                 mlib_s16       *dst,
553                                 mlib_s32       dlb,
554                                 mlib_s32       xsize,
555                                 mlib_s32       ysize,
556                                 const mlib_s16 **table)
557{
558  mlib_u8 *sl;
559  mlib_s16 *dl;
560  const mlib_s16 *tab = table[0];
561  mlib_s32 j, i;
562
563  sl = (void *)src;
564  dl = dst;
565
566  /* row loop */
567  for (j = 0; j < ysize; j++) {
568    mlib_u8 *sp = sl;
569    mlib_s16 *dp = dl;
570    mlib_s32 off, size = xsize;
571
572    off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
573
574    off = (off < size) ? off : size;
575
576    for (i = 0; i < off; i++) {
577      *dp++ = tab[(*sp++)];
578      size--;
579    }
580
581    if (size > 0) {
582
583      off = (mlib_addr) sp & 3;
584
585      if (off == 0) {
586        mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(sp, dp, size, tab, tab, tab,
587                                                 tab);
588      }
589      else if (off == 1) {
590        mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(sp, dp, size, tab, tab, tab,
591                                                 tab);
592      }
593      else if (off == 2) {
594        mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(sp, dp, size, tab, tab, tab,
595                                                 tab);
596      }
597      else {
598        mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(sp, dp, size, tab, tab, tab,
599                                                 tab);
600      }
601    }
602
603    sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
604    dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
605  }
606}
607
608/***************************************************************/
609void mlib_v_ImageLookUp_U8_S16_2(const mlib_u8  *src,
610                                 mlib_s32       slb,
611                                 mlib_s16       *dst,
612                                 mlib_s32       dlb,
613                                 mlib_s32       xsize,
614                                 mlib_s32       ysize,
615                                 const mlib_s16 **table)
616{
617  mlib_u8 *sl;
618  mlib_s16 *dl;
619  const mlib_s16 *tab;
620  mlib_s32 j, i;
621
622  sl = (void *)src;
623  dl = dst;
624
625  /* row loop */
626  for (j = 0; j < ysize; j++) {
627    mlib_u8 *sp = sl;
628    mlib_s16 *dp = dl;
629    mlib_s32 off, size = xsize * 2;
630    const mlib_s16 *tab0 = table[0];
631    const mlib_s16 *tab1 = table[1];
632
633    off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
634
635    off = (off < size) ? off : size;
636
637    for (i = 0; i < off - 1; i += 2) {
638      *dp++ = tab0[(*sp++)];
639      *dp++ = tab1[(*sp++)];
640      size -= 2;
641    }
642
643    if ((off & 1) != 0) {
644      *dp++ = tab0[(*sp++)];
645      size--;
646      tab = tab0;
647      tab0 = tab1;
648      tab1 = tab;
649    }
650
651    if (size > 0) {
652
653      off = (mlib_addr) sp & 3;
654
655      if (off == 0) {
656        mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(sp, dp, size, tab0, tab1, tab0,
657                                                 tab1);
658      }
659      else if (off == 1) {
660        mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(sp, dp, size, tab0, tab1, tab0,
661                                                 tab1);
662      }
663      else if (off == 2) {
664        mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(sp, dp, size, tab0, tab1, tab0,
665                                                 tab1);
666      }
667      else {
668        mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(sp, dp, size, tab0, tab1, tab0,
669                                                 tab1);
670      }
671    }
672
673    sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
674    dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
675  }
676}
677
678/***************************************************************/
679void mlib_v_ImageLookUp_U8_S16_4(const mlib_u8  *src,
680                                 mlib_s32       slb,
681                                 mlib_s16       *dst,
682                                 mlib_s32       dlb,
683                                 mlib_s32       xsize,
684                                 mlib_s32       ysize,
685                                 const mlib_s16 **table)
686{
687  mlib_u8 *sl;
688  mlib_s16 *dl;
689  const mlib_s16 *tab;
690  mlib_s32 j;
691
692  sl = (void *)src;
693  dl = dst;
694
695  /* row loop */
696  for (j = 0; j < ysize; j++) {
697    mlib_u8 *sp = sl;
698    mlib_s16 *dp = dl;
699    const mlib_s16 *tab0 = table[0];
700    const mlib_s16 *tab1 = table[1];
701    const mlib_s16 *tab2 = table[2];
702    const mlib_s16 *tab3 = table[3];
703    mlib_s32 off, size = xsize * 4;
704
705    off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
706
707    off = (off < size) ? off : size;
708
709    if (off == 1) {
710      *dp++ = tab0[(*sp++)];
711      tab = tab0;
712      tab0 = tab1;
713      tab1 = tab2;
714      tab2 = tab3;
715      tab3 = tab;
716      size--;
717    }
718    else if (off == 2) {
719      *dp++ = tab0[(*sp++)];
720      *dp++ = tab1[(*sp++)];
721      tab = tab0;
722      tab0 = tab2;
723      tab2 = tab;
724      tab = tab1;
725      tab1 = tab3;
726      tab3 = tab;
727      size -= 2;
728    }
729    else if (off == 3) {
730      *dp++ = tab0[(*sp++)];
731      *dp++ = tab1[(*sp++)];
732      *dp++ = tab2[(*sp++)];
733      tab = tab3;
734      tab3 = tab2;
735      tab2 = tab1;
736      tab1 = tab0;
737      tab0 = tab;
738      size -= 3;
739    }
740
741    if (size > 0) {
742
743      off = (mlib_addr) sp & 3;
744
745      if (off == 0) {
746        mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(sp, dp, size, tab0, tab1, tab2,
747                                                 tab3);
748      }
749      else if (off == 1) {
750        mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(sp, dp, size, tab0, tab1, tab2,
751                                                 tab3);
752      }
753      else if (off == 2) {
754        mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(sp, dp, size, tab0, tab1, tab2,
755                                                 tab3);
756      }
757      else {
758        mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(sp, dp, size, tab0, tab1, tab2,
759                                                 tab3);
760      }
761    }
762
763    sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
764    dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
765  }
766}
767
768/***************************************************************/
769void mlib_v_ImageLookUp_U8_S16_3_SrcOff0_D1(const mlib_u8  *src,
770                                            mlib_s16       *dst,
771                                            mlib_s32       xsize,
772                                            const mlib_s16 *table0,
773                                            const mlib_s16 *table1,
774                                            const mlib_s16 *table2)
775{
776  mlib_u32 *sa;                        /* aligned pointer to source data */
777  mlib_u8 *sp;                         /* pointer to source data */
778  mlib_u32 s0, s1, s2;                 /* source data */
779  mlib_s16 *dl;                        /* pointer to start of destination */
780  mlib_s16 *dend;                      /* pointer to end of destination */
781  mlib_d64 *dp;                        /* aligned pointer to destination */
782  mlib_d64 t0, t1, t2;                 /* destination data */
783  mlib_d64 t3, t4, t5;                 /* destination data */
784  mlib_d64 t6, t7, t8;                 /* destination data */
785  mlib_d64 t9, t10, t11;               /* destination data */
786  mlib_d64 acc0, acc1, acc2;           /* destination data */
787  mlib_s32 emask;                      /* edge mask */
788  mlib_s32 i, num;                     /* loop variable */
789  const mlib_s16 *table;
790
791  sa = (mlib_u32 *) src;
792  dl = dst;
793  dp = (mlib_d64 *) dl;
794  dend = dl + xsize - 1;
795
796  vis_alignaddr((void *)0, 6);
797
798  i = 0;
799
800  if (xsize >= 12) {
801
802    s0 = sa[0];
803    s1 = sa[1];
804    s2 = sa[2];
805    sa += 3;
806
807#pragma pipeloop(0)
808    for (i = 0; i <= xsize - 24; i += 12, sa += 3, dp += 3) {
809      t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
810      t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
811      t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
812      t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
813      t7 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
814      t6 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
815      t5 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
816      t4 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
817      t11 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
818      t10 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
819      t9 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
820      t8 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
821      acc0 = vis_faligndata(t3, acc0);
822      acc0 = vis_faligndata(t2, acc0);
823      acc0 = vis_faligndata(t1, acc0);
824      acc0 = vis_faligndata(t0, acc0);
825      acc1 = vis_faligndata(t7, acc1);
826      acc1 = vis_faligndata(t6, acc1);
827      acc1 = vis_faligndata(t5, acc1);
828      acc1 = vis_faligndata(t4, acc1);
829      acc2 = vis_faligndata(t11, acc2);
830      acc2 = vis_faligndata(t10, acc2);
831      acc2 = vis_faligndata(t9, acc2);
832      acc2 = vis_faligndata(t8, acc2);
833      s0 = sa[0];
834      s1 = sa[1];
835      s2 = sa[2];
836      dp[0] = acc0;
837      dp[1] = acc1;
838      dp[2] = acc2;
839    }
840
841    t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
842    t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
843    t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
844    t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
845    t7 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
846    t6 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
847    t5 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
848    t4 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
849    t11 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
850    t10 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
851    t9 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
852    t8 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
853    acc0 = vis_faligndata(t3, acc0);
854    acc0 = vis_faligndata(t2, acc0);
855    acc0 = vis_faligndata(t1, acc0);
856    acc0 = vis_faligndata(t0, acc0);
857    acc1 = vis_faligndata(t7, acc1);
858    acc1 = vis_faligndata(t6, acc1);
859    acc1 = vis_faligndata(t5, acc1);
860    acc1 = vis_faligndata(t4, acc1);
861    acc2 = vis_faligndata(t11, acc2);
862    acc2 = vis_faligndata(t10, acc2);
863    acc2 = vis_faligndata(t9, acc2);
864    acc2 = vis_faligndata(t8, acc2);
865    dp[0] = acc0;
866    dp[1] = acc1;
867    dp[2] = acc2;
868    dp += 3;
869    i += 12;
870  }
871
872  if (i <= xsize - 8) {
873    s0 = sa[0];
874    s1 = sa[1];
875    t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
876    t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
877    t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
878    t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
879    t7 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
880    t6 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
881    t5 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
882    t4 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
883    acc0 = vis_faligndata(t3, acc0);
884    acc0 = vis_faligndata(t2, acc0);
885    acc0 = vis_faligndata(t1, acc0);
886    acc0 = vis_faligndata(t0, acc0);
887    acc1 = vis_faligndata(t7, acc1);
888    acc1 = vis_faligndata(t6, acc1);
889    acc1 = vis_faligndata(t5, acc1);
890    acc1 = vis_faligndata(t4, acc1);
891    dp[0] = acc0;
892    dp[1] = acc1;
893    table = table0;
894    table0 = table2;
895    table2 = table1;
896    table1 = table;
897    sa += 2;
898    i += 8;
899    dp += 2;
900  }
901
902  if (i <= xsize - 4) {
903    s0 = sa[0];
904    t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
905    t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
906    t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
907    t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
908    acc0 = vis_faligndata(t3, acc0);
909    acc0 = vis_faligndata(t2, acc0);
910    acc0 = vis_faligndata(t1, acc0);
911    acc0 = vis_faligndata(t0, acc0);
912    dp[0] = acc0;
913    table = table0;
914    table0 = table1;
915    table1 = table2;
916    table2 = table;
917    sa++;
918    i += 4;
919    dp++;
920  }
921
922  sp = (mlib_u8 *) sa;
923
924  if ((mlib_addr) dp <= (mlib_addr) dend) {
925
926    num = (mlib_s16 *) dend - (mlib_s16 *) dp;
927    sp += num;
928    num++;
929
930    if (num == 1) {
931      s0 = (mlib_s32) * sp;
932      sp--;
933
934      t0 = VIS_LD_U16_I(table0, 2 * s0);
935      acc0 = vis_faligndata(t0, acc0);
936    }
937    else if (num == 2) {
938      s0 = (mlib_s32) * sp;
939      sp--;
940
941      t0 = VIS_LD_U16_I(table1, 2 * s0);
942      acc0 = vis_faligndata(t0, acc0);
943
944      s0 = (mlib_s32) * sp;
945      sp--;
946
947      t0 = VIS_LD_U16_I(table0, 2 * s0);
948      acc0 = vis_faligndata(t0, acc0);
949    }
950    else if (num == 3) {
951      s0 = (mlib_s32) * sp;
952      sp--;
953
954      t0 = VIS_LD_U16_I(table2, 2 * s0);
955      acc0 = vis_faligndata(t0, acc0);
956
957      s0 = (mlib_s32) * sp;
958      sp--;
959
960      t0 = VIS_LD_U16_I(table1, 2 * s0);
961      acc0 = vis_faligndata(t0, acc0);
962
963      s0 = (mlib_s32) * sp;
964      sp--;
965
966      t0 = VIS_LD_U16_I(table0, 2 * s0);
967      acc0 = vis_faligndata(t0, acc0);
968    }
969
970    emask = vis_edge16(dp, dend);
971    vis_pst_16(acc0, dp, emask);
972  }
973}
974
975/***************************************************************/
976void mlib_v_ImageLookUp_U8_S16_3_SrcOff1_D1(const mlib_u8  *src,
977                                            mlib_s16       *dst,
978                                            mlib_s32       xsize,
979                                            const mlib_s16 *table0,
980                                            const mlib_s16 *table1,
981                                            const mlib_s16 *table2)
982{
983  mlib_u32 *sa;                        /* aligned pointer to source data */
984  mlib_u8 *sp;                         /* pointer to source data */
985  mlib_u32 s0, s1, s2, s3;             /* source data */
986  mlib_s16 *dl;                        /* pointer to start of destination */
987  mlib_s16 *dend;                      /* pointer to end of destination */
988  mlib_d64 *dp;                        /* aligned pointer to destination */
989  mlib_d64 t0, t1, t2;                 /* destination data */
990  mlib_d64 t3, t4, t5;                 /* destination data */
991  mlib_d64 t6, t7, t8;                 /* destination data */
992  mlib_d64 t9, t10, t11;               /* destination data */
993  mlib_d64 acc0, acc1, acc2;           /* destination data */
994  mlib_s32 emask;                      /* edge mask */
995  mlib_s32 i, num;                     /* loop variable */
996  const mlib_s16 *table;
997
998  sa = (mlib_u32 *) (src - 1);
999  dl = dst;
1000  dp = (mlib_d64 *) dl;
1001  dend = dl + xsize - 1;
1002
1003  vis_alignaddr((void *)0, 6);
1004
1005  i = 0;
1006
1007  s0 = *sa++;
1008
1009  if (xsize >= 12) {
1010
1011    s1 = sa[0];
1012    s2 = sa[1];
1013    s3 = sa[2];
1014    sa += 3;
1015
1016#pragma pipeloop(0)
1017    for (i = 0; i <= xsize - 24; i += 12, sa += 3, dp += 3) {
1018      t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
1019      t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
1020      t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
1021      t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
1022      t7 = VIS_LD_U16_I(table1, (s2 >> 23) & 0x1FE);
1023      t6 = VIS_LD_U16_I(table0, (s1 << 1) & 0x1FE);
1024      t5 = VIS_LD_U16_I(table2, (s1 >> 7) & 0x1FE);
1025      t4 = VIS_LD_U16_I(table1, (s1 >> 15) & 0x1FE);
1026      t11 = VIS_LD_U16_I(table2, (s3 >> 23) & 0x1FE);
1027      t10 = VIS_LD_U16_I(table1, (s2 << 1) & 0x1FE);
1028      t9 = VIS_LD_U16_I(table0, (s2 >> 7) & 0x1FE);
1029      t8 = VIS_LD_U16_I(table2, (s2 >> 15) & 0x1FE);
1030      acc0 = vis_faligndata(t3, acc0);
1031      acc0 = vis_faligndata(t2, acc0);
1032      acc0 = vis_faligndata(t1, acc0);
1033      acc0 = vis_faligndata(t0, acc0);
1034      acc1 = vis_faligndata(t7, acc1);
1035      acc1 = vis_faligndata(t6, acc1);
1036      acc1 = vis_faligndata(t5, acc1);
1037      acc1 = vis_faligndata(t4, acc1);
1038      acc2 = vis_faligndata(t11, acc2);
1039      acc2 = vis_faligndata(t10, acc2);
1040      acc2 = vis_faligndata(t9, acc2);
1041      acc2 = vis_faligndata(t8, acc2);
1042      s0 = s3;
1043      s1 = sa[0];
1044      s2 = sa[1];
1045      s3 = sa[2];
1046      dp[0] = acc0;
1047      dp[1] = acc1;
1048      dp[2] = acc2;
1049    }
1050
1051    t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
1052    t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
1053    t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
1054    t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
1055    t7 = VIS_LD_U16_I(table1, (s2 >> 23) & 0x1FE);
1056    t6 = VIS_LD_U16_I(table0, (s1 << 1) & 0x1FE);
1057    t5 = VIS_LD_U16_I(table2, (s1 >> 7) & 0x1FE);
1058    t4 = VIS_LD_U16_I(table1, (s1 >> 15) & 0x1FE);
1059    t11 = VIS_LD_U16_I(table2, (s3 >> 23) & 0x1FE);
1060    t10 = VIS_LD_U16_I(table1, (s2 << 1) & 0x1FE);
1061    t9 = VIS_LD_U16_I(table0, (s2 >> 7) & 0x1FE);
1062    t8 = VIS_LD_U16_I(table2, (s2 >> 15) & 0x1FE);
1063    acc0 = vis_faligndata(t3, acc0);
1064    acc0 = vis_faligndata(t2, acc0);
1065    acc0 = vis_faligndata(t1, acc0);
1066    acc0 = vis_faligndata(t0, acc0);
1067    acc1 = vis_faligndata(t7, acc1);
1068    acc1 = vis_faligndata(t6, acc1);
1069    acc1 = vis_faligndata(t5, acc1);
1070    acc1 = vis_faligndata(t4, acc1);
1071    acc2 = vis_faligndata(t11, acc2);
1072    acc2 = vis_faligndata(t10, acc2);
1073    acc2 = vis_faligndata(t9, acc2);
1074    acc2 = vis_faligndata(t8, acc2);
1075    dp[0] = acc0;
1076    dp[1] = acc1;
1077    dp[2] = acc2;
1078    s0 = s3;
1079    dp += 3;
1080    i += 12;
1081  }
1082
1083  if (i <= xsize - 8) {
1084    s1 = sa[0];
1085    s2 = sa[1];
1086    t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
1087    t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
1088    t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
1089    t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
1090    t7 = VIS_LD_U16_I(table1, (s2 >> 23) & 0x1FE);
1091    t6 = VIS_LD_U16_I(table0, (s1 << 1) & 0x1FE);
1092    t5 = VIS_LD_U16_I(table2, (s1 >> 7) & 0x1FE);
1093    t4 = VIS_LD_U16_I(table1, (s1 >> 15) & 0x1FE);
1094    acc0 = vis_faligndata(t3, acc0);
1095    acc0 = vis_faligndata(t2, acc0);
1096    acc0 = vis_faligndata(t1, acc0);
1097    acc0 = vis_faligndata(t0, acc0);
1098    acc1 = vis_faligndata(t7, acc1);
1099    acc1 = vis_faligndata(t6, acc1);
1100    acc1 = vis_faligndata(t5, acc1);
1101    acc1 = vis_faligndata(t4, acc1);
1102    dp[0] = acc0;
1103    dp[1] = acc1;
1104    table = table0;
1105    table0 = table2;
1106    table2 = table1;
1107    table1 = table;
1108    sa += 2;
1109    i += 8;
1110    dp += 2;
1111    s0 = s2;
1112  }
1113
1114  if (i <= xsize - 4) {
1115    s1 = sa[0];
1116    t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
1117    t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
1118    t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
1119    t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
1120    acc0 = vis_faligndata(t3, acc0);
1121    acc0 = vis_faligndata(t2, acc0);
1122    acc0 = vis_faligndata(t1, acc0);
1123    acc0 = vis_faligndata(t0, acc0);
1124    dp[0] = acc0;
1125    table = table0;
1126    table0 = table1;
1127    table1 = table2;
1128    table2 = table;
1129    sa++;
1130    i += 4;
1131    dp++;
1132    s0 = s1;
1133  }
1134
1135  sp = (mlib_u8 *) sa;
1136  sp -= 3;
1137
1138  if ((mlib_addr) dp <= (mlib_addr) dend) {
1139
1140    num = (mlib_s16 *) dend - (mlib_s16 *) dp;
1141    sp += num;
1142    num++;
1143
1144    if (num == 1) {
1145      s0 = (mlib_s32) * sp;
1146      sp--;
1147
1148      t0 = VIS_LD_U16_I(table0, 2 * s0);
1149      acc0 = vis_faligndata(t0, acc0);
1150    }
1151    else if (num == 2) {
1152      s0 = (mlib_s32) * sp;
1153      sp--;
1154
1155      t0 = VIS_LD_U16_I(table1, 2 * s0);
1156      acc0 = vis_faligndata(t0, acc0);
1157
1158      s0 = (mlib_s32) * sp;
1159      sp--;
1160
1161      t0 = VIS_LD_U16_I(table0, 2 * s0);
1162      acc0 = vis_faligndata(t0, acc0);
1163    }
1164    else if (num == 3) {
1165      s0 = (mlib_s32) * sp;
1166      sp--;
1167
1168      t0 = VIS_LD_U16_I(table2, 2 * s0);
1169      acc0 = vis_faligndata(t0, acc0);
1170
1171      s0 = (mlib_s32) * sp;
1172      sp--;
1173
1174      t0 = VIS_LD_U16_I(table1, 2 * s0);
1175      acc0 = vis_faligndata(t0, acc0);
1176
1177      s0 = (mlib_s32) * sp;
1178      sp--;
1179
1180      t0 = VIS_LD_U16_I(table0, 2 * s0);
1181      acc0 = vis_faligndata(t0, acc0);
1182    }
1183
1184    emask = vis_edge16(dp, dend);
1185    vis_pst_16(acc0, dp, emask);
1186  }
1187}
1188
1189/***************************************************************/
1190void mlib_v_ImageLookUp_U8_S16_3_SrcOff2_D1(const mlib_u8  *src,
1191                                            mlib_s16       *dst,
1192                                            mlib_s32       xsize,
1193                                            const mlib_s16 *table0,
1194                                            const mlib_s16 *table1,
1195                                            const mlib_s16 *table2)
1196{
1197  mlib_u32 *sa;                        /* aligned pointer to source data */
1198  mlib_u8 *sp;                         /* pointer to source data */
1199  mlib_u32 s0, s1, s2, s3;             /* source data */
1200  mlib_s16 *dl;                        /* pointer to start of destination */
1201  mlib_s16 *dend;                      /* pointer to end of destination */
1202  mlib_d64 *dp;                        /* aligned pointer to destination */
1203  mlib_d64 t0, t1, t2;                 /* destination data */
1204  mlib_d64 t3, t4, t5;                 /* destination data */
1205  mlib_d64 t6, t7, t8;                 /* destination data */
1206  mlib_d64 t9, t10, t11;               /* destination data */
1207  mlib_d64 acc0, acc1, acc2;           /* destination data */
1208  mlib_s32 emask;                      /* edge mask */
1209  mlib_s32 i, num;                     /* loop variable */
1210  const mlib_s16 *table;
1211
1212  sa = (mlib_u32 *) (src - 2);
1213  dl = dst;
1214  dp = (mlib_d64 *) dl;
1215  dend = dl + xsize - 1;
1216
1217  vis_alignaddr((void *)0, 6);
1218
1219  i = 0;
1220
1221  s0 = *sa++;
1222
1223  if (xsize >= 12) {
1224
1225    s1 = sa[0];
1226    s2 = sa[1];
1227    s3 = sa[2];
1228    sa += 3;
1229
1230#pragma pipeloop(0)
1231    for (i = 0; i <= xsize - 24; i += 12, sa += 3, dp += 3) {
1232      t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
1233      t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
1234      t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
1235      t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
1236      t7 = VIS_LD_U16_I(table1, (s2 >> 15) & 0x1FE);
1237      t6 = VIS_LD_U16_I(table0, (s2 >> 23) & 0x1FE);
1238      t5 = VIS_LD_U16_I(table2, (s1 << 1) & 0x1FE);
1239      t4 = VIS_LD_U16_I(table1, (s1 >> 7) & 0x1FE);
1240      t11 = VIS_LD_U16_I(table2, (s3 >> 15) & 0x1FE);
1241      t10 = VIS_LD_U16_I(table1, (s3 >> 23) & 0x1FE);
1242      t9 = VIS_LD_U16_I(table0, (s2 << 1) & 0x1FE);
1243      t8 = VIS_LD_U16_I(table2, (s2 >> 7) & 0x1FE);
1244      acc0 = vis_faligndata(t3, acc0);
1245      acc0 = vis_faligndata(t2, acc0);
1246      acc0 = vis_faligndata(t1, acc0);
1247      acc0 = vis_faligndata(t0, acc0);
1248      acc1 = vis_faligndata(t7, acc1);
1249      acc1 = vis_faligndata(t6, acc1);
1250      acc1 = vis_faligndata(t5, acc1);
1251      acc1 = vis_faligndata(t4, acc1);
1252      acc2 = vis_faligndata(t11, acc2);
1253      acc2 = vis_faligndata(t10, acc2);
1254      acc2 = vis_faligndata(t9, acc2);
1255      acc2 = vis_faligndata(t8, acc2);
1256      s0 = s3;
1257      s1 = sa[0];
1258      s2 = sa[1];
1259      s3 = sa[2];
1260      dp[0] = acc0;
1261      dp[1] = acc1;
1262      dp[2] = acc2;
1263    }
1264
1265    t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
1266    t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
1267    t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
1268    t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
1269    t7 = VIS_LD_U16_I(table1, (s2 >> 15) & 0x1FE);
1270    t6 = VIS_LD_U16_I(table0, (s2 >> 23) & 0x1FE);
1271    t5 = VIS_LD_U16_I(table2, (s1 << 1) & 0x1FE);
1272    t4 = VIS_LD_U16_I(table1, (s1 >> 7) & 0x1FE);
1273    t11 = VIS_LD_U16_I(table2, (s3 >> 15) & 0x1FE);
1274    t10 = VIS_LD_U16_I(table1, (s3 >> 23) & 0x1FE);
1275    t9 = VIS_LD_U16_I(table0, (s2 << 1) & 0x1FE);
1276    t8 = VIS_LD_U16_I(table2, (s2 >> 7) & 0x1FE);
1277    acc0 = vis_faligndata(t3, acc0);
1278    acc0 = vis_faligndata(t2, acc0);
1279    acc0 = vis_faligndata(t1, acc0);
1280    acc0 = vis_faligndata(t0, acc0);
1281    acc1 = vis_faligndata(t7, acc1);
1282    acc1 = vis_faligndata(t6, acc1);
1283    acc1 = vis_faligndata(t5, acc1);
1284    acc1 = vis_faligndata(t4, acc1);
1285    acc2 = vis_faligndata(t11, acc2);
1286    acc2 = vis_faligndata(t10, acc2);
1287    acc2 = vis_faligndata(t9, acc2);
1288    acc2 = vis_faligndata(t8, acc2);
1289    dp[0] = acc0;
1290    dp[1] = acc1;
1291    dp[2] = acc2;
1292    s0 = s3;
1293    dp += 3;
1294    i += 12;
1295  }
1296
1297  if (i <= xsize - 8) {
1298    s1 = sa[0];
1299    s2 = sa[1];
1300    t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
1301    t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
1302    t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
1303    t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
1304    t7 = VIS_LD_U16_I(table1, (s2 >> 15) & 0x1FE);
1305    t6 = VIS_LD_U16_I(table0, (s2 >> 23) & 0x1FE);
1306    t5 = VIS_LD_U16_I(table2, (s1 << 1) & 0x1FE);
1307    t4 = VIS_LD_U16_I(table1, (s1 >> 7) & 0x1FE);
1308    acc0 = vis_faligndata(t3, acc0);
1309    acc0 = vis_faligndata(t2, acc0);
1310    acc0 = vis_faligndata(t1, acc0);
1311    acc0 = vis_faligndata(t0, acc0);
1312    acc1 = vis_faligndata(t7, acc1);
1313    acc1 = vis_faligndata(t6, acc1);
1314    acc1 = vis_faligndata(t5, acc1);
1315    acc1 = vis_faligndata(t4, acc1);
1316    dp[0] = acc0;
1317    dp[1] = acc1;
1318    table = table0;
1319    table0 = table2;
1320    table2 = table1;
1321    table1 = table;
1322    sa += 2;
1323    i += 8;
1324    dp += 2;
1325    s0 = s2;
1326  }
1327
1328  if (i <= xsize - 4) {
1329    s1 = sa[0];
1330    t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
1331    t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
1332    t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
1333    t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
1334    acc0 = vis_faligndata(t3, acc0);
1335    acc0 = vis_faligndata(t2, acc0);
1336    acc0 = vis_faligndata(t1, acc0);
1337    acc0 = vis_faligndata(t0, acc0);
1338    dp[0] = acc0;
1339    table = table0;
1340    table0 = table1;
1341    table1 = table2;
1342    table2 = table;
1343    sa++;
1344    i += 4;
1345    dp++;
1346    s0 = s1;
1347  }
1348
1349  sp = (mlib_u8 *) sa;
1350  sp -= 2;
1351
1352  if ((mlib_addr) dp <= (mlib_addr) dend) {
1353
1354    num = (mlib_s16 *) dend - (mlib_s16 *) dp;
1355    sp += num;
1356    num++;
1357
1358    if (num == 1) {
1359      s0 = (mlib_s32) * sp;
1360      sp--;
1361
1362      t0 = VIS_LD_U16_I(table0, 2 * s0);
1363      acc0 = vis_faligndata(t0, acc0);
1364    }
1365    else if (num == 2) {
1366      s0 = (mlib_s32) * sp;
1367      sp--;
1368
1369      t0 = VIS_LD_U16_I(table1, 2 * s0);
1370      acc0 = vis_faligndata(t0, acc0);
1371
1372      s0 = (mlib_s32) * sp;
1373      sp--;
1374
1375      t0 = VIS_LD_U16_I(table0, 2 * s0);
1376      acc0 = vis_faligndata(t0, acc0);
1377    }
1378    else if (num == 3) {
1379      s0 = (mlib_s32) * sp;
1380      sp--;
1381
1382      t0 = VIS_LD_U16_I(table2, 2 * s0);
1383      acc0 = vis_faligndata(t0, acc0);
1384
1385      s0 = (mlib_s32) * sp;
1386      sp--;
1387
1388      t0 = VIS_LD_U16_I(table1, 2 * s0);
1389      acc0 = vis_faligndata(t0, acc0);
1390
1391      s0 = (mlib_s32) * sp;
1392      sp--;
1393
1394      t0 = VIS_LD_U16_I(table0, 2 * s0);
1395      acc0 = vis_faligndata(t0, acc0);
1396    }
1397
1398    emask = vis_edge16(dp, dend);
1399    vis_pst_16(acc0, dp, emask);
1400  }
1401}
1402
1403/***************************************************************/
1404void mlib_v_ImageLookUp_U8_S16_3_SrcOff3_D1(const mlib_u8  *src,
1405                                            mlib_s16       *dst,
1406                                            mlib_s32       xsize,
1407                                            const mlib_s16 *table0,
1408                                            const mlib_s16 *table1,
1409                                            const mlib_s16 *table2)
1410{
1411  mlib_u32 *sa;                        /* aligned pointer to source data */
1412  mlib_u8 *sp;                         /* pointer to source data */
1413  mlib_u32 s0, s1, s2, s3;             /* source data */
1414  mlib_s16 *dl;                        /* pointer to start of destination */
1415  mlib_s16 *dend;                      /* pointer to end of destination */
1416  mlib_d64 *dp;                        /* aligned pointer to destination */
1417  mlib_d64 t0, t1, t2;                 /* destination data */
1418  mlib_d64 t3, t4, t5;                 /* destination data */
1419  mlib_d64 t6, t7, t8;                 /* destination data */
1420  mlib_d64 t9, t10, t11;               /* destination data */
1421  mlib_d64 acc0, acc1, acc2;           /* destination data */
1422  mlib_s32 emask;                      /* edge mask */
1423  mlib_s32 i, num;                     /* loop variable */
1424  const mlib_s16 *table;
1425
1426  sa = (mlib_u32 *) (src - 3);
1427  dl = dst;
1428  dp = (mlib_d64 *) dl;
1429  dend = dl + xsize - 1;
1430
1431  vis_alignaddr((void *)0, 6);
1432
1433  i = 0;
1434
1435  s0 = *sa++;
1436
1437  if (xsize >= 12) {
1438
1439    s1 = sa[0];
1440    s2 = sa[1];
1441    s3 = sa[2];
1442    sa += 3;
1443
1444#pragma pipeloop(0)
1445    for (i = 0; i <= xsize - 24; i += 12, sa += 3, dp += 3) {
1446      t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
1447      t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
1448      t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
1449      t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
1450      t7 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
1451      t6 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
1452      t5 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
1453      t4 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
1454      t11 = VIS_LD_U16_I(table2, (s3 >> 7) & 0x1FE);
1455      t10 = VIS_LD_U16_I(table1, (s3 >> 15) & 0x1FE);
1456      t9 = VIS_LD_U16_I(table0, (s3 >> 23) & 0x1FE);
1457      t8 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
1458      acc0 = vis_faligndata(t3, acc0);
1459      acc0 = vis_faligndata(t2, acc0);
1460      acc0 = vis_faligndata(t1, acc0);
1461      acc0 = vis_faligndata(t0, acc0);
1462      acc1 = vis_faligndata(t7, acc1);
1463      acc1 = vis_faligndata(t6, acc1);
1464      acc1 = vis_faligndata(t5, acc1);
1465      acc1 = vis_faligndata(t4, acc1);
1466      acc2 = vis_faligndata(t11, acc2);
1467      acc2 = vis_faligndata(t10, acc2);
1468      acc2 = vis_faligndata(t9, acc2);
1469      acc2 = vis_faligndata(t8, acc2);
1470      s0 = s3;
1471      s1 = sa[0];
1472      s2 = sa[1];
1473      s3 = sa[2];
1474      dp[0] = acc0;
1475      dp[1] = acc1;
1476      dp[2] = acc2;
1477    }
1478
1479    t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
1480    t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
1481    t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
1482    t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
1483    t7 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
1484    t6 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
1485    t5 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
1486    t4 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
1487    t11 = VIS_LD_U16_I(table2, (s3 >> 7) & 0x1FE);
1488    t10 = VIS_LD_U16_I(table1, (s3 >> 15) & 0x1FE);
1489    t9 = VIS_LD_U16_I(table0, (s3 >> 23) & 0x1FE);
1490    t8 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
1491    acc0 = vis_faligndata(t3, acc0);
1492    acc0 = vis_faligndata(t2, acc0);
1493    acc0 = vis_faligndata(t1, acc0);
1494    acc0 = vis_faligndata(t0, acc0);
1495    acc1 = vis_faligndata(t7, acc1);
1496    acc1 = vis_faligndata(t6, acc1);
1497    acc1 = vis_faligndata(t5, acc1);
1498    acc1 = vis_faligndata(t4, acc1);
1499    acc2 = vis_faligndata(t11, acc2);
1500    acc2 = vis_faligndata(t10, acc2);
1501    acc2 = vis_faligndata(t9, acc2);
1502    acc2 = vis_faligndata(t8, acc2);
1503    dp[0] = acc0;
1504    dp[1] = acc1;
1505    dp[2] = acc2;
1506    s0 = s3;
1507    dp += 3;
1508    i += 12;
1509  }
1510
1511  if (i <= xsize - 8) {
1512    s1 = sa[0];
1513    s2 = sa[1];
1514    t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
1515    t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
1516    t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
1517    t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
1518    t7 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
1519    t6 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
1520    t5 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
1521    t4 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
1522    acc0 = vis_faligndata(t3, acc0);
1523    acc0 = vis_faligndata(t2, acc0);
1524    acc0 = vis_faligndata(t1, acc0);
1525    acc0 = vis_faligndata(t0, acc0);
1526    acc1 = vis_faligndata(t7, acc1);
1527    acc1 = vis_faligndata(t6, acc1);
1528    acc1 = vis_faligndata(t5, acc1);
1529    acc1 = vis_faligndata(t4, acc1);
1530    dp[0] = acc0;
1531    dp[1] = acc1;
1532    table = table0;
1533    table0 = table2;
1534    table2 = table1;
1535    table1 = table;
1536    sa += 2;
1537    i += 8;
1538    dp += 2;
1539    s0 = s2;
1540  }
1541
1542  if (i <= xsize - 4) {
1543    s1 = sa[0];
1544    t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
1545    t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
1546    t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
1547    t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
1548    acc0 = vis_faligndata(t3, acc0);
1549    acc0 = vis_faligndata(t2, acc0);
1550    acc0 = vis_faligndata(t1, acc0);
1551    acc0 = vis_faligndata(t0, acc0);
1552    dp[0] = acc0;
1553    table = table0;
1554    table0 = table1;
1555    table1 = table2;
1556    table2 = table;
1557    sa++;
1558    i += 4;
1559    dp++;
1560    s0 = s1;
1561  }
1562
1563  sp = (mlib_u8 *) sa;
1564  sp -= 1;
1565
1566  if ((mlib_addr) dp <= (mlib_addr) dend) {
1567
1568    num = (mlib_s16 *) dend - (mlib_s16 *) dp;
1569    sp += num;
1570    num++;
1571
1572    if (num == 1) {
1573      s0 = (mlib_s32) * sp;
1574      sp--;
1575
1576      t0 = VIS_LD_U16_I(table0, 2 * s0);
1577      acc0 = vis_faligndata(t0, acc0);
1578    }
1579    else if (num == 2) {
1580      s0 = (mlib_s32) * sp;
1581      sp--;
1582
1583      t0 = VIS_LD_U16_I(table1, 2 * s0);
1584      acc0 = vis_faligndata(t0, acc0);
1585
1586      s0 = (mlib_s32) * sp;
1587      sp--;
1588
1589      t0 = VIS_LD_U16_I(table0, 2 * s0);
1590      acc0 = vis_faligndata(t0, acc0);
1591    }
1592    else if (num == 3) {
1593      s0 = (mlib_s32) * sp;
1594      sp--;
1595
1596      t0 = VIS_LD_U16_I(table2, 2 * s0);
1597      acc0 = vis_faligndata(t0, acc0);
1598
1599      s0 = (mlib_s32) * sp;
1600      sp--;
1601
1602      t0 = VIS_LD_U16_I(table1, 2 * s0);
1603      acc0 = vis_faligndata(t0, acc0);
1604
1605      s0 = (mlib_s32) * sp;
1606      sp--;
1607
1608      t0 = VIS_LD_U16_I(table0, 2 * s0);
1609      acc0 = vis_faligndata(t0, acc0);
1610    }
1611
1612    emask = vis_edge16(dp, dend);
1613    vis_pst_16(acc0, dp, emask);
1614  }
1615}
1616
1617/***************************************************************/
1618void mlib_v_ImageLookUp_U8_S16_3(const mlib_u8  *src,
1619                                 mlib_s32       slb,
1620                                 mlib_s16       *dst,
1621                                 mlib_s32       dlb,
1622                                 mlib_s32       xsize,
1623                                 mlib_s32       ysize,
1624                                 const mlib_s16 **table)
1625{
1626  mlib_u8 *sl;
1627  mlib_s16 *dl;
1628  const mlib_s16 *tab;
1629  mlib_s32 j, i;
1630
1631  sl = (void *)src;
1632  dl = dst;
1633
1634  /* row loop */
1635  for (j = 0; j < ysize; j++) {
1636    mlib_u8 *sp = sl;
1637    mlib_s16 *dp = dl;
1638    const mlib_s16 *tab0 = table[0];
1639    const mlib_s16 *tab1 = table[1];
1640    const mlib_s16 *tab2 = table[2];
1641    mlib_s32 off, size = xsize * 3;
1642
1643    off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
1644
1645    off = (off < size) ? off : size;
1646
1647    for (i = 0; i < off - 2; i += 3) {
1648      *dp++ = tab0[(*sp++)];
1649      *dp++ = tab1[(*sp++)];
1650      *dp++ = tab2[(*sp++)];
1651      size -= 3;
1652    }
1653
1654    off -= i;
1655
1656    if (off == 1) {
1657      *dp++ = tab0[(*sp++)];
1658      tab = tab0;
1659      tab0 = tab1;
1660      tab1 = tab2;
1661      tab2 = tab;
1662      size--;
1663    }
1664    else if (off == 2) {
1665      *dp++ = tab0[(*sp++)];
1666      *dp++ = tab1[(*sp++)];
1667      tab = tab2;
1668      tab2 = tab1;
1669      tab1 = tab0;
1670      tab0 = tab;
1671      size -= 2;
1672    }
1673
1674    if (size > 0) {
1675
1676      off = (mlib_addr) sp & 3;
1677
1678      if (off == 0) {
1679        mlib_v_ImageLookUp_U8_S16_3_SrcOff0_D1(sp, dp, size, tab0, tab1, tab2);
1680      }
1681      else if (off == 1) {
1682        mlib_v_ImageLookUp_U8_S16_3_SrcOff1_D1(sp, dp, size, tab0, tab1, tab2);
1683      }
1684      else if (off == 2) {
1685        mlib_v_ImageLookUp_U8_S16_3_SrcOff2_D1(sp, dp, size, tab0, tab1, tab2);
1686      }
1687      else {
1688        mlib_v_ImageLookUp_U8_S16_3_SrcOff3_D1(sp, dp, size, tab0, tab1, tab2);
1689      }
1690    }
1691
1692    sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
1693    dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
1694  }
1695}
1696
1697/***************************************************************/
1698