1/*
2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27
28#include "vis_proto.h"
29#include "mlib_image.h"
30#include "mlib_v_ImageLookUpFunc.h"
31
32/***************************************************************/
33static void mlib_v_ImageLookUpSI_U16_U8_2_DstA8D1(const mlib_u16 *src,
34                                                  mlib_u8        *dst,
35                                                  mlib_s32       xsize,
36                                                  const mlib_u8  **table);
37
38static void mlib_v_ImageLookUpSI_U16_U8_2_D1(const mlib_u16 *src,
39                                             mlib_u8        *dst,
40                                             mlib_s32       xsize,
41                                             const mlib_u8  **table);
42
43static void mlib_v_ImageLookUpSI_U16_U8_3_D1(const mlib_u16 *src,
44                                             mlib_u8        *dst,
45                                             mlib_s32       xsize,
46                                             const mlib_u8  **table);
47
48static void mlib_v_ImageLookUpSI_U16_U8_4_DstOff0_D1(const mlib_u16 *src,
49                                                     mlib_u8        *dst,
50                                                     mlib_s32       xsize,
51                                                     const mlib_u8  **table);
52
53static void mlib_v_ImageLookUpSI_U16_U8_4_DstOff1_D1(const mlib_u16 *src,
54                                                     mlib_u8        *dst,
55                                                     mlib_s32       xsize,
56                                                     const mlib_u8  **table);
57
58static void mlib_v_ImageLookUpSI_U16_U8_4_DstOff2_D1(const mlib_u16 *src,
59                                                     mlib_u8        *dst,
60                                                     mlib_s32       xsize,
61                                                     const mlib_u8  **table);
62
63static void mlib_v_ImageLookUpSI_U16_U8_4_DstOff3_D1(const mlib_u16 *src,
64                                                     mlib_u8        *dst,
65                                                     mlib_s32       xsize,
66                                                     const mlib_u8  **table);
67
68/***************************************************************/
69#define VIS_LD_U8_I(X, Y)       vis_ld_u8_i((void *)(X), (Y))
70
71/***************************************************************/
72void mlib_v_ImageLookUpSI_U16_U8_2_DstA8D1(const mlib_u16 *src,
73                                           mlib_u8        *dst,
74                                           mlib_s32       xsize,
75                                           const mlib_u8  **table)
76{
77  mlib_u16 *sp;                        /* pointer to source data */
78  mlib_s32 s0, s1, s2, s3;             /* source data */
79  mlib_u16 *dl;                        /* pointer to start of destination */
80  mlib_u16 *dend;                      /* pointer to end of destination */
81  mlib_d64 *dp;                        /* aligned pointer to destination */
82  mlib_d64 t0, t1, t2;                 /* destination data */
83  mlib_d64 t3, t4, t5;                 /* destination data */
84  mlib_d64 t6, t7, acc;                /* destination data */
85  mlib_s32 emask;                      /* edge mask */
86  mlib_s32 i, num;                     /* loop variable */
87  const mlib_u8 *tab0 = &table[0][0];
88  const mlib_u8 *tab1 = &table[1][0];
89
90  sp = (void *)src;
91  dl = (mlib_u16 *) dst;
92  dp = (mlib_d64 *) dl;
93  dend = dl + xsize - 1;
94
95  vis_alignaddr((void *)0, 7);
96
97  if (xsize >= 4) {
98
99    s0 = sp[0];
100    s1 = sp[1];
101    s2 = sp[2];
102    s3 = sp[3];
103    sp += 4;
104
105#pragma pipeloop(0)
106    for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
107      t7 = VIS_LD_U8_I(tab1, s3);
108      t6 = VIS_LD_U8_I(tab0, s3);
109      t5 = VIS_LD_U8_I(tab1, s2);
110      t4 = VIS_LD_U8_I(tab0, s2);
111      t3 = VIS_LD_U8_I(tab1, s1);
112      t2 = VIS_LD_U8_I(tab0, s1);
113      t1 = VIS_LD_U8_I(tab1, s0);
114      t0 = VIS_LD_U8_I(tab0, s0);
115      acc = vis_faligndata(t7, acc);
116      acc = vis_faligndata(t6, acc);
117      acc = vis_faligndata(t5, acc);
118      acc = vis_faligndata(t4, acc);
119      acc = vis_faligndata(t3, acc);
120      acc = vis_faligndata(t2, acc);
121      acc = vis_faligndata(t1, acc);
122      acc = vis_faligndata(t0, acc);
123      s0 = sp[0];
124      s1 = sp[1];
125      s2 = sp[2];
126      s3 = sp[3];
127      *dp++ = acc;
128    }
129
130    t7 = VIS_LD_U8_I(tab1, s3);
131    t6 = VIS_LD_U8_I(tab0, s3);
132    t5 = VIS_LD_U8_I(tab1, s2);
133    t4 = VIS_LD_U8_I(tab0, s2);
134    t3 = VIS_LD_U8_I(tab1, s1);
135    t2 = VIS_LD_U8_I(tab0, s1);
136    t1 = VIS_LD_U8_I(tab1, s0);
137    t0 = VIS_LD_U8_I(tab0, s0);
138    acc = vis_faligndata(t7, acc);
139    acc = vis_faligndata(t6, acc);
140    acc = vis_faligndata(t5, acc);
141    acc = vis_faligndata(t4, acc);
142    acc = vis_faligndata(t3, acc);
143    acc = vis_faligndata(t2, acc);
144    acc = vis_faligndata(t1, acc);
145    acc = vis_faligndata(t0, acc);
146    *dp++ = acc;
147  }
148
149  if ((mlib_addr) dp <= (mlib_addr) dend) {
150
151    num = (mlib_u16 *) dend - (mlib_u16 *) dp;
152    sp += num;
153    num++;
154#pragma pipeloop(0)
155    for (i = 0; i < num; i++) {
156      s0 = (mlib_s32) * sp;
157      sp--;
158
159      t0 = VIS_LD_U8_I(tab1, s0);
160      acc = vis_faligndata(t0, acc);
161
162      t0 = VIS_LD_U8_I(tab0, s0);
163      acc = vis_faligndata(t0, acc);
164    }
165
166    emask = vis_edge16(dp, dend);
167    vis_pst_16(acc, dp, emask);
168  }
169}
170
171/***************************************************************/
172void mlib_v_ImageLookUpSI_U16_U8_2_D1(const mlib_u16 *src,
173                                      mlib_u8        *dst,
174                                      mlib_s32       xsize,
175                                      const mlib_u8  **table)
176{
177  mlib_u16 *sp;                        /* pointer to source data */
178  mlib_s32 s0, s1, s2, s3, s4;         /* source data */
179  mlib_u8 *dl;                         /* pointer to start of destination */
180  mlib_u8 *dend;                       /* pointer to end of destination */
181  mlib_d64 *dp;                        /* aligned pointer to destination */
182  mlib_d64 t0, t1, t2;                 /* destination data */
183  mlib_d64 t3, t4, t5;                 /* destination data */
184  mlib_d64 t6, t7, acc;                /* destination data */
185  mlib_s32 emask;                      /* edge mask */
186  mlib_s32 i, num;                     /* loop variable */
187  const mlib_u8 *tab0 = &table[0][0];
188  const mlib_u8 *tab1 = &table[1][0];
189
190  sp = (void *)src;
191  dl = dst;
192
193  dend = dl + 2 * xsize - 1;
194
195  vis_alignaddr((void *)0, 7);
196
197  s0 = *sp++;
198  *dl++ = tab0[s0];
199  dp = (mlib_d64 *) dl;
200  xsize--;
201
202  if (xsize >= 4) {
203
204    s1 = sp[0];
205    s2 = sp[1];
206    s3 = sp[2];
207    s4 = sp[3];
208    sp += 4;
209
210#pragma pipeloop(0)
211    for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
212      t7 = VIS_LD_U8_I(tab0, s4);
213      t6 = VIS_LD_U8_I(tab1, s3);
214      t5 = VIS_LD_U8_I(tab0, s3);
215      t4 = VIS_LD_U8_I(tab1, s2);
216      t3 = VIS_LD_U8_I(tab0, s2);
217      t2 = VIS_LD_U8_I(tab1, s1);
218      t1 = VIS_LD_U8_I(tab0, s1);
219      t0 = VIS_LD_U8_I(tab1, s0);
220      acc = vis_faligndata(t7, acc);
221      acc = vis_faligndata(t6, acc);
222      acc = vis_faligndata(t5, acc);
223      acc = vis_faligndata(t4, acc);
224      acc = vis_faligndata(t3, acc);
225      acc = vis_faligndata(t2, acc);
226      acc = vis_faligndata(t1, acc);
227      acc = vis_faligndata(t0, acc);
228      s0 = s4;
229      s1 = sp[0];
230      s2 = sp[1];
231      s3 = sp[2];
232      s4 = sp[3];
233      *dp++ = acc;
234    }
235
236    t7 = VIS_LD_U8_I(tab0, s4);
237    t6 = VIS_LD_U8_I(tab1, s3);
238    t5 = VIS_LD_U8_I(tab0, s3);
239    t4 = VIS_LD_U8_I(tab1, s2);
240    t3 = VIS_LD_U8_I(tab0, s2);
241    t2 = VIS_LD_U8_I(tab1, s1);
242    t1 = VIS_LD_U8_I(tab0, s1);
243    t0 = VIS_LD_U8_I(tab1, s0);
244    acc = vis_faligndata(t7, acc);
245    acc = vis_faligndata(t6, acc);
246    acc = vis_faligndata(t5, acc);
247    acc = vis_faligndata(t4, acc);
248    acc = vis_faligndata(t3, acc);
249    acc = vis_faligndata(t2, acc);
250    acc = vis_faligndata(t1, acc);
251    acc = vis_faligndata(t0, acc);
252    s0 = s4;
253    *dp++ = acc;
254  }
255
256  num = ((mlib_u8 *) dend - (mlib_u8 *) dp) >> 1;
257  sp += num;
258  num++;
259
260#pragma pipeloop(0)
261  for (i = 0; i < num; i++) {
262    s1 = (mlib_s32) * sp;
263    sp--;
264
265    t0 = VIS_LD_U8_I(tab1, s1);
266    acc = vis_faligndata(t0, acc);
267
268    t0 = VIS_LD_U8_I(tab0, s1);
269    acc = vis_faligndata(t0, acc);
270  }
271
272  t0 = VIS_LD_U8_I(tab1, s0);
273  acc = vis_faligndata(t0, acc);
274  emask = vis_edge8(dp, dend);
275  vis_pst_8(acc, dp, emask);
276}
277
278/***************************************************************/
279void mlib_v_ImageLookUpSI_U16_U8_2(const mlib_u16 *src,
280                                   mlib_s32       slb,
281                                   mlib_u8        *dst,
282                                   mlib_s32       dlb,
283                                   mlib_s32       xsize,
284                                   mlib_s32       ysize,
285                                   const mlib_u8  **table)
286{
287  mlib_u16 *sl;
288  mlib_u8 *dl;
289  mlib_s32 i, j;
290  const mlib_u8 *tab0 = &table[0][0];
291  const mlib_u8 *tab1 = &table[1][0];
292
293  sl = (void *)src;
294  dl = dst;
295
296  /* row loop */
297  for (j = 0; j < ysize; j++) {
298    mlib_u16 *sp = sl;
299    mlib_u8 *dp = dl;
300    mlib_s32 off, s0, size = xsize;
301
302    off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
303    off = (off < size) ? off : size;
304
305    for (i = 0; i < off; i++) {
306      s0 = *sp++;
307      *dp++ = tab0[s0];
308      *dp++ = tab1[s0];
309      size--;
310    }
311
312    if (size > 0) {
313
314      if (((mlib_addr) dp & 1) == 0) {
315        mlib_v_ImageLookUpSI_U16_U8_2_DstA8D1(sp, dp, size, table);
316      }
317      else {
318        mlib_v_ImageLookUpSI_U16_U8_2_D1(sp, dp, size, table);
319      }
320    }
321
322    sl = (mlib_u16 *) ((mlib_u8 *) sl + slb);
323    dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
324  }
325}
326
327/***************************************************************/
328void mlib_v_ImageLookUpSI_U16_U8_3_D1(const mlib_u16 *src,
329                                      mlib_u8        *dst,
330                                      mlib_s32       xsize,
331                                      const mlib_u8  **table)
332{
333  mlib_u16 *sp;                        /* pointer to source data */
334  mlib_u8 *dl;                         /* pointer to start of destination */
335  mlib_d64 *dp;                        /* aligned pointer to destination */
336  mlib_d64 t0, t1, t2;                 /* destination data */
337  mlib_d64 t3, t4, t5;                 /* destination data */
338  mlib_d64 t6, t7;                     /* destination data */
339  mlib_d64 acc0, acc1, acc2;           /* destination data */
340  mlib_s32 i;                          /* loop variable */
341  const mlib_u8 *tab0 = &table[0][0];
342  const mlib_u8 *tab1 = &table[1][0];
343  const mlib_u8 *tab2 = &table[2][0];
344  mlib_s32 s00, s01, s02, s03;
345  mlib_s32 s10, s11, s12, s13;
346
347  sp = (void *)src;
348  dl = dst;
349  dp = (mlib_d64 *) dl;
350
351  vis_alignaddr((void *)0, 7);
352
353  i = 0;
354
355  if (xsize >= 8) {
356
357    s00 = sp[0];
358    s01 = sp[1];
359    s02 = sp[2];
360    s03 = sp[3];
361    s10 = sp[4];
362    s11 = sp[5];
363    s12 = sp[6];
364    s13 = sp[7];
365    sp += 8;
366
367#pragma pipeloop(0)
368    for (i = 0; i <= xsize - 16; i += 8, sp += 8) {
369      t7 = VIS_LD_U8_I(tab1, s02);
370      t6 = VIS_LD_U8_I(tab0, s02);
371      t5 = VIS_LD_U8_I(tab2, s01);
372      t4 = VIS_LD_U8_I(tab1, s01);
373      t3 = VIS_LD_U8_I(tab0, s01);
374      t2 = VIS_LD_U8_I(tab2, s00);
375      t1 = VIS_LD_U8_I(tab1, s00);
376      t0 = VIS_LD_U8_I(tab0, s00);
377      acc0 = vis_faligndata(t7, acc0);
378      acc0 = vis_faligndata(t6, acc0);
379      acc0 = vis_faligndata(t5, acc0);
380      acc0 = vis_faligndata(t4, acc0);
381      acc0 = vis_faligndata(t3, acc0);
382      acc0 = vis_faligndata(t2, acc0);
383      acc0 = vis_faligndata(t1, acc0);
384      acc0 = vis_faligndata(t0, acc0);
385      t7 = VIS_LD_U8_I(tab0, s11);
386      t6 = VIS_LD_U8_I(tab2, s10);
387      t5 = VIS_LD_U8_I(tab1, s10);
388      t4 = VIS_LD_U8_I(tab0, s10);
389      t3 = VIS_LD_U8_I(tab2, s03);
390      t2 = VIS_LD_U8_I(tab1, s03);
391      t1 = VIS_LD_U8_I(tab0, s03);
392      t0 = VIS_LD_U8_I(tab2, s02);
393      acc1 = vis_faligndata(t7, acc1);
394      acc1 = vis_faligndata(t6, acc1);
395      acc1 = vis_faligndata(t5, acc1);
396      acc1 = vis_faligndata(t4, acc1);
397      acc1 = vis_faligndata(t3, acc1);
398      acc1 = vis_faligndata(t2, acc1);
399      acc1 = vis_faligndata(t1, acc1);
400      acc1 = vis_faligndata(t0, acc1);
401      t7 = VIS_LD_U8_I(tab2, s13);
402      t6 = VIS_LD_U8_I(tab1, s13);
403      t5 = VIS_LD_U8_I(tab0, s13);
404      t4 = VIS_LD_U8_I(tab2, s12);
405      t3 = VIS_LD_U8_I(tab1, s12);
406      t2 = VIS_LD_U8_I(tab0, s12);
407      t1 = VIS_LD_U8_I(tab2, s11);
408      t0 = VIS_LD_U8_I(tab1, s11);
409      acc2 = vis_faligndata(t7, acc2);
410      acc2 = vis_faligndata(t6, acc2);
411      acc2 = vis_faligndata(t5, acc2);
412      acc2 = vis_faligndata(t4, acc2);
413      acc2 = vis_faligndata(t3, acc2);
414      acc2 = vis_faligndata(t2, acc2);
415      acc2 = vis_faligndata(t1, acc2);
416      acc2 = vis_faligndata(t0, acc2);
417      s00 = sp[0];
418      s01 = sp[1];
419      s02 = sp[2];
420      s03 = sp[3];
421      s10 = sp[4];
422      s11 = sp[5];
423      s12 = sp[6];
424      s13 = sp[7];
425      *dp++ = acc0;
426      *dp++ = acc1;
427      *dp++ = acc2;
428    }
429
430    t7 = VIS_LD_U8_I(tab1, s02);
431    t6 = VIS_LD_U8_I(tab0, s02);
432    t5 = VIS_LD_U8_I(tab2, s01);
433    t4 = VIS_LD_U8_I(tab1, s01);
434    t3 = VIS_LD_U8_I(tab0, s01);
435    t2 = VIS_LD_U8_I(tab2, s00);
436    t1 = VIS_LD_U8_I(tab1, s00);
437    t0 = VIS_LD_U8_I(tab0, s00);
438    acc0 = vis_faligndata(t7, acc0);
439    acc0 = vis_faligndata(t6, acc0);
440    acc0 = vis_faligndata(t5, acc0);
441    acc0 = vis_faligndata(t4, acc0);
442    acc0 = vis_faligndata(t3, acc0);
443    acc0 = vis_faligndata(t2, acc0);
444    acc0 = vis_faligndata(t1, acc0);
445    acc0 = vis_faligndata(t0, acc0);
446    t7 = VIS_LD_U8_I(tab0, s11);
447    t6 = VIS_LD_U8_I(tab2, s10);
448    t5 = VIS_LD_U8_I(tab1, s10);
449    t4 = VIS_LD_U8_I(tab0, s10);
450    t3 = VIS_LD_U8_I(tab2, s03);
451    t2 = VIS_LD_U8_I(tab1, s03);
452    t1 = VIS_LD_U8_I(tab0, s03);
453    t0 = VIS_LD_U8_I(tab2, s02);
454    acc1 = vis_faligndata(t7, acc1);
455    acc1 = vis_faligndata(t6, acc1);
456    acc1 = vis_faligndata(t5, acc1);
457    acc1 = vis_faligndata(t4, acc1);
458    acc1 = vis_faligndata(t3, acc1);
459    acc1 = vis_faligndata(t2, acc1);
460    acc1 = vis_faligndata(t1, acc1);
461    acc1 = vis_faligndata(t0, acc1);
462    t7 = VIS_LD_U8_I(tab2, s13);
463    t6 = VIS_LD_U8_I(tab1, s13);
464    t5 = VIS_LD_U8_I(tab0, s13);
465    t4 = VIS_LD_U8_I(tab2, s12);
466    t3 = VIS_LD_U8_I(tab1, s12);
467    t2 = VIS_LD_U8_I(tab0, s12);
468    t1 = VIS_LD_U8_I(tab2, s11);
469    t0 = VIS_LD_U8_I(tab1, s11);
470    acc2 = vis_faligndata(t7, acc2);
471    acc2 = vis_faligndata(t6, acc2);
472    acc2 = vis_faligndata(t5, acc2);
473    acc2 = vis_faligndata(t4, acc2);
474    acc2 = vis_faligndata(t3, acc2);
475    acc2 = vis_faligndata(t2, acc2);
476    acc2 = vis_faligndata(t1, acc2);
477    acc2 = vis_faligndata(t0, acc2);
478    *dp++ = acc0;
479    *dp++ = acc1;
480    *dp++ = acc2;
481    i += 8;
482  }
483
484  dl = (mlib_u8 *) dp;
485
486#pragma pipeloop(0)
487  for (; i < xsize; i++) {
488    s00 = sp[0];
489    dl[0] = tab0[s00];
490    dl[1] = tab1[s00];
491    dl[2] = tab2[s00];
492    dl += 3;
493    sp++;
494  }
495}
496
497/***************************************************************/
498void mlib_v_ImageLookUpSI_U16_U8_3(const mlib_u16 *src,
499                                   mlib_s32       slb,
500                                   mlib_u8        *dst,
501                                   mlib_s32       dlb,
502                                   mlib_s32       xsize,
503                                   mlib_s32       ysize,
504                                   const mlib_u8  **table)
505{
506  mlib_u16 *sl;
507  mlib_u8 *dl;
508  mlib_s32 i, j;
509  const mlib_u8 *tab0 = &table[0][0];
510  const mlib_u8 *tab1 = &table[1][0];
511  const mlib_u8 *tab2 = &table[2][0];
512
513  sl = (void *)src;
514  dl = dst;
515
516  /* row loop */
517  for (j = 0; j < ysize; j++) {
518    mlib_u16 *sp = sl;
519    mlib_u8 *dp = dl;
520    mlib_s32 off, s0, size = xsize;
521
522    off = (mlib_addr) dp & 7;
523    off = (off * 5) & 7;
524    off = (off < size) ? off : size;
525
526    for (i = 0; i < off; i++) {
527      s0 = *sp++;
528      *dp++ = tab0[s0];
529      *dp++ = tab1[s0];
530      *dp++ = tab2[s0];
531      size--;
532    }
533
534    if (size > 0) {
535      mlib_v_ImageLookUpSI_U16_U8_3_D1(sp, dp, size, table);
536    }
537
538    sl = (mlib_u16 *) ((mlib_u8 *) sl + slb);
539    dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
540  }
541}
542
543/***************************************************************/
544void mlib_v_ImageLookUpSI_U16_U8_4_DstOff0_D1(const mlib_u16 *src,
545                                              mlib_u8        *dst,
546                                              mlib_s32       xsize,
547                                              const mlib_u8  **table)
548{
549  mlib_u16 *sp;                        /* pointer to source data */
550  mlib_s32 s0, s1;                     /* source data */
551  mlib_u8 *dl;                         /* pointer to start of destination */
552  mlib_d64 *dp;                        /* aligned pointer to destination */
553  mlib_d64 t0, t1, t2;                 /* destination data */
554  mlib_d64 t3, t4, t5;                 /* destination data */
555  mlib_d64 t6, t7, acc;                /* destination data */
556  mlib_s32 i;                          /* loop variable */
557  const mlib_u8 *tab0 = &table[0][0];
558  const mlib_u8 *tab1 = &table[1][0];
559  const mlib_u8 *tab2 = &table[2][0];
560  const mlib_u8 *tab3 = &table[3][0];
561
562  sp = (void *)src;
563  dl = dst;
564  dp = (mlib_d64 *) dl;
565
566  vis_alignaddr((void *)0, 7);
567
568  if (xsize >= 2) {
569
570    s0 = sp[0];
571    s1 = sp[1];
572    sp += 2;
573
574#pragma pipeloop(0)
575    for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
576      t7 = VIS_LD_U8_I(tab3, s1);
577      t6 = VIS_LD_U8_I(tab2, s1);
578      t5 = VIS_LD_U8_I(tab1, s1);
579      t4 = VIS_LD_U8_I(tab0, s1);
580      t3 = VIS_LD_U8_I(tab3, s0);
581      t2 = VIS_LD_U8_I(tab2, s0);
582      t1 = VIS_LD_U8_I(tab1, s0);
583      t0 = VIS_LD_U8_I(tab0, s0);
584      acc = vis_faligndata(t7, acc);
585      acc = vis_faligndata(t6, acc);
586      acc = vis_faligndata(t5, acc);
587      acc = vis_faligndata(t4, acc);
588      acc = vis_faligndata(t3, acc);
589      acc = vis_faligndata(t2, acc);
590      acc = vis_faligndata(t1, acc);
591      acc = vis_faligndata(t0, acc);
592      s0 = sp[0];
593      s1 = sp[1];
594      *dp++ = acc;
595    }
596
597    t7 = VIS_LD_U8_I(tab3, s1);
598    t6 = VIS_LD_U8_I(tab2, s1);
599    t5 = VIS_LD_U8_I(tab1, s1);
600    t4 = VIS_LD_U8_I(tab0, s1);
601    t3 = VIS_LD_U8_I(tab3, s0);
602    t2 = VIS_LD_U8_I(tab2, s0);
603    t1 = VIS_LD_U8_I(tab1, s0);
604    t0 = VIS_LD_U8_I(tab0, s0);
605    acc = vis_faligndata(t7, acc);
606    acc = vis_faligndata(t6, acc);
607    acc = vis_faligndata(t5, acc);
608    acc = vis_faligndata(t4, acc);
609    acc = vis_faligndata(t3, acc);
610    acc = vis_faligndata(t2, acc);
611    acc = vis_faligndata(t1, acc);
612    acc = vis_faligndata(t0, acc);
613    *dp++ = acc;
614  }
615
616  if ((xsize & 1) != 0) {
617    s0 = sp[0];
618    t7 = VIS_LD_U8_I(tab3, s0);
619    t6 = VIS_LD_U8_I(tab2, s0);
620    t5 = VIS_LD_U8_I(tab1, s0);
621    t4 = VIS_LD_U8_I(tab0, s0);
622    acc = vis_faligndata(t7, acc);
623    acc = vis_faligndata(t6, acc);
624    acc = vis_faligndata(t5, acc);
625    acc = vis_faligndata(t4, acc);
626    *(mlib_f32 *) dp = vis_read_hi(acc);
627  }
628}
629
630/***************************************************************/
631void mlib_v_ImageLookUpSI_U16_U8_4_DstOff1_D1(const mlib_u16 *src,
632                                              mlib_u8        *dst,
633                                              mlib_s32       xsize,
634                                              const mlib_u8  **table)
635{
636  mlib_u16 *sp;                        /* pointer to source data */
637  mlib_s32 s0, s1, s2;                 /* source data */
638  mlib_u8 *dl;                         /* pointer to start of destination */
639  mlib_d64 *dp;                        /* aligned pointer to destination */
640  mlib_d64 t0, t1, t2;                 /* destination data */
641  mlib_d64 t3, t4, t5;                 /* destination data */
642  mlib_d64 t6, t7, acc;                /* destination data */
643  mlib_s32 i;                          /* loop variable */
644  const mlib_u8 *tab0 = &table[0][0];
645  const mlib_u8 *tab1 = &table[1][0];
646  const mlib_u8 *tab2 = &table[2][0];
647  const mlib_u8 *tab3 = &table[3][0];
648
649  sp = (void *)src;
650  dl = dst;
651  dp = (mlib_d64 *) dl;
652
653  vis_alignaddr((void *)0, 7);
654
655  s0 = *sp++;
656
657  if (xsize >= 2) {
658
659    s1 = sp[0];
660    s2 = sp[1];
661    sp += 2;
662
663#pragma pipeloop(0)
664    for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
665      t7 = VIS_LD_U8_I(tab0, s2);
666      t6 = VIS_LD_U8_I(tab3, s1);
667      t5 = VIS_LD_U8_I(tab2, s1);
668      t4 = VIS_LD_U8_I(tab1, s1);
669      t3 = VIS_LD_U8_I(tab0, s1);
670      t2 = VIS_LD_U8_I(tab3, s0);
671      t1 = VIS_LD_U8_I(tab2, s0);
672      t0 = VIS_LD_U8_I(tab1, s0);
673      acc = vis_faligndata(t7, acc);
674      acc = vis_faligndata(t6, acc);
675      acc = vis_faligndata(t5, acc);
676      acc = vis_faligndata(t4, acc);
677      acc = vis_faligndata(t3, acc);
678      acc = vis_faligndata(t2, acc);
679      acc = vis_faligndata(t1, acc);
680      acc = vis_faligndata(t0, acc);
681      s0 = s2;
682      s1 = sp[0];
683      s2 = sp[1];
684      *dp++ = acc;
685    }
686
687    t7 = VIS_LD_U8_I(tab0, s2);
688    t6 = VIS_LD_U8_I(tab3, s1);
689    t5 = VIS_LD_U8_I(tab2, s1);
690    t4 = VIS_LD_U8_I(tab1, s1);
691    t3 = VIS_LD_U8_I(tab0, s1);
692    t2 = VIS_LD_U8_I(tab3, s0);
693    t1 = VIS_LD_U8_I(tab2, s0);
694    t0 = VIS_LD_U8_I(tab1, s0);
695    acc = vis_faligndata(t7, acc);
696    acc = vis_faligndata(t6, acc);
697    acc = vis_faligndata(t5, acc);
698    acc = vis_faligndata(t4, acc);
699    acc = vis_faligndata(t3, acc);
700    acc = vis_faligndata(t2, acc);
701    acc = vis_faligndata(t1, acc);
702    acc = vis_faligndata(t0, acc);
703    s0 = s2;
704    *dp++ = acc;
705  }
706
707  dl = (mlib_u8 *) dp;
708
709  if ((xsize & 1) != 0) {
710    s1 = sp[0];
711    t7 = VIS_LD_U8_I(tab0, s1);
712    t6 = VIS_LD_U8_I(tab3, s0);
713    t5 = VIS_LD_U8_I(tab2, s0);
714    t4 = VIS_LD_U8_I(tab1, s0);
715    acc = vis_faligndata(t7, acc);
716    acc = vis_faligndata(t6, acc);
717    acc = vis_faligndata(t5, acc);
718    acc = vis_faligndata(t4, acc);
719    *(mlib_f32 *) dl = vis_read_hi(acc);
720    dl += 4;
721    s0 = s1;
722  }
723
724  dl[0] = tab1[s0];
725  dl[1] = tab2[s0];
726  dl[2] = tab3[s0];
727}
728
729/***************************************************************/
730void mlib_v_ImageLookUpSI_U16_U8_4_DstOff2_D1(const mlib_u16 *src,
731                                              mlib_u8        *dst,
732                                              mlib_s32       xsize,
733                                              const mlib_u8  **table)
734{
735  mlib_u16 *sp;                        /* pointer to source data */
736  mlib_s32 s0, s1, s2;                 /* source data */
737  mlib_u8 *dl;                         /* pointer to start of destination */
738  mlib_d64 *dp;                        /* aligned pointer to destination */
739  mlib_d64 t0, t1, t2;                 /* destination data */
740  mlib_d64 t3, t4, t5;                 /* destination data */
741  mlib_d64 t6, t7, acc;                /* destination data */
742  mlib_s32 i;                          /* loop variable */
743  const mlib_u8 *tab0 = &table[0][0];
744  const mlib_u8 *tab1 = &table[1][0];
745  const mlib_u8 *tab2 = &table[2][0];
746  const mlib_u8 *tab3 = &table[3][0];
747
748  sp = (void *)src;
749  dl = dst;
750  dp = (mlib_d64 *) dl;
751
752  vis_alignaddr((void *)0, 7);
753
754  s0 = *sp++;
755
756  if (xsize >= 2) {
757
758    s1 = sp[0];
759    s2 = sp[1];
760    sp += 2;
761
762#pragma pipeloop(0)
763    for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
764      t7 = VIS_LD_U8_I(tab1, s2);
765      t6 = VIS_LD_U8_I(tab0, s2);
766      t5 = VIS_LD_U8_I(tab3, s1);
767      t4 = VIS_LD_U8_I(tab2, s1);
768      t3 = VIS_LD_U8_I(tab1, s1);
769      t2 = VIS_LD_U8_I(tab0, s1);
770      t1 = VIS_LD_U8_I(tab3, s0);
771      t0 = VIS_LD_U8_I(tab2, s0);
772      acc = vis_faligndata(t7, acc);
773      acc = vis_faligndata(t6, acc);
774      acc = vis_faligndata(t5, acc);
775      acc = vis_faligndata(t4, acc);
776      acc = vis_faligndata(t3, acc);
777      acc = vis_faligndata(t2, acc);
778      acc = vis_faligndata(t1, acc);
779      acc = vis_faligndata(t0, acc);
780      s0 = s2;
781      s1 = sp[0];
782      s2 = sp[1];
783      *dp++ = acc;
784    }
785
786    t7 = VIS_LD_U8_I(tab1, s2);
787    t6 = VIS_LD_U8_I(tab0, s2);
788    t5 = VIS_LD_U8_I(tab3, s1);
789    t4 = VIS_LD_U8_I(tab2, s1);
790    t3 = VIS_LD_U8_I(tab1, s1);
791    t2 = VIS_LD_U8_I(tab0, s1);
792    t1 = VIS_LD_U8_I(tab3, s0);
793    t0 = VIS_LD_U8_I(tab2, s0);
794    acc = vis_faligndata(t7, acc);
795    acc = vis_faligndata(t6, acc);
796    acc = vis_faligndata(t5, acc);
797    acc = vis_faligndata(t4, acc);
798    acc = vis_faligndata(t3, acc);
799    acc = vis_faligndata(t2, acc);
800    acc = vis_faligndata(t1, acc);
801    acc = vis_faligndata(t0, acc);
802    s0 = s2;
803    *dp++ = acc;
804  }
805
806  dl = (mlib_u8 *) dp;
807
808  if ((xsize & 1) != 0) {
809    s1 = sp[0];
810    t7 = VIS_LD_U8_I(tab1, s1);
811    t6 = VIS_LD_U8_I(tab0, s1);
812    t5 = VIS_LD_U8_I(tab3, s0);
813    t4 = VIS_LD_U8_I(tab2, s0);
814    acc = vis_faligndata(t7, acc);
815    acc = vis_faligndata(t6, acc);
816    acc = vis_faligndata(t5, acc);
817    acc = vis_faligndata(t4, acc);
818    *(mlib_f32 *) dl = vis_read_hi(acc);
819    dl += 4;
820    s0 = s1;
821  }
822
823  dl[0] = tab2[s0];
824  dl[1] = tab3[s0];
825}
826
827/***************************************************************/
828void mlib_v_ImageLookUpSI_U16_U8_4_DstOff3_D1(const mlib_u16 *src,
829                                              mlib_u8        *dst,
830                                              mlib_s32       xsize,
831                                              const mlib_u8  **table)
832{
833  mlib_u16 *sp;                        /* pointer to source data */
834  mlib_s32 s0, s1, s2;                 /* source data */
835  mlib_u8 *dl;                         /* pointer to start of destination */
836  mlib_d64 *dp;                        /* aligned pointer to destination */
837  mlib_d64 t0, t1, t2;                 /* destination data */
838  mlib_d64 t3, t4, t5;                 /* destination data */
839  mlib_d64 t6, t7, acc;                /* destination data */
840  mlib_s32 i;                          /* loop variable */
841  const mlib_u8 *tab0 = &table[0][0];
842  const mlib_u8 *tab1 = &table[1][0];
843  const mlib_u8 *tab2 = &table[2][0];
844  const mlib_u8 *tab3 = &table[3][0];
845
846  sp = (void *)src;
847  dl = dst;
848  dp = (mlib_d64 *) dl;
849
850  vis_alignaddr((void *)0, 7);
851
852  s0 = *sp++;
853
854  if (xsize >= 2) {
855
856    s1 = sp[0];
857    s2 = sp[1];
858    sp += 2;
859
860#pragma pipeloop(0)
861    for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
862      t7 = VIS_LD_U8_I(tab2, s2);
863      t6 = VIS_LD_U8_I(tab1, s2);
864      t5 = VIS_LD_U8_I(tab0, s2);
865      t4 = VIS_LD_U8_I(tab3, s1);
866      t3 = VIS_LD_U8_I(tab2, s1);
867      t2 = VIS_LD_U8_I(tab1, s1);
868      t1 = VIS_LD_U8_I(tab0, s1);
869      t0 = VIS_LD_U8_I(tab3, s0);
870      acc = vis_faligndata(t7, acc);
871      acc = vis_faligndata(t6, acc);
872      acc = vis_faligndata(t5, acc);
873      acc = vis_faligndata(t4, acc);
874      acc = vis_faligndata(t3, acc);
875      acc = vis_faligndata(t2, acc);
876      acc = vis_faligndata(t1, acc);
877      acc = vis_faligndata(t0, acc);
878      s0 = s2;
879      s1 = sp[0];
880      s2 = sp[1];
881      *dp++ = acc;
882    }
883
884    t7 = VIS_LD_U8_I(tab2, s2);
885    t6 = VIS_LD_U8_I(tab1, s2);
886    t5 = VIS_LD_U8_I(tab0, s2);
887    t4 = VIS_LD_U8_I(tab3, s1);
888    t3 = VIS_LD_U8_I(tab2, s1);
889    t2 = VIS_LD_U8_I(tab1, s1);
890    t1 = VIS_LD_U8_I(tab0, s1);
891    t0 = VIS_LD_U8_I(tab3, s0);
892    acc = vis_faligndata(t7, acc);
893    acc = vis_faligndata(t6, acc);
894    acc = vis_faligndata(t5, acc);
895    acc = vis_faligndata(t4, acc);
896    acc = vis_faligndata(t3, acc);
897    acc = vis_faligndata(t2, acc);
898    acc = vis_faligndata(t1, acc);
899    acc = vis_faligndata(t0, acc);
900    s0 = s2;
901    *dp++ = acc;
902  }
903
904  dl = (mlib_u8 *) dp;
905
906  if ((xsize & 1) != 0) {
907    s1 = sp[0];
908    t7 = VIS_LD_U8_I(tab2, s1);
909    t6 = VIS_LD_U8_I(tab1, s1);
910    t5 = VIS_LD_U8_I(tab0, s1);
911    t4 = VIS_LD_U8_I(tab3, s0);
912    acc = vis_faligndata(t7, acc);
913    acc = vis_faligndata(t6, acc);
914    acc = vis_faligndata(t5, acc);
915    acc = vis_faligndata(t4, acc);
916    *(mlib_f32 *) dl = vis_read_hi(acc);
917    dl += 4;
918    s0 = s1;
919  }
920
921  dl[0] = tab3[s0];
922}
923
924/***************************************************************/
925void mlib_v_ImageLookUpSI_U16_U8_4(const mlib_u16 *src,
926                                   mlib_s32       slb,
927                                   mlib_u8        *dst,
928                                   mlib_s32       dlb,
929                                   mlib_s32       xsize,
930                                   mlib_s32       ysize,
931                                   const mlib_u8  **table)
932{
933  mlib_u16 *sl;
934  mlib_u8 *dl;
935  mlib_s32 j;
936  const mlib_u8 *tab0 = &table[0][0];
937  const mlib_u8 *tab1 = &table[1][0];
938  const mlib_u8 *tab2 = &table[2][0];
939  const mlib_u8 *tab3 = &table[3][0];
940
941  sl = (void *)src;
942  dl = dst;
943
944  /* row loop */
945  for (j = 0; j < ysize; j++) {
946    mlib_u16 *sp = sl;
947    mlib_u8 *dp = dl;
948    mlib_s32 off, s0, size = xsize;
949
950    off = (8 - ((mlib_addr) dp & 7)) & 7;
951
952    if ((off >= 4) && (size > 0)) {
953      s0 = *sp++;
954      *dp++ = tab0[s0];
955      *dp++ = tab1[s0];
956      *dp++ = tab2[s0];
957      *dp++ = tab3[s0];
958      size--;
959    }
960
961    if (size > 0) {
962      off = (4 - ((mlib_addr) dp & 3)) & 3;
963
964      if (off == 0) {
965        mlib_v_ImageLookUpSI_U16_U8_4_DstOff0_D1(sp, dp, size, table);
966      }
967      else if (off == 1) {
968        s0 = *sp;
969        *dp++ = tab0[s0];
970        size--;
971        mlib_v_ImageLookUpSI_U16_U8_4_DstOff1_D1(sp, dp, size, table);
972      }
973      else if (off == 2) {
974        s0 = *sp;
975        *dp++ = tab0[s0];
976        *dp++ = tab1[s0];
977        size--;
978        mlib_v_ImageLookUpSI_U16_U8_4_DstOff2_D1(sp, dp, size, table);
979      }
980      else if (off == 3) {
981        s0 = *sp;
982        *dp++ = tab0[s0];
983        *dp++ = tab1[s0];
984        *dp++ = tab2[s0];
985        size--;
986        mlib_v_ImageLookUpSI_U16_U8_4_DstOff3_D1(sp, dp, size, table);
987      }
988    }
989
990    sl = (mlib_u16 *) ((mlib_u8 *) sl + slb);
991    dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
992  }
993}
994
995/***************************************************************/
996