1/*
2 * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27
28#include "vis_proto.h"
29#include "mlib_image.h"
30#include "mlib_v_ImageLookUpFunc.h"
31
32/***************************************************************/
33static void mlib_v_ImageLookUpSI_S32_U8_2_DstA8D1(const mlib_s32 *src,
34                                                  mlib_u8        *dst,
35                                                  mlib_s32       xsize,
36                                                  const mlib_u8  **table);
37
38static void mlib_v_ImageLookUpSI_S32_U8_2_D1(const mlib_s32 *src,
39                                             mlib_u8        *dst,
40                                             mlib_s32       xsize,
41                                             const mlib_u8  **table);
42
43static void mlib_v_ImageLookUpSI_S32_U8_3_D1(const mlib_s32 *src,
44                                             mlib_u8        *dst,
45                                             mlib_s32       xsize,
46                                             const mlib_u8  **table);
47
48static void mlib_v_ImageLookUpSI_S32_U8_4_DstOff0_D1(const mlib_s32 *src,
49                                                     mlib_u8        *dst,
50                                                     mlib_s32       xsize,
51                                                     const mlib_u8  **table);
52
53static void mlib_v_ImageLookUpSI_S32_U8_4_DstOff1_D1(const mlib_s32 *src,
54                                                     mlib_u8        *dst,
55                                                     mlib_s32       xsize,
56                                                     const mlib_u8  **table);
57
58static void mlib_v_ImageLookUpSI_S32_U8_4_DstOff2_D1(const mlib_s32 *src,
59                                                     mlib_u8        *dst,
60                                                     mlib_s32       xsize,
61                                                     const mlib_u8  **table);
62
63static void mlib_v_ImageLookUpSI_S32_U8_4_DstOff3_D1(const mlib_s32 *src,
64                                                     mlib_u8        *dst,
65                                                     mlib_s32       xsize,
66                                                     const mlib_u8  **table);
67
68/***************************************************************/
69#define VIS_LD_U8_I(X, Y)       vis_ld_u8_i((void *)(X), (Y))
70
71/***************************************************************/
72void mlib_v_ImageLookUpSI_S32_U8_2_DstA8D1(const mlib_s32 *src,
73                                           mlib_u8        *dst,
74                                           mlib_s32       xsize,
75                                           const mlib_u8  **table)
76{
77  mlib_s32 *sp;                        /* pointer to source data */
78  mlib_s32 s0, s1, s2, s3;             /* source data */
79  mlib_u16 *dl;                        /* pointer to start of destination */
80  mlib_u16 *dend;                      /* pointer to end of destination */
81  mlib_d64 *dp;                        /* aligned pointer to destination */
82  mlib_d64 t0, t1, t2;                 /* destination data */
83  mlib_d64 t3, t4, t5;                 /* destination data */
84  mlib_d64 t6, t7, acc;                /* destination data */
85  mlib_s32 emask;                      /* edge mask */
86  mlib_s32 i, num;                     /* loop variable */
87  const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
88  const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
89
90  sp = (void *)src;
91  dl = (mlib_u16 *) dst;
92  dp = (mlib_d64 *) dl;
93  dend = dl + xsize - 1;
94
95  vis_alignaddr((void *)0, 7);
96
97  if (xsize >= 4) {
98
99    s0 = sp[0];
100    s1 = sp[1];
101    s2 = sp[2];
102    s3 = sp[3];
103    sp += 4;
104
105#pragma pipeloop(0)
106    for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
107      t7 = VIS_LD_U8_I(tab1, s3);
108      t6 = VIS_LD_U8_I(tab0, s3);
109      t5 = VIS_LD_U8_I(tab1, s2);
110      t4 = VIS_LD_U8_I(tab0, s2);
111      t3 = VIS_LD_U8_I(tab1, s1);
112      t2 = VIS_LD_U8_I(tab0, s1);
113      t1 = VIS_LD_U8_I(tab1, s0);
114      t0 = VIS_LD_U8_I(tab0, s0);
115      acc = vis_faligndata(t7, acc);
116      acc = vis_faligndata(t6, acc);
117      acc = vis_faligndata(t5, acc);
118      acc = vis_faligndata(t4, acc);
119      acc = vis_faligndata(t3, acc);
120      acc = vis_faligndata(t2, acc);
121      acc = vis_faligndata(t1, acc);
122      acc = vis_faligndata(t0, acc);
123      s0 = sp[0];
124      s1 = sp[1];
125      s2 = sp[2];
126      s3 = sp[3];
127      *dp++ = acc;
128    }
129
130    t7 = VIS_LD_U8_I(tab1, s3);
131    t6 = VIS_LD_U8_I(tab0, s3);
132    t5 = VIS_LD_U8_I(tab1, s2);
133    t4 = VIS_LD_U8_I(tab0, s2);
134    t3 = VIS_LD_U8_I(tab1, s1);
135    t2 = VIS_LD_U8_I(tab0, s1);
136    t1 = VIS_LD_U8_I(tab1, s0);
137    t0 = VIS_LD_U8_I(tab0, s0);
138    acc = vis_faligndata(t7, acc);
139    acc = vis_faligndata(t6, acc);
140    acc = vis_faligndata(t5, acc);
141    acc = vis_faligndata(t4, acc);
142    acc = vis_faligndata(t3, acc);
143    acc = vis_faligndata(t2, acc);
144    acc = vis_faligndata(t1, acc);
145    acc = vis_faligndata(t0, acc);
146    *dp++ = acc;
147  }
148
149  if ((mlib_addr) dp <= (mlib_addr) dend) {
150
151    num = (mlib_s32) ((mlib_u16 *) dend - (mlib_u16 *) dp);
152    sp += num;
153    num++;
154#pragma pipeloop(0)
155    for (i = 0; i < num; i++) {
156      s0 = *sp;
157      sp--;
158
159      t0 = VIS_LD_U8_I(tab1, s0);
160      acc = vis_faligndata(t0, acc);
161
162      t0 = VIS_LD_U8_I(tab0, s0);
163      acc = vis_faligndata(t0, acc);
164    }
165
166    emask = vis_edge16(dp, dend);
167    vis_pst_16(acc, dp, emask);
168  }
169}
170
171/***************************************************************/
172void mlib_v_ImageLookUpSI_S32_U8_2_D1(const mlib_s32 *src,
173                                      mlib_u8        *dst,
174                                      mlib_s32       xsize,
175                                      const mlib_u8  **table)
176{
177  mlib_s32 *sp;                        /* pointer to source data */
178  mlib_s32 s0, s1, s2, s3, s4;         /* source data */
179  mlib_u8 *dl;                         /* pointer to start of destination */
180  mlib_u8 *dend;                       /* pointer to end of destination */
181  mlib_d64 *dp;                        /* aligned pointer to destination */
182  mlib_d64 t0, t1, t2;                 /* destination data */
183  mlib_d64 t3, t4, t5;                 /* destination data */
184  mlib_d64 t6, t7, acc;                /* destination data */
185  mlib_s32 emask;                      /* edge mask */
186  mlib_s32 i, num;                     /* loop variable */
187  const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
188  const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
189
190  sp = (void *)src;
191  dl = dst;
192
193  dend = dl + 2 * xsize - 1;
194
195  vis_alignaddr((void *)0, 7);
196
197  s0 = *sp++;
198  *dl++ = tab0[s0];
199  dp = (mlib_d64 *) dl;
200  xsize--;
201
202  if (xsize >= 4) {
203
204    s1 = sp[0];
205    s2 = sp[1];
206    s3 = sp[2];
207    s4 = sp[3];
208    sp += 4;
209
210#pragma pipeloop(0)
211    for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
212      t7 = VIS_LD_U8_I(tab0, s4);
213      t6 = VIS_LD_U8_I(tab1, s3);
214      t5 = VIS_LD_U8_I(tab0, s3);
215      t4 = VIS_LD_U8_I(tab1, s2);
216      t3 = VIS_LD_U8_I(tab0, s2);
217      t2 = VIS_LD_U8_I(tab1, s1);
218      t1 = VIS_LD_U8_I(tab0, s1);
219      t0 = VIS_LD_U8_I(tab1, s0);
220      acc = vis_faligndata(t7, acc);
221      acc = vis_faligndata(t6, acc);
222      acc = vis_faligndata(t5, acc);
223      acc = vis_faligndata(t4, acc);
224      acc = vis_faligndata(t3, acc);
225      acc = vis_faligndata(t2, acc);
226      acc = vis_faligndata(t1, acc);
227      acc = vis_faligndata(t0, acc);
228      s0 = s4;
229      s1 = sp[0];
230      s2 = sp[1];
231      s3 = sp[2];
232      s4 = sp[3];
233      *dp++ = acc;
234    }
235
236    t7 = VIS_LD_U8_I(tab0, s4);
237    t6 = VIS_LD_U8_I(tab1, s3);
238    t5 = VIS_LD_U8_I(tab0, s3);
239    t4 = VIS_LD_U8_I(tab1, s2);
240    t3 = VIS_LD_U8_I(tab0, s2);
241    t2 = VIS_LD_U8_I(tab1, s1);
242    t1 = VIS_LD_U8_I(tab0, s1);
243    t0 = VIS_LD_U8_I(tab1, s0);
244    acc = vis_faligndata(t7, acc);
245    acc = vis_faligndata(t6, acc);
246    acc = vis_faligndata(t5, acc);
247    acc = vis_faligndata(t4, acc);
248    acc = vis_faligndata(t3, acc);
249    acc = vis_faligndata(t2, acc);
250    acc = vis_faligndata(t1, acc);
251    acc = vis_faligndata(t0, acc);
252    s0 = s4;
253    *dp++ = acc;
254  }
255
256  num = (mlib_s32) (((mlib_u8 *) dend - (mlib_u8 *) dp) >> 1);
257  sp += num - 1;
258
259#pragma pipeloop(0)
260  for (i = 0; i < num; i++) {
261    s1 = *sp;
262    sp--;
263
264    t0 = VIS_LD_U8_I(tab1, s1);
265    acc = vis_faligndata(t0, acc);
266
267    t0 = VIS_LD_U8_I(tab0, s1);
268    acc = vis_faligndata(t0, acc);
269  }
270
271  t0 = VIS_LD_U8_I(tab1, s0);
272  acc = vis_faligndata(t0, acc);
273  emask = vis_edge8(dp, dend);
274  vis_pst_8(acc, dp, emask);
275}
276
277/***************************************************************/
278void mlib_v_ImageLookUpSI_S32_U8_2(const mlib_s32 *src,
279                                   mlib_s32       slb,
280                                   mlib_u8        *dst,
281                                   mlib_s32       dlb,
282                                   mlib_s32       xsize,
283                                   mlib_s32       ysize,
284                                   const mlib_u8  **table)
285{
286  mlib_s32 *sl;
287  mlib_u8 *dl;
288  mlib_s32 i, j;
289  const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
290  const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
291
292  sl = (void *)src;
293  dl = dst;
294
295  /* row loop */
296  for (j = 0; j < ysize; j++) {
297    mlib_s32 *sp = sl;
298    mlib_u8 *dp = dl;
299    mlib_s32 off, s0, size = xsize;
300
301    off = (mlib_s32) (((8 - ((mlib_addr) dp & 7)) & 7) >> 1);
302    off = (off < size) ? off : size;
303
304    for (i = 0; i < off; i++) {
305      s0 = *sp++;
306      *dp++ = tab0[s0];
307      *dp++ = tab1[s0];
308      size--;
309    }
310
311    if (size > 0) {
312
313      if (((mlib_addr) dp & 1) == 0) {
314        mlib_v_ImageLookUpSI_S32_U8_2_DstA8D1(sp, dp, size, table);
315      }
316      else {
317        mlib_v_ImageLookUpSI_S32_U8_2_D1(sp, dp, size, table);
318      }
319    }
320
321    sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
322    dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
323  }
324}
325
326/***************************************************************/
327void mlib_v_ImageLookUpSI_S32_U8_3_D1(const mlib_s32 *src,
328                                      mlib_u8        *dst,
329                                      mlib_s32       xsize,
330                                      const mlib_u8  **table)
331{
332  mlib_s32 *sp;                        /* pointer to source data */
333  mlib_u8 *dl;                         /* pointer to start of destination */
334  mlib_d64 *dp;                        /* aligned pointer to destination */
335  mlib_d64 t0, t1, t2;                 /* destination data */
336  mlib_d64 t3, t4, t5;                 /* destination data */
337  mlib_d64 t6, t7;                     /* destination data */
338  mlib_d64 acc0, acc1, acc2;           /* destination data */
339  mlib_s32 i;                          /* loop variable */
340  const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
341  const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
342  const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u];
343  mlib_s32 s00, s01, s02, s03;
344  mlib_s32 s10, s11, s12, s13;
345
346  sp = (void *)src;
347  dl = dst;
348  dp = (mlib_d64 *) dl;
349
350  vis_alignaddr((void *)0, 7);
351
352  i = 0;
353
354  if (xsize >= 8) {
355
356    s00 = sp[0];
357    s01 = sp[1];
358    s02 = sp[2];
359    s03 = sp[3];
360    s10 = sp[4];
361    s11 = sp[5];
362    s12 = sp[6];
363    s13 = sp[7];
364    sp += 8;
365
366#pragma pipeloop(0)
367    for (i = 0; i <= xsize - 16; i += 8, sp += 8) {
368      t7 = VIS_LD_U8_I(tab1, s02);
369      t6 = VIS_LD_U8_I(tab0, s02);
370      t5 = VIS_LD_U8_I(tab2, s01);
371      t4 = VIS_LD_U8_I(tab1, s01);
372      t3 = VIS_LD_U8_I(tab0, s01);
373      t2 = VIS_LD_U8_I(tab2, s00);
374      t1 = VIS_LD_U8_I(tab1, s00);
375      t0 = VIS_LD_U8_I(tab0, s00);
376      acc0 = vis_faligndata(t7, acc0);
377      acc0 = vis_faligndata(t6, acc0);
378      acc0 = vis_faligndata(t5, acc0);
379      acc0 = vis_faligndata(t4, acc0);
380      acc0 = vis_faligndata(t3, acc0);
381      acc0 = vis_faligndata(t2, acc0);
382      acc0 = vis_faligndata(t1, acc0);
383      acc0 = vis_faligndata(t0, acc0);
384      t7 = VIS_LD_U8_I(tab0, s11);
385      t6 = VIS_LD_U8_I(tab2, s10);
386      t5 = VIS_LD_U8_I(tab1, s10);
387      t4 = VIS_LD_U8_I(tab0, s10);
388      t3 = VIS_LD_U8_I(tab2, s03);
389      t2 = VIS_LD_U8_I(tab1, s03);
390      t1 = VIS_LD_U8_I(tab0, s03);
391      t0 = VIS_LD_U8_I(tab2, s02);
392      acc1 = vis_faligndata(t7, acc1);
393      acc1 = vis_faligndata(t6, acc1);
394      acc1 = vis_faligndata(t5, acc1);
395      acc1 = vis_faligndata(t4, acc1);
396      acc1 = vis_faligndata(t3, acc1);
397      acc1 = vis_faligndata(t2, acc1);
398      acc1 = vis_faligndata(t1, acc1);
399      acc1 = vis_faligndata(t0, acc1);
400      t7 = VIS_LD_U8_I(tab2, s13);
401      t6 = VIS_LD_U8_I(tab1, s13);
402      t5 = VIS_LD_U8_I(tab0, s13);
403      t4 = VIS_LD_U8_I(tab2, s12);
404      t3 = VIS_LD_U8_I(tab1, s12);
405      t2 = VIS_LD_U8_I(tab0, s12);
406      t1 = VIS_LD_U8_I(tab2, s11);
407      t0 = VIS_LD_U8_I(tab1, s11);
408      acc2 = vis_faligndata(t7, acc2);
409      acc2 = vis_faligndata(t6, acc2);
410      acc2 = vis_faligndata(t5, acc2);
411      acc2 = vis_faligndata(t4, acc2);
412      acc2 = vis_faligndata(t3, acc2);
413      acc2 = vis_faligndata(t2, acc2);
414      acc2 = vis_faligndata(t1, acc2);
415      acc2 = vis_faligndata(t0, acc2);
416      s00 = sp[0];
417      s01 = sp[1];
418      s02 = sp[2];
419      s03 = sp[3];
420      s10 = sp[4];
421      s11 = sp[5];
422      s12 = sp[6];
423      s13 = sp[7];
424      *dp++ = acc0;
425      *dp++ = acc1;
426      *dp++ = acc2;
427    }
428
429    t7 = VIS_LD_U8_I(tab1, s02);
430    t6 = VIS_LD_U8_I(tab0, s02);
431    t5 = VIS_LD_U8_I(tab2, s01);
432    t4 = VIS_LD_U8_I(tab1, s01);
433    t3 = VIS_LD_U8_I(tab0, s01);
434    t2 = VIS_LD_U8_I(tab2, s00);
435    t1 = VIS_LD_U8_I(tab1, s00);
436    t0 = VIS_LD_U8_I(tab0, s00);
437    acc0 = vis_faligndata(t7, acc0);
438    acc0 = vis_faligndata(t6, acc0);
439    acc0 = vis_faligndata(t5, acc0);
440    acc0 = vis_faligndata(t4, acc0);
441    acc0 = vis_faligndata(t3, acc0);
442    acc0 = vis_faligndata(t2, acc0);
443    acc0 = vis_faligndata(t1, acc0);
444    acc0 = vis_faligndata(t0, acc0);
445    t7 = VIS_LD_U8_I(tab0, s11);
446    t6 = VIS_LD_U8_I(tab2, s10);
447    t5 = VIS_LD_U8_I(tab1, s10);
448    t4 = VIS_LD_U8_I(tab0, s10);
449    t3 = VIS_LD_U8_I(tab2, s03);
450    t2 = VIS_LD_U8_I(tab1, s03);
451    t1 = VIS_LD_U8_I(tab0, s03);
452    t0 = VIS_LD_U8_I(tab2, s02);
453    acc1 = vis_faligndata(t7, acc1);
454    acc1 = vis_faligndata(t6, acc1);
455    acc1 = vis_faligndata(t5, acc1);
456    acc1 = vis_faligndata(t4, acc1);
457    acc1 = vis_faligndata(t3, acc1);
458    acc1 = vis_faligndata(t2, acc1);
459    acc1 = vis_faligndata(t1, acc1);
460    acc1 = vis_faligndata(t0, acc1);
461    t7 = VIS_LD_U8_I(tab2, s13);
462    t6 = VIS_LD_U8_I(tab1, s13);
463    t5 = VIS_LD_U8_I(tab0, s13);
464    t4 = VIS_LD_U8_I(tab2, s12);
465    t3 = VIS_LD_U8_I(tab1, s12);
466    t2 = VIS_LD_U8_I(tab0, s12);
467    t1 = VIS_LD_U8_I(tab2, s11);
468    t0 = VIS_LD_U8_I(tab1, s11);
469    acc2 = vis_faligndata(t7, acc2);
470    acc2 = vis_faligndata(t6, acc2);
471    acc2 = vis_faligndata(t5, acc2);
472    acc2 = vis_faligndata(t4, acc2);
473    acc2 = vis_faligndata(t3, acc2);
474    acc2 = vis_faligndata(t2, acc2);
475    acc2 = vis_faligndata(t1, acc2);
476    acc2 = vis_faligndata(t0, acc2);
477    *dp++ = acc0;
478    *dp++ = acc1;
479    *dp++ = acc2;
480    i += 8;
481  }
482
483  dl = (mlib_u8 *) dp;
484
485#pragma pipeloop(0)
486  for (; i < xsize; i++) {
487    s00 = sp[0];
488    dl[0] = tab0[s00];
489    dl[1] = tab1[s00];
490    dl[2] = tab2[s00];
491    dl += 3;
492    sp++;
493  }
494}
495
496/***************************************************************/
497void mlib_v_ImageLookUpSI_S32_U8_3(const mlib_s32 *src,
498                                   mlib_s32       slb,
499                                   mlib_u8        *dst,
500                                   mlib_s32       dlb,
501                                   mlib_s32       xsize,
502                                   mlib_s32       ysize,
503                                   const mlib_u8  **table)
504{
505  mlib_s32 *sl;
506  mlib_u8 *dl;
507  mlib_s32 i, j;
508  const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
509  const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
510  const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u];
511
512  sl = (void *)src;
513  dl = dst;
514
515  /* row loop */
516  for (j = 0; j < ysize; j++) {
517    mlib_s32 *sp = sl;
518    mlib_u8 *dp = dl;
519    mlib_s32 off, s0, size = xsize;
520
521    off = (mlib_s32) ((mlib_addr) dp & 7);
522    off = (off * 5) & 7;
523    off = (off < size) ? off : size;
524
525    for (i = 0; i < off; i++) {
526      s0 = *sp++;
527      *dp++ = tab0[s0];
528      *dp++ = tab1[s0];
529      *dp++ = tab2[s0];
530      size--;
531    }
532
533    if (size > 0) {
534      mlib_v_ImageLookUpSI_S32_U8_3_D1(sp, dp, size, table);
535    }
536
537    sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
538    dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
539  }
540}
541
542/***************************************************************/
543void mlib_v_ImageLookUpSI_S32_U8_4_DstOff0_D1(const mlib_s32 *src,
544                                              mlib_u8        *dst,
545                                              mlib_s32       xsize,
546                                              const mlib_u8  **table)
547{
548  mlib_s32 *sp;                        /* pointer to source data */
549  mlib_s32 s0, s1;                     /* source data */
550  mlib_u8 *dl;                         /* pointer to start of destination */
551  mlib_d64 *dp;                        /* aligned pointer to destination */
552  mlib_d64 t0, t1, t2;                 /* destination data */
553  mlib_d64 t3, t4, t5;                 /* destination data */
554  mlib_d64 t6, t7, acc;                /* destination data */
555  mlib_s32 i;                          /* loop variable */
556  const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
557  const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
558  const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u];
559  const mlib_u8 *tab3 = &table[3][(mlib_u32) 2147483648u];
560
561  sp = (void *)src;
562  dl = dst;
563  dp = (mlib_d64 *) dl;
564
565  vis_alignaddr((void *)0, 7);
566
567  if (xsize >= 2) {
568
569    s0 = sp[0];
570    s1 = sp[1];
571    sp += 2;
572
573#pragma pipeloop(0)
574    for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
575      t7 = VIS_LD_U8_I(tab3, s1);
576      t6 = VIS_LD_U8_I(tab2, s1);
577      t5 = VIS_LD_U8_I(tab1, s1);
578      t4 = VIS_LD_U8_I(tab0, s1);
579      t3 = VIS_LD_U8_I(tab3, s0);
580      t2 = VIS_LD_U8_I(tab2, s0);
581      t1 = VIS_LD_U8_I(tab1, s0);
582      t0 = VIS_LD_U8_I(tab0, s0);
583      acc = vis_faligndata(t7, acc);
584      acc = vis_faligndata(t6, acc);
585      acc = vis_faligndata(t5, acc);
586      acc = vis_faligndata(t4, acc);
587      acc = vis_faligndata(t3, acc);
588      acc = vis_faligndata(t2, acc);
589      acc = vis_faligndata(t1, acc);
590      acc = vis_faligndata(t0, acc);
591      s0 = sp[0];
592      s1 = sp[1];
593      *dp++ = acc;
594    }
595
596    t7 = VIS_LD_U8_I(tab3, s1);
597    t6 = VIS_LD_U8_I(tab2, s1);
598    t5 = VIS_LD_U8_I(tab1, s1);
599    t4 = VIS_LD_U8_I(tab0, s1);
600    t3 = VIS_LD_U8_I(tab3, s0);
601    t2 = VIS_LD_U8_I(tab2, s0);
602    t1 = VIS_LD_U8_I(tab1, s0);
603    t0 = VIS_LD_U8_I(tab0, s0);
604    acc = vis_faligndata(t7, acc);
605    acc = vis_faligndata(t6, acc);
606    acc = vis_faligndata(t5, acc);
607    acc = vis_faligndata(t4, acc);
608    acc = vis_faligndata(t3, acc);
609    acc = vis_faligndata(t2, acc);
610    acc = vis_faligndata(t1, acc);
611    acc = vis_faligndata(t0, acc);
612    *dp++ = acc;
613  }
614
615  if ((xsize & 1) != 0) {
616    s0 = sp[0];
617    t7 = VIS_LD_U8_I(tab3, s0);
618    t6 = VIS_LD_U8_I(tab2, s0);
619    t5 = VIS_LD_U8_I(tab1, s0);
620    t4 = VIS_LD_U8_I(tab0, s0);
621    acc = vis_faligndata(t7, acc);
622    acc = vis_faligndata(t6, acc);
623    acc = vis_faligndata(t5, acc);
624    acc = vis_faligndata(t4, acc);
625    *(mlib_f32 *) dp = vis_read_hi(acc);
626  }
627}
628
629/***************************************************************/
630void mlib_v_ImageLookUpSI_S32_U8_4_DstOff1_D1(const mlib_s32 *src,
631                                              mlib_u8        *dst,
632                                              mlib_s32       xsize,
633                                              const mlib_u8  **table)
634{
635  mlib_s32 *sp;                        /* pointer to source data */
636  mlib_s32 s0, s1, s2;                 /* source data */
637  mlib_u8 *dl;                         /* pointer to start of destination */
638  mlib_d64 *dp;                        /* aligned pointer to destination */
639  mlib_d64 t0, t1, t2;                 /* destination data */
640  mlib_d64 t3, t4, t5;                 /* destination data */
641  mlib_d64 t6, t7, acc;                /* destination data */
642  mlib_s32 i;                          /* loop variable */
643  const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
644  const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
645  const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u];
646  const mlib_u8 *tab3 = &table[3][(mlib_u32) 2147483648u];
647
648  sp = (void *)src;
649  dl = dst;
650  dp = (mlib_d64 *) dl;
651
652  vis_alignaddr((void *)0, 7);
653
654  s0 = *sp++;
655
656  if (xsize >= 2) {
657
658    s1 = sp[0];
659    s2 = sp[1];
660    sp += 2;
661
662#pragma pipeloop(0)
663    for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
664      t7 = VIS_LD_U8_I(tab0, s2);
665      t6 = VIS_LD_U8_I(tab3, s1);
666      t5 = VIS_LD_U8_I(tab2, s1);
667      t4 = VIS_LD_U8_I(tab1, s1);
668      t3 = VIS_LD_U8_I(tab0, s1);
669      t2 = VIS_LD_U8_I(tab3, s0);
670      t1 = VIS_LD_U8_I(tab2, s0);
671      t0 = VIS_LD_U8_I(tab1, s0);
672      acc = vis_faligndata(t7, acc);
673      acc = vis_faligndata(t6, acc);
674      acc = vis_faligndata(t5, acc);
675      acc = vis_faligndata(t4, acc);
676      acc = vis_faligndata(t3, acc);
677      acc = vis_faligndata(t2, acc);
678      acc = vis_faligndata(t1, acc);
679      acc = vis_faligndata(t0, acc);
680      s0 = s2;
681      s1 = sp[0];
682      s2 = sp[1];
683      *dp++ = acc;
684    }
685
686    t7 = VIS_LD_U8_I(tab0, s2);
687    t6 = VIS_LD_U8_I(tab3, s1);
688    t5 = VIS_LD_U8_I(tab2, s1);
689    t4 = VIS_LD_U8_I(tab1, s1);
690    t3 = VIS_LD_U8_I(tab0, s1);
691    t2 = VIS_LD_U8_I(tab3, s0);
692    t1 = VIS_LD_U8_I(tab2, s0);
693    t0 = VIS_LD_U8_I(tab1, s0);
694    acc = vis_faligndata(t7, acc);
695    acc = vis_faligndata(t6, acc);
696    acc = vis_faligndata(t5, acc);
697    acc = vis_faligndata(t4, acc);
698    acc = vis_faligndata(t3, acc);
699    acc = vis_faligndata(t2, acc);
700    acc = vis_faligndata(t1, acc);
701    acc = vis_faligndata(t0, acc);
702    s0 = s2;
703    *dp++ = acc;
704  }
705
706  dl = (mlib_u8 *) dp;
707
708  if ((xsize & 1) != 0) {
709    s1 = sp[0];
710    t7 = VIS_LD_U8_I(tab0, s1);
711    t6 = VIS_LD_U8_I(tab3, s0);
712    t5 = VIS_LD_U8_I(tab2, s0);
713    t4 = VIS_LD_U8_I(tab1, s0);
714    acc = vis_faligndata(t7, acc);
715    acc = vis_faligndata(t6, acc);
716    acc = vis_faligndata(t5, acc);
717    acc = vis_faligndata(t4, acc);
718    *(mlib_f32 *) dl = vis_read_hi(acc);
719    dl += 4;
720    s0 = s1;
721  }
722
723  dl[0] = tab1[s0];
724  dl[1] = tab2[s0];
725  dl[2] = tab3[s0];
726}
727
728/***************************************************************/
729void mlib_v_ImageLookUpSI_S32_U8_4_DstOff2_D1(const mlib_s32 *src,
730                                              mlib_u8        *dst,
731                                              mlib_s32       xsize,
732                                              const mlib_u8  **table)
733{
734  mlib_s32 *sp;                        /* pointer to source data */
735  mlib_s32 s0, s1, s2;                 /* source data */
736  mlib_u8 *dl;                         /* pointer to start of destination */
737  mlib_d64 *dp;                        /* aligned pointer to destination */
738  mlib_d64 t0, t1, t2;                 /* destination data */
739  mlib_d64 t3, t4, t5;                 /* destination data */
740  mlib_d64 t6, t7, acc;                /* destination data */
741  mlib_s32 i;                          /* loop variable */
742  const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
743  const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
744  const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u];
745  const mlib_u8 *tab3 = &table[3][(mlib_u32) 2147483648u];
746
747  sp = (void *)src;
748  dl = dst;
749  dp = (mlib_d64 *) dl;
750
751  vis_alignaddr((void *)0, 7);
752
753  s0 = *sp++;
754
755  if (xsize >= 2) {
756
757    s1 = sp[0];
758    s2 = sp[1];
759    sp += 2;
760
761#pragma pipeloop(0)
762    for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
763      t7 = VIS_LD_U8_I(tab1, s2);
764      t6 = VIS_LD_U8_I(tab0, s2);
765      t5 = VIS_LD_U8_I(tab3, s1);
766      t4 = VIS_LD_U8_I(tab2, s1);
767      t3 = VIS_LD_U8_I(tab1, s1);
768      t2 = VIS_LD_U8_I(tab0, s1);
769      t1 = VIS_LD_U8_I(tab3, s0);
770      t0 = VIS_LD_U8_I(tab2, s0);
771      acc = vis_faligndata(t7, acc);
772      acc = vis_faligndata(t6, acc);
773      acc = vis_faligndata(t5, acc);
774      acc = vis_faligndata(t4, acc);
775      acc = vis_faligndata(t3, acc);
776      acc = vis_faligndata(t2, acc);
777      acc = vis_faligndata(t1, acc);
778      acc = vis_faligndata(t0, acc);
779      s0 = s2;
780      s1 = sp[0];
781      s2 = sp[1];
782      *dp++ = acc;
783    }
784
785    t7 = VIS_LD_U8_I(tab1, s2);
786    t6 = VIS_LD_U8_I(tab0, s2);
787    t5 = VIS_LD_U8_I(tab3, s1);
788    t4 = VIS_LD_U8_I(tab2, s1);
789    t3 = VIS_LD_U8_I(tab1, s1);
790    t2 = VIS_LD_U8_I(tab0, s1);
791    t1 = VIS_LD_U8_I(tab3, s0);
792    t0 = VIS_LD_U8_I(tab2, s0);
793    acc = vis_faligndata(t7, acc);
794    acc = vis_faligndata(t6, acc);
795    acc = vis_faligndata(t5, acc);
796    acc = vis_faligndata(t4, acc);
797    acc = vis_faligndata(t3, acc);
798    acc = vis_faligndata(t2, acc);
799    acc = vis_faligndata(t1, acc);
800    acc = vis_faligndata(t0, acc);
801    s0 = s2;
802    *dp++ = acc;
803  }
804
805  dl = (mlib_u8 *) dp;
806
807  if ((xsize & 1) != 0) {
808    s1 = sp[0];
809    t7 = VIS_LD_U8_I(tab1, s1);
810    t6 = VIS_LD_U8_I(tab0, s1);
811    t5 = VIS_LD_U8_I(tab3, s0);
812    t4 = VIS_LD_U8_I(tab2, s0);
813    acc = vis_faligndata(t7, acc);
814    acc = vis_faligndata(t6, acc);
815    acc = vis_faligndata(t5, acc);
816    acc = vis_faligndata(t4, acc);
817    *(mlib_f32 *) dl = vis_read_hi(acc);
818    dl += 4;
819    s0 = s1;
820  }
821
822  dl[0] = tab2[s0];
823  dl[1] = tab3[s0];
824}
825
826/***************************************************************/
827void mlib_v_ImageLookUpSI_S32_U8_4_DstOff3_D1(const mlib_s32 *src,
828                                              mlib_u8        *dst,
829                                              mlib_s32       xsize,
830                                              const mlib_u8  **table)
831{
832  mlib_s32 *sp;                        /* pointer to source data */
833  mlib_s32 s0, s1, s2;                 /* source data */
834  mlib_u8 *dl;                         /* pointer to start of destination */
835  mlib_d64 *dp;                        /* aligned pointer to destination */
836  mlib_d64 t0, t1, t2;                 /* destination data */
837  mlib_d64 t3, t4, t5;                 /* destination data */
838  mlib_d64 t6, t7, acc;                /* destination data */
839  mlib_s32 i;                          /* loop variable */
840  const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
841  const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
842  const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u];
843  const mlib_u8 *tab3 = &table[3][(mlib_u32) 2147483648u];
844
845  sp = (void *)src;
846  dl = dst;
847  dp = (mlib_d64 *) dl;
848
849  vis_alignaddr((void *)0, 7);
850
851  s0 = *sp++;
852
853  if (xsize >= 2) {
854
855    s1 = sp[0];
856    s2 = sp[1];
857    sp += 2;
858
859#pragma pipeloop(0)
860    for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
861      t7 = VIS_LD_U8_I(tab2, s2);
862      t6 = VIS_LD_U8_I(tab1, s2);
863      t5 = VIS_LD_U8_I(tab0, s2);
864      t4 = VIS_LD_U8_I(tab3, s1);
865      t3 = VIS_LD_U8_I(tab2, s1);
866      t2 = VIS_LD_U8_I(tab1, s1);
867      t1 = VIS_LD_U8_I(tab0, s1);
868      t0 = VIS_LD_U8_I(tab3, s0);
869      acc = vis_faligndata(t7, acc);
870      acc = vis_faligndata(t6, acc);
871      acc = vis_faligndata(t5, acc);
872      acc = vis_faligndata(t4, acc);
873      acc = vis_faligndata(t3, acc);
874      acc = vis_faligndata(t2, acc);
875      acc = vis_faligndata(t1, acc);
876      acc = vis_faligndata(t0, acc);
877      s0 = s2;
878      s1 = sp[0];
879      s2 = sp[1];
880      *dp++ = acc;
881    }
882
883    t7 = VIS_LD_U8_I(tab2, s2);
884    t6 = VIS_LD_U8_I(tab1, s2);
885    t5 = VIS_LD_U8_I(tab0, s2);
886    t4 = VIS_LD_U8_I(tab3, s1);
887    t3 = VIS_LD_U8_I(tab2, s1);
888    t2 = VIS_LD_U8_I(tab1, s1);
889    t1 = VIS_LD_U8_I(tab0, s1);
890    t0 = VIS_LD_U8_I(tab3, s0);
891    acc = vis_faligndata(t7, acc);
892    acc = vis_faligndata(t6, acc);
893    acc = vis_faligndata(t5, acc);
894    acc = vis_faligndata(t4, acc);
895    acc = vis_faligndata(t3, acc);
896    acc = vis_faligndata(t2, acc);
897    acc = vis_faligndata(t1, acc);
898    acc = vis_faligndata(t0, acc);
899    s0 = s2;
900    *dp++ = acc;
901  }
902
903  dl = (mlib_u8 *) dp;
904
905  if ((xsize & 1) != 0) {
906    s1 = sp[0];
907    t7 = VIS_LD_U8_I(tab2, s1);
908    t6 = VIS_LD_U8_I(tab1, s1);
909    t5 = VIS_LD_U8_I(tab0, s1);
910    t4 = VIS_LD_U8_I(tab3, s0);
911    acc = vis_faligndata(t7, acc);
912    acc = vis_faligndata(t6, acc);
913    acc = vis_faligndata(t5, acc);
914    acc = vis_faligndata(t4, acc);
915    *(mlib_f32 *) dl = vis_read_hi(acc);
916    dl += 4;
917    s0 = s1;
918  }
919
920  dl[0] = tab3[s0];
921}
922
923/***************************************************************/
924void mlib_v_ImageLookUpSI_S32_U8_4(const mlib_s32 *src,
925                                   mlib_s32       slb,
926                                   mlib_u8        *dst,
927                                   mlib_s32       dlb,
928                                   mlib_s32       xsize,
929                                   mlib_s32       ysize,
930                                   const mlib_u8  **table)
931{
932  mlib_s32 *sl;
933  mlib_u8 *dl;
934  mlib_s32 j;
935  const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
936  const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
937  const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u];
938  const mlib_u8 *tab3 = &table[3][(mlib_u32) 2147483648u];
939
940  sl = (void *)src;
941  dl = dst;
942
943  /* row loop */
944  for (j = 0; j < ysize; j++) {
945    mlib_s32 *sp = sl;
946    mlib_u8 *dp = dl;
947    mlib_s32 off, s0, size = xsize;
948
949    off = (mlib_s32) ((8 - ((mlib_addr) dp & 7)) & 7);
950
951    if ((off >= 4) && (size > 0)) {
952      s0 = *sp++;
953      *dp++ = tab0[s0];
954      *dp++ = tab1[s0];
955      *dp++ = tab2[s0];
956      *dp++ = tab3[s0];
957      size--;
958    }
959
960    if (size > 0) {
961      off = (mlib_s32) ((4 - ((mlib_addr) dp & 3)) & 3);
962
963      if (off == 0) {
964        mlib_v_ImageLookUpSI_S32_U8_4_DstOff0_D1(sp, dp, size, table);
965      }
966      else if (off == 1) {
967        s0 = *sp;
968        *dp++ = tab0[s0];
969        size--;
970        mlib_v_ImageLookUpSI_S32_U8_4_DstOff1_D1(sp, dp, size, table);
971      }
972      else if (off == 2) {
973        s0 = *sp;
974        *dp++ = tab0[s0];
975        *dp++ = tab1[s0];
976        size--;
977        mlib_v_ImageLookUpSI_S32_U8_4_DstOff2_D1(sp, dp, size, table);
978      }
979      else if (off == 3) {
980        s0 = *sp;
981        *dp++ = tab0[s0];
982        *dp++ = tab1[s0];
983        *dp++ = tab2[s0];
984        size--;
985        mlib_v_ImageLookUpSI_S32_U8_4_DstOff3_D1(sp, dp, size, table);
986      }
987    }
988
989    sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
990    dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
991  }
992}
993
994/***************************************************************/
995