1/*
2 * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27
28#include "vis_proto.h"
29#include "mlib_image.h"
30#include "mlib_v_ImageLookUpFunc.h"
31
32/***************************************************************/
33static void mlib_v_ImageLookUp_U8_U8_124_SrcOff0_D1(const mlib_u8 *src,
34                                                    mlib_u8       *dst,
35                                                    mlib_s32      xsize,
36                                                    const mlib_u8 *table0,
37                                                    const mlib_u8 *table1,
38                                                    const mlib_u8 *table2,
39                                                    const mlib_u8 *table3);
40
41static void mlib_v_ImageLookUp_U8_U8_124_SrcOff1_D1(const mlib_u8 *src,
42                                                    mlib_u8       *dst,
43                                                    mlib_s32      xsize,
44                                                    const mlib_u8 *table0,
45                                                    const mlib_u8 *table1,
46                                                    const mlib_u8 *table2,
47                                                    const mlib_u8 *table3);
48
49static void mlib_v_ImageLookUp_U8_U8_124_SrcOff2_D1(const mlib_u8 *src,
50                                                    mlib_u8       *dst,
51                                                    mlib_s32      xsize,
52                                                    const mlib_u8 *table0,
53                                                    const mlib_u8 *table1,
54                                                    const mlib_u8 *table2,
55                                                    const mlib_u8 *table3);
56
57static void mlib_v_ImageLookUp_U8_U8_124_SrcOff3_D1(const mlib_u8 *src,
58                                                    mlib_u8       *dst,
59                                                    mlib_s32      xsize,
60                                                    const mlib_u8 *table0,
61                                                    const mlib_u8 *table1,
62                                                    const mlib_u8 *table2,
63                                                    const mlib_u8 *table3);
64
65static void mlib_v_ImageLookUp_U8_U8_3_SrcOff0_D1(const mlib_u8 *src,
66                                                  mlib_u8       *dst,
67                                                  mlib_s32      xsize,
68                                                  const mlib_u8 *table0,
69                                                  const mlib_u8 *table1,
70                                                  const mlib_u8 *table2);
71
72static void mlib_v_ImageLookUp_U8_U8_3_SrcOff1_D1(const mlib_u8 *src,
73                                                  mlib_u8       *dst,
74                                                  mlib_s32      xsize,
75                                                  const mlib_u8 *table0,
76                                                  const mlib_u8 *table1,
77                                                  const mlib_u8 *table2);
78
79static void mlib_v_ImageLookUp_U8_U8_3_SrcOff2_D1(const mlib_u8 *src,
80                                                  mlib_u8       *dst,
81                                                  mlib_s32      xsize,
82                                                  const mlib_u8 *table0,
83                                                  const mlib_u8 *table1,
84                                                  const mlib_u8 *table2);
85
86static void mlib_v_ImageLookUp_U8_U8_3_SrcOff3_D1(const mlib_u8 *src,
87                                                  mlib_u8       *dst,
88                                                  mlib_s32      xsize,
89                                                  const mlib_u8 *table0,
90                                                  const mlib_u8 *table1,
91                                                  const mlib_u8 *table2);
92
93/***************************************************************/
94#define VIS_LD_U8_I(X, Y)       vis_ld_u8_i((void *)(X), (Y))
95
96/***************************************************************/
97void mlib_v_ImageLookUp_U8_U8_124_SrcOff0_D1(const mlib_u8 *src,
98                                             mlib_u8       *dst,
99                                             mlib_s32      xsize,
100                                             const mlib_u8 *table0,
101                                             const mlib_u8 *table1,
102                                             const mlib_u8 *table2,
103                                             const mlib_u8 *table3)
104{
105  mlib_u32 *sa;                        /* aligned pointer to source data */
106  mlib_u8 *sp;                         /* pointer to source data */
107  mlib_u32 s0, s1;                     /* source data */
108  mlib_u8 *dl;                         /* pointer to start of destination */
109  mlib_u8 *dend;                       /* pointer to end of destination */
110  mlib_d64 *dp;                        /* aligned pointer to destination */
111  mlib_d64 t0, t1, t2;                 /* destination data */
112  mlib_d64 t3, t4, t5;                 /* destination data */
113  mlib_d64 t6, t7, acc;                /* destination data */
114  mlib_s32 emask;                      /* edge mask */
115  mlib_s32 i, num;                     /* loop variable */
116
117  sa = (mlib_u32 *) src;
118  dl = dst;
119  dp = (mlib_d64 *) dl;
120  dend = dl + xsize - 1;
121
122  vis_alignaddr((void *)0, 7);
123
124  if (xsize >= 8) {
125
126    s0 = sa[0];
127    s1 = sa[1];
128    sa += 2;
129
130#pragma pipeloop(0)
131    for (i = 0; i <= xsize - 16; i += 8, sa += 2) {
132      t7 = VIS_LD_U8_I(table3, s1 & 0xFF);
133      t6 = VIS_LD_U8_I(table2, (s1 >> 8) & 0xFF);
134      t5 = VIS_LD_U8_I(table1, (s1 >> 16) & 0xFF);
135      t4 = VIS_LD_U8_I(table0, s1 >> 24);
136      t3 = VIS_LD_U8_I(table3, s0 & 0xFF);
137      t2 = VIS_LD_U8_I(table2, (s0 >> 8) & 0xFF);
138      t1 = VIS_LD_U8_I(table1, (s0 >> 16) & 0xFF);
139      t0 = VIS_LD_U8_I(table0, s0 >> 24);
140      acc = vis_faligndata(t7, acc);
141      acc = vis_faligndata(t6, acc);
142      acc = vis_faligndata(t5, acc);
143      acc = vis_faligndata(t4, acc);
144      acc = vis_faligndata(t3, acc);
145      acc = vis_faligndata(t2, acc);
146      acc = vis_faligndata(t1, acc);
147      acc = vis_faligndata(t0, acc);
148      s0 = sa[0];
149      s1 = sa[1];
150      *dp++ = acc;
151    }
152
153    t7 = VIS_LD_U8_I(table3, s1 & 0xFF);
154    t6 = VIS_LD_U8_I(table2, (s1 >> 8) & 0xFF);
155    t5 = VIS_LD_U8_I(table1, (s1 >> 16) & 0xFF);
156    t4 = VIS_LD_U8_I(table0, s1 >> 24);
157    t3 = VIS_LD_U8_I(table3, s0 & 0xFF);
158    t2 = VIS_LD_U8_I(table2, (s0 >> 8) & 0xFF);
159    t1 = VIS_LD_U8_I(table1, (s0 >> 16) & 0xFF);
160    t0 = VIS_LD_U8_I(table0, s0 >> 24);
161    acc = vis_faligndata(t7, acc);
162    acc = vis_faligndata(t6, acc);
163    acc = vis_faligndata(t5, acc);
164    acc = vis_faligndata(t4, acc);
165    acc = vis_faligndata(t3, acc);
166    acc = vis_faligndata(t2, acc);
167    acc = vis_faligndata(t1, acc);
168    acc = vis_faligndata(t0, acc);
169    *dp++ = acc;
170  }
171
172  sp = (mlib_u8 *) sa;
173
174  if ((mlib_addr) dp <= (mlib_addr) dend) {
175
176    num = (mlib_addr) dend - (mlib_addr) dp;
177    sp += num;
178    num++;
179
180    if ((num & 3) == 1) {
181      s0 = (mlib_s32) * sp;
182      sp--;
183
184      t0 = VIS_LD_U8_I(table0, s0);
185      acc = vis_faligndata(t0, acc);
186      num--;
187    }
188    else if ((num & 3) == 2) {
189      s0 = (mlib_s32) * sp;
190      sp--;
191
192      t0 = VIS_LD_U8_I(table1, s0);
193      acc = vis_faligndata(t0, acc);
194
195      s0 = (mlib_s32) * sp;
196      sp--;
197
198      t0 = VIS_LD_U8_I(table0, s0);
199      acc = vis_faligndata(t0, acc);
200      num -= 2;
201    }
202    else if ((num & 3) == 3) {
203      s0 = (mlib_s32) * sp;
204      sp--;
205
206      t0 = VIS_LD_U8_I(table2, s0);
207      acc = vis_faligndata(t0, acc);
208
209      s0 = (mlib_s32) * sp;
210      sp--;
211
212      t0 = VIS_LD_U8_I(table1, s0);
213      acc = vis_faligndata(t0, acc);
214
215      s0 = (mlib_s32) * sp;
216      sp--;
217
218      t0 = VIS_LD_U8_I(table0, s0);
219      acc = vis_faligndata(t0, acc);
220      num -= 3;
221    }
222
223    if (num != 0) {
224      s0 = (mlib_s32) * sp;
225      sp--;
226
227      t0 = VIS_LD_U8_I(table3, s0);
228      acc = vis_faligndata(t0, acc);
229
230      s0 = (mlib_s32) * sp;
231      sp--;
232
233      t0 = VIS_LD_U8_I(table2, s0);
234      acc = vis_faligndata(t0, acc);
235
236      s0 = (mlib_s32) * sp;
237      sp--;
238
239      t0 = VIS_LD_U8_I(table1, s0);
240      acc = vis_faligndata(t0, acc);
241
242      s0 = (mlib_s32) * sp;
243      sp--;
244
245      t0 = VIS_LD_U8_I(table0, s0);
246      acc = vis_faligndata(t0, acc);
247    }
248
249    emask = vis_edge8(dp, dend);
250    vis_pst_8(acc, dp, emask);
251  }
252}
253
254/***************************************************************/
255void mlib_v_ImageLookUp_U8_U8_124_SrcOff1_D1(const mlib_u8 *src,
256                                             mlib_u8       *dst,
257                                             mlib_s32      xsize,
258                                             const mlib_u8 *table0,
259                                             const mlib_u8 *table1,
260                                             const mlib_u8 *table2,
261                                             const mlib_u8 *table3)
262{
263  mlib_u32 *sa;                        /* aligned pointer to source data */
264  mlib_u8 *sp;                         /* pointer to source data */
265  mlib_u32 s0, s1, s2;                 /* source data */
266  mlib_u8 *dl;                         /* pointer to start of destination */
267  mlib_u8 *dend;                       /* pointer to end of destination */
268  mlib_d64 *dp;                        /* aligned pointer to destination */
269  mlib_d64 t0, t1, t2;                 /* destination data */
270  mlib_d64 t3, t4, t5;                 /* destination data */
271  mlib_d64 t6, t7, acc;                /* destination data */
272  mlib_s32 emask;                      /* edge mask */
273  mlib_s32 i, num;                     /* loop variable */
274
275  sa = (mlib_u32 *) (src - 1);
276  dl = dst;
277  dp = (mlib_d64 *) dl;
278  dend = dl + xsize - 1;
279
280  vis_alignaddr((void *)0, 7);
281
282  s0 = *sa++;
283
284  if (xsize >= 8) {
285
286    s1 = sa[0];
287    s2 = sa[1];
288    sa += 2;
289
290#pragma pipeloop(0)
291    for (i = 0; i <= xsize - 16; i += 8, sa += 2) {
292      t7 = VIS_LD_U8_I(table3, s2 >> 24);
293      t6 = VIS_LD_U8_I(table2, s1 & 0xFF);
294      t5 = VIS_LD_U8_I(table1, (s1 >> 8) & 0xFF);
295      t4 = VIS_LD_U8_I(table0, (s1 >> 16) & 0xFF);
296      t3 = VIS_LD_U8_I(table3, s1 >> 24);
297      t2 = VIS_LD_U8_I(table2, s0 & 0xFF);
298      t1 = VIS_LD_U8_I(table1, (s0 >> 8) & 0xFF);
299      t0 = VIS_LD_U8_I(table0, (s0 >> 16) & 0xFF);
300      acc = vis_faligndata(t7, acc);
301      acc = vis_faligndata(t6, acc);
302      acc = vis_faligndata(t5, acc);
303      acc = vis_faligndata(t4, acc);
304      acc = vis_faligndata(t3, acc);
305      acc = vis_faligndata(t2, acc);
306      acc = vis_faligndata(t1, acc);
307      acc = vis_faligndata(t0, acc);
308      s0 = s2;
309      s1 = sa[0];
310      s2 = sa[1];
311      *dp++ = acc;
312    }
313
314    t7 = VIS_LD_U8_I(table3, s2 >> 24);
315    t6 = VIS_LD_U8_I(table2, s1 & 0xFF);
316    t5 = VIS_LD_U8_I(table1, (s1 >> 8) & 0xFF);
317    t4 = VIS_LD_U8_I(table0, (s1 >> 16) & 0xFF);
318    t3 = VIS_LD_U8_I(table3, s1 >> 24);
319    t2 = VIS_LD_U8_I(table2, s0 & 0xFF);
320    t1 = VIS_LD_U8_I(table1, (s0 >> 8) & 0xFF);
321    t0 = VIS_LD_U8_I(table0, (s0 >> 16) & 0xFF);
322    acc = vis_faligndata(t7, acc);
323    acc = vis_faligndata(t6, acc);
324    acc = vis_faligndata(t5, acc);
325    acc = vis_faligndata(t4, acc);
326    acc = vis_faligndata(t3, acc);
327    acc = vis_faligndata(t2, acc);
328    acc = vis_faligndata(t1, acc);
329    acc = vis_faligndata(t0, acc);
330    *dp++ = acc;
331  }
332
333  sp = (mlib_u8 *) sa;
334  sp -= 3;
335
336  if ((mlib_addr) dp <= (mlib_addr) dend) {
337
338    num = (mlib_addr) dend - (mlib_addr) dp;
339    sp += num;
340    num++;
341
342    if ((num & 3) == 1) {
343      s0 = (mlib_s32) * sp;
344      sp--;
345
346      t0 = VIS_LD_U8_I(table0, s0);
347      acc = vis_faligndata(t0, acc);
348      num--;
349    }
350    else if ((num & 3) == 2) {
351      s0 = (mlib_s32) * sp;
352      sp--;
353
354      t0 = VIS_LD_U8_I(table1, s0);
355      acc = vis_faligndata(t0, acc);
356
357      s0 = (mlib_s32) * sp;
358      sp--;
359
360      t0 = VIS_LD_U8_I(table0, s0);
361      acc = vis_faligndata(t0, acc);
362      num -= 2;
363    }
364    else if ((num & 3) == 3) {
365      s0 = (mlib_s32) * sp;
366      sp--;
367
368      t0 = VIS_LD_U8_I(table2, s0);
369      acc = vis_faligndata(t0, acc);
370
371      s0 = (mlib_s32) * sp;
372      sp--;
373
374      t0 = VIS_LD_U8_I(table1, s0);
375      acc = vis_faligndata(t0, acc);
376
377      s0 = (mlib_s32) * sp;
378      sp--;
379
380      t0 = VIS_LD_U8_I(table0, s0);
381      acc = vis_faligndata(t0, acc);
382      num -= 3;
383    }
384
385    if (num != 0) {
386      s0 = (mlib_s32) * sp;
387      sp--;
388
389      t0 = VIS_LD_U8_I(table3, s0);
390      acc = vis_faligndata(t0, acc);
391
392      s0 = (mlib_s32) * sp;
393      sp--;
394
395      t0 = VIS_LD_U8_I(table2, s0);
396      acc = vis_faligndata(t0, acc);
397
398      s0 = (mlib_s32) * sp;
399      sp--;
400
401      t0 = VIS_LD_U8_I(table1, s0);
402      acc = vis_faligndata(t0, acc);
403
404      s0 = (mlib_s32) * sp;
405      sp--;
406
407      t0 = VIS_LD_U8_I(table0, s0);
408      acc = vis_faligndata(t0, acc);
409    }
410
411    emask = vis_edge8(dp, dend);
412    vis_pst_8(acc, dp, emask);
413  }
414}
415
416/***************************************************************/
417void mlib_v_ImageLookUp_U8_U8_124_SrcOff2_D1(const mlib_u8 *src,
418                                             mlib_u8       *dst,
419                                             mlib_s32      xsize,
420                                             const mlib_u8 *table0,
421                                             const mlib_u8 *table1,
422                                             const mlib_u8 *table2,
423                                             const mlib_u8 *table3)
424{
425  mlib_u32 *sa;                        /* aligned pointer to source data */
426  mlib_u8 *sp;                         /* pointer to source data */
427  mlib_u32 s0, s1, s2;                 /* source data */
428  mlib_u8 *dl;                         /* pointer to start of destination */
429  mlib_u8 *dend;                       /* pointer to end of destination */
430  mlib_d64 *dp;                        /* aligned pointer to destination */
431  mlib_d64 t0, t1, t2;                 /* destination data */
432  mlib_d64 t3, t4, t5;                 /* destination data */
433  mlib_d64 t6, t7, acc;                /* destination data */
434  mlib_s32 emask;                      /* edge mask */
435  mlib_s32 i, num;                     /* loop variable */
436
437  sa = (mlib_u32 *) (src - 2);
438  dl = dst;
439  dp = (mlib_d64 *) dl;
440  dend = dl + xsize - 1;
441
442  vis_alignaddr((void *)0, 7);
443
444  s0 = *sa++;
445
446  if (xsize >= 8) {
447
448    s1 = sa[0];
449    s2 = sa[1];
450    sa += 2;
451
452#pragma pipeloop(0)
453    for (i = 0; i <= xsize - 16; i += 8, sa += 2) {
454      t7 = VIS_LD_U8_I(table3, (s2 >> 16) & 0xFF);
455      t6 = VIS_LD_U8_I(table2, s2 >> 24);
456      t5 = VIS_LD_U8_I(table1, s1 & 0xFF);
457      t4 = VIS_LD_U8_I(table0, (s1 >> 8) & 0xFF);
458      t3 = VIS_LD_U8_I(table3, (s1 >> 16) & 0xFF);
459      t2 = VIS_LD_U8_I(table2, s1 >> 24);
460      t1 = VIS_LD_U8_I(table1, s0 & 0xFF);
461      t0 = VIS_LD_U8_I(table0, (s0 >> 8) & 0xFF);
462      acc = vis_faligndata(t7, acc);
463      acc = vis_faligndata(t6, acc);
464      acc = vis_faligndata(t5, acc);
465      acc = vis_faligndata(t4, acc);
466      acc = vis_faligndata(t3, acc);
467      acc = vis_faligndata(t2, acc);
468      acc = vis_faligndata(t1, acc);
469      acc = vis_faligndata(t0, acc);
470      s0 = s2;
471      s1 = sa[0];
472      s2 = sa[1];
473      *dp++ = acc;
474    }
475
476    t7 = VIS_LD_U8_I(table3, (s2 >> 16) & 0xFF);
477    t6 = VIS_LD_U8_I(table2, s2 >> 24);
478    t5 = VIS_LD_U8_I(table1, s1 & 0xFF);
479    t4 = VIS_LD_U8_I(table0, (s1 >> 8) & 0xFF);
480    t3 = VIS_LD_U8_I(table3, (s1 >> 16) & 0xFF);
481    t2 = VIS_LD_U8_I(table2, s1 >> 24);
482    t1 = VIS_LD_U8_I(table1, s0 & 0xFF);
483    t0 = VIS_LD_U8_I(table0, (s0 >> 8) & 0xFF);
484    acc = vis_faligndata(t7, acc);
485    acc = vis_faligndata(t6, acc);
486    acc = vis_faligndata(t5, acc);
487    acc = vis_faligndata(t4, acc);
488    acc = vis_faligndata(t3, acc);
489    acc = vis_faligndata(t2, acc);
490    acc = vis_faligndata(t1, acc);
491    acc = vis_faligndata(t0, acc);
492    *dp++ = acc;
493  }
494
495  sp = (mlib_u8 *) sa;
496  sp -= 2;
497
498  if ((mlib_addr) dp <= (mlib_addr) dend) {
499
500    num = (mlib_addr) dend - (mlib_addr) dp;
501    sp += num;
502    num++;
503
504    if ((num & 3) == 1) {
505      s0 = (mlib_s32) * sp;
506      sp--;
507
508      t0 = VIS_LD_U8_I(table0, s0);
509      acc = vis_faligndata(t0, acc);
510      num--;
511    }
512    else if ((num & 3) == 2) {
513      s0 = (mlib_s32) * sp;
514      sp--;
515
516      t0 = VIS_LD_U8_I(table1, s0);
517      acc = vis_faligndata(t0, acc);
518
519      s0 = (mlib_s32) * sp;
520      sp--;
521
522      t0 = VIS_LD_U8_I(table0, s0);
523      acc = vis_faligndata(t0, acc);
524      num -= 2;
525    }
526    else if ((num & 3) == 3) {
527      s0 = (mlib_s32) * sp;
528      sp--;
529
530      t0 = VIS_LD_U8_I(table2, s0);
531      acc = vis_faligndata(t0, acc);
532
533      s0 = (mlib_s32) * sp;
534      sp--;
535
536      t0 = VIS_LD_U8_I(table1, s0);
537      acc = vis_faligndata(t0, acc);
538
539      s0 = (mlib_s32) * sp;
540      sp--;
541
542      t0 = VIS_LD_U8_I(table0, s0);
543      acc = vis_faligndata(t0, acc);
544      num -= 3;
545    }
546
547    if (num != 0) {
548      s0 = (mlib_s32) * sp;
549      sp--;
550
551      t0 = VIS_LD_U8_I(table3, s0);
552      acc = vis_faligndata(t0, acc);
553
554      s0 = (mlib_s32) * sp;
555      sp--;
556
557      t0 = VIS_LD_U8_I(table2, s0);
558      acc = vis_faligndata(t0, acc);
559
560      s0 = (mlib_s32) * sp;
561      sp--;
562
563      t0 = VIS_LD_U8_I(table1, s0);
564      acc = vis_faligndata(t0, acc);
565
566      s0 = (mlib_s32) * sp;
567      sp--;
568
569      t0 = VIS_LD_U8_I(table0, s0);
570      acc = vis_faligndata(t0, acc);
571    }
572
573    emask = vis_edge8(dp, dend);
574    vis_pst_8(acc, dp, emask);
575  }
576}
577
578/***************************************************************/
579void mlib_v_ImageLookUp_U8_U8_124_SrcOff3_D1(const mlib_u8 *src,
580                                             mlib_u8       *dst,
581                                             mlib_s32      xsize,
582                                             const mlib_u8 *table0,
583                                             const mlib_u8 *table1,
584                                             const mlib_u8 *table2,
585                                             const mlib_u8 *table3)
586{
587  mlib_u32 *sa;                        /* aligned pointer to source data */
588  mlib_u8 *sp;                         /* pointer to source data */
589  mlib_u32 s0, s1, s2;                 /* source data */
590  mlib_u8 *dl;                         /* pointer to start of destination */
591  mlib_u8 *dend;                       /* pointer to end of destination */
592  mlib_d64 *dp;                        /* aligned pointer to destination */
593  mlib_d64 t0, t1, t2;                 /* destination data */
594  mlib_d64 t3, t4, t5;                 /* destination data */
595  mlib_d64 t6, t7, acc;                /* destination data */
596  mlib_s32 emask;                      /* edge mask */
597  mlib_s32 i, num;                     /* loop variable */
598
599  sa = (mlib_u32 *) (src - 3);
600  dl = dst;
601  dp = (mlib_d64 *) dl;
602  dend = dl + xsize - 1;
603
604  vis_alignaddr((void *)0, 7);
605
606  s0 = *sa++;
607
608  if (xsize >= 8) {
609
610    s1 = sa[0];
611    s2 = sa[1];
612    sa += 2;
613
614#pragma pipeloop(0)
615    for (i = 0; i <= xsize - 16; i += 8, sa += 2) {
616      t7 = VIS_LD_U8_I(table3, (s2 >> 8) & 0xFF);
617      t6 = VIS_LD_U8_I(table2, (s2 >> 16) & 0xFF);
618      t5 = VIS_LD_U8_I(table1, s2 >> 24);
619      t4 = VIS_LD_U8_I(table0, s1 & 0xFF);
620      t3 = VIS_LD_U8_I(table3, (s1 >> 8) & 0xFF);
621      t2 = VIS_LD_U8_I(table2, (s1 >> 16) & 0xFF);
622      t1 = VIS_LD_U8_I(table1, s1 >> 24);
623      t0 = VIS_LD_U8_I(table0, s0 & 0xFF);
624      acc = vis_faligndata(t7, acc);
625      acc = vis_faligndata(t6, acc);
626      acc = vis_faligndata(t5, acc);
627      acc = vis_faligndata(t4, acc);
628      acc = vis_faligndata(t3, acc);
629      acc = vis_faligndata(t2, acc);
630      acc = vis_faligndata(t1, acc);
631      acc = vis_faligndata(t0, acc);
632      s0 = s2;
633      s1 = sa[0];
634      s2 = sa[1];
635      *dp++ = acc;
636    }
637
638    t7 = VIS_LD_U8_I(table3, (s2 >> 8) & 0xFF);
639    t6 = VIS_LD_U8_I(table2, (s2 >> 16) & 0xFF);
640    t5 = VIS_LD_U8_I(table1, s2 >> 24);
641    t4 = VIS_LD_U8_I(table0, s1 & 0xFF);
642    t3 = VIS_LD_U8_I(table3, (s1 >> 8) & 0xFF);
643    t2 = VIS_LD_U8_I(table2, (s1 >> 16) & 0xFF);
644    t1 = VIS_LD_U8_I(table1, s1 >> 24);
645    t0 = VIS_LD_U8_I(table0, s0 & 0xFF);
646    acc = vis_faligndata(t7, acc);
647    acc = vis_faligndata(t6, acc);
648    acc = vis_faligndata(t5, acc);
649    acc = vis_faligndata(t4, acc);
650    acc = vis_faligndata(t3, acc);
651    acc = vis_faligndata(t2, acc);
652    acc = vis_faligndata(t1, acc);
653    acc = vis_faligndata(t0, acc);
654    *dp++ = acc;
655  }
656
657  sp = (mlib_u8 *) sa;
658  sp--;
659
660  if ((mlib_addr) dp <= (mlib_addr) dend) {
661
662    num = (mlib_addr) dend - (mlib_addr) dp;
663    sp += num;
664    num++;
665
666    if ((num & 3) == 1) {
667      s0 = (mlib_s32) * sp;
668      sp--;
669
670      t0 = VIS_LD_U8_I(table0, s0);
671      acc = vis_faligndata(t0, acc);
672      num--;
673    }
674    else if ((num & 3) == 2) {
675      s0 = (mlib_s32) * sp;
676      sp--;
677
678      t0 = VIS_LD_U8_I(table1, s0);
679      acc = vis_faligndata(t0, acc);
680
681      s0 = (mlib_s32) * sp;
682      sp--;
683
684      t0 = VIS_LD_U8_I(table0, s0);
685      acc = vis_faligndata(t0, acc);
686      num -= 2;
687    }
688    else if ((num & 3) == 3) {
689      s0 = (mlib_s32) * sp;
690      sp--;
691
692      t0 = VIS_LD_U8_I(table2, s0);
693      acc = vis_faligndata(t0, acc);
694
695      s0 = (mlib_s32) * sp;
696      sp--;
697
698      t0 = VIS_LD_U8_I(table1, s0);
699      acc = vis_faligndata(t0, acc);
700
701      s0 = (mlib_s32) * sp;
702      sp--;
703
704      t0 = VIS_LD_U8_I(table0, s0);
705      acc = vis_faligndata(t0, acc);
706      num -= 3;
707    }
708
709    if (num != 0) {
710      s0 = (mlib_s32) * sp;
711      sp--;
712
713      t0 = VIS_LD_U8_I(table3, s0);
714      acc = vis_faligndata(t0, acc);
715
716      s0 = (mlib_s32) * sp;
717      sp--;
718
719      t0 = VIS_LD_U8_I(table2, s0);
720      acc = vis_faligndata(t0, acc);
721
722      s0 = (mlib_s32) * sp;
723      sp--;
724
725      t0 = VIS_LD_U8_I(table1, s0);
726      acc = vis_faligndata(t0, acc);
727
728      s0 = (mlib_s32) * sp;
729      sp--;
730
731      t0 = VIS_LD_U8_I(table0, s0);
732      acc = vis_faligndata(t0, acc);
733    }
734
735    emask = vis_edge8(dp, dend);
736    vis_pst_8(acc, dp, emask);
737  }
738}
739
740/***************************************************************/
741void mlib_v_ImageLookUp_U8_U8_1(const mlib_u8 *src,
742                                mlib_s32      slb,
743                                mlib_u8       *dst,
744                                mlib_s32      dlb,
745                                mlib_s32      xsize,
746                                mlib_s32      ysize,
747                                const mlib_u8 **table)
748{
749  mlib_u8 *sl;
750  mlib_u8 *dl;
751  const mlib_u8 *tab = table[0];
752  mlib_s32 j, i;
753
754  sl = (void *)src;
755  dl = dst;
756
757  /* row loop */
758  for (j = 0; j < ysize; j++) {
759    mlib_u8 *sp = sl;
760    mlib_u8 *dp = dl;
761    mlib_s32 off, size = xsize;
762
763    off = (8 - ((mlib_addr) dp & 7)) & 7;
764
765    off = (off < size) ? off : size;
766
767    for (i = 0; i < off; i++) {
768      *dp++ = tab[(*sp++)];
769      size--;
770    }
771
772    if (size > 0) {
773
774      off = (mlib_addr) sp & 3;
775
776      if (off == 0) {
777        mlib_v_ImageLookUp_U8_U8_124_SrcOff0_D1(sp, dp, size, tab, tab, tab,
778                                                tab);
779      }
780      else if (off == 1) {
781        mlib_v_ImageLookUp_U8_U8_124_SrcOff1_D1(sp, dp, size, tab, tab, tab,
782                                                tab);
783      }
784      else if (off == 2) {
785        mlib_v_ImageLookUp_U8_U8_124_SrcOff2_D1(sp, dp, size, tab, tab, tab,
786                                                tab);
787      }
788      else {
789        mlib_v_ImageLookUp_U8_U8_124_SrcOff3_D1(sp, dp, size, tab, tab, tab,
790                                                tab);
791      }
792    }
793
794    sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
795    dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
796  }
797}
798
799/***************************************************************/
800void mlib_v_ImageLookUp_U8_U8_2(const mlib_u8 *src,
801                                mlib_s32      slb,
802                                mlib_u8       *dst,
803                                mlib_s32      dlb,
804                                mlib_s32      xsize,
805                                mlib_s32      ysize,
806                                const mlib_u8 **table)
807{
808  mlib_u8 *sl;
809  mlib_u8 *dl;
810  const mlib_u8 *tab;
811  mlib_s32 j, i;
812
813  sl = (void *)src;
814  dl = dst;
815
816  /* row loop */
817  for (j = 0; j < ysize; j++) {
818    mlib_u8 *sp = sl;
819    mlib_u8 *dp = dl;
820    mlib_s32 off, size = xsize * 2;
821    const mlib_u8 *tab0 = table[0];
822    const mlib_u8 *tab1 = table[1];
823
824    off = (8 - ((mlib_addr) dp & 7)) & 7;
825
826    off = (off < size) ? off : size;
827
828    for (i = 0; i < off - 1; i += 2) {
829      *dp++ = tab0[(*sp++)];
830      *dp++ = tab1[(*sp++)];
831      size -= 2;
832    }
833
834    if ((off & 1) != 0) {
835      *dp++ = tab0[(*sp++)];
836      size--;
837      tab = tab0;
838      tab0 = tab1;
839      tab1 = tab;
840    }
841
842    if (size > 0) {
843
844      off = (mlib_addr) sp & 3;
845
846      if (off == 0) {
847        mlib_v_ImageLookUp_U8_U8_124_SrcOff0_D1(sp, dp, size, tab0, tab1, tab0,
848                                                tab1);
849      }
850      else if (off == 1) {
851        mlib_v_ImageLookUp_U8_U8_124_SrcOff1_D1(sp, dp, size, tab0, tab1, tab0,
852                                                tab1);
853      }
854      else if (off == 2) {
855        mlib_v_ImageLookUp_U8_U8_124_SrcOff2_D1(sp, dp, size, tab0, tab1, tab0,
856                                                tab1);
857      }
858      else {
859        mlib_v_ImageLookUp_U8_U8_124_SrcOff3_D1(sp, dp, size, tab0, tab1, tab0,
860                                                tab1);
861      }
862    }
863
864    sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
865    dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
866  }
867}
868
869/***************************************************************/
870void mlib_v_ImageLookUp_U8_U8_4(const mlib_u8 *src,
871                                mlib_s32      slb,
872                                mlib_u8       *dst,
873                                mlib_s32      dlb,
874                                mlib_s32      xsize,
875                                mlib_s32      ysize,
876                                const mlib_u8 **table)
877{
878  mlib_u8 *sl;
879  mlib_u8 *dl;
880  const mlib_u8 *tab;
881  mlib_s32 j;
882
883  sl = (void *)src;
884  dl = dst;
885
886  /* row loop */
887  for (j = 0; j < ysize; j++) {
888    mlib_u8 *sp = sl;
889    mlib_u8 *dp = dl;
890    const mlib_u8 *tab0 = table[0];
891    const mlib_u8 *tab1 = table[1];
892    const mlib_u8 *tab2 = table[2];
893    const mlib_u8 *tab3 = table[3];
894    mlib_s32 off, size = xsize * 4;
895
896    off = (8 - ((mlib_addr) dp & 7)) & 7;
897
898    off = (off < size) ? off : size;
899
900    if (off >= 4) {
901      *dp++ = tab0[(*sp++)];
902      *dp++ = tab1[(*sp++)];
903      *dp++ = tab2[(*sp++)];
904      *dp++ = tab3[(*sp++)];
905      size -= 4;
906      off -= 4;
907    }
908
909    if (off == 1) {
910      *dp++ = tab0[(*sp++)];
911      tab = tab0;
912      tab0 = tab1;
913      tab1 = tab2;
914      tab2 = tab3;
915      tab3 = tab;
916      size--;
917    }
918    else if (off == 2) {
919      *dp++ = tab0[(*sp++)];
920      *dp++ = tab1[(*sp++)];
921      tab = tab0;
922      tab0 = tab2;
923      tab2 = tab;
924      tab = tab1;
925      tab1 = tab3;
926      tab3 = tab;
927      size -= 2;
928    }
929    else if (off == 3) {
930      *dp++ = tab0[(*sp++)];
931      *dp++ = tab1[(*sp++)];
932      *dp++ = tab2[(*sp++)];
933      tab = tab3;
934      tab3 = tab2;
935      tab2 = tab1;
936      tab1 = tab0;
937      tab0 = tab;
938      size -= 3;
939    }
940
941    if (size > 0) {
942
943      off = (mlib_addr) sp & 3;
944
945      if (off == 0) {
946        mlib_v_ImageLookUp_U8_U8_124_SrcOff0_D1(sp, dp, size, tab0, tab1, tab2,
947                                                tab3);
948      }
949      else if (off == 1) {
950        mlib_v_ImageLookUp_U8_U8_124_SrcOff1_D1(sp, dp, size, tab0, tab1, tab2,
951                                                tab3);
952      }
953      else if (off == 2) {
954        mlib_v_ImageLookUp_U8_U8_124_SrcOff2_D1(sp, dp, size, tab0, tab1, tab2,
955                                                tab3);
956      }
957      else {
958        mlib_v_ImageLookUp_U8_U8_124_SrcOff3_D1(sp, dp, size, tab0, tab1, tab2,
959                                                tab3);
960      }
961    }
962
963    sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
964    dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
965  }
966}
967
968/***************************************************************/
969void mlib_v_ImageLookUp_U8_U8_3_SrcOff0_D1(const mlib_u8 *src,
970                                           mlib_u8       *dst,
971                                           mlib_s32      xsize,
972                                           const mlib_u8 *table0,
973                                           const mlib_u8 *table1,
974                                           const mlib_u8 *table2)
975{
976  mlib_u32 *sa;                        /* aligned pointer to source data */
977  mlib_u8 *sp;                         /* pointer to source data */
978  mlib_u32 s0, s1;                     /* source data */
979  mlib_u8 *dl;                         /* pointer to start of destination */
980  mlib_u8 *dend;                       /* pointer to end of destination */
981  mlib_d64 *dp;                        /* aligned pointer to destination */
982  mlib_d64 t0, t1, t2;                 /* destination data */
983  mlib_d64 t3, t4, t5;                 /* destination data */
984  mlib_d64 t6, t7, acc;                /* destination data */
985  mlib_s32 emask;                      /* edge mask */
986  mlib_s32 i, num;                     /* loop variable */
987  const mlib_u8 *table;
988
989  sa = (mlib_u32 *) src;
990  dl = dst;
991  dp = (mlib_d64 *) dl;
992  dend = dl + xsize - 1;
993
994  vis_alignaddr((void *)0, 7);
995
996  if (xsize >= 8) {
997
998    s0 = sa[0];
999    s1 = sa[1];
1000    sa += 2;
1001
1002#pragma pipeloop(0)
1003    for (i = 0; i <= xsize - 16; i += 8, sa += 2) {
1004      t7 = VIS_LD_U8_I(table1, s1 & 0xFF);
1005      t6 = VIS_LD_U8_I(table0, (s1 >> 8) & 0xFF);
1006      t5 = VIS_LD_U8_I(table2, (s1 >> 16) & 0xFF);
1007      t4 = VIS_LD_U8_I(table1, s1 >> 24);
1008      t3 = VIS_LD_U8_I(table0, s0 & 0xFF);
1009      t2 = VIS_LD_U8_I(table2, (s0 >> 8) & 0xFF);
1010      t1 = VIS_LD_U8_I(table1, (s0 >> 16) & 0xFF);
1011      t0 = VIS_LD_U8_I(table0, s0 >> 24);
1012      acc = vis_faligndata(t7, acc);
1013      acc = vis_faligndata(t6, acc);
1014      acc = vis_faligndata(t5, acc);
1015      acc = vis_faligndata(t4, acc);
1016      acc = vis_faligndata(t3, acc);
1017      acc = vis_faligndata(t2, acc);
1018      acc = vis_faligndata(t1, acc);
1019      acc = vis_faligndata(t0, acc);
1020      table = table0;
1021      table0 = table2;
1022      table2 = table1;
1023      table1 = table;
1024      s0 = sa[0];
1025      s1 = sa[1];
1026      *dp++ = acc;
1027    }
1028
1029    t7 = VIS_LD_U8_I(table1, s1 & 0xFF);
1030    t6 = VIS_LD_U8_I(table0, (s1 >> 8) & 0xFF);
1031    t5 = VIS_LD_U8_I(table2, (s1 >> 16) & 0xFF);
1032    t4 = VIS_LD_U8_I(table1, s1 >> 24);
1033    t3 = VIS_LD_U8_I(table0, s0 & 0xFF);
1034    t2 = VIS_LD_U8_I(table2, (s0 >> 8) & 0xFF);
1035    t1 = VIS_LD_U8_I(table1, (s0 >> 16) & 0xFF);
1036    t0 = VIS_LD_U8_I(table0, s0 >> 24);
1037    acc = vis_faligndata(t7, acc);
1038    acc = vis_faligndata(t6, acc);
1039    acc = vis_faligndata(t5, acc);
1040    acc = vis_faligndata(t4, acc);
1041    acc = vis_faligndata(t3, acc);
1042    acc = vis_faligndata(t2, acc);
1043    acc = vis_faligndata(t1, acc);
1044    acc = vis_faligndata(t0, acc);
1045    table = table0;
1046    table0 = table2;
1047    table2 = table1;
1048    table1 = table;
1049    *dp++ = acc;
1050  }
1051
1052  sp = (mlib_u8 *) sa;
1053
1054  if ((mlib_addr) dp <= (mlib_addr) dend) {
1055
1056    num = (mlib_addr) dend - (mlib_addr) dp;
1057    sp += num;
1058    num++;
1059    i = num - 3 * (num / 3);
1060
1061    if (i == 2) {
1062      s0 = (mlib_s32) * sp;
1063      sp--;
1064
1065      t0 = VIS_LD_U8_I(table1, s0);
1066      acc = vis_faligndata(t0, acc);
1067
1068      s0 = (mlib_s32) * sp;
1069      sp--;
1070
1071      t0 = VIS_LD_U8_I(table0, s0);
1072      acc = vis_faligndata(t0, acc);
1073      num -= 2;
1074    }
1075    else if (i == 1) {
1076      s0 = (mlib_s32) * sp;
1077      sp--;
1078
1079      t0 = VIS_LD_U8_I(table0, s0);
1080      acc = vis_faligndata(t0, acc);
1081      num--;
1082    }
1083
1084#pragma pipeloop(0)
1085    for (i = 0; i < num; i += 3) {
1086      s0 = (mlib_s32) * sp;
1087      sp--;
1088
1089      t0 = VIS_LD_U8_I(table2, s0);
1090      acc = vis_faligndata(t0, acc);
1091
1092      s0 = (mlib_s32) * sp;
1093      sp--;
1094
1095      t0 = VIS_LD_U8_I(table1, s0);
1096      acc = vis_faligndata(t0, acc);
1097
1098      s0 = (mlib_s32) * sp;
1099      sp--;
1100
1101      t0 = VIS_LD_U8_I(table0, s0);
1102      acc = vis_faligndata(t0, acc);
1103    }
1104
1105    emask = vis_edge8(dp, dend);
1106    vis_pst_8(acc, dp, emask);
1107  }
1108}
1109
1110/***************************************************************/
1111void mlib_v_ImageLookUp_U8_U8_3_SrcOff1_D1(const mlib_u8 *src,
1112                                           mlib_u8       *dst,
1113                                           mlib_s32      xsize,
1114                                           const mlib_u8 *table0,
1115                                           const mlib_u8 *table1,
1116                                           const mlib_u8 *table2)
1117{
1118  mlib_u32 *sa;                        /* aligned pointer to source data */
1119  mlib_u8 *sp;                         /* pointer to source data */
1120  mlib_u32 s0, s1, s2;                 /* source data */
1121  mlib_u8 *dl;                         /* pointer to start of destination */
1122  mlib_u8 *dend;                       /* pointer to end of destination */
1123  mlib_d64 *dp;                        /* aligned pointer to destination */
1124  mlib_d64 t0, t1, t2;                 /* destination data */
1125  mlib_d64 t3, t4, t5;                 /* destination data */
1126  mlib_d64 t6, t7, acc;                /* destination data */
1127  mlib_s32 emask;                      /* edge mask */
1128  mlib_s32 i, num;                     /* loop variable */
1129  const mlib_u8 *table;
1130
1131  sa = (mlib_u32 *) (src - 1);
1132  dl = dst;
1133  dp = (mlib_d64 *) dl;
1134  dend = dl + xsize - 1;
1135
1136  vis_alignaddr((void *)0, 7);
1137
1138  s0 = *sa++;
1139
1140  if (xsize >= 8) {
1141
1142    s1 = sa[0];
1143    s2 = sa[1];
1144    sa += 2;
1145
1146#pragma pipeloop(0)
1147    for (i = 0; i <= xsize - 16; i += 8, sa += 2) {
1148      t7 = VIS_LD_U8_I(table1, s2 >> 24);
1149      t6 = VIS_LD_U8_I(table0, s1 & 0xFF);
1150      t5 = VIS_LD_U8_I(table2, (s1 >> 8) & 0xFF);
1151      t4 = VIS_LD_U8_I(table1, (s1 >> 16) & 0xFF);
1152      t3 = VIS_LD_U8_I(table0, s1 >> 24);
1153      t2 = VIS_LD_U8_I(table2, s0 & 0xFF);
1154      t1 = VIS_LD_U8_I(table1, (s0 >> 8) & 0xFF);
1155      t0 = VIS_LD_U8_I(table0, (s0 >> 16) & 0xFF);
1156      acc = vis_faligndata(t7, acc);
1157      acc = vis_faligndata(t6, acc);
1158      acc = vis_faligndata(t5, acc);
1159      acc = vis_faligndata(t4, acc);
1160      acc = vis_faligndata(t3, acc);
1161      acc = vis_faligndata(t2, acc);
1162      acc = vis_faligndata(t1, acc);
1163      acc = vis_faligndata(t0, acc);
1164      table = table0;
1165      table0 = table2;
1166      table2 = table1;
1167      table1 = table;
1168      s0 = s2;
1169      s1 = sa[0];
1170      s2 = sa[1];
1171      *dp++ = acc;
1172    }
1173
1174    t7 = VIS_LD_U8_I(table1, s2 >> 24);
1175    t6 = VIS_LD_U8_I(table0, s1 & 0xFF);
1176    t5 = VIS_LD_U8_I(table2, (s1 >> 8) & 0xFF);
1177    t4 = VIS_LD_U8_I(table1, (s1 >> 16) & 0xFF);
1178    t3 = VIS_LD_U8_I(table0, s1 >> 24);
1179    t2 = VIS_LD_U8_I(table2, s0 & 0xFF);
1180    t1 = VIS_LD_U8_I(table1, (s0 >> 8) & 0xFF);
1181    t0 = VIS_LD_U8_I(table0, (s0 >> 16) & 0xFF);
1182    acc = vis_faligndata(t7, acc);
1183    acc = vis_faligndata(t6, acc);
1184    acc = vis_faligndata(t5, acc);
1185    acc = vis_faligndata(t4, acc);
1186    acc = vis_faligndata(t3, acc);
1187    acc = vis_faligndata(t2, acc);
1188    acc = vis_faligndata(t1, acc);
1189    acc = vis_faligndata(t0, acc);
1190    table = table0;
1191    table0 = table2;
1192    table2 = table1;
1193    table1 = table;
1194    *dp++ = acc;
1195  }
1196
1197  sp = (mlib_u8 *) sa;
1198  sp -= 3;
1199
1200  if ((mlib_addr) dp <= (mlib_addr) dend) {
1201
1202    num = (mlib_addr) dend - (mlib_addr) dp;
1203    sp += num;
1204    num++;
1205    i = num - 3 * (num / 3);
1206
1207    if (i == 2) {
1208      s0 = (mlib_s32) * sp;
1209      sp--;
1210
1211      t0 = VIS_LD_U8_I(table1, s0);
1212      acc = vis_faligndata(t0, acc);
1213
1214      s0 = (mlib_s32) * sp;
1215      sp--;
1216
1217      t0 = VIS_LD_U8_I(table0, s0);
1218      acc = vis_faligndata(t0, acc);
1219      num -= 2;
1220    }
1221    else if (i == 1) {
1222      s0 = (mlib_s32) * sp;
1223      sp--;
1224
1225      t0 = VIS_LD_U8_I(table0, s0);
1226      acc = vis_faligndata(t0, acc);
1227      num--;
1228    }
1229
1230#pragma pipeloop(0)
1231    for (i = 0; i < num; i += 3) {
1232      s0 = (mlib_s32) * sp;
1233      sp--;
1234
1235      t0 = VIS_LD_U8_I(table2, s0);
1236      acc = vis_faligndata(t0, acc);
1237
1238      s0 = (mlib_s32) * sp;
1239      sp--;
1240
1241      t0 = VIS_LD_U8_I(table1, s0);
1242      acc = vis_faligndata(t0, acc);
1243
1244      s0 = (mlib_s32) * sp;
1245      sp--;
1246
1247      t0 = VIS_LD_U8_I(table0, s0);
1248      acc = vis_faligndata(t0, acc);
1249    }
1250
1251    emask = vis_edge8(dp, dend);
1252    vis_pst_8(acc, dp, emask);
1253  }
1254}
1255
1256/***************************************************************/
1257void mlib_v_ImageLookUp_U8_U8_3_SrcOff2_D1(const mlib_u8 *src,
1258                                           mlib_u8       *dst,
1259                                           mlib_s32      xsize,
1260                                           const mlib_u8 *table0,
1261                                           const mlib_u8 *table1,
1262                                           const mlib_u8 *table2)
1263{
1264  mlib_u32 *sa;                        /* aligned pointer to source data */
1265  mlib_u8 *sp;                         /* pointer to source data */
1266  mlib_u32 s0, s1, s2;                 /* source data */
1267  mlib_u8 *dl;                         /* pointer to start of destination */
1268  mlib_u8 *dend;                       /* pointer to end of destination */
1269  mlib_d64 *dp;                        /* aligned pointer to destination */
1270  mlib_d64 t0, t1, t2;                 /* destination data */
1271  mlib_d64 t3, t4, t5;                 /* destination data */
1272  mlib_d64 t6, t7, acc;                /* destination data */
1273  mlib_s32 emask;                      /* edge mask */
1274  mlib_s32 i, num;                     /* loop variable */
1275  const mlib_u8 *table;
1276
1277  sa = (mlib_u32 *) (src - 2);
1278  dl = dst;
1279  dp = (mlib_d64 *) dl;
1280  dend = dl + xsize - 1;
1281
1282  vis_alignaddr((void *)0, 7);
1283
1284  s0 = *sa++;
1285
1286  if (xsize >= 8) {
1287
1288    s1 = sa[0];
1289    s2 = sa[1];
1290    sa += 2;
1291
1292#pragma pipeloop(0)
1293    for (i = 0; i <= xsize - 16; i += 8, sa += 2) {
1294      t7 = VIS_LD_U8_I(table1, (s2 >> 16) & 0xFF);
1295      t6 = VIS_LD_U8_I(table0, s2 >> 24);
1296      t5 = VIS_LD_U8_I(table2, s1 & 0xFF);
1297      t4 = VIS_LD_U8_I(table1, (s1 >> 8) & 0xFF);
1298      t3 = VIS_LD_U8_I(table0, (s1 >> 16) & 0xFF);
1299      t2 = VIS_LD_U8_I(table2, s1 >> 24);
1300      t1 = VIS_LD_U8_I(table1, s0 & 0xFF);
1301      t0 = VIS_LD_U8_I(table0, (s0 >> 8) & 0xFF);
1302      acc = vis_faligndata(t7, acc);
1303      acc = vis_faligndata(t6, acc);
1304      acc = vis_faligndata(t5, acc);
1305      acc = vis_faligndata(t4, acc);
1306      acc = vis_faligndata(t3, acc);
1307      acc = vis_faligndata(t2, acc);
1308      acc = vis_faligndata(t1, acc);
1309      acc = vis_faligndata(t0, acc);
1310      table = table0;
1311      table0 = table2;
1312      table2 = table1;
1313      table1 = table;
1314      s0 = s2;
1315      s1 = sa[0];
1316      s2 = sa[1];
1317      *dp++ = acc;
1318    }
1319
1320    t7 = VIS_LD_U8_I(table1, (s2 >> 16) & 0xFF);
1321    t6 = VIS_LD_U8_I(table0, s2 >> 24);
1322    t5 = VIS_LD_U8_I(table2, s1 & 0xFF);
1323    t4 = VIS_LD_U8_I(table1, (s1 >> 8) & 0xFF);
1324    t3 = VIS_LD_U8_I(table0, (s1 >> 16) & 0xFF);
1325    t2 = VIS_LD_U8_I(table2, s1 >> 24);
1326    t1 = VIS_LD_U8_I(table1, s0 & 0xFF);
1327    t0 = VIS_LD_U8_I(table0, (s0 >> 8) & 0xFF);
1328    acc = vis_faligndata(t7, acc);
1329    acc = vis_faligndata(t6, acc);
1330    acc = vis_faligndata(t5, acc);
1331    acc = vis_faligndata(t4, acc);
1332    acc = vis_faligndata(t3, acc);
1333    acc = vis_faligndata(t2, acc);
1334    acc = vis_faligndata(t1, acc);
1335    acc = vis_faligndata(t0, acc);
1336    table = table0;
1337    table0 = table2;
1338    table2 = table1;
1339    table1 = table;
1340    *dp++ = acc;
1341  }
1342
1343  sp = (mlib_u8 *) sa;
1344  sp -= 2;
1345
1346  if ((mlib_addr) dp <= (mlib_addr) dend) {
1347
1348    num = (mlib_addr) dend - (mlib_addr) dp;
1349    sp += num;
1350    num++;
1351    i = num - 3 * (num / 3);
1352
1353    if (i == 2) {
1354      s0 = (mlib_s32) * sp;
1355      sp--;
1356
1357      t0 = VIS_LD_U8_I(table1, s0);
1358      acc = vis_faligndata(t0, acc);
1359
1360      s0 = (mlib_s32) * sp;
1361      sp--;
1362
1363      t0 = VIS_LD_U8_I(table0, s0);
1364      acc = vis_faligndata(t0, acc);
1365      num -= 2;
1366    }
1367    else if (i == 1) {
1368      s0 = (mlib_s32) * sp;
1369      sp--;
1370
1371      t0 = VIS_LD_U8_I(table0, s0);
1372      acc = vis_faligndata(t0, acc);
1373      num--;
1374    }
1375
1376#pragma pipeloop(0)
1377    for (i = 0; i < num; i += 3) {
1378      s0 = (mlib_s32) * sp;
1379      sp--;
1380
1381      t0 = VIS_LD_U8_I(table2, s0);
1382      acc = vis_faligndata(t0, acc);
1383
1384      s0 = (mlib_s32) * sp;
1385      sp--;
1386
1387      t0 = VIS_LD_U8_I(table1, s0);
1388      acc = vis_faligndata(t0, acc);
1389
1390      s0 = (mlib_s32) * sp;
1391      sp--;
1392
1393      t0 = VIS_LD_U8_I(table0, s0);
1394      acc = vis_faligndata(t0, acc);
1395    }
1396
1397    emask = vis_edge8(dp, dend);
1398    vis_pst_8(acc, dp, emask);
1399  }
1400}
1401
1402/***************************************************************/
1403void mlib_v_ImageLookUp_U8_U8_3_SrcOff3_D1(const mlib_u8 *src,
1404                                           mlib_u8       *dst,
1405                                           mlib_s32      xsize,
1406                                           const mlib_u8 *table0,
1407                                           const mlib_u8 *table1,
1408                                           const mlib_u8 *table2)
1409{
1410  mlib_u32 *sa;                        /* aligned pointer to source data */
1411  mlib_u8 *sp;                         /* pointer to source data */
1412  mlib_u32 s0, s1, s2;                 /* source data */
1413  mlib_u8 *dl;                         /* pointer to start of destination */
1414  mlib_u8 *dend;                       /* pointer to end of destination */
1415  mlib_d64 *dp;                        /* aligned pointer to destination */
1416  mlib_d64 t0, t1, t2;                 /* destination data */
1417  mlib_d64 t3, t4, t5;                 /* destination data */
1418  mlib_d64 t6, t7, acc;                /* destination data */
1419  mlib_s32 emask;                      /* edge mask */
1420  mlib_s32 i, num;                     /* loop variable */
1421  const mlib_u8 *table;
1422
1423  sa = (mlib_u32 *) (src - 3);
1424  dl = dst;
1425  dp = (mlib_d64 *) dl;
1426  dend = dl + xsize - 1;
1427
1428  vis_alignaddr((void *)0, 7);
1429
1430  s0 = *sa++;
1431
1432  if (xsize >= 8) {
1433
1434    s1 = sa[0];
1435    s2 = sa[1];
1436    sa += 2;
1437
1438#pragma pipeloop(0)
1439    for (i = 0; i <= xsize - 16; i += 8, sa += 2) {
1440      t7 = VIS_LD_U8_I(table1, (s2 >> 8) & 0xFF);
1441      t6 = VIS_LD_U8_I(table0, (s2 >> 16) & 0xFF);
1442      t5 = VIS_LD_U8_I(table2, s2 >> 24);
1443      t4 = VIS_LD_U8_I(table1, s1 & 0xFF);
1444      t3 = VIS_LD_U8_I(table0, (s1 >> 8) & 0xFF);
1445      t2 = VIS_LD_U8_I(table2, (s1 >> 16) & 0xFF);
1446      t1 = VIS_LD_U8_I(table1, s1 >> 24);
1447      t0 = VIS_LD_U8_I(table0, s0 & 0xFF);
1448      acc = vis_faligndata(t7, acc);
1449      acc = vis_faligndata(t6, acc);
1450      acc = vis_faligndata(t5, acc);
1451      acc = vis_faligndata(t4, acc);
1452      acc = vis_faligndata(t3, acc);
1453      acc = vis_faligndata(t2, acc);
1454      acc = vis_faligndata(t1, acc);
1455      acc = vis_faligndata(t0, acc);
1456      table = table0;
1457      table0 = table2;
1458      table2 = table1;
1459      table1 = table;
1460      s0 = s2;
1461      s1 = sa[0];
1462      s2 = sa[1];
1463      *dp++ = acc;
1464    }
1465
1466    t7 = VIS_LD_U8_I(table1, (s2 >> 8) & 0xFF);
1467    t6 = VIS_LD_U8_I(table0, (s2 >> 16) & 0xFF);
1468    t5 = VIS_LD_U8_I(table2, s2 >> 24);
1469    t4 = VIS_LD_U8_I(table1, s1 & 0xFF);
1470    t3 = VIS_LD_U8_I(table0, (s1 >> 8) & 0xFF);
1471    t2 = VIS_LD_U8_I(table2, (s1 >> 16) & 0xFF);
1472    t1 = VIS_LD_U8_I(table1, s1 >> 24);
1473    t0 = VIS_LD_U8_I(table0, s0 & 0xFF);
1474    acc = vis_faligndata(t7, acc);
1475    acc = vis_faligndata(t6, acc);
1476    acc = vis_faligndata(t5, acc);
1477    acc = vis_faligndata(t4, acc);
1478    acc = vis_faligndata(t3, acc);
1479    acc = vis_faligndata(t2, acc);
1480    acc = vis_faligndata(t1, acc);
1481    acc = vis_faligndata(t0, acc);
1482    table = table0;
1483    table0 = table2;
1484    table2 = table1;
1485    table1 = table;
1486    *dp++ = acc;
1487  }
1488
1489  sp = (mlib_u8 *) sa;
1490  sp--;
1491
1492  if ((mlib_addr) dp <= (mlib_addr) dend) {
1493
1494    num = (mlib_addr) dend - (mlib_addr) dp;
1495    sp += num;
1496    num++;
1497    i = num - 3 * (num / 3);
1498
1499    if (i == 2) {
1500      s0 = (mlib_s32) * sp;
1501      sp--;
1502
1503      t0 = VIS_LD_U8_I(table1, s0);
1504      acc = vis_faligndata(t0, acc);
1505
1506      s0 = (mlib_s32) * sp;
1507      sp--;
1508
1509      t0 = VIS_LD_U8_I(table0, s0);
1510      acc = vis_faligndata(t0, acc);
1511      num -= 2;
1512    }
1513    else if (i == 1) {
1514      s0 = (mlib_s32) * sp;
1515      sp--;
1516
1517      t0 = VIS_LD_U8_I(table0, s0);
1518      acc = vis_faligndata(t0, acc);
1519      num--;
1520    }
1521
1522#pragma pipeloop(0)
1523    for (i = 0; i < num; i += 3) {
1524      s0 = (mlib_s32) * sp;
1525      sp--;
1526
1527      t0 = VIS_LD_U8_I(table2, s0);
1528      acc = vis_faligndata(t0, acc);
1529
1530      s0 = (mlib_s32) * sp;
1531      sp--;
1532
1533      t0 = VIS_LD_U8_I(table1, s0);
1534      acc = vis_faligndata(t0, acc);
1535
1536      s0 = (mlib_s32) * sp;
1537      sp--;
1538
1539      t0 = VIS_LD_U8_I(table0, s0);
1540      acc = vis_faligndata(t0, acc);
1541    }
1542
1543    emask = vis_edge8(dp, dend);
1544    vis_pst_8(acc, dp, emask);
1545  }
1546}
1547
1548/***************************************************************/
1549void mlib_v_ImageLookUp_U8_U8_3(const mlib_u8 *src,
1550                                mlib_s32      slb,
1551                                mlib_u8       *dst,
1552                                mlib_s32      dlb,
1553                                mlib_s32      xsize,
1554                                mlib_s32      ysize,
1555                                const mlib_u8 **table)
1556{
1557  mlib_u8 *sl;
1558  mlib_u8 *dl;
1559  const mlib_u8 *tab;
1560  mlib_s32 j, i;
1561
1562  sl = (void *)src;
1563  dl = dst;
1564
1565  /* row loop */
1566  for (j = 0; j < ysize; j++) {
1567    mlib_u8 *sp = sl;
1568    mlib_u8 *dp = dl;
1569    const mlib_u8 *tab0 = table[0];
1570    const mlib_u8 *tab1 = table[1];
1571    const mlib_u8 *tab2 = table[2];
1572    mlib_s32 off, size = xsize * 3;
1573
1574    off = (8 - ((mlib_addr) dp & 7)) & 7;
1575
1576    off = (off < size) ? off : size;
1577
1578    for (i = 0; i < off - 2; i += 3) {
1579      *dp++ = tab0[(*sp++)];
1580      *dp++ = tab1[(*sp++)];
1581      *dp++ = tab2[(*sp++)];
1582      size -= 3;
1583    }
1584
1585    off -= i;
1586
1587    if (off == 1) {
1588      *dp++ = tab0[(*sp++)];
1589      tab = tab0;
1590      tab0 = tab1;
1591      tab1 = tab2;
1592      tab2 = tab;
1593      size--;
1594    }
1595    else if (off == 2) {
1596      *dp++ = tab0[(*sp++)];
1597      *dp++ = tab1[(*sp++)];
1598      tab = tab2;
1599      tab2 = tab1;
1600      tab1 = tab0;
1601      tab0 = tab;
1602      size -= 2;
1603    }
1604
1605    if (size > 0) {
1606
1607      off = (mlib_addr) sp & 3;
1608
1609      if (off == 0) {
1610        mlib_v_ImageLookUp_U8_U8_3_SrcOff0_D1(sp, dp, size, tab0, tab1, tab2);
1611      }
1612      else if (off == 1) {
1613        mlib_v_ImageLookUp_U8_U8_3_SrcOff1_D1(sp, dp, size, tab0, tab1, tab2);
1614      }
1615      else if (off == 2) {
1616        mlib_v_ImageLookUp_U8_U8_3_SrcOff2_D1(sp, dp, size, tab0, tab1, tab2);
1617      }
1618      else {
1619        mlib_v_ImageLookUp_U8_U8_3_SrcOff3_D1(sp, dp, size, tab0, tab1, tab2);
1620      }
1621    }
1622
1623    sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
1624    dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
1625  }
1626}
1627
1628/***************************************************************/
1629