1/*
2 * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27
28#include "vis_proto.h"
29#include "mlib_image.h"
30#include "mlib_v_ImageLookUpFunc.h"
31
32/***************************************************************/
33static void mlib_v_ImageLookUp_S32_U8_124_D1(const mlib_s32 *src,
34                                             mlib_u8        *dst,
35                                             mlib_s32       xsize,
36                                             const mlib_u8  *table0,
37                                             const mlib_u8  *table1,
38                                             const mlib_u8  *table2,
39                                             const mlib_u8  *table3);
40
41static void mlib_v_ImageLookUp_S32_U8_3_D1(const mlib_s32 *src,
42                                           mlib_u8        *dst,
43                                           mlib_s32       xsize,
44                                           const mlib_u8  *table0,
45                                           const mlib_u8  *table1,
46                                           const mlib_u8  *table2);
47
48/***************************************************************/
49
50#define VIS_LD_U8_I(X, Y)       vis_ld_u8_i((void *)(X), (Y))
51
52/***************************************************************/
53void mlib_v_ImageLookUp_S32_U8_124_D1(const mlib_s32 *src,
54                                      mlib_u8        *dst,
55                                      mlib_s32       xsize,
56                                      const mlib_u8  *table0,
57                                      const mlib_u8  *table1,
58                                      const mlib_u8  *table2,
59                                      const mlib_u8  *table3)
60{
61  mlib_s32 *sp;                        /* pointer to source data */
62  mlib_s32 s0, s1, s2, s3;             /* source data */
63  mlib_s32 s4, s5, s6, s7;             /* source data */
64  mlib_u8 *dl;                         /* pointer to start of destination */
65  mlib_u8 *dend;                       /* pointer to end of destination */
66  mlib_d64 *dp;                        /* aligned pointer to destination */
67  mlib_d64 t0, t1, t2;                 /* destination data */
68  mlib_d64 t3, t4, t5;                 /* destination data */
69  mlib_d64 t6, t7, acc;                /* destination data */
70  mlib_s32 emask;                      /* edge mask */
71  mlib_s32 i, num;                     /* loop variable */
72
73  dl = dst;
74  dp = (mlib_d64 *) dl;
75  dend = dl + xsize - 1;
76  sp = (void *)src;
77
78  vis_alignaddr((void *)0, 7);
79
80  if (xsize >= 8) {
81
82    s0 = sp[0];
83    s1 = sp[1];
84    s2 = sp[2];
85    s3 = sp[3];
86    s4 = sp[4];
87    s5 = sp[5];
88    s6 = sp[6];
89    s7 = sp[7];
90    sp += 8;
91
92#pragma pipeloop(0)
93    for (i = 0; i <= xsize - 16; i += 8, sp += 8) {
94      t7 = VIS_LD_U8_I(table3, s7);
95      t6 = VIS_LD_U8_I(table2, s6);
96      t5 = VIS_LD_U8_I(table1, s5);
97      t4 = VIS_LD_U8_I(table0, s4);
98      t3 = VIS_LD_U8_I(table3, s3);
99      t2 = VIS_LD_U8_I(table2, s2);
100      t1 = VIS_LD_U8_I(table1, s1);
101      t0 = VIS_LD_U8_I(table0, s0);
102      acc = vis_faligndata(t7, acc);
103      acc = vis_faligndata(t6, acc);
104      acc = vis_faligndata(t5, acc);
105      acc = vis_faligndata(t4, acc);
106      acc = vis_faligndata(t3, acc);
107      acc = vis_faligndata(t2, acc);
108      acc = vis_faligndata(t1, acc);
109      acc = vis_faligndata(t0, acc);
110      s0 = sp[0];
111      s1 = sp[1];
112      s2 = sp[2];
113      s3 = sp[3];
114      s4 = sp[4];
115      s5 = sp[5];
116      s6 = sp[6];
117      s7 = sp[7];
118      *dp++ = acc;
119    }
120
121    t7 = VIS_LD_U8_I(table3, s7);
122    t6 = VIS_LD_U8_I(table2, s6);
123    t5 = VIS_LD_U8_I(table1, s5);
124    t4 = VIS_LD_U8_I(table0, s4);
125    t3 = VIS_LD_U8_I(table3, s3);
126    t2 = VIS_LD_U8_I(table2, s2);
127    t1 = VIS_LD_U8_I(table1, s1);
128    t0 = VIS_LD_U8_I(table0, s0);
129    acc = vis_faligndata(t7, acc);
130    acc = vis_faligndata(t6, acc);
131    acc = vis_faligndata(t5, acc);
132    acc = vis_faligndata(t4, acc);
133    acc = vis_faligndata(t3, acc);
134    acc = vis_faligndata(t2, acc);
135    acc = vis_faligndata(t1, acc);
136    acc = vis_faligndata(t0, acc);
137    *dp++ = acc;
138  }
139
140  if ((mlib_addr) dp <= (mlib_addr) dend) {
141
142    num = (mlib_s32) ((mlib_addr) dend - (mlib_addr) dp);
143    sp += num;
144    num++;
145
146    if ((num & 3) == 1) {
147      s0 = *sp;
148      sp--;
149
150      t0 = VIS_LD_U8_I(table0, s0);
151      acc = vis_faligndata(t0, acc);
152      num--;
153    }
154    else if ((num & 3) == 2) {
155      s0 = *sp;
156      sp--;
157
158      t0 = VIS_LD_U8_I(table1, s0);
159      acc = vis_faligndata(t0, acc);
160
161      s0 = *sp;
162      sp--;
163
164      t0 = VIS_LD_U8_I(table0, s0);
165      acc = vis_faligndata(t0, acc);
166      num -= 2;
167    }
168    else if ((num & 3) == 3) {
169      s0 = *sp;
170      sp--;
171
172      t0 = VIS_LD_U8_I(table2, s0);
173      acc = vis_faligndata(t0, acc);
174
175      s0 = *sp;
176      sp--;
177
178      t0 = VIS_LD_U8_I(table1, s0);
179      acc = vis_faligndata(t0, acc);
180
181      s0 = *sp;
182      sp--;
183
184      t0 = VIS_LD_U8_I(table0, s0);
185      acc = vis_faligndata(t0, acc);
186      num -= 3;
187    }
188
189    if (num != 0) {
190      s0 = *sp;
191      sp--;
192
193      t0 = VIS_LD_U8_I(table3, s0);
194      acc = vis_faligndata(t0, acc);
195
196      s0 = *sp;
197      sp--;
198
199      t0 = VIS_LD_U8_I(table2, s0);
200      acc = vis_faligndata(t0, acc);
201
202      s0 = *sp;
203      sp--;
204
205      t0 = VIS_LD_U8_I(table1, s0);
206      acc = vis_faligndata(t0, acc);
207
208      s0 = *sp;
209
210      t0 = VIS_LD_U8_I(table0, s0);
211      acc = vis_faligndata(t0, acc);
212    }
213
214    emask = vis_edge8(dp, dend);
215    vis_pst_8(acc, dp, emask);
216  }
217}
218
219/***************************************************************/
220void mlib_v_ImageLookUp_S32_U8_1(const mlib_s32 *src,
221                                 mlib_s32       slb,
222                                 mlib_u8        *dst,
223                                 mlib_s32       dlb,
224                                 mlib_s32       xsize,
225                                 mlib_s32       ysize,
226                                 const mlib_u8  **table)
227{
228  mlib_s32 *sl;
229  mlib_u8 *dl;
230  const mlib_u8 *tab = &table[0][(mlib_u32) 2147483648u];
231  mlib_s32 j, i;
232
233  sl = (void *)src;
234  dl = dst;
235
236  /* row loop */
237  for (j = 0; j < ysize; j++) {
238    mlib_s32 *sp = sl;
239    mlib_u8 *dp = dl;
240    mlib_s32 off, size = xsize;
241
242    off = (mlib_s32) ((8 - ((mlib_addr) dp & 7)) & 7);
243
244    off = (off < size) ? off : size;
245
246    for (i = 0; i < off; i++, sp++) {
247      *dp++ = tab[sp[0]];
248      size--;
249    }
250
251    if (size > 0) {
252      mlib_v_ImageLookUp_S32_U8_124_D1(sp, dp, size, tab, tab, tab, tab);
253    }
254
255    sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
256    dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
257  }
258}
259
260/***************************************************************/
261void mlib_v_ImageLookUp_S32_U8_2(const mlib_s32 *src,
262                                 mlib_s32       slb,
263                                 mlib_u8        *dst,
264                                 mlib_s32       dlb,
265                                 mlib_s32       xsize,
266                                 mlib_s32       ysize,
267                                 const mlib_u8  **table)
268{
269  mlib_s32 *sl;
270  mlib_u8 *dl;
271  const mlib_u8 *tab;
272  mlib_s32 j, i;
273
274  sl = (void *)src;
275  dl = dst;
276
277  /* row loop */
278  for (j = 0; j < ysize; j++) {
279    mlib_s32 *sp = sl;
280    mlib_u8 *dp = dl;
281    mlib_s32 off, size = xsize * 2;
282    const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
283    const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
284
285    off = (mlib_s32) ((8 - ((mlib_addr) dp & 7)) & 7);
286
287    off = (off < size) ? off : size;
288
289    for (i = 0; i < off - 1; i += 2, sp += 2) {
290      *dp++ = tab0[sp[0]];
291      *dp++ = tab1[sp[1]];
292      size -= 2;
293    }
294
295    if ((off & 1) != 0) {
296      *dp++ = tab0[sp[0]];
297      size--;
298      sp++;
299      tab = tab0;
300      tab0 = tab1;
301      tab1 = tab;
302    }
303
304    if (size > 0) {
305      mlib_v_ImageLookUp_S32_U8_124_D1(sp, dp, size, tab0, tab1, tab0, tab1);
306    }
307
308    sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
309    dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
310  }
311}
312
313/***************************************************************/
314void mlib_v_ImageLookUp_S32_U8_4(const mlib_s32 *src,
315                                 mlib_s32       slb,
316                                 mlib_u8        *dst,
317                                 mlib_s32       dlb,
318                                 mlib_s32       xsize,
319                                 mlib_s32       ysize,
320                                 const mlib_u8  **table)
321{
322  mlib_s32 *sl;
323  mlib_u8 *dl;
324  const mlib_u8 *tab;
325  mlib_s32 j;
326
327  sl = (void *)src;
328  dl = dst;
329
330  /* row loop */
331  for (j = 0; j < ysize; j++) {
332    mlib_s32 *sp = sl;
333    mlib_u8 *dp = dl;
334    const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
335    const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
336    const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u];
337    const mlib_u8 *tab3 = &table[3][(mlib_u32) 2147483648u];
338    mlib_s32 off, size = xsize * 4;
339
340    off = (mlib_s32) ((8 - ((mlib_addr) dp & 7)) & 7);
341
342    off = (off < size) ? off : size;
343
344    if (off >= 4) {
345      *dp++ = tab0[sp[0]];
346      *dp++ = tab1[sp[1]];
347      *dp++ = tab2[sp[2]];
348      *dp++ = tab3[sp[3]];
349      size -= 4;
350      off -= 4;
351      sp += 4;
352    }
353
354    if (off == 1) {
355      *dp++ = tab0[sp[0]];
356      tab = tab0;
357      tab0 = tab1;
358      tab1 = tab2;
359      tab2 = tab3;
360      tab3 = tab;
361      size--;
362      sp++;
363    }
364    else if (off == 2) {
365      *dp++ = tab0[sp[0]];
366      *dp++ = tab1[sp[1]];
367      tab = tab0;
368      tab0 = tab2;
369      tab2 = tab;
370      tab = tab1;
371      tab1 = tab3;
372      tab3 = tab;
373      size -= 2;
374      sp += 2;
375    }
376    else if (off == 3) {
377      *dp++ = tab0[sp[0]];
378      *dp++ = tab1[sp[1]];
379      *dp++ = tab2[sp[2]];
380      tab = tab3;
381      tab3 = tab2;
382      tab2 = tab1;
383      tab1 = tab0;
384      tab0 = tab;
385      size -= 3;
386      sp += 3;
387    }
388
389    if (size > 0) {
390      mlib_v_ImageLookUp_S32_U8_124_D1(sp, dp, size, tab0, tab1, tab2, tab3);
391    }
392
393    sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
394    dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
395  }
396}
397
398/***************************************************************/
399void mlib_v_ImageLookUp_S32_U8_3_D1(const mlib_s32 *src,
400                                    mlib_u8        *dst,
401                                    mlib_s32       xsize,
402                                    const mlib_u8  *table0,
403                                    const mlib_u8  *table1,
404                                    const mlib_u8  *table2)
405{
406  mlib_s32 *sp;                        /* pointer to source data */
407  mlib_s32 s0, s1, s2, s3;             /* source data */
408  mlib_s32 s4, s5, s6, s7;             /* source data */
409  mlib_u8 *dl;                         /* pointer to start of destination */
410  mlib_u8 *dend;                       /* pointer to end of destination */
411  mlib_d64 *dp;                        /* aligned pointer to destination */
412  mlib_d64 t0, t1, t2;                 /* destination data */
413  mlib_d64 t3, t4, t5;                 /* destination data */
414  mlib_d64 t6, t7, acc;                /* destination data */
415  mlib_s32 emask;                      /* edge mask */
416  mlib_s32 i, num;                     /* loop variable */
417  const mlib_u8 *table;
418
419  dl = dst;
420  sp = (void *)src;
421  dp = (mlib_d64 *) dl;
422  dend = dl + xsize - 1;
423
424  vis_alignaddr((void *)0, 7);
425
426  if (xsize >= 8) {
427
428    s0 = sp[0];
429    s1 = sp[1];
430    s2 = sp[2];
431    s3 = sp[3];
432    s4 = sp[4];
433    s5 = sp[5];
434    s6 = sp[6];
435    s7 = sp[7];
436    sp += 8;
437
438#pragma pipeloop(0)
439    for (i = 0; i <= xsize - 16; i += 8, sp += 8) {
440      t7 = VIS_LD_U8_I(table1, s7);
441      t6 = VIS_LD_U8_I(table0, s6);
442      t5 = VIS_LD_U8_I(table2, s5);
443      t4 = VIS_LD_U8_I(table1, s4);
444      t3 = VIS_LD_U8_I(table0, s3);
445      t2 = VIS_LD_U8_I(table2, s2);
446      t1 = VIS_LD_U8_I(table1, s1);
447      t0 = VIS_LD_U8_I(table0, s0);
448      acc = vis_faligndata(t7, acc);
449      acc = vis_faligndata(t6, acc);
450      acc = vis_faligndata(t5, acc);
451      acc = vis_faligndata(t4, acc);
452      acc = vis_faligndata(t3, acc);
453      acc = vis_faligndata(t2, acc);
454      acc = vis_faligndata(t1, acc);
455      acc = vis_faligndata(t0, acc);
456      table = table0;
457      table0 = table2;
458      table2 = table1;
459      table1 = table;
460      s0 = sp[0];
461      s1 = sp[1];
462      s2 = sp[2];
463      s3 = sp[3];
464      s4 = sp[4];
465      s5 = sp[5];
466      s6 = sp[6];
467      s7 = sp[7];
468      *dp++ = acc;
469    }
470
471    t7 = VIS_LD_U8_I(table1, s7);
472    t6 = VIS_LD_U8_I(table0, s6);
473    t5 = VIS_LD_U8_I(table2, s5);
474    t4 = VIS_LD_U8_I(table1, s4);
475    t3 = VIS_LD_U8_I(table0, s3);
476    t2 = VIS_LD_U8_I(table2, s2);
477    t1 = VIS_LD_U8_I(table1, s1);
478    t0 = VIS_LD_U8_I(table0, s0);
479    acc = vis_faligndata(t7, acc);
480    acc = vis_faligndata(t6, acc);
481    acc = vis_faligndata(t5, acc);
482    acc = vis_faligndata(t4, acc);
483    acc = vis_faligndata(t3, acc);
484    acc = vis_faligndata(t2, acc);
485    acc = vis_faligndata(t1, acc);
486    acc = vis_faligndata(t0, acc);
487    table = table0;
488    table0 = table2;
489    table2 = table1;
490    table1 = table;
491    *dp++ = acc;
492  }
493
494  if ((mlib_addr) dp <= (mlib_addr) dend) {
495
496    num = (mlib_s32) ((mlib_addr) dend - (mlib_addr) dp);
497    sp += num;
498    num++;
499    i = num - 3 * (num / 3);
500
501    if (i == 2) {
502      s0 = *sp;
503      sp--;
504
505      t0 = VIS_LD_U8_I(table1, s0);
506      acc = vis_faligndata(t0, acc);
507
508      s0 = *sp;
509      sp--;
510
511      t0 = VIS_LD_U8_I(table0, s0);
512      acc = vis_faligndata(t0, acc);
513      num -= 2;
514    }
515    else if (i == 1) {
516      s0 = *sp;
517      sp--;
518
519      t0 = VIS_LD_U8_I(table0, s0);
520      acc = vis_faligndata(t0, acc);
521      num--;
522    }
523
524#pragma pipeloop(0)
525    for (i = 0; i < num; i += 3) {
526      s0 = *sp;
527      sp--;
528
529      t0 = VIS_LD_U8_I(table2, s0);
530      acc = vis_faligndata(t0, acc);
531
532      s0 = *sp;
533      sp--;
534
535      t0 = VIS_LD_U8_I(table1, s0);
536      acc = vis_faligndata(t0, acc);
537
538      s0 = *sp;
539      sp--;
540
541      t0 = VIS_LD_U8_I(table0, s0);
542      acc = vis_faligndata(t0, acc);
543    }
544
545    emask = vis_edge8(dp, dend);
546    vis_pst_8(acc, dp, emask);
547  }
548}
549
550/***************************************************************/
551void mlib_v_ImageLookUp_S32_U8_3(const mlib_s32 *src,
552                                 mlib_s32       slb,
553                                 mlib_u8        *dst,
554                                 mlib_s32       dlb,
555                                 mlib_s32       xsize,
556                                 mlib_s32       ysize,
557                                 const mlib_u8  **table)
558{
559  mlib_s32 *sl;
560  mlib_u8 *dl;
561  const mlib_u8 *tab;
562  mlib_s32 j, i;
563
564  sl = (void *)src;
565  dl = dst;
566
567  /* row loop */
568  for (j = 0; j < ysize; j++) {
569    mlib_s32 *sp = sl;
570    mlib_u8 *dp = dl;
571    const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
572    const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
573    const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u];
574    mlib_s32 off, size = xsize * 3;
575
576    off = (mlib_s32) ((8 - ((mlib_addr) dp & 7)) & 7);
577
578    off = (off < size) ? off : size;
579
580    for (i = 0; i < off - 2; i += 3, sp += 3) {
581      *dp++ = tab0[sp[0]];
582      *dp++ = tab1[sp[1]];
583      *dp++ = tab2[sp[2]];
584      size -= 3;
585    }
586
587    off -= i;
588
589    if (off == 1) {
590      *dp++ = tab0[sp[0]];
591      tab = tab0;
592      tab0 = tab1;
593      tab1 = tab2;
594      tab2 = tab;
595      size--;
596      sp++;
597    }
598    else if (off == 2) {
599      *dp++ = tab0[sp[0]];
600      *dp++ = tab1[sp[1]];
601      tab = tab2;
602      tab2 = tab1;
603      tab1 = tab0;
604      tab0 = tab;
605      size -= 2;
606      sp += 2;
607    }
608
609    if (size > 0) {
610      mlib_v_ImageLookUp_S32_U8_3_D1(sp, dp, size, tab0, tab1, tab2);
611    }
612
613    sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
614    dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
615  }
616}
617
618/***************************************************************/
619