1/*
2 * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27
28#include "vis_proto.h"
29#include "mlib_image.h"
30#include "mlib_v_ImageLookUpFunc.h"
31
32/***************************************************************/
33static void mlib_v_ImageLookUp_S16_U8_124_D1(const mlib_s16 *src,
34                                             mlib_u8        *dst,
35                                             mlib_s32       xsize,
36                                             const mlib_u8  *table0,
37                                             const mlib_u8  *table1,
38                                             const mlib_u8  *table2,
39                                             const mlib_u8  *table3);
40
41static void mlib_v_ImageLookUp_S16_U8_3_D1(const mlib_s16 *src,
42                                           mlib_u8        *dst,
43                                           mlib_s32       xsize,
44                                           const mlib_u8  *table0,
45                                           const mlib_u8  *table1,
46                                           const mlib_u8  *table2);
47
48/***************************************************************/
49
50#define VIS_LD_U8_I(X, Y)       vis_ld_u8_i((void *)(X), (Y))
51
52/***************************************************************/
53void mlib_v_ImageLookUp_S16_U8_124_D1(const mlib_s16 *src,
54                                      mlib_u8        *dst,
55                                      mlib_s32       xsize,
56                                      const mlib_u8  *table0,
57                                      const mlib_u8  *table1,
58                                      const mlib_u8  *table2,
59                                      const mlib_u8  *table3)
60{
61  mlib_s16 *sp;                        /* pointer to source data */
62  mlib_s32 s0, s1, s2, s3;             /* source data */
63  mlib_s32 s4, s5, s6, s7;             /* source data */
64  mlib_u8 *dl;                         /* pointer to start of destination */
65  mlib_u8 *dend;                       /* pointer to end of destination */
66  mlib_d64 *dp;                        /* aligned pointer to destination */
67  mlib_d64 t0, t1, t2;                 /* destination data */
68  mlib_d64 t3, t4, t5;                 /* destination data */
69  mlib_d64 t6, t7, acc;                /* destination data */
70  mlib_s32 emask;                      /* edge mask */
71  mlib_s32 i, num;                     /* loop variable */
72
73  dl = dst;
74  dp = (mlib_d64 *) dl;
75  dend = dl + xsize - 1;
76  sp = (void *)src;
77
78  vis_alignaddr((void *)0, 7);
79
80  if (xsize >= 8) {
81
82    s0 = sp[0];
83    s1 = sp[1];
84    s2 = sp[2];
85    s3 = sp[3];
86    s4 = sp[4];
87    s5 = sp[5];
88    s6 = sp[6];
89    s7 = sp[7];
90    sp += 8;
91
92#pragma pipeloop(0)
93    for (i = 0; i <= xsize - 16; i += 8, sp += 8) {
94      t7 = VIS_LD_U8_I(table3, s7);
95      t6 = VIS_LD_U8_I(table2, s6);
96      t5 = VIS_LD_U8_I(table1, s5);
97      t4 = VIS_LD_U8_I(table0, s4);
98      t3 = VIS_LD_U8_I(table3, s3);
99      t2 = VIS_LD_U8_I(table2, s2);
100      t1 = VIS_LD_U8_I(table1, s1);
101      t0 = VIS_LD_U8_I(table0, s0);
102      acc = vis_faligndata(t7, acc);
103      acc = vis_faligndata(t6, acc);
104      acc = vis_faligndata(t5, acc);
105      acc = vis_faligndata(t4, acc);
106      acc = vis_faligndata(t3, acc);
107      acc = vis_faligndata(t2, acc);
108      acc = vis_faligndata(t1, acc);
109      acc = vis_faligndata(t0, acc);
110      s0 = sp[0];
111      s1 = sp[1];
112      s2 = sp[2];
113      s3 = sp[3];
114      s4 = sp[4];
115      s5 = sp[5];
116      s6 = sp[6];
117      s7 = sp[7];
118      *dp++ = acc;
119    }
120
121    t7 = VIS_LD_U8_I(table3, s7);
122    t6 = VIS_LD_U8_I(table2, s6);
123    t5 = VIS_LD_U8_I(table1, s5);
124    t4 = VIS_LD_U8_I(table0, s4);
125    t3 = VIS_LD_U8_I(table3, s3);
126    t2 = VIS_LD_U8_I(table2, s2);
127    t1 = VIS_LD_U8_I(table1, s1);
128    t0 = VIS_LD_U8_I(table0, s0);
129    acc = vis_faligndata(t7, acc);
130    acc = vis_faligndata(t6, acc);
131    acc = vis_faligndata(t5, acc);
132    acc = vis_faligndata(t4, acc);
133    acc = vis_faligndata(t3, acc);
134    acc = vis_faligndata(t2, acc);
135    acc = vis_faligndata(t1, acc);
136    acc = vis_faligndata(t0, acc);
137    *dp++ = acc;
138  }
139
140  if ((mlib_addr) dp <= (mlib_addr) dend) {
141
142    num = (mlib_addr) dend - (mlib_addr) dp;
143    sp += num;
144    num++;
145
146    if ((num & 3) == 1) {
147      s0 = (mlib_s32) * sp;
148      sp--;
149
150      t0 = VIS_LD_U8_I(table0, s0);
151      acc = vis_faligndata(t0, acc);
152      num--;
153    }
154    else if ((num & 3) == 2) {
155      s0 = (mlib_s32) * sp;
156      sp--;
157
158      t0 = VIS_LD_U8_I(table1, s0);
159      acc = vis_faligndata(t0, acc);
160
161      s0 = (mlib_s32) * sp;
162      sp--;
163
164      t0 = VIS_LD_U8_I(table0, s0);
165      acc = vis_faligndata(t0, acc);
166      num -= 2;
167    }
168    else if ((num & 3) == 3) {
169      s0 = (mlib_s32) * sp;
170      sp--;
171
172      t0 = VIS_LD_U8_I(table2, s0);
173      acc = vis_faligndata(t0, acc);
174
175      s0 = (mlib_s32) * sp;
176      sp--;
177
178      t0 = VIS_LD_U8_I(table1, s0);
179      acc = vis_faligndata(t0, acc);
180
181      s0 = (mlib_s32) * sp;
182      sp--;
183
184      t0 = VIS_LD_U8_I(table0, s0);
185      acc = vis_faligndata(t0, acc);
186      num -= 3;
187    }
188
189    if (num != 0) {
190      s0 = (mlib_s32) * sp;
191      sp--;
192
193      t0 = VIS_LD_U8_I(table3, s0);
194      acc = vis_faligndata(t0, acc);
195
196      s0 = (mlib_s32) * sp;
197      sp--;
198
199      t0 = VIS_LD_U8_I(table2, s0);
200      acc = vis_faligndata(t0, acc);
201
202      s0 = (mlib_s32) * sp;
203      sp--;
204
205      t0 = VIS_LD_U8_I(table1, s0);
206      acc = vis_faligndata(t0, acc);
207
208      s0 = (mlib_s32) * sp;
209      sp--;
210
211      t0 = VIS_LD_U8_I(table0, s0);
212      acc = vis_faligndata(t0, acc);
213    }
214
215    emask = vis_edge8(dp, dend);
216    vis_pst_8(acc, dp, emask);
217  }
218}
219
220/***************************************************************/
221void mlib_v_ImageLookUp_S16_U8_1(const mlib_s16 *src,
222                                 mlib_s32       slb,
223                                 mlib_u8        *dst,
224                                 mlib_s32       dlb,
225                                 mlib_s32       xsize,
226                                 mlib_s32       ysize,
227                                 const mlib_u8  **table)
228{
229  mlib_s16 *sl;
230  mlib_u8 *dl;
231  const mlib_u8 *tab = &table[0][32768];
232  mlib_s32 j, i;
233
234  sl = (void *)src;
235  dl = dst;
236
237  /* row loop */
238  for (j = 0; j < ysize; j++) {
239    mlib_s16 *sp = sl;
240    mlib_u8 *dp = dl;
241    mlib_s32 off, size = xsize;
242
243    off = (8 - ((mlib_addr) dp & 7)) & 7;
244
245    off = (off < size) ? off : size;
246
247    for (i = 0; i < off; i++, sp++) {
248      *dp++ = tab[sp[0]];
249      size--;
250    }
251
252    if (size > 0) {
253      mlib_v_ImageLookUp_S16_U8_124_D1(sp, dp, size, tab, tab, tab, tab);
254    }
255
256    sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
257    dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
258  }
259}
260
261/***************************************************************/
262void mlib_v_ImageLookUp_S16_U8_2(const mlib_s16 *src,
263                                 mlib_s32       slb,
264                                 mlib_u8        *dst,
265                                 mlib_s32       dlb,
266                                 mlib_s32       xsize,
267                                 mlib_s32       ysize,
268                                 const mlib_u8  **table)
269{
270  mlib_s16 *sl;
271  mlib_u8 *dl;
272  const mlib_u8 *tab;
273  mlib_s32 j, i;
274
275  sl = (void *)src;
276  dl = dst;
277
278  /* row loop */
279  for (j = 0; j < ysize; j++) {
280    mlib_s16 *sp = sl;
281    mlib_u8 *dp = dl;
282    mlib_s32 off, size = xsize * 2;
283    const mlib_u8 *tab0 = &table[0][32768];
284    const mlib_u8 *tab1 = &table[1][32768];
285
286    off = (8 - ((mlib_addr) dp & 7)) & 7;
287
288    off = (off < size) ? off : size;
289
290    for (i = 0; i < off - 1; i += 2, sp += 2) {
291      *dp++ = tab0[sp[0]];
292      *dp++ = tab1[sp[1]];
293      size -= 2;
294    }
295
296    if ((off & 1) != 0) {
297      *dp++ = tab0[sp[0]];
298      size--;
299      sp++;
300      tab = tab0;
301      tab0 = tab1;
302      tab1 = tab;
303    }
304
305    if (size > 0) {
306      mlib_v_ImageLookUp_S16_U8_124_D1(sp, dp, size, tab0, tab1, tab0, tab1);
307    }
308
309    sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
310    dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
311  }
312}
313
314/***************************************************************/
315void mlib_v_ImageLookUp_S16_U8_4(const mlib_s16 *src,
316                                 mlib_s32       slb,
317                                 mlib_u8        *dst,
318                                 mlib_s32       dlb,
319                                 mlib_s32       xsize,
320                                 mlib_s32       ysize,
321                                 const mlib_u8  **table)
322{
323  mlib_s16 *sl;
324  mlib_u8 *dl;
325  const mlib_u8 *tab;
326  mlib_s32 j;
327
328  sl = (void *)src;
329  dl = dst;
330
331  /* row loop */
332  for (j = 0; j < ysize; j++) {
333    mlib_s16 *sp = sl;
334    mlib_u8 *dp = dl;
335    const mlib_u8 *tab0 = &table[0][32768];
336    const mlib_u8 *tab1 = &table[1][32768];
337    const mlib_u8 *tab2 = &table[2][32768];
338    const mlib_u8 *tab3 = &table[3][32768];
339    mlib_s32 off, size = xsize * 4;
340
341    off = (8 - ((mlib_addr) dp & 7)) & 7;
342
343    off = (off < size) ? off : size;
344
345    if (off >= 4) {
346      *dp++ = tab0[sp[0]];
347      *dp++ = tab1[sp[1]];
348      *dp++ = tab2[sp[2]];
349      *dp++ = tab3[sp[3]];
350      size -= 4;
351      off -= 4;
352      sp += 4;
353    }
354
355    if (off == 1) {
356      *dp++ = tab0[sp[0]];
357      tab = tab0;
358      tab0 = tab1;
359      tab1 = tab2;
360      tab2 = tab3;
361      tab3 = tab;
362      size--;
363      sp++;
364    }
365    else if (off == 2) {
366      *dp++ = tab0[sp[0]];
367      *dp++ = tab1[sp[1]];
368      tab = tab0;
369      tab0 = tab2;
370      tab2 = tab;
371      tab = tab1;
372      tab1 = tab3;
373      tab3 = tab;
374      size -= 2;
375      sp += 2;
376    }
377    else if (off == 3) {
378      *dp++ = tab0[sp[0]];
379      *dp++ = tab1[sp[1]];
380      *dp++ = tab2[sp[2]];
381      tab = tab3;
382      tab3 = tab2;
383      tab2 = tab1;
384      tab1 = tab0;
385      tab0 = tab;
386      size -= 3;
387      sp += 3;
388    }
389
390    if (size > 0) {
391      mlib_v_ImageLookUp_S16_U8_124_D1(sp, dp, size, tab0, tab1, tab2, tab3);
392    }
393
394    sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
395    dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
396  }
397}
398
399/***************************************************************/
400void mlib_v_ImageLookUp_S16_U8_3_D1(const mlib_s16 *src,
401                                    mlib_u8        *dst,
402                                    mlib_s32       xsize,
403                                    const mlib_u8  *table0,
404                                    const mlib_u8  *table1,
405                                    const mlib_u8  *table2)
406{
407  mlib_s16 *sp;                        /* pointer to source data */
408  mlib_s32 s0, s1, s2, s3;             /* source data */
409  mlib_s32 s4, s5, s6, s7;             /* source data */
410  mlib_u8 *dl;                         /* pointer to start of destination */
411  mlib_u8 *dend;                       /* pointer to end of destination */
412  mlib_d64 *dp;                        /* aligned pointer to destination */
413  mlib_d64 t0, t1, t2;                 /* destination data */
414  mlib_d64 t3, t4, t5;                 /* destination data */
415  mlib_d64 t6, t7, acc;                /* destination data */
416  mlib_s32 emask;                      /* edge mask */
417  mlib_s32 i, num;                     /* loop variable */
418  const mlib_u8 *table;
419
420  dl = dst;
421  sp = (void *)src;
422  dp = (mlib_d64 *) dl;
423  dend = dl + xsize - 1;
424
425  vis_alignaddr((void *)0, 7);
426
427  if (xsize >= 8) {
428
429    s0 = sp[0];
430    s1 = sp[1];
431    s2 = sp[2];
432    s3 = sp[3];
433    s4 = sp[4];
434    s5 = sp[5];
435    s6 = sp[6];
436    s7 = sp[7];
437    sp += 8;
438
439#pragma pipeloop(0)
440    for (i = 0; i <= xsize - 16; i += 8, sp += 8) {
441      t7 = VIS_LD_U8_I(table1, s7);
442      t6 = VIS_LD_U8_I(table0, s6);
443      t5 = VIS_LD_U8_I(table2, s5);
444      t4 = VIS_LD_U8_I(table1, s4);
445      t3 = VIS_LD_U8_I(table0, s3);
446      t2 = VIS_LD_U8_I(table2, s2);
447      t1 = VIS_LD_U8_I(table1, s1);
448      t0 = VIS_LD_U8_I(table0, s0);
449      acc = vis_faligndata(t7, acc);
450      acc = vis_faligndata(t6, acc);
451      acc = vis_faligndata(t5, acc);
452      acc = vis_faligndata(t4, acc);
453      acc = vis_faligndata(t3, acc);
454      acc = vis_faligndata(t2, acc);
455      acc = vis_faligndata(t1, acc);
456      acc = vis_faligndata(t0, acc);
457      table = table0;
458      table0 = table2;
459      table2 = table1;
460      table1 = table;
461      s0 = sp[0];
462      s1 = sp[1];
463      s2 = sp[2];
464      s3 = sp[3];
465      s4 = sp[4];
466      s5 = sp[5];
467      s6 = sp[6];
468      s7 = sp[7];
469      *dp++ = acc;
470    }
471
472    t7 = VIS_LD_U8_I(table1, s7);
473    t6 = VIS_LD_U8_I(table0, s6);
474    t5 = VIS_LD_U8_I(table2, s5);
475    t4 = VIS_LD_U8_I(table1, s4);
476    t3 = VIS_LD_U8_I(table0, s3);
477    t2 = VIS_LD_U8_I(table2, s2);
478    t1 = VIS_LD_U8_I(table1, s1);
479    t0 = VIS_LD_U8_I(table0, s0);
480    acc = vis_faligndata(t7, acc);
481    acc = vis_faligndata(t6, acc);
482    acc = vis_faligndata(t5, acc);
483    acc = vis_faligndata(t4, acc);
484    acc = vis_faligndata(t3, acc);
485    acc = vis_faligndata(t2, acc);
486    acc = vis_faligndata(t1, acc);
487    acc = vis_faligndata(t0, acc);
488    table = table0;
489    table0 = table2;
490    table2 = table1;
491    table1 = table;
492    *dp++ = acc;
493  }
494
495  if ((mlib_addr) dp <= (mlib_addr) dend) {
496
497    num = (mlib_addr) dend - (mlib_addr) dp;
498    sp += num;
499    num++;
500    i = num - 3 * (num / 3);
501
502    if (i == 2) {
503      s0 = (mlib_s32) * sp;
504      sp--;
505
506      t0 = VIS_LD_U8_I(table1, s0);
507      acc = vis_faligndata(t0, acc);
508
509      s0 = (mlib_s32) * sp;
510      sp--;
511
512      t0 = VIS_LD_U8_I(table0, s0);
513      acc = vis_faligndata(t0, acc);
514      num -= 2;
515    }
516    else if (i == 1) {
517      s0 = (mlib_s32) * sp;
518      sp--;
519
520      t0 = VIS_LD_U8_I(table0, s0);
521      acc = vis_faligndata(t0, acc);
522      num--;
523    }
524
525#pragma pipeloop(0)
526    for (i = 0; i < num; i += 3) {
527      s0 = (mlib_s32) * sp;
528      sp--;
529
530      t0 = VIS_LD_U8_I(table2, s0);
531      acc = vis_faligndata(t0, acc);
532
533      s0 = (mlib_s32) * sp;
534      sp--;
535
536      t0 = VIS_LD_U8_I(table1, s0);
537      acc = vis_faligndata(t0, acc);
538
539      s0 = (mlib_s32) * sp;
540      sp--;
541
542      t0 = VIS_LD_U8_I(table0, s0);
543      acc = vis_faligndata(t0, acc);
544    }
545
546    emask = vis_edge8(dp, dend);
547    vis_pst_8(acc, dp, emask);
548  }
549}
550
551/***************************************************************/
552void mlib_v_ImageLookUp_S16_U8_3(const mlib_s16 *src,
553                                 mlib_s32       slb,
554                                 mlib_u8        *dst,
555                                 mlib_s32       dlb,
556                                 mlib_s32       xsize,
557                                 mlib_s32       ysize,
558                                 const mlib_u8  **table)
559{
560  mlib_s16 *sl;
561  mlib_u8 *dl;
562  const mlib_u8 *tab;
563  mlib_s32 j, i;
564
565  sl = (void *)src;
566  dl = dst;
567
568  /* row loop */
569  for (j = 0; j < ysize; j++) {
570    mlib_s16 *sp = sl;
571    mlib_u8 *dp = dl;
572    const mlib_u8 *tab0 = &table[0][32768];
573    const mlib_u8 *tab1 = &table[1][32768];
574    const mlib_u8 *tab2 = &table[2][32768];
575    mlib_s32 off, size = xsize * 3;
576
577    off = (8 - ((mlib_addr) dp & 7)) & 7;
578
579    off = (off < size) ? off : size;
580
581    for (i = 0; i < off - 2; i += 3, sp += 3) {
582      *dp++ = tab0[sp[0]];
583      *dp++ = tab1[sp[1]];
584      *dp++ = tab2[sp[2]];
585      size -= 3;
586    }
587
588    off -= i;
589
590    if (off == 1) {
591      *dp++ = tab0[sp[0]];
592      tab = tab0;
593      tab0 = tab1;
594      tab1 = tab2;
595      tab2 = tab;
596      size--;
597      sp++;
598    }
599    else if (off == 2) {
600      *dp++ = tab0[sp[0]];
601      *dp++ = tab1[sp[1]];
602      tab = tab2;
603      tab2 = tab1;
604      tab1 = tab0;
605      tab0 = tab;
606      size -= 2;
607      sp += 2;
608    }
609
610    if (size > 0) {
611      mlib_v_ImageLookUp_S16_U8_3_D1(sp, dp, size, tab0, tab1, tab2);
612    }
613
614    sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
615    dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
616  }
617}
618
619/***************************************************************/
620