1/*
2 * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27
28#include "vis_proto.h"
29#include "mlib_image.h"
30#include "mlib_v_ImageLookUpFunc.h"
31
32/***************************************************************/
33static void mlib_v_ImageLookUp_S16_S16_124_D1(const mlib_s16 *src,
34                                              mlib_s16       *dst,
35                                              mlib_s32       xsize,
36                                              const mlib_s16 *table0,
37                                              const mlib_s16 *table1,
38                                              const mlib_s16 *table2,
39                                              const mlib_s16 *table3);
40
41static void mlib_v_ImageLookUp_S16_S16_3_D1(const mlib_s16 *src,
42                                            mlib_s16       *dst,
43                                            mlib_s32       xsize,
44                                            const mlib_s16 *table0,
45                                            const mlib_s16 *table1,
46                                            const mlib_s16 *table2);
47
48/***************************************************************/
49
50#define VIS_LD_U16_I(X, Y)      vis_ld_u16_i((void *)(X), (Y))
51
52/***************************************************************/
53void mlib_v_ImageLookUp_S16_S16_124_D1(const mlib_s16 *src,
54                                       mlib_s16       *dst,
55                                       mlib_s32       xsize,
56                                       const mlib_s16 *table0,
57                                       const mlib_s16 *table1,
58                                       const mlib_s16 *table2,
59                                       const mlib_s16 *table3)
60{
61  mlib_s16 *sp;                        /* pointer to source data */
62  mlib_s32 s0, s1, s2, s3;             /* source data */
63  mlib_s16 *dl;                        /* pointer to start of destination */
64  mlib_s16 *dend;                      /* pointer to end of destination */
65  mlib_d64 *dp;                        /* aligned pointer to destination */
66  mlib_d64 t0, t1, t2;                 /* destination data */
67  mlib_d64 t3, acc0;                   /* destination data */
68  mlib_s32 emask;                      /* edge mask */
69  mlib_s32 i, num;                     /* loop variable */
70
71  dl = dst;
72  sp = (void *)src;
73  dp = (mlib_d64 *) dl;
74  dend = dl + xsize - 1;
75
76  vis_alignaddr((void *)0, 6);
77
78  i = 0;
79
80  if (xsize >= 4) {
81
82    s0 = sp[0];
83    s1 = sp[1];
84    s2 = sp[2];
85    s3 = sp[3];
86    sp += 4;
87
88#pragma pipeloop(0)
89    for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
90      t3 = VIS_LD_U16_I(table3, 2 * s3);
91      t2 = VIS_LD_U16_I(table2, 2 * s2);
92      t1 = VIS_LD_U16_I(table1, 2 * s1);
93      t0 = VIS_LD_U16_I(table0, 2 * s0);
94      acc0 = vis_faligndata(t3, acc0);
95      acc0 = vis_faligndata(t2, acc0);
96      acc0 = vis_faligndata(t1, acc0);
97      acc0 = vis_faligndata(t0, acc0);
98      s0 = sp[0];
99      s1 = sp[1];
100      s2 = sp[2];
101      s3 = sp[3];
102      *dp++ = acc0;
103    }
104
105    t3 = VIS_LD_U16_I(table3, 2 * s3);
106    t2 = VIS_LD_U16_I(table2, 2 * s2);
107    t1 = VIS_LD_U16_I(table1, 2 * s1);
108    t0 = VIS_LD_U16_I(table0, 2 * s0);
109    acc0 = vis_faligndata(t3, acc0);
110    acc0 = vis_faligndata(t2, acc0);
111    acc0 = vis_faligndata(t1, acc0);
112    acc0 = vis_faligndata(t0, acc0);
113    *dp++ = acc0;
114  }
115
116  if ((mlib_addr) dp <= (mlib_addr) dend) {
117
118    num = (mlib_s16 *) dend - (mlib_s16 *) dp;
119    sp += num;
120    num++;
121
122    if (num == 1) {
123      s0 = (mlib_s32) * sp;
124      sp--;
125
126      t0 = VIS_LD_U16_I(table0, 2 * s0);
127      acc0 = vis_faligndata(t0, acc0);
128    }
129    else if (num == 2) {
130      s0 = (mlib_s32) * sp;
131      sp--;
132
133      t0 = VIS_LD_U16_I(table1, 2 * s0);
134      acc0 = vis_faligndata(t0, acc0);
135
136      s0 = (mlib_s32) * sp;
137      sp--;
138
139      t0 = VIS_LD_U16_I(table0, 2 * s0);
140      acc0 = vis_faligndata(t0, acc0);
141    }
142    else if (num == 3) {
143      s0 = (mlib_s32) * sp;
144      sp--;
145
146      t0 = VIS_LD_U16_I(table2, 2 * s0);
147      acc0 = vis_faligndata(t0, acc0);
148
149      s0 = (mlib_s32) * sp;
150      sp--;
151
152      t0 = VIS_LD_U16_I(table1, 2 * s0);
153      acc0 = vis_faligndata(t0, acc0);
154
155      s0 = (mlib_s32) * sp;
156      sp--;
157
158      t0 = VIS_LD_U16_I(table0, 2 * s0);
159      acc0 = vis_faligndata(t0, acc0);
160    }
161
162    emask = vis_edge16(dp, dend);
163    vis_pst_16(acc0, dp, emask);
164  }
165}
166
167/***************************************************************/
168void mlib_v_ImageLookUp_S16_S16_1(const mlib_s16 *src,
169                                  mlib_s32       slb,
170                                  mlib_s16       *dst,
171                                  mlib_s32       dlb,
172                                  mlib_s32       xsize,
173                                  mlib_s32       ysize,
174                                  const mlib_s16 **table)
175{
176  mlib_s16 *sl;
177  mlib_s16 *dl;
178  const mlib_s16 *tab = &table[0][32768];
179  mlib_s32 j, i;
180
181  sl = (void *)src;
182  dl = dst;
183
184  /* row loop */
185  for (j = 0; j < ysize; j++) {
186    mlib_s16 *sp = sl;
187    mlib_s16 *dp = dl;
188    mlib_s32 off, size = xsize;
189
190    off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
191
192    off = (off < size) ? off : size;
193
194    for (i = 0; i < off; i++, sp++) {
195      *dp++ = tab[sp[0]];
196      size--;
197    }
198
199    if (size > 0) {
200      mlib_v_ImageLookUp_S16_S16_124_D1(sp, dp, size, tab, tab, tab, tab);
201    }
202
203    sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
204    dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
205  }
206}
207
208/***************************************************************/
209void mlib_v_ImageLookUp_S16_S16_2(const mlib_s16 *src,
210                                  mlib_s32       slb,
211                                  mlib_s16       *dst,
212                                  mlib_s32       dlb,
213                                  mlib_s32       xsize,
214                                  mlib_s32       ysize,
215                                  const mlib_s16 **table)
216{
217  mlib_s16 *sl;
218  mlib_s16 *dl;
219  const mlib_s16 *tab;
220  mlib_s32 j, i;
221
222  sl = (void *)src;
223  dl = dst;
224
225  /* row loop */
226  for (j = 0; j < ysize; j++) {
227    mlib_s16 *sp = sl;
228    mlib_s16 *dp = dl;
229    mlib_s32 off, size = xsize * 2;
230    const mlib_s16 *tab0 = &table[0][32768];
231    const mlib_s16 *tab1 = &table[1][32768];
232
233    off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
234
235    off = (off < size) ? off : size;
236
237    for (i = 0; i < off - 1; i += 2, sp += 2) {
238      *dp++ = tab0[sp[0]];
239      *dp++ = tab1[sp[1]];
240      size -= 2;
241    }
242
243    if ((off & 1) != 0) {
244      *dp++ = tab0[sp[0]];
245      size--;
246      sp++;
247      tab = tab0;
248      tab0 = tab1;
249      tab1 = tab;
250    }
251
252    if (size > 0) {
253      mlib_v_ImageLookUp_S16_S16_124_D1(sp, dp, size, tab0, tab1, tab0, tab1);
254    }
255
256    sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
257    dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
258  }
259}
260
261/***************************************************************/
262void mlib_v_ImageLookUp_S16_S16_4(const mlib_s16 *src,
263                                  mlib_s32       slb,
264                                  mlib_s16       *dst,
265                                  mlib_s32       dlb,
266                                  mlib_s32       xsize,
267                                  mlib_s32       ysize,
268                                  const mlib_s16 **table)
269{
270  mlib_s16 *sl;
271  mlib_s16 *dl;
272  const mlib_s16 *tab;
273  mlib_s32 j;
274
275  sl = (void *)src;
276  dl = dst;
277
278  /* row loop */
279  for (j = 0; j < ysize; j++) {
280    mlib_s16 *sp = sl;
281    mlib_s16 *dp = dl;
282    const mlib_s16 *tab0 = &table[0][32768];
283    const mlib_s16 *tab1 = &table[1][32768];
284    const mlib_s16 *tab2 = &table[2][32768];
285    const mlib_s16 *tab3 = &table[3][32768];
286    mlib_s32 off, size = xsize * 4;
287
288    off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
289
290    off = (off < size) ? off : size;
291
292    if (off == 1) {
293      *dp++ = tab0[sp[0]];
294      tab = tab0;
295      tab0 = tab1;
296      tab1 = tab2;
297      tab2 = tab3;
298      tab3 = tab;
299      size--;
300      sp++;
301    }
302    else if (off == 2) {
303      *dp++ = tab0[sp[0]];
304      *dp++ = tab1[sp[1]];
305      tab = tab0;
306      tab0 = tab2;
307      tab2 = tab;
308      tab = tab1;
309      tab1 = tab3;
310      tab3 = tab;
311      size -= 2;
312      sp += 2;
313    }
314    else if (off == 3) {
315      *dp++ = tab0[sp[0]];
316      *dp++ = tab1[sp[1]];
317      *dp++ = tab2[sp[2]];
318      tab = tab3;
319      tab3 = tab2;
320      tab2 = tab1;
321      tab1 = tab0;
322      tab0 = tab;
323      size -= 3;
324      sp += 3;
325    }
326
327    if (size > 0) {
328      mlib_v_ImageLookUp_S16_S16_124_D1(sp, dp, size, tab0, tab1, tab2, tab3);
329    }
330
331    sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
332    dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
333  }
334}
335
336/***************************************************************/
337void mlib_v_ImageLookUp_S16_S16_3_D1(const mlib_s16 *src,
338                                     mlib_s16       *dst,
339                                     mlib_s32       xsize,
340                                     const mlib_s16 *table0,
341                                     const mlib_s16 *table1,
342                                     const mlib_s16 *table2)
343{
344  mlib_s16 *sp;                        /* pointer to source data */
345  mlib_s32 s0, s1, s2, s3;             /* source data */
346  mlib_s16 *dl;                        /* pointer to start of destination */
347  mlib_s16 *dend;                      /* pointer to end of destination */
348  mlib_d64 *dp;                        /* aligned pointer to destination */
349  mlib_d64 t0, t1, t2, t3;             /* destination data */
350  mlib_d64 acc0;                       /* destination data */
351  mlib_s32 emask;                      /* edge mask */
352  mlib_s32 i, num;                     /* loop variable */
353  const mlib_s16 *table;
354
355  dl = dst;
356  sp = (void *)src;
357  dp = (mlib_d64 *) dl;
358  dend = dl + xsize - 1;
359
360  vis_alignaddr((void *)0, 6);
361
362  i = 0;
363
364  if (xsize >= 4) {
365
366    s0 = sp[0];
367    s1 = sp[1];
368    s2 = sp[2];
369    s3 = sp[3];
370    sp += 4;
371
372#pragma pipeloop(0)
373    for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
374      t3 = VIS_LD_U16_I(table0, 2 * s3);
375      t2 = VIS_LD_U16_I(table2, 2 * s2);
376      t1 = VIS_LD_U16_I(table1, 2 * s1);
377      t0 = VIS_LD_U16_I(table0, 2 * s0);
378      acc0 = vis_faligndata(t3, acc0);
379      acc0 = vis_faligndata(t2, acc0);
380      acc0 = vis_faligndata(t1, acc0);
381      acc0 = vis_faligndata(t0, acc0);
382      s0 = sp[0];
383      s1 = sp[1];
384      s2 = sp[2];
385      s3 = sp[3];
386      *dp++ = acc0;
387      table = table0;
388      table0 = table1;
389      table1 = table2;
390      table2 = table;
391    }
392
393    t3 = VIS_LD_U16_I(table0, 2 * s3);
394    t2 = VIS_LD_U16_I(table2, 2 * s2);
395    t1 = VIS_LD_U16_I(table1, 2 * s1);
396    t0 = VIS_LD_U16_I(table0, 2 * s0);
397    acc0 = vis_faligndata(t3, acc0);
398    acc0 = vis_faligndata(t2, acc0);
399    acc0 = vis_faligndata(t1, acc0);
400    acc0 = vis_faligndata(t0, acc0);
401    *dp++ = acc0;
402    table = table0;
403    table0 = table1;
404    table1 = table2;
405    table2 = table;
406    i += 4;
407  }
408
409  if ((mlib_addr) dp <= (mlib_addr) dend) {
410
411    num = (mlib_s16 *) dend - (mlib_s16 *) dp;
412    sp += num;
413    num++;
414
415    if (num == 1) {
416      s0 = (mlib_s32) * sp;
417      sp--;
418
419      t0 = VIS_LD_U16_I(table0, 2 * s0);
420      acc0 = vis_faligndata(t0, acc0);
421    }
422    else if (num == 2) {
423      s0 = (mlib_s32) * sp;
424      sp--;
425
426      t0 = VIS_LD_U16_I(table1, 2 * s0);
427      acc0 = vis_faligndata(t0, acc0);
428
429      s0 = (mlib_s32) * sp;
430      sp--;
431
432      t0 = VIS_LD_U16_I(table0, 2 * s0);
433      acc0 = vis_faligndata(t0, acc0);
434    }
435    else if (num == 3) {
436      s0 = (mlib_s32) * sp;
437      sp--;
438
439      t0 = VIS_LD_U16_I(table2, 2 * s0);
440      acc0 = vis_faligndata(t0, acc0);
441
442      s0 = (mlib_s32) * sp;
443      sp--;
444
445      t0 = VIS_LD_U16_I(table1, 2 * s0);
446      acc0 = vis_faligndata(t0, acc0);
447
448      s0 = (mlib_s32) * sp;
449      sp--;
450
451      t0 = VIS_LD_U16_I(table0, 2 * s0);
452      acc0 = vis_faligndata(t0, acc0);
453    }
454
455    emask = vis_edge16(dp, dend);
456    vis_pst_16(acc0, dp, emask);
457  }
458}
459
460/***************************************************************/
461void mlib_v_ImageLookUp_S16_S16_3(const mlib_s16 *src,
462                                  mlib_s32       slb,
463                                  mlib_s16       *dst,
464                                  mlib_s32       dlb,
465                                  mlib_s32       xsize,
466                                  mlib_s32       ysize,
467                                  const mlib_s16 **table)
468{
469  mlib_s16 *sl;
470  mlib_s16 *dl;
471  const mlib_s16 *tab;
472  mlib_s32 j, i;
473
474  sl = (void *)src;
475  dl = dst;
476
477  /* row loop */
478  for (j = 0; j < ysize; j++) {
479    mlib_s16 *sp = sl;
480    mlib_s16 *dp = dl;
481    const mlib_s16 *tab0 = &table[0][32768];
482    const mlib_s16 *tab1 = &table[1][32768];
483    const mlib_s16 *tab2 = &table[2][32768];
484    mlib_s32 off, size = xsize * 3;
485
486    off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
487
488    off = (off < size) ? off : size;
489
490    for (i = 0; i < off - 2; i += 3, sp += 3) {
491      *dp++ = tab0[sp[0]];
492      *dp++ = tab1[sp[1]];
493      *dp++ = tab2[sp[2]];
494      size -= 3;
495    }
496
497    off -= i;
498
499    if (off == 1) {
500      *dp++ = tab0[sp[0]];
501      tab = tab0;
502      tab0 = tab1;
503      tab1 = tab2;
504      tab2 = tab;
505      size--;
506      sp++;
507    }
508    else if (off == 2) {
509      *dp++ = tab0[sp[0]];
510      *dp++ = tab1[sp[1]];
511      tab = tab2;
512      tab2 = tab1;
513      tab1 = tab0;
514      tab0 = tab;
515      size -= 2;
516      sp += 2;
517    }
518
519    if (size > 0) {
520      mlib_v_ImageLookUp_S16_S16_3_D1(sp, dp, size, tab0, tab1, tab2);
521    }
522
523    sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
524    dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
525  }
526}
527
528/***************************************************************/
529