1/*
2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26#ifndef __MLIB_V_IMAGELOGIC_H
27#define __MLIB_V_IMAGELOGIC_H
28
29
30#include <vis_proto.h>
31#include <mlib_ImageCheck.h>
32#include <mlib_ImageLogic_proto.h>
33#include <mlib_v_ImageLogic_proto.h>
34
35#ifdef __cplusplus
36extern "C" {
37#endif /* __cplusplus */
38
39/*
40 * Functions for VIS version image logical functions.
41 */
42
43/*
44#if defined ( VIS )
45#if VIS >= 0x200
46#error This include file can be used with VIS 1.0 only
47#endif
48#endif
49*/
50
51static void mlib_v_alligned_dst_src1(mlib_u8  *dp,
52                                     mlib_u8  *sp1,
53                                     mlib_u8  *sp2,
54                                     mlib_s32 amount);
55
56static void mlib_v_alligned_dst_src2(mlib_u8  *dp,
57                                     mlib_u8  *sp1,
58                                     mlib_u8  *sp2,
59                                     mlib_s32 amount);
60
61static void mlib_v_alligned_src1_src2(mlib_u8  *dp,
62                                      mlib_u8  *sp1,
63                                      mlib_u8  *sp2,
64                                      mlib_s32 amount);
65
66static void mlib_v_notalligned(mlib_u8  *dp,
67                               mlib_u8  *sp1,
68                               mlib_u8  *sp2,
69                               mlib_s32 amount);
70
71/***************************************************************/
72
73#define VALIDATE()                                                      \
74  mlib_u8  *sp1, *sl1; /* pointers for pixel and line of source */      \
75  mlib_u8  *sp2, *sl2; /* pointers for pixel and line of source */      \
76  mlib_u8  *dp,  *dl;  /* pointers for pixel and line of dst */         \
77  mlib_s32 width, height, channels, type;                               \
78  mlib_s32 stride1;  /* for src1 */                                     \
79  mlib_s32 stride2;  /* for src2 */                                     \
80  mlib_s32 strided;  /* for dst */                                      \
81                                                                        \
82  MLIB_IMAGE_SIZE_EQUAL(dst,src1);                                      \
83  MLIB_IMAGE_TYPE_EQUAL(dst,src1);                                      \
84  MLIB_IMAGE_CHAN_EQUAL(dst,src1);                                      \
85                                                                        \
86  MLIB_IMAGE_SIZE_EQUAL(dst,src2);                                      \
87  MLIB_IMAGE_TYPE_EQUAL(dst,src2);                                      \
88  MLIB_IMAGE_CHAN_EQUAL(dst,src2);                                      \
89                                                                        \
90  dp  = (mlib_u8*) mlib_ImageGetData(dst);                              \
91  sp1 = (mlib_u8*) mlib_ImageGetData(src1);                             \
92  sp2 = (mlib_u8*) mlib_ImageGetData(src2);                             \
93  height = mlib_ImageGetHeight(dst);                                    \
94  width  = mlib_ImageGetWidth(dst);                                     \
95  stride1 = mlib_ImageGetStride(src1);                                  \
96  stride2 = mlib_ImageGetStride(src2);                                  \
97  strided  = mlib_ImageGetStride(dst);                                  \
98  channels    = mlib_ImageGetChannels(dst);                             \
99  type = mlib_ImageGetType(dst);                                        \
100                                                                        \
101  if (type == MLIB_SHORT) {                                             \
102    width *= 2;                                                         \
103  } else if (type == MLIB_INT) {                                        \
104    width *= 4;                                                         \
105  }
106
107/***************************************************************/
108
109static mlib_status mlib_v_ImageLogic(mlib_image *dst,
110                                     mlib_image *src1,
111                                     mlib_image *src2)
112{
113  mlib_s32 i, j;
114  mlib_s32 offdst, offsrc1, offsrc2 , mask, emask;
115  mlib_s32 amount;
116  mlib_d64 *dpp, *spp2 , *spp1;
117  mlib_d64 dd, sd10, sd20;
118  mlib_u8* dend;
119
120  VALIDATE();
121
122  amount = width * channels;
123
124  if (stride1 == amount && stride2 == amount && strided == amount) {
125
126    amount *= height;
127    offdst = ((mlib_addr)dp) & 7;
128    offsrc1 = (( mlib_addr)sp1) & 7;
129    offsrc2 = (( mlib_addr)sp2) & 7 ;
130    mask = ((offsrc1 ^ offsrc2) << 8) |
131           ((offdst ^ offsrc2) << 4)   | (offdst ^ offsrc1);
132
133    if (mask == 0) { /* offdst = offsrc1 = offsrc2 */
134
135/* prepare the destination addresses */
136      dpp = (mlib_d64 *) vis_alignaddr(dp, 0);
137      i = (mlib_u8*)dpp - dp;
138
139/* prepare the source addresses */
140      spp1 = (mlib_d64 *) vis_alignaddr(sp1, 0);
141      spp2 = (mlib_d64 *) vis_alignaddr(sp2, 0);
142
143      dend  = dp + amount - 1;
144/* generate edge mask for the start point */
145      emask = vis_edge8(dp, dend);
146
147      if (emask != 0xff) {
148        sd10 = *spp1++; sd20 = *spp2++;
149        dd = VIS_LOGIC(sd20, sd10);
150        vis_pst_8(dd, dpp++, emask);
151        i += 8;
152      }
153
154#pragma pipeloop(0)
155      for ( ; i <= amount - 8; i += 8) {
156        sd10 = *spp1++; sd20 = *spp2++;
157        *dpp++ = VIS_LOGIC(sd20, sd10);
158      }
159
160      if (i < amount)  {
161        emask = vis_edge8(dpp, dend);
162        sd10 = *spp1++; sd20 = *spp2++;
163        dd = VIS_LOGIC(sd20, sd10);
164        vis_pst_8(dd, dpp, emask);
165      }
166
167    } else if ((mask & 0xF) == 0) { /* offdst = offsrc1 != offsrc2 */
168
169      mlib_v_alligned_dst_src1(dp, sp1, sp2, amount);
170
171    } else if ((mask & 0xF0) == 0) { /* offdst = offsrc2 != offsrc1 */
172
173      mlib_v_alligned_dst_src2(dp, sp1, sp2, amount);
174
175    } else if ((mask & 0xF00) == 0) { /* offsrc1 = offsrc2 != offdst */
176
177      mlib_v_alligned_src1_src2(dp, sp1, sp2, amount);
178
179    } else {                       /* offdst != offsrc1 != offsrc2 */
180
181      mlib_v_notalligned(dp, sp1, sp2, amount);
182    }
183  }
184  else {
185
186    sl1 = sp1 ;
187    sl2 = sp2 ;
188    dl = dp ;
189
190    offdst = ((mlib_addr)dp) & 7;
191    offsrc1 = (( mlib_addr)sp1) & 7;
192    offsrc2 = (( mlib_addr)sp2) & 7 ;
193
194    if ((offdst == offsrc1) && (offdst == offsrc2) &&
195        ((strided & 7) == (stride1 & 7)) &&
196        ((strided & 7) == (stride2 & 7))) {
197
198      for (j = 0; j < height; j ++ ) {
199
200/* prepare the destination addresses */
201        dpp = (mlib_d64 *) vis_alignaddr(dp, 0);
202        i = (mlib_u8*)dpp - dp;
203
204/* prepare the source addresses */
205        spp1 = (mlib_d64 *) vis_alignaddr(sp1, 0);
206        spp2 = (mlib_d64 *) vis_alignaddr(sp2, 0);
207
208        dend  = dp + amount - 1;
209/* generate edge mask for the start point */
210        emask = vis_edge8(dp, dend);
211
212        if (emask != 0xff) {
213          sd10 = *spp1++; sd20 = *spp2++;
214          dd = VIS_LOGIC(sd20, sd10);
215          vis_pst_8(dd, dpp++, emask);
216          i += 8;
217        }
218
219#pragma pipeloop(0)
220        for ( ; i <= amount - 8; i += 8) {
221          sd10 = *spp1++; sd20 = *spp2++;
222          *dpp++ = VIS_LOGIC(sd20, sd10);
223        }
224
225        if (i < amount)  {
226          emask = vis_edge8(dpp, dend);
227          sd10 = *spp1++; sd20 = *spp2++;
228          dd = VIS_LOGIC(sd20, sd10);
229          vis_pst_8(dd, dpp, emask);
230        }
231
232        sp1 = sl1 += stride1 ;
233        sp2 = sl2 += stride2 ;
234        dp = dl += strided ;
235      }
236
237   } else if ((offdst == offsrc1) &&
238             ((strided & 7) == (stride1 & 7))) {
239
240      for (j = 0; j < height; j ++ ) {
241        mlib_v_alligned_dst_src1(dp, sp1, sp2, amount);
242
243        sp1 = sl1 += stride1 ;
244        sp2 = sl2 += stride2 ;
245        dp = dl += strided ;
246      }
247
248   } else if ((offdst == offsrc2) &&
249             ((strided & 7) == (stride2 & 7))) {
250
251      for (j = 0; j < height; j ++ ) {
252        mlib_v_alligned_dst_src2(dp, sp1, sp2, amount);
253
254        sp1 = sl1 += stride1 ;
255        sp2 = sl2 += stride2 ;
256        dp = dl += strided ;
257      }
258
259   } else if ((offsrc1 == offsrc2) &&
260             ((stride1 & 7) == (stride2 & 7))) {
261
262      for (j = 0; j < height; j ++ ) {
263        mlib_v_alligned_src1_src2(dp, sp1, sp2, amount);
264
265        sp1 = sl1 += stride1 ;
266        sp2 = sl2 += stride2 ;
267        dp = dl += strided ;
268      }
269
270   } else {
271
272      for (j = 0; j < height; j ++ ) {
273        mlib_v_notalligned(dp, sp1, sp2, amount);
274
275        sp1 = sl1 += stride1 ;
276        sp2 = sl2 += stride2 ;
277        dp = dl += strided ;
278      }
279    }
280  }
281
282  return MLIB_SUCCESS;
283}
284
285/***************************************************************/
286
287static void mlib_v_alligned_dst_src1(mlib_u8  *dp,
288                                     mlib_u8  *sp1,
289                                     mlib_u8  *sp2,
290                                     mlib_s32 amount)
291{
292  mlib_s32 i;
293  mlib_s32 emask;
294  mlib_d64 *dpp, *spp2 , *spp1;
295  mlib_d64 dd, sd10, sd20, sd21;
296  mlib_u8* dend;
297
298/* prepare the destination addresses */
299  dpp = (mlib_d64 *) vis_alignaddr(dp, 0);
300  i = (mlib_u8*)dpp - dp;
301
302/* prepare the source addresses */
303  spp1 = (mlib_d64 *) vis_alignaddr(sp1, 0);
304  spp2 = (mlib_d64 *) vis_alignaddr(sp2, i);
305
306  dend  = dp + amount - 1;
307/* generate edge mask for the start point */
308  emask = vis_edge8(dp, dend);
309
310  sd20 = spp2[0];
311
312  if (emask != 0xff) {
313    sd10 = *spp1++; sd21 = spp2[1];
314    sd20 = vis_faligndata(sd20, sd21);
315    dd = VIS_LOGIC(sd20, sd10);
316    vis_pst_8(dd, dpp++, emask);
317    sd20 = sd21; spp2++;
318    i += 8;
319  }
320
321#pragma pipeloop(0)
322  for ( ; i <= amount - 8; i += 8) {
323    sd10 = *spp1++; sd21 = spp2[1];
324    sd20 = vis_faligndata(sd20, sd21);
325    *dpp++ = VIS_LOGIC(sd20, sd10);
326    sd20 = sd21; spp2++;
327  }
328
329  if (i < amount)  {
330    emask = vis_edge8(dpp, dend);
331    sd10 = *spp1++;
332    sd20 = vis_faligndata(sd20, spp2[1]);
333    dd = VIS_LOGIC(sd20, sd10);
334    vis_pst_8(dd, dpp, emask);
335  }
336}
337
338/***************************************************************/
339
340static void mlib_v_alligned_dst_src2(mlib_u8  *dp,
341                                     mlib_u8  *sp1,
342                                     mlib_u8  *sp2,
343                                     mlib_s32 amount)
344{
345  mlib_s32 i;
346  mlib_s32 emask;
347  mlib_d64 *dpp, *spp2 , *spp1;
348  mlib_d64 dd, sd10, sd11, sd20;
349  mlib_u8* dend;
350
351/* prepare the destination addresses */
352  dpp = (mlib_d64 *) vis_alignaddr(dp, 0);
353  i = (mlib_u8*)dpp - dp;
354
355/* prepare the source addresses */
356  spp2 = (mlib_d64 *) vis_alignaddr(sp2, 0);
357  spp1 = (mlib_d64 *) vis_alignaddr(sp1, i);
358
359  dend  = dp + amount - 1;
360/* generate edge mask for the start point */
361  emask = vis_edge8(dp, dend);
362
363  sd10 = spp1[0];
364
365  if (emask != 0xff) {
366    sd20 = *spp2++; sd11 = spp1[1];
367    sd10 = vis_faligndata(sd10, sd11);
368    dd = VIS_LOGIC(sd20, sd10);
369    vis_pst_8(dd, dpp++, emask);
370    sd10 = sd11; spp1++;
371    i += 8;
372  }
373
374#pragma pipeloop(0)
375  for ( ; i <= amount - 8; i += 8) {
376    sd20 = *spp2++; sd11 = spp1[1];
377    sd10 = vis_faligndata(sd10, sd11);
378    *dpp++ = VIS_LOGIC(sd20, sd10);
379    sd10 = sd11; spp1++;
380  }
381
382  if (i < amount)  {
383    emask = vis_edge8(dpp, dend);
384    sd20 = *spp2++;
385    sd10 = vis_faligndata(sd10, spp1[1]);
386    dd = VIS_LOGIC(sd20, sd10);
387    vis_pst_8(dd, dpp, emask);
388  }
389}
390
391/***************************************************************/
392
393static void mlib_v_alligned_src1_src2(mlib_u8  *dp,
394                                      mlib_u8  *sp1,
395                                      mlib_u8  *sp2,
396                                      mlib_s32 amount)
397{
398  mlib_s32 i;
399  mlib_s32 emask;
400  mlib_d64 *dpp, *spp2 , *spp1;
401  mlib_d64 dd, sd10, dd0, sd20, dd1;
402  mlib_u8* dend;
403
404/* prepare the source addresses */
405  dpp = (mlib_d64 *) vis_alignaddr(dp, 0);
406  i = (mlib_u8*)dpp - dp;
407
408/* prepare the destination addresses */
409  spp1 = (mlib_d64 *) vis_alignaddr(sp1, i);
410  spp2 = (mlib_d64 *) vis_alignaddr(sp2, i);
411
412  dend  = dp + amount - 1;
413/* generate edge mask for the start point */
414  emask = vis_edge8(dp, dend);
415
416  sd10 = *spp1++; sd20 = *spp2++;
417  dd0 = VIS_LOGIC(sd20, sd10);
418
419  if (emask != 0xff) {
420    sd10 = *spp1++; sd20 = *spp2++;
421    dd1 = VIS_LOGIC(sd20, sd10);
422    dd = vis_faligndata(dd0, dd1);
423    vis_pst_8(dd, dpp++, emask);
424    dd0 = dd1;
425    i += 8;
426  }
427
428#pragma pipeloop(0)
429  for ( ; i <= amount - 8; i += 8) {
430    sd10 = *spp1++; sd20 = *spp2++;
431    dd1 = VIS_LOGIC(sd20, sd10);
432    *dpp++ = vis_faligndata(dd0, dd1);
433    dd0 = dd1;
434  }
435
436  if (i < amount)  {
437    emask = vis_edge8(dpp, dend);
438    sd10 = *spp1++; sd20 = *spp2++;
439    dd1 = VIS_LOGIC(sd20, sd10);
440    dd = vis_faligndata(dd0, dd1);
441    vis_pst_8(dd, dpp, emask);
442  }
443}
444
445/***************************************************************/
446
447static void mlib_v_notalligned(mlib_u8  *dp,
448                               mlib_u8  *sp1,
449                               mlib_u8  *sp2,
450                               mlib_s32 amount)
451{
452  mlib_s32 i, k;
453  mlib_s32 emask;
454  mlib_d64 *dpp, *spp2 , *spp1, *tmp_ptr ;
455  mlib_d64 dd, sd10, sd11, sd20, sd21;
456  mlib_u8* dend;
457
458/* prepare the destination addresses */
459  dpp = (mlib_d64 *) vis_alignaddr(dp, 0);
460  i = (mlib_u8*)dpp - dp;
461
462  dend  = dp + amount - 1;
463/* generate edge mask for the start point */
464  emask = vis_edge8(dp, dend);
465
466  if (emask != 0xff) {
467    spp1 = (mlib_d64 *) vis_alignaddr(sp1, i);
468    sd10 = vis_faligndata(spp1[0], spp1[1]);
469    spp2 = (mlib_d64 *) vis_alignaddr(sp2, i);
470    sd20 = vis_faligndata(spp2[0], spp2[1]);
471    dd = VIS_LOGIC(sd20, sd10);
472    vis_pst_8(dd, dpp++, emask);
473    i += 8;
474  }
475
476/* copy src1 to dst */
477  spp1 = (mlib_d64 *) vis_alignaddr(sp1, i);
478  sd11 = spp1[0];
479  tmp_ptr = dpp;
480
481#pragma pipeloop(0)
482  for (k = i; k <= (amount - 8); k += 8) {
483    sd10 = sd11; sd11 = spp1[1];
484    *tmp_ptr++ = vis_faligndata(sd10, sd11);
485    spp1++;
486  }
487
488  sd11 = vis_faligndata(sd11, spp1[1]);
489
490  spp2 = (mlib_d64 *) vis_alignaddr(sp2, i);
491  sd20 = spp2[0];
492  tmp_ptr = dpp;
493
494#pragma pipeloop(0)
495  for ( ; i <= amount - 8; i += 8) {
496    sd10 = *tmp_ptr++; sd21 = spp2[1];
497    sd20 = vis_faligndata(sd20, sd21);
498    *dpp++ = VIS_LOGIC(sd20, sd10);
499    sd20 = sd21; spp2++;
500  }
501
502  if (i < amount)  {
503    emask = vis_edge8(dpp, dend);
504    sd20 = vis_faligndata(sd20, spp2[1]);
505    dd = VIS_LOGIC(sd20, sd11);
506    vis_pst_8(dd, dpp, emask);
507  }
508}
509
510/***************************************************************/
511
512#ifdef __cplusplus
513}
514#endif /* __cplusplus */
515#endif /* __MLIB_V_IMAGELOGIC_H */
516