1/*
2 * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27
28/*
29 * FILENAME: mlib_v_ImageChannelExtract_43.c
30 *
31 * FUNCTIONS
32 *      mlib_v_ImageChannelExtract_U8_43L_D1
33 *      mlib_v_ImageChannelExtract_S16_43L_D1
34 *
35 * SYNOPSIS
36 *
37 * ARGUMENT
38 *      src    pointer to source image data
39 *      dst    pointer to destination image data
40 *      slb    source image line stride in bytes
41 *      dlb    destination image line stride in bytes
42 *      dsize image data size in pixels
43 *      xsize  image width in pixels
44 *      ysize  image height in lines
45 *      cmask channel mask
46 *
47 * DESCRIPTION
48 *      extract the right or left 3 channels of a 4-channel image to
49 *      a 3-channel image -- VIS version low level functions.
50 *
51 *      ABGR => BGR   (43R), or  RGBA => RGB  (43L)
52 *
53 * NOTE
54 *      These functions are separated from mlib_v_ImageChannelExtract.c
55 *      for loop unrolling and structure clarity.
56 */
57
58#include "vis_proto.h"
59#include "mlib_image.h"
60#include "mlib_v_ImageChannelExtract.h"
61
62/***************************************************************/
63#define EXTRACT_U8_43L        /* shift left */                  \
64                                                                \
65  vis_alignaddr((void *)0, 3);                                  \
66  dd0 = vis_faligndata(dd0, sd0);    /* ----------r0g0b0 */     \
67  sda = vis_freg_pair(vis_read_lo(sd0), vis_read_hi(sd0));      \
68  dd0 = vis_faligndata(dd0, sda);    /* ----r0g0b0r1g1b1 */     \
69                                                                \
70  vis_alignaddr((void *)0, 2);                                  \
71  dd0 = vis_faligndata(dd0, sd1);    /* r0g0b0r1g1b1r2g2 */     \
72                                                                \
73  vis_alignaddr((void *)0, 3);                                  \
74  dd1 = vis_faligndata(dd1, sd1);    /* ----------r2g2b2 */     \
75  sda = vis_freg_pair(vis_read_lo(sd1), vis_read_hi(sd1));      \
76  dd1 = vis_faligndata(dd1, sda);    /* ----r2g2b2r3g3b3 */     \
77  dd1 = vis_faligndata(dd1, sd2);    /* g2b2r3g3b3r4g4b4 */     \
78                                                                \
79  sda = vis_freg_pair(vis_read_lo(sd2), vis_read_hi(sd2));      \
80  vis_alignaddr((void *)0, 1);                                  \
81  dd1 = vis_faligndata(dd1, sda);    /* b2r3g3b3r4g4b4r5 */     \
82                                                                \
83  vis_alignaddr((void *)0, 3);                                  \
84  dd2 = vis_faligndata(dd2, sda);    /* ----------r5g5b5 */     \
85                                                                \
86  dd2 = vis_faligndata(dd2, sd3);    /* ----r5g5b5r6g6b6 */     \
87  sda = vis_freg_pair(vis_read_lo(sd3), vis_read_hi(sd3));      \
88  dd2 = vis_faligndata(dd2, sda);           /* g5b5r6g6b6r7g7b7 */
89
90/***************************************************************/
91#define LOAD_EXTRACT_U8_43L                                             \
92                                                                        \
93  vis_alignaddr((void *)soff, 0);                                       \
94  s0 = s4;                                                              \
95  s1 = sp[1];                                                           \
96  s2 = sp[2];                                                           \
97  s3 = sp[3];                                                           \
98  s4 = sp[4];                                                           \
99  sd0 = vis_faligndata(s0, s1);  /* the intermediate is ABGR aligned */ \
100  sd1 = vis_faligndata(s1, s2);                                         \
101  sd2 = vis_faligndata(s2, s3);                                         \
102  sd3 = vis_faligndata(s3, s4);                                         \
103  sp += 4;                                                              \
104                                                                        \
105/*  vis_alignaddr((void *)0, 1); */    /* for _old only */              \
106  dd2old = dd2;                                                         \
107  EXTRACT_U8_43L
108
109/***************************************************************/
110/*
111 * Either source or destination data are not 8-byte aligned.
112 * And ssize is multiple of 8.
113 */
114
115void mlib_v_ImageChannelExtract_U8_43L_D1(const mlib_u8 *src,
116                                          mlib_u8       *dst,
117                                          mlib_s32      dsize)
118{
119  mlib_u8 *sa, *da;
120  mlib_u8 *dend, *dend2;                              /* end points in dst */
121  mlib_d64 *dp;                                       /* 8-byte aligned start points in dst */
122  mlib_d64 *sp;                                       /* 8-byte aligned start point in src */
123  mlib_d64 s0, s1, s2, s3, s4;                        /* 8-byte source row data */
124  mlib_d64 sd0, sd1, sd2, sd3;                        /* 8-byte source data */
125  mlib_d64 dd0, dd1, dd2;                             /* dst data */
126  mlib_d64 dd2old;                                    /* the last datum of the last step */
127  mlib_d64 sda;
128  mlib_s32 soff;                                      /* offset of address in src */
129  mlib_s32 doff;                                      /* offset of address in dst */
130  mlib_s32 emask;                                     /* edge mask */
131  mlib_s32 i, n;
132
133  sa = (void *)src;
134  da = dst;
135
136  /* prepare the source address */
137  sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
138  soff = ((mlib_addr) sa & 7);
139
140  /* prepare the destination addresses */
141  dp = (mlib_d64 *) ((mlib_addr) da & (~7));
142  dend = da + dsize * 3 - 1;
143  dend2 = dend - 23;
144  doff = 8 - ((mlib_addr) da & 7);
145
146  /* generate edge mask for the start point */
147  emask = vis_edge8(da, dend);
148
149  /* load 32 byte, convert, store 24 bytes */
150  s4 = sp[0];                               /* initial value */
151  LOAD_EXTRACT_U8_43L;
152
153  if (dsize >= 8) {
154    if (doff == 8) {
155      vis_pst_8(dd0, dp++, emask);
156      *dp++ = dd1;
157      *dp++ = dd2;
158    }
159    else {
160      vis_alignaddr((void *)doff, 0);
161      vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask);
162      *dp++ = vis_faligndata(dd0, dd1);
163      *dp++ = vis_faligndata(dd1, dd2);
164    }
165  }
166  else {                                    /* for very small size */
167    if (doff == 8) {
168      vis_pst_8(dd0, dp++, emask);
169      if ((mlib_addr) dp <= (mlib_addr) dend) {
170        emask = vis_edge8(dp, dend);
171        vis_pst_8(dd1, dp++, emask);
172        if ((mlib_addr) dp <= (mlib_addr) dend) {
173          emask = vis_edge8(dp, dend);
174          vis_pst_8(dd2, dp++, emask);
175        }
176      }
177    }
178    else {
179      vis_alignaddr((void *)doff, 0);
180      vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask);
181      if ((mlib_addr) dp <= (mlib_addr) dend) {
182        emask = vis_edge8(dp, dend);
183        vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask);
184        if ((mlib_addr) dp <= (mlib_addr) dend) {
185          emask = vis_edge8(dp, dend);
186          vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask);
187          if ((mlib_addr) dp <= (mlib_addr) dend) {
188            emask = vis_edge8(dp, dend);
189            vis_pst_8(vis_faligndata(dd2, dd2), dp++, emask);
190          }
191        }
192      }
193    }
194  }
195
196  /* no edge handling is needed in the loop */
197  if (doff == 8) {
198    if ((mlib_addr) dp <= (mlib_addr) dend2) {
199      n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
200#pragma pipeloop(0)
201      for (i = 0; i < n; i++) {
202        LOAD_EXTRACT_U8_43L;
203        *dp++ = dd0;
204        *dp++ = dd1;
205        *dp++ = dd2;
206      }
207    }
208  }
209  else {
210    if ((mlib_addr) dp <= (mlib_addr) dend2) {
211      n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
212#pragma pipeloop(0)
213      for (i = 0; i < n; i++) {
214        LOAD_EXTRACT_U8_43L;
215        vis_alignaddr((void *)doff, 0);
216        *dp++ = vis_faligndata(dd2old, dd0);
217        *dp++ = vis_faligndata(dd0, dd1);
218        *dp++ = vis_faligndata(dd1, dd2);
219      }
220    }
221  }
222
223  if ((mlib_addr) dp <= (mlib_addr) dend) {
224    LOAD_EXTRACT_U8_43L;
225    emask = vis_edge8(dp, dend);
226    if (doff == 8) {
227      vis_pst_8(dd0, dp++, emask);
228      if ((mlib_addr) dp <= (mlib_addr) dend) {
229        emask = vis_edge8(dp, dend);
230        vis_pst_8(dd1, dp++, emask);
231        if ((mlib_addr) dp <= (mlib_addr) dend) {
232          emask = vis_edge8(dp, dend);
233          vis_pst_8(dd2, dp++, emask);
234        }
235      }
236    }
237    else {
238      vis_alignaddr((void *)doff, 0);
239      vis_pst_8(vis_faligndata(dd2old, dd0), dp++, emask);
240      if ((mlib_addr) dp <= (mlib_addr) dend) {
241        emask = vis_edge8(dp, dend);
242        vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask);
243        if ((mlib_addr) dp <= (mlib_addr) dend) {
244          emask = vis_edge8(dp, dend);
245          vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask);
246        }
247      }
248    }
249  }
250}
251
252/***************************************************************/
253#define EXTRACT_S16_43L              /* shift left */           \
254  vis_alignaddr((void *)0, 6);                                  \
255  dd0 = vis_faligndata(dd0, sd0);    /* --r0g0b0 */             \
256  vis_alignaddr((void *)0, 2);                                  \
257  dd0 = vis_faligndata(dd0, sd1);    /* r0g0b0r1 */             \
258                                                                \
259  vis_alignaddr((void *)0, 6);                                  \
260  dd1 = vis_faligndata(dd1, sd1);    /* --r1g1b1 */             \
261  vis_alignaddr((void *)0, 4);                                  \
262  dd1 = vis_faligndata(dd1, sd2);    /* g1b1r2g2 */             \
263                                                                \
264  vis_alignaddr((void *)0, 6);                                  \
265  dd2 = vis_faligndata(dd2, sd2);    /* --r2g2b2 */             \
266  dd2 = vis_faligndata(dd2, sd3);           /* b2r3g3b3 */
267
268/***************************************************************/
269#define LOAD_EXTRACT_S16_43L                                    \
270                                                                \
271  vis_alignaddr((void *)soff, 0);                               \
272  s0 = s4;                                                      \
273  s1 = sp[1];                                                   \
274  s2 = sp[2];                                                   \
275  s3 = sp[3];                                                   \
276  s4 = sp[4];                                                   \
277  sd0 = vis_faligndata(s0, s1);                                 \
278  sd1 = vis_faligndata(s1, s2);                                 \
279  sd2 = vis_faligndata(s2, s3);                                 \
280  sd3 = vis_faligndata(s3, s4);                                 \
281  sp += 4;                                                      \
282  dd2old = dd2;                                                 \
283  EXTRACT_S16_43L
284
285/***************************************************************/
286/*
287 * Either source or destination data are not 8-byte aligned.
288 * And size is in pixels.
289 */
290
291void mlib_v_ImageChannelExtract_S16_43L_D1(const mlib_s16 *src,
292                                           mlib_s16       *dst,
293                                           mlib_s32       dsize)
294{
295  mlib_s16 *sa, *da;                                  /* pointer for pixel */
296  mlib_s16 *dend, *dend2;                             /* end points in dst */
297  mlib_d64 *dp;                                       /* 8-byte aligned start points in dst */
298  mlib_d64 *sp;                                       /* 8-byte aligned start point in src */
299  mlib_d64 s0, s1, s2, s3, s4;                        /* 8-byte source row data */
300  mlib_d64 sd0, sd1, sd2, sd3;                        /* 8-byte source data */
301  mlib_d64 dd0, dd1, dd2;                             /* dst data */
302  mlib_d64 dd2old;                                    /* the last datum of the last step */
303  mlib_s32 soff;                                      /* offset of address in src */
304  mlib_s32 doff;                                      /* offset of address in dst */
305  mlib_s32 emask;                                     /* edge mask */
306  mlib_s32 i, n;
307
308  sa = (void *)src;
309  da = dst;
310
311  /* prepare the source address */
312  sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
313  soff = ((mlib_addr) sa & 7);
314
315  /* prepare the destination addresses */
316  dp = (mlib_d64 *) ((mlib_addr) da & (~7));
317  dend = da + dsize * 3 - 1;
318  dend2 = dend - 11;
319  doff = 8 - ((mlib_addr) da & 7);
320
321  /* generate edge mask for the start point */
322  emask = vis_edge16(da, dend);
323
324  /* load 32 byte, convert, store 24 bytes */
325  s4 = sp[0];                               /* initial value */
326  LOAD_EXTRACT_S16_43L;
327
328  if (dsize >= 4) {
329    if (doff == 8) {
330      vis_pst_16(dd0, dp++, emask);
331      *dp++ = dd1;
332      *dp++ = dd2;
333    }
334    else {
335      vis_alignaddr((void *)doff, 0);
336      vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask);
337      *dp++ = vis_faligndata(dd0, dd1);
338      *dp++ = vis_faligndata(dd1, dd2);
339    }
340  }
341  else {                                    /* for very small size */
342    if (doff == 8) {
343      vis_pst_16(dd0, dp++, emask);
344      if ((mlib_addr) dp <= (mlib_addr) dend) {
345        emask = vis_edge16(dp, dend);
346        vis_pst_16(dd1, dp++, emask);
347        if ((mlib_addr) dp <= (mlib_addr) dend) {
348          emask = vis_edge16(dp, dend);
349          vis_pst_16(dd2, dp++, emask);
350        }
351      }
352    }
353    else {
354      vis_alignaddr((void *)doff, 0);
355      vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask);
356      if ((mlib_addr) dp <= (mlib_addr) dend) {
357        emask = vis_edge16(dp, dend);
358        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
359        if ((mlib_addr) dp <= (mlib_addr) dend) {
360          emask = vis_edge16(dp, dend);
361          vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask);
362        }
363      }
364    }
365  }
366
367  /* no edge handling is needed in the loop */
368  if (doff == 8) {
369    if ((mlib_addr) dp <= (mlib_addr) dend2) {
370      n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
371#pragma pipeloop(0)
372      for (i = 0; i < n; i++) {
373        LOAD_EXTRACT_S16_43L;
374        *dp++ = dd0;
375        *dp++ = dd1;
376        *dp++ = dd2;
377      }
378    }
379  }
380  else {
381    if ((mlib_addr) dp <= (mlib_addr) dend2) {
382      n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
383#pragma pipeloop(0)
384      for (i = 0; i < n; i++) {
385        LOAD_EXTRACT_S16_43L;
386        vis_alignaddr((void *)doff, 0);
387        *dp++ = vis_faligndata(dd2old, dd0);
388        *dp++ = vis_faligndata(dd0, dd1);
389        *dp++ = vis_faligndata(dd1, dd2);
390      }
391    }
392  }
393
394  if ((mlib_addr) dp <= (mlib_addr) dend) {
395    LOAD_EXTRACT_S16_43L;
396    emask = vis_edge16(dp, dend);
397    if (doff == 8) {
398      vis_pst_16(dd0, dp++, emask);
399      if ((mlib_addr) dp <= (mlib_addr) dend) {
400        emask = vis_edge16(dp, dend);
401        vis_pst_16(dd1, dp++, emask);
402        if ((mlib_addr) dp <= (mlib_addr) dend) {
403          emask = vis_edge16(dp, dend);
404          vis_pst_16(dd2, dp++, emask);
405        }
406      }
407    }
408    else {
409      vis_alignaddr((void *)doff, 0);
410      vis_pst_16(vis_faligndata(dd2old, dd0), dp++, emask);
411      if ((mlib_addr) dp <= (mlib_addr) dend) {
412        emask = vis_edge16(dp, dend);
413        vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
414        if ((mlib_addr) dp <= (mlib_addr) dend) {
415          emask = vis_edge16(dp, dend);
416          vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask);
417        }
418      }
419    }
420  }
421}
422
423/***************************************************************/
424