mlib_v_ImageAffine_BL_U16.c revision 14535:6968aa3bf5ac
1/*
2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27
28/*
29 *      The functions step along the lines from xLeft to xRight and apply
30 *      the bilinear filtering.
31 *
32 */
33
34#include "vis_proto.h"
35#include "mlib_image.h"
36#include "mlib_ImageCopy.h"
37#include "mlib_ImageAffine.h"
38#include "mlib_v_ImageFilters.h"
39#include "mlib_v_ImageChannelExtract.h"
40#include "mlib_v_ImageAffine_BL_S16.h"
41
42/*#define MLIB_VIS2*/
43
44/***************************************************************/
45#define DTYPE mlib_s16
46
47#define FUN_NAME(CHAN) mlib_ImageAffine_u16_##CHAN##_bl
48
49/***************************************************************/
50mlib_status FUN_NAME(2ch_na)(mlib_affine_param *param);
51mlib_status FUN_NAME(4ch_na)(mlib_affine_param *param);
52
53/***************************************************************/
54#define XOR_8000(x) x = vis_fxor(x, mask_8000)
55
56/***************************************************************/
57#ifdef MLIB_VIS2
58#define MLIB_WRITE_BMASK(bmask) vis_write_bmask(bmask, 0)
59#else
60#define MLIB_WRITE_BMASK(bmask)
61#endif /* MLIB_VIS2 */
62
63/***************************************************************/
64#undef  DECLAREVAR
65#define DECLAREVAR()                                            \
66  DECLAREVAR0();                                                \
67  mlib_s32  *warp_tbl   = param -> warp_tbl;                    \
68  mlib_s32  srcYStride = param -> srcYStride;                   \
69  mlib_u8   *dl;                                                \
70  mlib_s32  i, size;                                            \
71  mlib_d64  mask_8000 = vis_to_double_dup(0x80008000);          \
72  mlib_d64  mask_7fff = vis_to_double_dup(0x7FFF7FFF);          \
73  mlib_d64  dx64, dy64, deltax, deltay, delta1_x, delta1_y;     \
74  mlib_d64  s0, s1, s2, s3;                                     \
75  mlib_d64  d0, d1, d2, d3, dd
76
77/***************************************************************/
78
79/* arguments (x, y) are swapped to prevent overflow */
80#define FMUL_16x16(x, y)                        \
81  vis_fpadd16(vis_fmul8sux16(y, x),             \
82              vis_fmul8ulx16(y, x))
83
84/***************************************************************/
85#define BUF_SIZE  512
86
87/***************************************************************/
88#define DOUBLE_4U16(x0, x1, x2, x3)                                 \
89  vis_to_double(((((x0) & 0xFFFE) << 15) | (((x1) & 0xFFFE) >> 1)), \
90                ((((x2) & 0xFFFE) << 15) | (((x3) & 0xFFFE) >> 1)))
91
92/***************************************************************/
93#define BL_SUM()                                                \
94  XOR_8000(s0);                                                 \
95  XOR_8000(s1);                                                 \
96  XOR_8000(s2);                                                 \
97  XOR_8000(s3);                                                 \
98                                                                \
99  delta1_x = vis_fpsub16(mask_7fff, deltax);                    \
100  delta1_y = vis_fpsub16(mask_7fff, deltay);                    \
101                                                                \
102  d0 = FMUL_16x16(s0, delta1_x);                                \
103  d1 = FMUL_16x16(s1, deltax);                                  \
104  d0 = vis_fpadd16(d0, d1);                                     \
105  d0 = vis_fpadd16(d0, d0);                                     \
106  d0 = FMUL_16x16(d0, delta1_y);                                \
107                                                                \
108  d2 = FMUL_16x16(s2, delta1_x);                                \
109  d3 = FMUL_16x16(s3, deltax);                                  \
110  d2 = vis_fpadd16(d2, d3);                                     \
111  d2 = vis_fpadd16(d2, d2);                                     \
112  d2 = FMUL_16x16(d2, deltay);                                  \
113                                                                \
114  dd = vis_fpadd16(d0, d2);                                     \
115  dd = vis_fpadd16(dd, dd);                                     \
116  XOR_8000(dd);                                                 \
117                                                                \
118  deltax = vis_fpadd16(deltax, dx64);                           \
119  deltay = vis_fpadd16(deltay, dy64);                           \
120  deltax = vis_fand(deltax, mask_7fff);                         \
121  deltay = vis_fand(deltay, mask_7fff)
122
123/***************************************************************/
124#define BL_SUM_3CH()                                            \
125  XOR_8000(s0);                                                 \
126  XOR_8000(s1);                                                 \
127  XOR_8000(s2);                                                 \
128  XOR_8000(s3);                                                 \
129                                                                \
130  delta1_x = vis_fpsub16(mask_7fff, deltax);                    \
131  delta1_y = vis_fpsub16(mask_7fff, deltay);                    \
132                                                                \
133  d0 = FMUL_16x16(s0, delta1_y);                                \
134  d2 = FMUL_16x16(s2, deltay);                                  \
135  d0 = vis_fpadd16(d0, d2);                                     \
136  d0 = vis_fpadd16(d0, d0);                                     \
137  d0 = FMUL_16x16(d0, delta1_x);                                \
138                                                                \
139  d1 = FMUL_16x16(s1, delta1_y);                                \
140  d3 = FMUL_16x16(s3, deltay);                                  \
141  d1 = vis_fpadd16(d1, d3);                                     \
142  d1 = vis_fpadd16(d1, d1);                                     \
143  d1 = FMUL_16x16(d1, deltax);                                  \
144                                                                \
145  vis_alignaddr((void*)0, 2);                                   \
146  d0 = vis_faligndata(d0, d0);                                  \
147  dd = vis_fpadd16(d0, d1);                                     \
148  dd = vis_fpadd16(dd, dd);                                     \
149  XOR_8000(dd);                                                 \
150                                                                \
151  deltax = vis_fpadd16(deltax, dx64);                           \
152  deltay = vis_fpadd16(deltay, dy64);                           \
153  deltax = vis_fand(deltax, mask_7fff);                         \
154  deltay = vis_fand(deltay, mask_7fff)
155
156/***************************************************************/
157#define LD_U16(sp, ind) vis_ld_u16(sp + ind)
158
159/***************************************************************/
160#ifndef MLIB_VIS2
161
162#define LOAD_1CH()                                              \
163  s0 = vis_faligndata(LD_U16(sp3, 0), mask_7fff);               \
164  s1 = vis_faligndata(LD_U16(sp3, 2), mask_7fff);               \
165  s2 = vis_faligndata(LD_U16(sp3, srcYStride), mask_7fff);      \
166  s3 = vis_faligndata(LD_U16(sp3, srcYStride + 2), mask_7fff);  \
167                                                                \
168  s0 = vis_faligndata(LD_U16(sp2, 0), s0);                      \
169  s1 = vis_faligndata(LD_U16(sp2, 2), s1);                      \
170  s2 = vis_faligndata(LD_U16(sp2, srcYStride), s2);             \
171  s3 = vis_faligndata(LD_U16(sp2, srcYStride + 2), s3);         \
172                                                                \
173  s0 = vis_faligndata(LD_U16(sp1, 0), s0);                      \
174  s1 = vis_faligndata(LD_U16(sp1, 2), s1);                      \
175  s2 = vis_faligndata(LD_U16(sp1, srcYStride), s2);             \
176  s3 = vis_faligndata(LD_U16(sp1, srcYStride + 2), s3);         \
177                                                                \
178  s0 = vis_faligndata(LD_U16(sp0, 0), s0);                      \
179  s1 = vis_faligndata(LD_U16(sp0, 2), s1);                      \
180  s2 = vis_faligndata(LD_U16(sp0, srcYStride), s2);             \
181  s3 = vis_faligndata(LD_U16(sp0, srcYStride + 2), s3)
182
183#else
184
185#define LOAD_1CH()                                                             \
186  s0 = vis_bshuffle(LD_U16(sp0, 0), LD_U16(sp2, 0));                           \
187  s1 = vis_bshuffle(LD_U16(sp0, 2), LD_U16(sp2, 2));                           \
188  s2 = vis_bshuffle(LD_U16(sp0, srcYStride), LD_U16(sp2, srcYStride));         \
189  s3 = vis_bshuffle(LD_U16(sp0, srcYStride + 2), LD_U16(sp2, srcYStride + 2)); \
190                                                                               \
191  t0 = vis_bshuffle(LD_U16(sp1, 0), LD_U16(sp3, 0));                           \
192  t1 = vis_bshuffle(LD_U16(sp1, 2), LD_U16(sp3, 2));                           \
193  t2 = vis_bshuffle(LD_U16(sp1, srcYStride), LD_U16(sp3, srcYStride));         \
194  t3 = vis_bshuffle(LD_U16(sp1, srcYStride + 2), LD_U16(sp3, srcYStride + 2)); \
195                                                                               \
196  s0 = vis_bshuffle(s0, t0);                                                   \
197  s1 = vis_bshuffle(s1, t1);                                                   \
198  s2 = vis_bshuffle(s2, t2);                                                   \
199  s3 = vis_bshuffle(s3, t3)
200
201#endif /* MLIB_VIS2 */
202
203/***************************************************************/
204#define GET_POINTER(sp)                                                       \
205  sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 2*(X >> MLIB_SHIFT); \
206  X += dX;                                                                    \
207  Y += dY
208
209/***************************************************************/
210#undef  PREPARE_DELTAS
211#define PREPARE_DELTAS                                                             \
212  if (warp_tbl != NULL) {                                                          \
213    dX = warp_tbl[2*j    ];                                                        \
214    dY = warp_tbl[2*j + 1];                                                        \
215    dx64 = vis_to_double_dup((((dX << 1) & 0xFFFF) << 16) | ((dX << 1) & 0xFFFF)); \
216    dy64 = vis_to_double_dup((((dY << 1) & 0xFFFF) << 16) | ((dY << 1) & 0xFFFF)); \
217  }
218
219/***************************************************************/
220mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
221{
222  DECLAREVAR();
223  mlib_s32 off;
224  mlib_s32 x0, x1, x2, x3, y0, y1, y2, y3;
225#ifdef MLIB_VIS2
226  mlib_d64 t0, t1, t2, t3;
227  vis_write_bmask(0x45CD67EF, 0);
228#else
229  vis_alignaddr((void*)0, 6);
230#endif /* MLIB_VIS2 */
231
232  dx64 = vis_to_double_dup((((dX << 1) & 0xFFFF) << 16) | ((dX << 1) & 0xFFFF));
233  dy64 = vis_to_double_dup((((dY << 1) & 0xFFFF) << 16) | ((dY << 1) & 0xFFFF));
234
235  for (j = yStart; j <= yFinish; j++) {
236    mlib_u8  *sp0, *sp1, *sp2, *sp3;
237    mlib_d64 *dp, dmask;
238
239    NEW_LINE(1);
240
241    off = (mlib_s32)dl & 7;
242    dp = (mlib_d64*)(dl - off);
243    off >>= 1;
244
245    x0 = X - off*dX; y0 = Y - off*dY;
246    x1 = x0 + dX;    y1 = y0 + dY;
247    x2 = x1 + dX;    y2 = y1 + dY;
248    x3 = x2 + dX;    y3 = y2 + dY;
249
250    deltax = DOUBLE_4U16(x0, x1, x2, x3);
251    deltay = DOUBLE_4U16(y0, y1, y2, y3);
252
253    if (off) {
254      mlib_s32 emask = vis_edge16((void*)(2*off), (void*)(2*(off + size - 1)));
255
256      off = 4 - off;
257      GET_POINTER(sp3);
258      sp0 = sp1 = sp2 = sp3;
259
260      if (off > 1 && size > 1) {
261        GET_POINTER(sp3);
262      }
263
264      if (off > 2) {
265        sp2 = sp3;
266
267        if (size > 2) {
268          GET_POINTER(sp3);
269        }
270      }
271
272      LOAD_1CH();
273      BL_SUM();
274
275      dmask = ((mlib_d64*)mlib_dmask_arr)[emask];
276      *dp++ = vis_for (vis_fand(dmask, dd), vis_fandnot(dmask, dp[0]));
277
278      size -= off;
279
280      if (size < 0) size = 0;
281    }
282
283#pragma pipeloop(0)
284    for (i = 0; i < size/4; i++) {
285      GET_POINTER(sp0);
286      GET_POINTER(sp1);
287      GET_POINTER(sp2);
288      GET_POINTER(sp3);
289
290      LOAD_1CH();
291      BL_SUM();
292
293      dp[i] = dd;
294    }
295
296    off = size & 3;
297
298    if (off) {
299      GET_POINTER(sp0);
300      sp1 = sp2 = sp3 = sp0;
301
302      if (off > 1) {
303        GET_POINTER(sp1);
304      }
305
306      if (off > 2) {
307        GET_POINTER(sp2);
308      }
309
310      LOAD_1CH();
311      BL_SUM();
312
313      dmask = ((mlib_d64*)mlib_dmask_arr)[(0xF0 >> off) & 0x0F];
314      dp[i] = vis_for (vis_fand(dmask, dd), vis_fandnot(dmask, dp[i]));
315    }
316  }
317
318  return MLIB_SUCCESS;
319}
320
321/***************************************************************/
322#undef  GET_POINTER
323#define GET_POINTER(sp)                                                      \
324  sp = *(mlib_f32**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT); \
325  X += dX;                                                                   \
326  Y += dY
327
328/***************************************************************/
329#define LOAD_2CH()                                              \
330  s0 = vis_freg_pair(sp0[0], sp1[0]);                           \
331  s1 = vis_freg_pair(sp0[1], sp1[1]);                           \
332  s2 = vis_freg_pair(sp0[srcYStride], sp1[srcYStride]);         \
333  s3 = vis_freg_pair(sp0[srcYStride + 1], sp1[srcYStride + 1])
334
335/***************************************************************/
336#undef  PREPARE_DELTAS
337#define PREPARE_DELTAS                                               \
338  if (warp_tbl != NULL) {                                            \
339    dX = warp_tbl[2*j    ];                                          \
340    dY = warp_tbl[2*j + 1];                                          \
341    dx64 = vis_to_double_dup(((dX & 0xFFFF) << 16) | (dX & 0xFFFF)); \
342    dy64 = vis_to_double_dup(((dY & 0xFFFF) << 16) | (dY & 0xFFFF)); \
343  }
344
345/***************************************************************/
346mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
347{
348  DECLAREVAR();
349  mlib_s32 off;
350  mlib_s32 x0, x1, y0, y1;
351
352  if (((mlib_s32)lineAddr[0] | (mlib_s32)dstData | srcYStride | dstYStride) & 3) {
353    return FUN_NAME(2ch_na)(param);
354  }
355
356  srcYStride >>= 2;
357
358  dx64 = vis_to_double_dup(((dX & 0xFFFF) << 16) | (dX & 0xFFFF));
359  dy64 = vis_to_double_dup(((dY & 0xFFFF) << 16) | (dY & 0xFFFF));
360
361  for (j = yStart; j <= yFinish; j++) {
362    mlib_f32 *sp0, *sp1;
363    mlib_d64 *dp;
364
365    NEW_LINE(2);
366
367    off = (mlib_s32)dl & 7;
368    dp = (mlib_d64*)(dl - off);
369
370    if (off) {
371      x0 = X - dX; y0 = Y - dY;
372      x1 = X;      y1 = Y;
373    } else {
374      x0 = X;      y0 = Y;
375      x1 = X + dX; y1 = Y + dY;
376    }
377
378    deltax = DOUBLE_4U16(x0, x0, x1, x1);
379    deltay = DOUBLE_4U16(y0, y0, y1, y1);
380
381    if (off) {
382      GET_POINTER(sp1);
383      sp0 = sp1;
384      LOAD_2CH();
385
386      BL_SUM();
387
388      ((mlib_f32*)dp)[1] = vis_read_lo(dd);
389      dp++;
390      size--;
391    }
392
393#pragma pipeloop(0)
394    for (i = 0; i < size/2; i++) {
395      GET_POINTER(sp0);
396      GET_POINTER(sp1);
397      LOAD_2CH();
398
399      BL_SUM();
400
401      *dp++ = dd;
402    }
403
404    if (size & 1) {
405      GET_POINTER(sp0);
406      sp1 = sp0;
407      LOAD_2CH();
408
409      BL_SUM();
410
411      ((mlib_f32*)dp)[0] = vis_read_hi(dd);
412    }
413  }
414
415  return MLIB_SUCCESS;
416}
417
418/***************************************************************/
419#undef  GET_POINTER
420#define GET_POINTER(sp)                                                       \
421  sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 4*(X >> MLIB_SHIFT); \
422  X += dX;                                                                    \
423  Y += dY
424
425/***************************************************************/
426#ifndef MLIB_VIS2
427
428#define LOAD_2CH_NA()                                           \
429  s0 = vis_faligndata(LD_U16(sp1, 2), mask_7fff);               \
430  s1 = vis_faligndata(LD_U16(sp1, 6), mask_7fff);               \
431  s2 = vis_faligndata(LD_U16(sp1, srcYStride + 2), mask_7fff);  \
432  s3 = vis_faligndata(LD_U16(sp1, srcYStride + 6), mask_7fff);  \
433                                                                \
434  s0 = vis_faligndata(LD_U16(sp1, 0), s0);                      \
435  s1 = vis_faligndata(LD_U16(sp1, 4), s1);                      \
436  s2 = vis_faligndata(LD_U16(sp1, srcYStride), s2);             \
437  s3 = vis_faligndata(LD_U16(sp1, srcYStride + 4), s3);         \
438                                                                \
439  s0 = vis_faligndata(LD_U16(sp0, 2), s0);                      \
440  s1 = vis_faligndata(LD_U16(sp0, 6), s1);                      \
441  s2 = vis_faligndata(LD_U16(sp0, srcYStride + 2), s2);         \
442  s3 = vis_faligndata(LD_U16(sp0, srcYStride + 6), s3);         \
443                                                                \
444  s0 = vis_faligndata(LD_U16(sp0, 0), s0);                      \
445  s1 = vis_faligndata(LD_U16(sp0, 4), s1);                      \
446  s2 = vis_faligndata(LD_U16(sp0, srcYStride), s2);             \
447  s3 = vis_faligndata(LD_U16(sp0, srcYStride + 4), s3)
448
449#else
450
451#define LOAD_2CH_NA()                                                          \
452  s0 = vis_bshuffle(LD_U16(sp0, 0), LD_U16(sp1, 0));                           \
453  s1 = vis_bshuffle(LD_U16(sp0, 4), LD_U16(sp1, 4));                           \
454  s2 = vis_bshuffle(LD_U16(sp0, srcYStride), LD_U16(sp1, srcYStride));         \
455  s3 = vis_bshuffle(LD_U16(sp0, srcYStride + 4), LD_U16(sp1, srcYStride + 4)); \
456                                                                               \
457  t0 = vis_bshuffle(LD_U16(sp0, 2), LD_U16(sp1, 2));                           \
458  t1 = vis_bshuffle(LD_U16(sp0, 6), LD_U16(sp1, 6));                           \
459  t2 = vis_bshuffle(LD_U16(sp0, srcYStride + 2), LD_U16(sp1, srcYStride + 2)); \
460  t3 = vis_bshuffle(LD_U16(sp0, srcYStride + 6), LD_U16(sp1, srcYStride + 6)); \
461                                                                               \
462  s0 = vis_bshuffle(s0, t0);                                                   \
463  s1 = vis_bshuffle(s1, t1);                                                   \
464  s2 = vis_bshuffle(s2, t2);                                                   \
465  s3 = vis_bshuffle(s3, t3)
466
467#endif /* MLIB_VIS2 */
468
469/***************************************************************/
470mlib_status FUN_NAME(2ch_na)(mlib_affine_param *param)
471{
472  DECLAREVAR();
473  mlib_s32 max_xsize = param -> max_xsize, bsize;
474  mlib_s32 x0, x1, y0, y1;
475  mlib_d64 buff[BUF_SIZE], *pbuff = buff;
476#ifdef MLIB_VIS2
477  mlib_d64 t0, t1, t2, t3;
478#endif /* MLIB_VIS2 */
479
480  bsize = (max_xsize + 1)/2;
481
482  if (bsize > BUF_SIZE) {
483    pbuff = mlib_malloc(bsize*sizeof(mlib_d64));
484
485    if (pbuff == NULL) return MLIB_FAILURE;
486  }
487
488  MLIB_WRITE_BMASK(0x45CD67EF);
489
490  dx64 = vis_to_double_dup(((dX & 0xFFFF) << 16) | (dX & 0xFFFF));
491  dy64 = vis_to_double_dup(((dY & 0xFFFF) << 16) | (dY & 0xFFFF));
492
493  for (j = yStart; j <= yFinish; j++) {
494    mlib_u8 *sp0, *sp1;
495
496#ifndef MLIB_VIS2
497    vis_alignaddr((void*)0, 6);
498#endif /* MLIB_VIS2 */
499
500    NEW_LINE(2);
501
502    x0 = X;      y0 = Y;
503    x1 = X + dX; y1 = Y + dY;
504
505    deltax = DOUBLE_4U16(x0, x0, x1, x1);
506    deltay = DOUBLE_4U16(y0, y0, y1, y1);
507
508#pragma pipeloop(0)
509    for (i = 0; i < size/2; i++) {
510      GET_POINTER(sp0);
511      GET_POINTER(sp1);
512      LOAD_2CH_NA();
513
514      BL_SUM();
515
516      pbuff[i] = dd;
517    }
518
519    if (size & 1) {
520      GET_POINTER(sp0);
521      sp1 = sp0;
522      LOAD_2CH_NA();
523
524      BL_SUM();
525
526      pbuff[i] = dd;
527    }
528
529    mlib_ImageCopy_na((mlib_u8*)pbuff, dl, 4*size);
530  }
531
532  if (pbuff != buff) {
533    mlib_free(pbuff);
534  }
535
536  return MLIB_SUCCESS;
537}
538
539/***************************************************************/
540#undef  PREPARE_DELTAS
541#define PREPARE_DELTAS                                                             \
542  if (warp_tbl != NULL) {                                                          \
543    dX = warp_tbl[2*j    ];                                                        \
544    dY = warp_tbl[2*j + 1];                                                        \
545    dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */                       \
546    dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */                       \
547    dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); \
548    dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); \
549  }
550
551/***************************************************************/
552mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
553{
554  DECLAREVAR();
555  mlib_s32 max_xsize = param -> max_xsize;
556  mlib_d64 buff[BUF_SIZE], *pbuff = buff;
557
558  if (max_xsize > BUF_SIZE) {
559    pbuff = mlib_malloc(max_xsize*sizeof(mlib_d64));
560
561    if (pbuff == NULL) return MLIB_FAILURE;
562  }
563
564  dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */
565  dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */
566  dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF));
567  dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF));
568
569  for (j = yStart; j <= yFinish; j++) {
570    mlib_u8  *sp;
571    mlib_d64 *sp0, *sp1;
572
573    NEW_LINE(3);
574
575    deltax = DOUBLE_4U16(X, X, X, X);
576    deltay = DOUBLE_4U16(Y, Y, Y, Y);
577
578#pragma pipeloop(0)
579    for (i = 0; i < size; i++) {
580      sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 6*(X >> MLIB_SHIFT) - 2;
581
582      vis_alignaddr(sp, 0);
583      sp0 = AL_ADDR(sp, 0);
584      s0 = vis_faligndata(sp0[0], sp0[1]);
585      s1 = vis_faligndata(sp0[1], sp0[2]);
586
587      vis_alignaddr(sp, srcYStride);
588      sp1 = AL_ADDR(sp, srcYStride);
589      s2 = vis_faligndata(sp1[0], sp1[1]);
590      s3 = vis_faligndata(sp1[1], sp1[2]);
591
592      BL_SUM_3CH();
593
594      pbuff[i] = dd;
595      X += dX;
596      Y += dY;
597    }
598
599    mlib_v_ImageChannelExtract_S16_43L_D1((void *)pbuff, (void *)dl, size);
600  }
601
602  if (pbuff != buff) {
603    mlib_free(pbuff);
604  }
605
606  return MLIB_SUCCESS;
607}
608
609/***************************************************************/
610mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
611{
612  DECLAREVAR();
613
614  if (((mlib_s32)lineAddr[0] | (mlib_s32)dstData | srcYStride | dstYStride) & 7) {
615    return FUN_NAME(4ch_na)(param);
616  }
617
618  srcYStride >>= 3;
619
620  dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */
621  dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */
622  dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF));
623  dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF));
624
625  for (j = yStart; j <= yFinish; j++) {
626    mlib_d64 *sp;
627
628    NEW_LINE(4);
629
630    deltax = DOUBLE_4U16(X, X, X, X);
631    deltay = DOUBLE_4U16(Y, Y, Y, Y);
632
633#pragma pipeloop(0)
634    for (i = 0; i < size; i++) {
635      sp = *(mlib_d64**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT);
636      s0 = sp[0];
637      s1 = sp[1];
638      s2 = sp[srcYStride];
639      s3 = sp[srcYStride + 1];
640
641      BL_SUM();
642
643      ((mlib_d64*)dl)[i] = dd;
644      X += dX;
645      Y += dY;
646    }
647  }
648
649  return MLIB_SUCCESS;
650}
651
652/***************************************************************/
653mlib_status FUN_NAME(4ch_na)(mlib_affine_param *param)
654{
655  DECLAREVAR();
656  mlib_s32 max_xsize = param -> max_xsize;
657  mlib_d64 buff[BUF_SIZE], *pbuff = buff;
658
659  if (max_xsize > BUF_SIZE) {
660    pbuff = mlib_malloc(max_xsize*sizeof(mlib_d64));
661
662    if (pbuff == NULL) return MLIB_FAILURE;
663  }
664
665  dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */
666  dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */
667  dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF));
668  dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF));
669
670  for (j = yStart; j <= yFinish; j++) {
671    mlib_u8  *sp;
672    mlib_d64 *sp0, *sp1;
673
674    NEW_LINE(4);
675
676    deltax = DOUBLE_4U16(X, X, X, X);
677    deltay = DOUBLE_4U16(Y, Y, Y, Y);
678
679#pragma pipeloop(0)
680    for (i = 0; i < size; i++) {
681      sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 8*(X >> MLIB_SHIFT);
682
683      vis_alignaddr(sp, 0);
684      sp0 = AL_ADDR(sp, 0);
685      s0 = vis_faligndata(sp0[0], sp0[1]);
686      s1 = vis_faligndata(sp0[1], sp0[2]);
687
688      vis_alignaddr(sp, srcYStride);
689      sp1 = AL_ADDR(sp, srcYStride);
690      s2 = vis_faligndata(sp1[0], sp1[1]);
691      s3 = vis_faligndata(sp1[1], sp1[2]);
692
693      BL_SUM();
694
695      pbuff[i] = dd;
696      X += dX;
697      Y += dY;
698    }
699
700    mlib_ImageCopy_na((mlib_u8*)pbuff, dl, 8*size);
701  }
702
703  if (pbuff != buff) {
704    mlib_free(pbuff);
705  }
706
707  return MLIB_SUCCESS;
708}
709
710/***************************************************************/
711