1/*
2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27/*
28 * FUNCTION
29 *      Image affine transformation with Bicubic filtering
30 * SYNOPSIS
31 *      mlib_status mlib_ImageAffine_[u8|s16|u16]_?ch_bc(mlib_s32 *leftEdges,
32 *                                                       mlib_s32 *rightEdges,
33 *                                                       mlib_s32 *xStarts,
34 *                                                       mlib_s32 *yStarts,
35 *                                                       mlib_s32 *sides,
36 *                                                       mlib_u8  *dstData,
37 *                                                       mlib_u8  **lineAddr,
38 *                                                       mlib_s32 dstYStride,
39 *                                                       mlib_s32 is_affine,
40 *                                                       mlib_s32 srcYStride,
41 *                                                       mlib_filter filter)
42 *
43 * ARGUMENTS
44 *      leftEdges  array[dstHeight] of xLeft coordinates
45 *      RightEdges array[dstHeight] of xRight coordinates
46 *      xStarts    array[dstHeight] of xStart * 65536 coordinates
47 *      yStarts    array[dstHeight] of yStart * 65536 coordinates
48 *      sides      output array[4]. sides[0] is yStart, sides[1] is yFinish,
49 *                 sides[2] is dx * 65536, sides[3] is dy * 65536
50 *      dstData    pointer to the first pixel on (yStart - 1) line
51 *      lineAddr   array[srcHeight] of pointers to the first pixel on
52 *                 the corresponding lines
53 *      dstYStride stride of destination image
54 *      is_affine  indicator (Affine - GridWarp)
55 *      srcYStride stride of source image
56 *      filter     type of resampling filter
57 *
58 * DESCRIPTION
59 *      The functions step along the lines from xLeft to xRight and apply
60 *      the bicubic filtering.
61 *
62 */
63
64#include "mlib_ImageAffine.h"
65
66#define DTYPE  mlib_u16
67
68#define FUN_NAME(CHAN) mlib_ImageAffine_u16_##CHAN##_bc
69
70#define FILTER_BITS   9
71
72/***************************************************************/
73#ifdef __sparc /* for SPARC, using floating-point multiplies is faster */
74
75/***************************************************************/
76#undef  FILTER_ELEM_BITS
77#define FILTER_ELEM_BITS  4
78
79/***************************************************************/
80#ifdef MLIB_USE_FTOI_CLAMPING
81
82#define SAT_U16(DST)                                            \
83  DST = ((mlib_s32)(val0 - (mlib_d64)0x7FFF8000) >> 16) ^ 0x8000
84
85#else
86
87#define SAT_U16(DST)                                            \
88  if (val0 >= MLIB_U32_MAX)                                     \
89    DST = MLIB_U16_MAX;                                         \
90  else if (val0 <= MLIB_U32_MIN)                                \
91    DST = MLIB_U16_MIN;                                         \
92  else                                                          \
93    DST = ((mlib_u32)val0) >> 16
94
95#endif /* MLIB_USE_FTOI_CLAMPING */
96
97/***************************************************************/
98mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
99{
100  DECLAREVAR_BC();
101  DTYPE *dstLineEnd;
102  const mlib_f32 *mlib_filters_table;
103
104  if (filter == MLIB_BICUBIC) {
105    mlib_filters_table = mlib_filters_s16f_bc;
106  }
107  else {
108    mlib_filters_table = mlib_filters_s16f_bc2;
109  }
110
111  for (j = yStart; j <= yFinish; j++) {
112    mlib_d64 xf0, xf1, xf2, xf3;
113    mlib_d64 yf0, yf1, yf2, yf3;
114    mlib_d64 c0, c1, c2, c3, val0;
115    mlib_s32 filterpos;
116    mlib_f32 *fptr;
117    mlib_s32 s0, s1, s2, s3;
118    mlib_s32 s4, s5, s6, s7;
119
120    CLIP(1);
121    dstLineEnd = (DTYPE *) dstData + xRight;
122
123    filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
124    fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
125
126    xf0 = fptr[0];
127    xf1 = fptr[1];
128    xf2 = fptr[2];
129    xf3 = fptr[3];
130
131    filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
132    fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
133
134    yf0 = fptr[0];
135    yf1 = fptr[1];
136    yf2 = fptr[2];
137    yf3 = fptr[3];
138
139    xSrc = (X >> MLIB_SHIFT) - 1;
140    ySrc = (Y >> MLIB_SHIFT) - 1;
141
142    srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
143    s0 = srcPixelPtr[0];
144    s1 = srcPixelPtr[1];
145    s2 = srcPixelPtr[2];
146    s3 = srcPixelPtr[3];
147
148    srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
149    s4 = srcPixelPtr[0];
150    s5 = srcPixelPtr[1];
151    s6 = srcPixelPtr[2];
152    s7 = srcPixelPtr[3];
153
154    for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
155
156      X += dX;
157      Y += dY;
158
159      c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
160      c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
161      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
162      c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
163            srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
164      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
165      c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
166            srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
167
168      filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
169      fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
170
171      xf0 = fptr[0];
172      xf1 = fptr[1];
173      xf2 = fptr[2];
174      xf3 = fptr[3];
175
176      val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
177
178      filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
179      fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
180
181      yf0 = fptr[0];
182      yf1 = fptr[1];
183      yf2 = fptr[2];
184      yf3 = fptr[3];
185
186      SAT_U16(dstPixelPtr[0]);
187
188      xSrc = (X >> MLIB_SHIFT) - 1;
189      ySrc = (Y >> MLIB_SHIFT) - 1;
190
191      srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
192      s0 = srcPixelPtr[0];
193      s1 = srcPixelPtr[1];
194      s2 = srcPixelPtr[2];
195      s3 = srcPixelPtr[3];
196
197      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
198      s4 = srcPixelPtr[0];
199      s5 = srcPixelPtr[1];
200      s6 = srcPixelPtr[2];
201      s7 = srcPixelPtr[3];
202    }
203
204    c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
205    c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
206    srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
207    c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
208          srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
209    srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
210    c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
211          srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
212
213    val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
214    SAT_U16(dstPixelPtr[0]);
215  }
216
217  return MLIB_SUCCESS;
218}
219
220/***************************************************************/
221mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
222{
223  DECLAREVAR_BC();
224  DTYPE *dstLineEnd;
225  const mlib_f32 *mlib_filters_table;
226
227  if (filter == MLIB_BICUBIC) {
228    mlib_filters_table = mlib_filters_s16f_bc;
229  }
230  else {
231    mlib_filters_table = mlib_filters_s16f_bc2;
232  }
233
234  for (j = yStart; j <= yFinish; j++) {
235    mlib_d64 xf0, xf1, xf2, xf3;
236    mlib_d64 yf0, yf1, yf2, yf3;
237    mlib_d64 c0, c1, c2, c3, val0;
238    mlib_s32 filterpos, k;
239    mlib_f32 *fptr;
240    mlib_s32 s0, s1, s2, s3;
241    mlib_s32 s4, s5, s6, s7;
242
243    CLIP(2);
244    dstLineEnd = (DTYPE *) dstData + 2 * xRight;
245
246    for (k = 0; k < 2; k++) {
247      mlib_s32 X1 = X;
248      mlib_s32 Y1 = Y;
249      DTYPE *dPtr = dstPixelPtr + k;
250
251      filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
252      fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
253
254      xf0 = fptr[0];
255      xf1 = fptr[1];
256      xf2 = fptr[2];
257      xf3 = fptr[3];
258
259      filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
260      fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
261
262      yf0 = fptr[0];
263      yf1 = fptr[1];
264      yf2 = fptr[2];
265      yf3 = fptr[3];
266
267      xSrc = (X1 >> MLIB_SHIFT) - 1;
268      ySrc = (Y1 >> MLIB_SHIFT) - 1;
269
270      srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
271      s0 = srcPixelPtr[0];
272      s1 = srcPixelPtr[2];
273      s2 = srcPixelPtr[4];
274      s3 = srcPixelPtr[6];
275
276      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
277      s4 = srcPixelPtr[0];
278      s5 = srcPixelPtr[2];
279      s6 = srcPixelPtr[4];
280      s7 = srcPixelPtr[6];
281
282      for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
283
284        X1 += dX;
285        Y1 += dY;
286
287        c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
288        c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
289        srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
290        c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
291              srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
292        srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
293        c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
294              srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
295
296        filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
297        fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
298
299        xf0 = fptr[0];
300        xf1 = fptr[1];
301        xf2 = fptr[2];
302        xf3 = fptr[3];
303
304        val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
305
306        filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
307        fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
308
309        yf0 = fptr[0];
310        yf1 = fptr[1];
311        yf2 = fptr[2];
312        yf3 = fptr[3];
313
314        SAT_U16(dPtr[0]);
315
316        xSrc = (X1 >> MLIB_SHIFT) - 1;
317        ySrc = (Y1 >> MLIB_SHIFT) - 1;
318
319        srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
320        s0 = srcPixelPtr[0];
321        s1 = srcPixelPtr[2];
322        s2 = srcPixelPtr[4];
323        s3 = srcPixelPtr[6];
324
325        srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
326        s4 = srcPixelPtr[0];
327        s5 = srcPixelPtr[2];
328        s6 = srcPixelPtr[4];
329        s7 = srcPixelPtr[6];
330      }
331
332      c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
333      c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
334      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
335      c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
336            srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
337      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
338      c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
339            srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
340
341      val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
342      SAT_U16(dPtr[0]);
343    }
344  }
345
346  return MLIB_SUCCESS;
347}
348
349/***************************************************************/
350mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
351{
352  DECLAREVAR_BC();
353  DTYPE *dstLineEnd;
354  const mlib_f32 *mlib_filters_table;
355
356  if (filter == MLIB_BICUBIC) {
357    mlib_filters_table = mlib_filters_s16f_bc;
358  }
359  else {
360    mlib_filters_table = mlib_filters_s16f_bc2;
361  }
362
363  for (j = yStart; j <= yFinish; j++) {
364    mlib_d64 xf0, xf1, xf2, xf3;
365    mlib_d64 yf0, yf1, yf2, yf3;
366    mlib_d64 c0, c1, c2, c3, val0;
367    mlib_s32 filterpos, k;
368    mlib_f32 *fptr;
369    mlib_s32 s0, s1, s2, s3;
370    mlib_s32 s4, s5, s6, s7;
371
372    CLIP(3);
373    dstLineEnd = (DTYPE *) dstData + 3 * xRight;
374
375    for (k = 0; k < 3; k++) {
376      mlib_s32 X1 = X;
377      mlib_s32 Y1 = Y;
378      DTYPE *dPtr = dstPixelPtr + k;
379
380      filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
381      fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
382
383      xf0 = fptr[0];
384      xf1 = fptr[1];
385      xf2 = fptr[2];
386      xf3 = fptr[3];
387
388      filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
389      fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
390
391      yf0 = fptr[0];
392      yf1 = fptr[1];
393      yf2 = fptr[2];
394      yf3 = fptr[3];
395
396      xSrc = (X1 >> MLIB_SHIFT) - 1;
397      ySrc = (Y1 >> MLIB_SHIFT) - 1;
398
399      srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
400      s0 = srcPixelPtr[0];
401      s1 = srcPixelPtr[3];
402      s2 = srcPixelPtr[6];
403      s3 = srcPixelPtr[9];
404
405      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
406      s4 = srcPixelPtr[0];
407      s5 = srcPixelPtr[3];
408      s6 = srcPixelPtr[6];
409      s7 = srcPixelPtr[9];
410
411      for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
412
413        X1 += dX;
414        Y1 += dY;
415
416        c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
417        c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
418        srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
419        c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
420              srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
421        srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
422        c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
423              srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
424
425        filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
426        fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
427
428        xf0 = fptr[0];
429        xf1 = fptr[1];
430        xf2 = fptr[2];
431        xf3 = fptr[3];
432
433        val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
434
435        filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
436        fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
437
438        yf0 = fptr[0];
439        yf1 = fptr[1];
440        yf2 = fptr[2];
441        yf3 = fptr[3];
442
443        SAT_U16(dPtr[0]);
444
445        xSrc = (X1 >> MLIB_SHIFT) - 1;
446        ySrc = (Y1 >> MLIB_SHIFT) - 1;
447
448        srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
449        s0 = srcPixelPtr[0];
450        s1 = srcPixelPtr[3];
451        s2 = srcPixelPtr[6];
452        s3 = srcPixelPtr[9];
453
454        srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
455        s4 = srcPixelPtr[0];
456        s5 = srcPixelPtr[3];
457        s6 = srcPixelPtr[6];
458        s7 = srcPixelPtr[9];
459      }
460
461      c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
462      c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
463      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
464      c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
465            srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
466      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
467      c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
468            srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
469
470      val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
471      SAT_U16(dPtr[0]);
472    }
473  }
474
475  return MLIB_SUCCESS;
476}
477
478/***************************************************************/
479mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
480{
481  DECLAREVAR_BC();
482  DTYPE *dstLineEnd;
483  const mlib_f32 *mlib_filters_table;
484
485  if (filter == MLIB_BICUBIC) {
486    mlib_filters_table = mlib_filters_s16f_bc;
487  }
488  else {
489    mlib_filters_table = mlib_filters_s16f_bc2;
490  }
491
492  for (j = yStart; j <= yFinish; j++) {
493    mlib_d64 xf0, xf1, xf2, xf3;
494    mlib_d64 yf0, yf1, yf2, yf3;
495    mlib_d64 c0, c1, c2, c3, val0;
496    mlib_s32 filterpos, k;
497    mlib_f32 *fptr;
498    mlib_s32 s0, s1, s2, s3;
499    mlib_s32 s4, s5, s6, s7;
500
501    CLIP(4);
502    dstLineEnd = (DTYPE *) dstData + 4 * xRight;
503
504    for (k = 0; k < 4; k++) {
505      mlib_s32 X1 = X;
506      mlib_s32 Y1 = Y;
507      DTYPE *dPtr = dstPixelPtr + k;
508
509      filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
510      fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
511
512      xf0 = fptr[0];
513      xf1 = fptr[1];
514      xf2 = fptr[2];
515      xf3 = fptr[3];
516
517      filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
518      fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
519
520      yf0 = fptr[0];
521      yf1 = fptr[1];
522      yf2 = fptr[2];
523      yf3 = fptr[3];
524
525      xSrc = (X1 >> MLIB_SHIFT) - 1;
526      ySrc = (Y1 >> MLIB_SHIFT) - 1;
527
528      srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
529      s0 = srcPixelPtr[0];
530      s1 = srcPixelPtr[4];
531      s2 = srcPixelPtr[8];
532      s3 = srcPixelPtr[12];
533
534      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
535      s4 = srcPixelPtr[0];
536      s5 = srcPixelPtr[4];
537      s6 = srcPixelPtr[8];
538      s7 = srcPixelPtr[12];
539
540      for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
541
542        X1 += dX;
543        Y1 += dY;
544
545        c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
546        c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
547        srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
548        c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
549              srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
550        srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
551        c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
552              srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
553
554        filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
555        fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
556
557        xf0 = fptr[0];
558        xf1 = fptr[1];
559        xf2 = fptr[2];
560        xf3 = fptr[3];
561
562        val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
563
564        filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
565        fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
566
567        yf0 = fptr[0];
568        yf1 = fptr[1];
569        yf2 = fptr[2];
570        yf3 = fptr[3];
571
572        SAT_U16(dPtr[0]);
573
574        xSrc = (X1 >> MLIB_SHIFT) - 1;
575        ySrc = (Y1 >> MLIB_SHIFT) - 1;
576
577        srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
578        s0 = srcPixelPtr[0];
579        s1 = srcPixelPtr[4];
580        s2 = srcPixelPtr[8];
581        s3 = srcPixelPtr[12];
582
583        srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
584        s4 = srcPixelPtr[0];
585        s5 = srcPixelPtr[4];
586        s6 = srcPixelPtr[8];
587        s7 = srcPixelPtr[12];
588      }
589
590      c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
591      c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
592      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
593      c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
594            srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
595      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
596      c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
597            srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
598
599      val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
600      SAT_U16(dPtr[0]);
601    }
602  }
603
604  return MLIB_SUCCESS;
605}
606
607#else       /* for x86, using integer multiplies is faster */
608
609#define SHIFT_X  15
610#define ROUND_X  0 /* (1 << (SHIFT_X - 1)) */
611
612#define SHIFT_Y  14
613#define ROUND_Y  (1 << (SHIFT_Y - 1))
614
615#define S32_TO_U16_SAT(DST)                                     \
616  if (val0 >= MLIB_U16_MAX)                                     \
617    DST = MLIB_U16_MAX;                                         \
618  else if (val0 <= MLIB_U16_MIN)                                \
619    DST = MLIB_U16_MIN;                                         \
620  else                                                          \
621    DST = (mlib_u16)val0
622
623/***************************************************************/
624mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
625{
626  DECLAREVAR_BC();
627  DTYPE *dstLineEnd;
628  const mlib_s16 *mlib_filters_table;
629
630  if (filter == MLIB_BICUBIC) {
631    mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
632  }
633  else {
634    mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
635  }
636
637  for (j = yStart; j <= yFinish; j++) {
638    mlib_s32 xf0, xf1, xf2, xf3;
639    mlib_s32 yf0, yf1, yf2, yf3;
640    mlib_s32 c0, c1, c2, c3, val0;
641    mlib_s32 filterpos;
642    mlib_s16 *fptr;
643    mlib_s32 s0, s1, s2, s3;
644    mlib_s32 s4, s5, s6, s7;
645
646    CLIP(1);
647    dstLineEnd = (DTYPE *) dstData + xRight;
648
649    filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
650    fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
651
652    xf0 = fptr[0] >> 1;
653    xf1 = fptr[1] >> 1;
654    xf2 = fptr[2] >> 1;
655    xf3 = fptr[3] >> 1;
656
657    filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
658    fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
659
660    yf0 = fptr[0];
661    yf1 = fptr[1];
662    yf2 = fptr[2];
663    yf3 = fptr[3];
664
665    xSrc = (X >> MLIB_SHIFT) - 1;
666    ySrc = (Y >> MLIB_SHIFT) - 1;
667
668    srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
669    s0 = srcPixelPtr[0];
670    s1 = srcPixelPtr[1];
671    s2 = srcPixelPtr[2];
672    s3 = srcPixelPtr[3];
673
674    srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
675    s4 = srcPixelPtr[0];
676    s5 = srcPixelPtr[1];
677    s6 = srcPixelPtr[2];
678    s7 = srcPixelPtr[3];
679
680    for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
681
682      X += dX;
683      Y += dY;
684
685      c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
686      c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
687      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
688      c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
689            srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
690      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
691      c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
692            srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
693
694      filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
695      fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
696
697      xf0 = fptr[0] >> 1;
698      xf1 = fptr[1] >> 1;
699      xf2 = fptr[2] >> 1;
700      xf3 = fptr[3] >> 1;
701
702      val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
703
704      filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
705      fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
706
707      yf0 = fptr[0];
708      yf1 = fptr[1];
709      yf2 = fptr[2];
710      yf3 = fptr[3];
711
712      S32_TO_U16_SAT(dstPixelPtr[0]);
713
714      xSrc = (X >> MLIB_SHIFT) - 1;
715      ySrc = (Y >> MLIB_SHIFT) - 1;
716
717      srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
718      s0 = srcPixelPtr[0];
719      s1 = srcPixelPtr[1];
720      s2 = srcPixelPtr[2];
721      s3 = srcPixelPtr[3];
722
723      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
724      s4 = srcPixelPtr[0];
725      s5 = srcPixelPtr[1];
726      s6 = srcPixelPtr[2];
727      s7 = srcPixelPtr[3];
728    }
729
730    c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
731    c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
732    srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
733    c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
734          srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
735    srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
736    c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
737          srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
738
739    val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
740    S32_TO_U16_SAT(dstPixelPtr[0]);
741  }
742
743  return MLIB_SUCCESS;
744}
745
746/***************************************************************/
747mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
748{
749  DECLAREVAR_BC();
750  DTYPE *dstLineEnd;
751  const mlib_s16 *mlib_filters_table;
752
753  if (filter == MLIB_BICUBIC) {
754    mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
755  }
756  else {
757    mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
758  }
759
760  for (j = yStart; j <= yFinish; j++) {
761    mlib_s32 xf0, xf1, xf2, xf3;
762    mlib_s32 yf0, yf1, yf2, yf3;
763    mlib_s32 c0, c1, c2, c3, val0;
764    mlib_s32 filterpos, k;
765    mlib_s16 *fptr;
766    mlib_s32 s0, s1, s2, s3;
767    mlib_s32 s4, s5, s6, s7;
768
769    CLIP(2);
770    dstLineEnd = (DTYPE *) dstData + 2 * xRight;
771
772    for (k = 0; k < 2; k++) {
773      mlib_s32 X1 = X;
774      mlib_s32 Y1 = Y;
775      DTYPE *dPtr = dstPixelPtr + k;
776
777      filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
778      fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
779
780      xf0 = fptr[0] >> 1;
781      xf1 = fptr[1] >> 1;
782      xf2 = fptr[2] >> 1;
783      xf3 = fptr[3] >> 1;
784
785      filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
786      fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
787
788      yf0 = fptr[0];
789      yf1 = fptr[1];
790      yf2 = fptr[2];
791      yf3 = fptr[3];
792
793      xSrc = (X1 >> MLIB_SHIFT) - 1;
794      ySrc = (Y1 >> MLIB_SHIFT) - 1;
795
796      srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
797      s0 = srcPixelPtr[0];
798      s1 = srcPixelPtr[2];
799      s2 = srcPixelPtr[4];
800      s3 = srcPixelPtr[6];
801
802      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
803      s4 = srcPixelPtr[0];
804      s5 = srcPixelPtr[2];
805      s6 = srcPixelPtr[4];
806      s7 = srcPixelPtr[6];
807
808      for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
809
810        X1 += dX;
811        Y1 += dY;
812
813        c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
814        c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
815        srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
816        c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
817              srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
818        srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
819        c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
820              srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
821
822        filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
823        fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
824
825        xf0 = fptr[0] >> 1;
826        xf1 = fptr[1] >> 1;
827        xf2 = fptr[2] >> 1;
828        xf3 = fptr[3] >> 1;
829
830        val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
831
832        filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
833        fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
834
835        yf0 = fptr[0];
836        yf1 = fptr[1];
837        yf2 = fptr[2];
838        yf3 = fptr[3];
839
840        S32_TO_U16_SAT(dPtr[0]);
841
842        xSrc = (X1 >> MLIB_SHIFT) - 1;
843        ySrc = (Y1 >> MLIB_SHIFT) - 1;
844
845        srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
846        s0 = srcPixelPtr[0];
847        s1 = srcPixelPtr[2];
848        s2 = srcPixelPtr[4];
849        s3 = srcPixelPtr[6];
850
851        srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
852        s4 = srcPixelPtr[0];
853        s5 = srcPixelPtr[2];
854        s6 = srcPixelPtr[4];
855        s7 = srcPixelPtr[6];
856      }
857
858      c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
859      c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
860      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
861      c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
862            srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
863      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
864      c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
865            srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
866
867      val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
868      S32_TO_U16_SAT(dPtr[0]);
869    }
870  }
871
872  return MLIB_SUCCESS;
873}
874
875/***************************************************************/
876mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
877{
878  DECLAREVAR_BC();
879  DTYPE *dstLineEnd;
880  const mlib_s16 *mlib_filters_table;
881
882  if (filter == MLIB_BICUBIC) {
883    mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
884  }
885  else {
886    mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
887  }
888
889  for (j = yStart; j <= yFinish; j++) {
890    mlib_s32 xf0, xf1, xf2, xf3;
891    mlib_s32 yf0, yf1, yf2, yf3;
892    mlib_s32 c0, c1, c2, c3, val0;
893    mlib_s32 filterpos, k;
894    mlib_s16 *fptr;
895    mlib_s32 s0, s1, s2, s3;
896    mlib_s32 s4, s5, s6, s7;
897
898    CLIP(3);
899    dstLineEnd = (DTYPE *) dstData + 3 * xRight;
900
901    for (k = 0; k < 3; k++) {
902      mlib_s32 X1 = X;
903      mlib_s32 Y1 = Y;
904      DTYPE *dPtr = dstPixelPtr + k;
905
906      filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
907      fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
908
909      xf0 = fptr[0] >> 1;
910      xf1 = fptr[1] >> 1;
911      xf2 = fptr[2] >> 1;
912      xf3 = fptr[3] >> 1;
913
914      filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
915      fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
916
917      yf0 = fptr[0];
918      yf1 = fptr[1];
919      yf2 = fptr[2];
920      yf3 = fptr[3];
921
922      xSrc = (X1 >> MLIB_SHIFT) - 1;
923      ySrc = (Y1 >> MLIB_SHIFT) - 1;
924
925      srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
926      s0 = srcPixelPtr[0];
927      s1 = srcPixelPtr[3];
928      s2 = srcPixelPtr[6];
929      s3 = srcPixelPtr[9];
930
931      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
932      s4 = srcPixelPtr[0];
933      s5 = srcPixelPtr[3];
934      s6 = srcPixelPtr[6];
935      s7 = srcPixelPtr[9];
936
937      for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
938
939        X1 += dX;
940        Y1 += dY;
941
942        c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
943        c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
944        srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
945        c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
946              srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
947        srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
948        c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
949              srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
950
951        filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
952        fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
953
954        xf0 = fptr[0] >> 1;
955        xf1 = fptr[1] >> 1;
956        xf2 = fptr[2] >> 1;
957        xf3 = fptr[3] >> 1;
958
959        val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
960
961        filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
962        fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
963
964        yf0 = fptr[0];
965        yf1 = fptr[1];
966        yf2 = fptr[2];
967        yf3 = fptr[3];
968
969        S32_TO_U16_SAT(dPtr[0]);
970
971        xSrc = (X1 >> MLIB_SHIFT) - 1;
972        ySrc = (Y1 >> MLIB_SHIFT) - 1;
973
974        srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
975        s0 = srcPixelPtr[0];
976        s1 = srcPixelPtr[3];
977        s2 = srcPixelPtr[6];
978        s3 = srcPixelPtr[9];
979
980        srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
981        s4 = srcPixelPtr[0];
982        s5 = srcPixelPtr[3];
983        s6 = srcPixelPtr[6];
984        s7 = srcPixelPtr[9];
985      }
986
987      c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
988      c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
989      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
990      c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
991            srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
992      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
993      c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
994            srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
995
996      val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
997      S32_TO_U16_SAT(dPtr[0]);
998    }
999  }
1000
1001  return MLIB_SUCCESS;
1002}
1003
1004/***************************************************************/
1005mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
1006{
1007  DECLAREVAR_BC();
1008  DTYPE *dstLineEnd;
1009  const mlib_s16 *mlib_filters_table;
1010
1011  if (filter == MLIB_BICUBIC) {
1012    mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
1013  }
1014  else {
1015    mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
1016  }
1017
1018  for (j = yStart; j <= yFinish; j++) {
1019    mlib_s32 xf0, xf1, xf2, xf3;
1020    mlib_s32 yf0, yf1, yf2, yf3;
1021    mlib_s32 c0, c1, c2, c3, val0;
1022    mlib_s32 filterpos, k;
1023    mlib_s16 *fptr;
1024    mlib_s32 s0, s1, s2, s3;
1025    mlib_s32 s4, s5, s6, s7;
1026
1027    CLIP(4);
1028    dstLineEnd = (DTYPE *) dstData + 4 * xRight;
1029
1030    for (k = 0; k < 4; k++) {
1031      mlib_s32 X1 = X;
1032      mlib_s32 Y1 = Y;
1033      DTYPE *dPtr = dstPixelPtr + k;
1034
1035      filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
1036      fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1037
1038      xf0 = fptr[0] >> 1;
1039      xf1 = fptr[1] >> 1;
1040      xf2 = fptr[2] >> 1;
1041      xf3 = fptr[3] >> 1;
1042
1043      filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
1044      fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1045
1046      yf0 = fptr[0];
1047      yf1 = fptr[1];
1048      yf2 = fptr[2];
1049      yf3 = fptr[3];
1050
1051      xSrc = (X1 >> MLIB_SHIFT) - 1;
1052      ySrc = (Y1 >> MLIB_SHIFT) - 1;
1053
1054      srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
1055      s0 = srcPixelPtr[0];
1056      s1 = srcPixelPtr[4];
1057      s2 = srcPixelPtr[8];
1058      s3 = srcPixelPtr[12];
1059
1060      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1061      s4 = srcPixelPtr[0];
1062      s5 = srcPixelPtr[4];
1063      s6 = srcPixelPtr[8];
1064      s7 = srcPixelPtr[12];
1065
1066      for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
1067
1068        X1 += dX;
1069        Y1 += dY;
1070
1071        c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
1072        c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
1073        srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1074        c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1075              srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1076        srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1077        c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1078              srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1079
1080        filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
1081        fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1082
1083        xf0 = fptr[0] >> 1;
1084        xf1 = fptr[1] >> 1;
1085        xf2 = fptr[2] >> 1;
1086        xf3 = fptr[3] >> 1;
1087
1088        val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
1089
1090        filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
1091        fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1092
1093        yf0 = fptr[0];
1094        yf1 = fptr[1];
1095        yf2 = fptr[2];
1096        yf3 = fptr[3];
1097
1098        S32_TO_U16_SAT(dPtr[0]);
1099
1100        xSrc = (X1 >> MLIB_SHIFT) - 1;
1101        ySrc = (Y1 >> MLIB_SHIFT) - 1;
1102
1103        srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
1104        s0 = srcPixelPtr[0];
1105        s1 = srcPixelPtr[4];
1106        s2 = srcPixelPtr[8];
1107        s3 = srcPixelPtr[12];
1108
1109        srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1110        s4 = srcPixelPtr[0];
1111        s5 = srcPixelPtr[4];
1112        s6 = srcPixelPtr[8];
1113        s7 = srcPixelPtr[12];
1114      }
1115
1116      c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
1117      c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
1118      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1119      c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1120            srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1121      srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1122      c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1123            srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1124
1125      val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
1126      S32_TO_U16_SAT(dPtr[0]);
1127    }
1128  }
1129
1130  return MLIB_SUCCESS;
1131}
1132
1133#endif /* __sparc ( for SPARC, using floating-point multiplies is faster ) */
1134
1135/***************************************************************/
1136