1/*
2 * Copyright (c) 2004, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26#include <stdlib.h>
27#include "jni_util.h"
28#include "math.h"
29
30#include "GraphicsPrimitiveMgr.h"
31#include "Region.h"
32
33#include "sun_java2d_loops_TransformHelper.h"
34#include "java_awt_image_AffineTransformOp.h"
35
36/*
37 * The stub functions replace the bilinear and bicubic interpolation
38 * functions with NOP versions so that the performance of the helper
39 * functions that fetch the data can be more directly tested.  They
40 * are not compiled or enabled by default.  Change the following
41 * #undef to a #define to build the stub functions.
42 *
43 * When compiled, they are enabled by the environment variable TXSTUB.
44 * When compiled, there is also code to disable the VIS versions and
45 * use the C versions in this file in their place by defining the TXNOVIS
46 * environment variable.
47 */
48#undef MAKE_STUBS
49
50/* The number of IntArgbPre samples to store in the temporary buffer. */
51#define LINE_SIZE       2048
52
53/* The size of a stack allocated buffer to hold edge coordinates (see below). */
54#define MAXEDGES 1024
55
56/* Declare the software interpolation functions. */
57static TransformInterpFunc BilinearInterp;
58static TransformInterpFunc BicubicInterp;
59
60#ifdef MAKE_STUBS
61/* Optionally Declare the stub interpolation functions. */
62static TransformInterpFunc BilinearInterpStub;
63static TransformInterpFunc BicubicInterpStub;
64#endif /* MAKE_STUBS */
65
66/*
67 * Initially choose the software interpolation functions.
68 * These choices can be overridden by platform code that runs during the
69 * primitive registration phase of initialization by storing pointers to
70 * better functions in these pointers.
71 * Compiling the stubs also turns on code below that can re-install the
72 * software functions or stub functions on the first call to this primitive.
73 */
74TransformInterpFunc *pBilinearFunc = BilinearInterp;
75TransformInterpFunc *pBicubicFunc = BicubicInterp;
76
77/*
78 * The dxydxy parameters of the inverse transform determine how
79 * quickly we step through the source image.  For tiny scale
80 * factors (on the order of 1E-16 or so) the stepping distances
81 * are huge.  The image has been scaled so small that stepping
82 * a single pixel in device space moves the sampling point by
83 * billions (or more) pixels in the source image space.  These
84 * huge stepping values can overflow the whole part of the longs
85 * we use for the fixed point stepping equations and so we need
86 * a more robust solution.  We could simply iterate over every
87 * device pixel, use the inverse transform to transform it back
88 * into the source image coordinate system and then test it for
89 * being in range and sample pixel-by-pixel, but that is quite
90 * a bit more expensive.  Fortunately, if the scale factors are
91 * so tiny that we overflow our long values then the number of
92 * pixels we are planning to visit should be very tiny.  The only
93 * exception to that rule is if the scale factor along one
94 * dimension is tiny (creating the huge stepping values), and
95 * the scale factor along the other dimension is fairly regular
96 * or an up-scale.  In that case we have a lot of pixels along
97 * the direction of the larger axis to sample, but few along the
98 * smaller axis.  Though, pessimally, with an added shear factor
99 * such a linearly tiny image could have bounds that cover a large
100 * number of pixels.  Such odd transformations should be very
101 * rare and the absolute limit on calculations would involve a
102 * single reverse transform of every pixel in the output image
103 * which is not fast, but it should not cause an undue stall
104 * of the rendering software.
105 *
106 * The specific test we will use is to calculate the inverse
107 * transformed values of every corner of the destination bounds
108 * (in order to be user-clip independent) and if we can
109 * perform a fixed-point-long inverse transform of all of
110 * those points without overflowing we will use the fast
111 * fixed point algorithm.  Otherwise we will use the safe
112 * per-pixel transform algorithm.
113 * The 4 corners are 0,0, 0,dsth, dstw,0, dstw,dsth
114 * Transformed they are:
115 *     tx,               ty
116 *     tx       +dxdy*H, ty       +dydy*H
117 *     tx+dxdx*W,        ty+dydx*W
118 *     tx+dxdx*W+dxdy*H, ty+dydx*W+dydy*H
119 */
120/* We reject coordinates not less than 1<<30 so that the distance between */
121/* any 2 of them is less than 1<<31 which would overflow into the sign */
122/* bit of a signed long value used to represent fixed point coordinates. */
123#define TX_FIXED_UNSAFE(v)  (fabs(v) >= (1<<30))
124static jboolean
125checkOverflow(jint dxoff, jint dyoff,
126              SurfaceDataBounds *pBounds,
127              TransformInfo *pItxInfo,
128              jdouble *retx, jdouble *rety)
129{
130    jdouble x, y;
131
132    x = dxoff+pBounds->x1+0.5; /* Center of pixel x1 */
133    y = dyoff+pBounds->y1+0.5; /* Center of pixel y1 */
134    Transform_transform(pItxInfo, &x, &y);
135    *retx = x;
136    *rety = y;
137    if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
138        return JNI_TRUE;
139    }
140
141    x = dxoff+pBounds->x2-0.5; /* Center of pixel x2-1 */
142    y = dyoff+pBounds->y1+0.5; /* Center of pixel y1 */
143    Transform_transform(pItxInfo, &x, &y);
144    if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
145        return JNI_TRUE;
146    }
147
148    x = dxoff+pBounds->x1+0.5; /* Center of pixel x1 */
149    y = dyoff+pBounds->y2-0.5; /* Center of pixel y2-1 */
150    Transform_transform(pItxInfo, &x, &y);
151    if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
152        return JNI_TRUE;
153    }
154
155    x = dxoff+pBounds->x2-0.5; /* Center of pixel x2-1 */
156    y = dyoff+pBounds->y2-0.5; /* Center of pixel y2-1 */
157    Transform_transform(pItxInfo, &x, &y);
158    if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
159        return JNI_TRUE;
160    }
161
162    return JNI_FALSE;
163}
164
165/*
166 * Fill the edge buffer with pairs of coordinates representing the maximum
167 * left and right pixels of the destination surface that should be processed
168 * on each scanline, clipped to the bounds parameter.
169 * The number of scanlines to calculate is implied by the bounds parameter.
170 * Only pixels that map back through the specified (inverse) transform to a
171 * source coordinate that falls within the (0, 0, sw, sh) bounds of the
172 * source image should be processed.
173 * pEdges points to an array of jints that holds 2 + numedges*2 values where
174 * numedges should match (pBounds->y2 - pBounds->y1).
175 * The first two jints in pEdges should be set to y1 and y2 and every pair
176 * of jints after that represent the xmin,xmax of all pixels in range of
177 * the transformed blit for the corresponding scanline.
178 */
179static void
180calculateEdges(jint *pEdges,
181               SurfaceDataBounds *pBounds,
182               TransformInfo *pItxInfo,
183               jlong xbase, jlong ybase,
184               juint sw, juint sh)
185{
186    jlong dxdxlong, dydxlong;
187    jlong dxdylong, dydylong;
188    jlong drowxlong, drowylong;
189    jint dx1, dy1, dx2, dy2;
190
191    dxdxlong = DblToLong(pItxInfo->dxdx);
192    dydxlong = DblToLong(pItxInfo->dydx);
193    dxdylong = DblToLong(pItxInfo->dxdy);
194    dydylong = DblToLong(pItxInfo->dydy);
195
196    dx1 = pBounds->x1;
197    dy1 = pBounds->y1;
198    dx2 = pBounds->x2;
199    dy2 = pBounds->y2;
200    *pEdges++ = dy1;
201    *pEdges++ = dy2;
202
203    drowxlong = (dx2-dx1-1) * dxdxlong;
204    drowylong = (dx2-dx1-1) * dydxlong;
205
206    while (dy1 < dy2) {
207        jlong xlong, ylong;
208
209        dx1 = pBounds->x1;
210        dx2 = pBounds->x2;
211
212        xlong = xbase;
213        ylong = ybase;
214        while (dx1 < dx2 &&
215               (((juint) WholeOfLong(ylong)) >= sh ||
216                ((juint) WholeOfLong(xlong)) >= sw))
217        {
218            dx1++;
219            xlong += dxdxlong;
220            ylong += dydxlong;
221        }
222
223        xlong = xbase + drowxlong;
224        ylong = ybase + drowylong;
225        while (dx2 > dx1 &&
226               (((juint) WholeOfLong(ylong)) >= sh ||
227                ((juint) WholeOfLong(xlong)) >= sw))
228        {
229            dx2--;
230            xlong -= dxdxlong;
231            ylong -= dydxlong;
232        }
233
234        *pEdges++ = dx1;
235        *pEdges++ = dx2;
236
237        /* Increment to next scanline */
238        xbase += dxdylong;
239        ybase += dydylong;
240        dy1++;
241    }
242}
243
244static void
245Transform_SafeHelper(JNIEnv *env,
246                     SurfaceDataOps *srcOps,
247                     SurfaceDataOps *dstOps,
248                     SurfaceDataRasInfo *pSrcInfo,
249                     SurfaceDataRasInfo *pDstInfo,
250                     NativePrimitive *pMaskBlitPrim,
251                     CompositeInfo *pCompInfo,
252                     TransformHelperFunc *pHelperFunc,
253                     TransformInterpFunc *pInterpFunc,
254                     RegionData *pClipInfo, TransformInfo *pItxInfo,
255                     jint *pData, jint *pEdges,
256                     jint dxoff, jint dyoff, jint sw, jint sh);
257
258/*
259 * Class:     sun_java2d_loops_TransformHelper
260 * Method:    Transform
261 * Signature: (Lsun/java2d/loops/MaskBlit;Lsun/java2d/SurfaceData;Lsun/java2d/SurfaceData;Ljava/awt/Composite;Lsun/java2d/pipe/Region;Ljava/awt/geom/AffineTransform;IIIIIIIII[I)V
262 */
263JNIEXPORT void JNICALL
264Java_sun_java2d_loops_TransformHelper_Transform
265    (JNIEnv *env, jobject self,
266     jobject maskblit,
267     jobject srcData, jobject dstData,
268     jobject comp, jobject clip,
269     jobject itxform, jint txtype,
270     jint sx1, jint sy1, jint sx2, jint sy2,
271     jint dx1, jint dy1, jint dx2, jint dy2,
272     jintArray edgeArray, jint dxoff, jint dyoff)
273{
274    SurfaceDataOps *srcOps;
275    SurfaceDataOps *dstOps;
276    SurfaceDataRasInfo srcInfo;
277    SurfaceDataRasInfo dstInfo;
278    NativePrimitive *pHelperPrim;
279    NativePrimitive *pMaskBlitPrim;
280    CompositeInfo compInfo;
281    RegionData clipInfo;
282    TransformInfo itxInfo;
283    jint maxlinepix;
284    TransformHelperFunc *pHelperFunc;
285    TransformInterpFunc *pInterpFunc;
286    jdouble xorig, yorig;
287    jlong numedges;
288    jint *pEdges;
289    jint edgebuf[2 + MAXEDGES * 2];
290    union {
291        jlong align;
292        jint data[LINE_SIZE];
293    } rgb;
294
295#ifdef MAKE_STUBS
296    static int th_initialized;
297
298    /* For debugging only - used to swap in alternate funcs for perf testing */
299    if (!th_initialized) {
300        if (getenv("TXSTUB") != 0) {
301            pBilinearFunc = BilinearInterpStub;
302            pBicubicFunc = BicubicInterpStub;
303        } else if (getenv("TXNOVIS") != 0) {
304            pBilinearFunc = BilinearInterp;
305            pBicubicFunc = BicubicInterp;
306        }
307        th_initialized = 1;
308    }
309#endif /* MAKE_STUBS */
310
311    pHelperPrim = GetNativePrim(env, self);
312    if (pHelperPrim == NULL) {
313        /* Should never happen... */
314        return;
315    }
316    pMaskBlitPrim = GetNativePrim(env, maskblit);
317    if (pMaskBlitPrim == NULL) {
318        /* Exception was thrown by GetNativePrim */
319        return;
320    }
321    if (pMaskBlitPrim->pCompType->getCompInfo != NULL) {
322        (*pMaskBlitPrim->pCompType->getCompInfo)(env, &compInfo, comp);
323    }
324    if (Region_GetInfo(env, clip, &clipInfo)) {
325        return;
326    }
327
328    srcOps = SurfaceData_GetOps(env, srcData);
329    if (srcOps == 0) {
330        return;
331    }
332    dstOps = SurfaceData_GetOps(env, dstData);
333    if (dstOps == 0) {
334        return;
335    }
336
337    /*
338     * Grab the appropriate pointer to the helper and interpolation
339     * routines and calculate the maximum number of destination pixels
340     * that can be processed in one intermediate buffer based on the
341     * size of the buffer and the number of samples needed per pixel.
342     */
343    switch (txtype) {
344    case java_awt_image_AffineTransformOp_TYPE_NEAREST_NEIGHBOR:
345        pHelperFunc = pHelperPrim->funcs.transformhelpers->nnHelper;
346        pInterpFunc = NULL;
347        maxlinepix = LINE_SIZE;
348        break;
349    case java_awt_image_AffineTransformOp_TYPE_BILINEAR:
350        pHelperFunc = pHelperPrim->funcs.transformhelpers->blHelper;
351        pInterpFunc = pBilinearFunc;
352        maxlinepix = LINE_SIZE / 4;
353        break;
354    case java_awt_image_AffineTransformOp_TYPE_BICUBIC:
355        pHelperFunc = pHelperPrim->funcs.transformhelpers->bcHelper;
356        pInterpFunc = pBicubicFunc;
357        maxlinepix = LINE_SIZE / 16;
358        break;
359    default:
360        // Should not happen, but just in case.
361        return;
362    }
363
364    srcInfo.bounds.x1 = sx1;
365    srcInfo.bounds.y1 = sy1;
366    srcInfo.bounds.x2 = sx2;
367    srcInfo.bounds.y2 = sy2;
368    dstInfo.bounds.x1 = dx1;
369    dstInfo.bounds.y1 = dy1;
370    dstInfo.bounds.x2 = dx2;
371    dstInfo.bounds.y2 = dy2;
372    SurfaceData_IntersectBounds(&dstInfo.bounds, &clipInfo.bounds);
373    if (srcOps->Lock(env, srcOps, &srcInfo, pHelperPrim->srcflags)
374        != SD_SUCCESS)
375    {
376        /* edgeArray should already contain zeros for min/maxy */
377        return;
378    }
379    if (dstOps->Lock(env, dstOps, &dstInfo, pMaskBlitPrim->dstflags)
380        != SD_SUCCESS)
381    {
382        SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
383        /* edgeArray should already contain zeros for min/maxy */
384        return;
385    }
386    Region_IntersectBounds(&clipInfo, &dstInfo.bounds);
387    Transform_GetInfo(env, itxform, &itxInfo);
388
389    numedges = (((jlong) dstInfo.bounds.y2) - ((jlong) dstInfo.bounds.y1));
390    if (numedges <= 0) {
391        pEdges = NULL;
392    } else if (!JNU_IsNull(env, edgeArray)) {
393        /*
394         * Ideally Java should allocate an array large enough, but if
395         * we ever have a miscommunication about the number of edge
396         * lines, or if the Java array calculation should overflow to
397         * a positive number and succeed in allocating an array that
398         * is too small, we need to verify that it can still hold the
399         * number of integers that we plan to store to be safe.
400         */
401        jsize edgesize = (*env)->GetArrayLength(env, edgeArray);
402        /* (edgesize/2 - 1) should avoid any overflow or underflow. */
403        pEdges = (((edgesize / 2) - 1) >= numedges)
404            ? (*env)->GetPrimitiveArrayCritical(env, edgeArray, NULL)
405            : NULL;
406    } else if (numedges > MAXEDGES) {
407        /* numedges variable (jlong) can be at most ((1<<32)-1) */
408        /* memsize can overflow a jint, but not a jlong */
409        jlong memsize = ((numedges * 2) + 2) * sizeof(*pEdges);
410        pEdges = (memsize == ((size_t) memsize))
411            ? malloc((size_t) memsize)
412            : NULL;
413    } else {
414        pEdges = edgebuf;
415    }
416
417    if (pEdges == NULL) {
418        if (!(*env)->ExceptionCheck(env) && numedges > 0) {
419            JNU_ThrowInternalError(env, "Unable to allocate edge list");
420        }
421        SurfaceData_InvokeUnlock(env, dstOps, &dstInfo);
422        SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
423        /* edgeArray should already contain zeros for min/maxy */
424        return;
425    }
426
427
428    if (!Region_IsEmpty(&clipInfo)) {
429        srcOps->GetRasInfo(env, srcOps, &srcInfo);
430        dstOps->GetRasInfo(env, dstOps, &dstInfo);
431        if (srcInfo.rasBase == NULL || dstInfo.rasBase == NULL) {
432            pEdges[0] = pEdges[1] = 0;
433        } else if (checkOverflow(dxoff, dyoff, &dstInfo.bounds,
434                                 &itxInfo, &xorig, &yorig))
435        {
436            Transform_SafeHelper(env, srcOps, dstOps,
437                                 &srcInfo, &dstInfo,
438                                 pMaskBlitPrim, &compInfo,
439                                 pHelperFunc, pInterpFunc,
440                                 &clipInfo, &itxInfo, rgb.data, pEdges,
441                                 dxoff, dyoff, sx2-sx1, sy2-sy1);
442        } else {
443            SurfaceDataBounds span;
444            jlong dxdxlong, dydxlong;
445            jlong dxdylong, dydylong;
446            jlong xbase, ybase;
447
448            dxdxlong = DblToLong(itxInfo.dxdx);
449            dydxlong = DblToLong(itxInfo.dydx);
450            dxdylong = DblToLong(itxInfo.dxdy);
451            dydylong = DblToLong(itxInfo.dydy);
452            xbase = DblToLong(xorig);
453            ybase = DblToLong(yorig);
454
455            calculateEdges(pEdges, &dstInfo.bounds, &itxInfo,
456                           xbase, ybase, sx2-sx1, sy2-sy1);
457
458            Region_StartIteration(env, &clipInfo);
459            while (Region_NextIteration(&clipInfo, &span)) {
460                jlong rowxlong, rowylong;
461                void *pDst;
462
463                dy1 = span.y1;
464                dy2 = span.y2;
465                rowxlong = xbase + (dy1 - dstInfo.bounds.y1) * dxdylong;
466                rowylong = ybase + (dy1 - dstInfo.bounds.y1) * dydylong;
467
468                while (dy1 < dy2) {
469                    jlong xlong, ylong;
470
471                    /* Note - process at most one scanline at a time. */
472
473                    dx1 = pEdges[(dy1 - dstInfo.bounds.y1) * 2 + 2];
474                    dx2 = pEdges[(dy1 - dstInfo.bounds.y1) * 2 + 3];
475                    if (dx1 < span.x1) dx1 = span.x1;
476                    if (dx2 > span.x2) dx2 = span.x2;
477
478                    /* All pixels from dx1 to dx2 have centers in bounds */
479                    while (dx1 < dx2) {
480                        /* Can process at most one buffer full at a time */
481                        jint numpix = dx2 - dx1;
482                        if (numpix > maxlinepix) {
483                            numpix = maxlinepix;
484                        }
485
486                        xlong =
487                            rowxlong + ((dx1 - dstInfo.bounds.x1) * dxdxlong);
488                        ylong =
489                            rowylong + ((dx1 - dstInfo.bounds.x1) * dydxlong);
490
491                        /* Get IntArgbPre pixel data from source */
492                        (*pHelperFunc)(&srcInfo,
493                                       rgb.data, numpix,
494                                       xlong, dxdxlong,
495                                       ylong, dydxlong);
496
497                        /* Interpolate result pixels if needed */
498                        if (pInterpFunc) {
499                            (*pInterpFunc)(rgb.data, numpix,
500                                           FractOfLong(xlong-LongOneHalf),
501                                           FractOfLong(dxdxlong),
502                                           FractOfLong(ylong-LongOneHalf),
503                                           FractOfLong(dydxlong));
504                        }
505
506                        /* Store/Composite interpolated pixels into dest */
507                        pDst = PtrCoord(dstInfo.rasBase,
508                                        dx1, dstInfo.pixelStride,
509                                        dy1, dstInfo.scanStride);
510                        (*pMaskBlitPrim->funcs.maskblit)(pDst, rgb.data,
511                                                         0, 0, 0,
512                                                         numpix, 1,
513                                                         &dstInfo, &srcInfo,
514                                                         pMaskBlitPrim,
515                                                         &compInfo);
516
517                        /* Increment to next buffer worth of input pixels */
518                        dx1 += maxlinepix;
519                    }
520
521                    /* Increment to next scanline */
522                    rowxlong += dxdylong;
523                    rowylong += dydylong;
524                    dy1++;
525                }
526            }
527            Region_EndIteration(env, &clipInfo);
528        }
529        SurfaceData_InvokeRelease(env, dstOps, &dstInfo);
530        SurfaceData_InvokeRelease(env, srcOps, &srcInfo);
531    } else {
532        pEdges[0] = pEdges[1] = 0;
533    }
534
535    if (!JNU_IsNull(env, edgeArray)) {
536        (*env)->ReleasePrimitiveArrayCritical(env, edgeArray, pEdges, 0);
537    } else if (pEdges != edgebuf) {
538        free(pEdges);
539    }
540    SurfaceData_InvokeUnlock(env, dstOps, &dstInfo);
541    SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
542}
543
544static void
545Transform_SafeHelper(JNIEnv *env,
546                     SurfaceDataOps *srcOps,
547                     SurfaceDataOps *dstOps,
548                     SurfaceDataRasInfo *pSrcInfo,
549                     SurfaceDataRasInfo *pDstInfo,
550                     NativePrimitive *pMaskBlitPrim,
551                     CompositeInfo *pCompInfo,
552                     TransformHelperFunc *pHelperFunc,
553                     TransformInterpFunc *pInterpFunc,
554                     RegionData *pClipInfo, TransformInfo *pItxInfo,
555                     jint *pData, jint *pEdges,
556                     jint dxoff, jint dyoff, jint sw, jint sh)
557{
558    SurfaceDataBounds span;
559    jint dx1, dx2;
560    jint dy1, dy2;
561    jint i, iy;
562
563    dy1 = pDstInfo->bounds.y1;
564    dy2 = pDstInfo->bounds.y2;
565    dx1 = pDstInfo->bounds.x1;
566    dx2 = pDstInfo->bounds.x2;
567    pEdges[0] = dy1;
568    pEdges[1] = dy2;
569    for (iy = dy1; iy < dy2; iy++) {
570        jint i = (iy - dy1) * 2;
571        /* row spans are set to max,min until we find a pixel in range below */
572        pEdges[i + 2] = dx2;
573        pEdges[i + 3] = dx1;
574    }
575
576    Region_StartIteration(env, pClipInfo);
577    while (Region_NextIteration(pClipInfo, &span)) {
578        dy1 = span.y1;
579        dy2 = span.y2;
580        while (dy1 < dy2) {
581            dx1 = span.x1;
582            dx2 = span.x2;
583            i = (dy1 - pDstInfo->bounds.y1) * 2;
584            while (dx1 < dx2) {
585                jdouble x, y;
586                jlong xlong, ylong;
587
588                x = dxoff + dx1 + 0.5;
589                y = dyoff + dy1 + 0.5;
590                Transform_transform(pItxInfo, &x, &y);
591                xlong = DblToLong(x);
592                ylong = DblToLong(y);
593
594                /* Process only pixels with centers in bounds
595                 * Test double values to avoid overflow in conversion
596                 * to long values and then also test the long values
597                 * in case they rounded up and out of bounds during
598                 * the conversion.
599                 */
600                if (x >= 0 && y >= 0 && x < sw && y < sh &&
601                    WholeOfLong(xlong) < sw &&
602                    WholeOfLong(ylong) < sh)
603                {
604                    void *pDst;
605
606                    if (pEdges[i + 2] > dx1) {
607                        pEdges[i + 2] = dx1;
608                    }
609                    if (pEdges[i + 3] <= dx1) {
610                        pEdges[i + 3] = dx1 + 1;
611                    }
612
613                    /* Get IntArgbPre pixel data from source */
614                    (*pHelperFunc)(pSrcInfo,
615                                   pData, 1,
616                                   xlong, 0,
617                                   ylong, 0);
618
619                    /* Interpolate result pixels if needed */
620                    if (pInterpFunc) {
621                        (*pInterpFunc)(pData, 1,
622                                       FractOfLong(xlong-LongOneHalf), 0,
623                                       FractOfLong(ylong-LongOneHalf), 0);
624                    }
625
626                    /* Store/Composite interpolated pixels into dest */
627                    pDst = PtrCoord(pDstInfo->rasBase,
628                                    dx1, pDstInfo->pixelStride,
629                                    dy1, pDstInfo->scanStride);
630                    (*pMaskBlitPrim->funcs.maskblit)(pDst, pData,
631                                                     0, 0, 0,
632                                                     1, 1,
633                                                     pDstInfo, pSrcInfo,
634                                                     pMaskBlitPrim,
635                                                     pCompInfo);
636                }
637
638                /* Increment to next input pixel */
639                dx1++;
640            }
641
642            /* Increment to next scanline */
643            dy1++;
644        }
645    }
646    Region_EndIteration(env, pClipInfo);
647}
648
649#define BL_INTERP_V1_to_V2_by_F(v1, v2, f) \
650    (((v1)<<8) + ((v2)-(v1))*(f))
651
652#define BL_ACCUM(comp) \
653    do { \
654        jint c1 = ((jubyte *) pRGB)[comp]; \
655        jint c2 = ((jubyte *) pRGB)[comp+4]; \
656        jint cR = BL_INTERP_V1_to_V2_by_F(c1, c2, xfactor); \
657        c1 = ((jubyte *) pRGB)[comp+8]; \
658        c2 = ((jubyte *) pRGB)[comp+12]; \
659        c2 = BL_INTERP_V1_to_V2_by_F(c1, c2, xfactor); \
660        cR = BL_INTERP_V1_to_V2_by_F(cR, c2, yfactor); \
661        ((jubyte *)pRes)[comp] = (jubyte) ((cR + (1<<15)) >> 16); \
662    } while (0)
663
664static void
665BilinearInterp(jint *pRGB, jint numpix,
666               jint xfract, jint dxfract,
667               jint yfract, jint dyfract)
668{
669    jint j;
670    jint *pRes = pRGB;
671
672    for (j = 0; j < numpix; j++) {
673        jint xfactor;
674        jint yfactor;
675        xfactor = URShift(xfract, 32-8);
676        yfactor = URShift(yfract, 32-8);
677        BL_ACCUM(0);
678        BL_ACCUM(1);
679        BL_ACCUM(2);
680        BL_ACCUM(3);
681        pRes++;
682        pRGB += 4;
683        xfract += dxfract;
684        yfract += dyfract;
685    }
686}
687
688#define SAT(val, max) \
689    do { \
690        val &= ~(val >> 31);  /* negatives become 0 */ \
691        val -= max;           /* only overflows are now positive */ \
692        val &= (val >> 31);   /* positives become 0 */ \
693        val += max;           /* range is now [0 -> max] */ \
694    } while (0)
695
696#ifdef __sparc
697/* For sparc, floating point multiplies are faster than integer */
698#define BICUBIC_USE_DBL_LUT
699#else
700/* For x86, integer multiplies are faster than floating point */
701/* Note that on x86 Linux the choice of best algorithm varies
702 * depending on the compiler optimization and the processor type.
703 * Currently, the sun/awt x86 Linux builds are not optimized so
704 * all the variations produce mediocre performance.
705 * For now we will use the choice that works best for the Windows
706 * build until the (lack of) optimization issues on Linux are resolved.
707 */
708#define BICUBIC_USE_INT_MATH
709#endif
710
711#ifdef BICUBIC_USE_DBL_CAST
712
713#define BC_DblToCoeff(v)        (v)
714#define BC_COEFF_ONE            1.0
715#define BC_TYPE                 jdouble
716#define BC_V_HALF               0.5
717#define BC_CompToV(v)           ((jdouble) (v))
718#define BC_STORE_COMPS(pRes) \
719    do { \
720        jint a = (jint) accumA; \
721        jint r = (jint) accumR; \
722        jint g = (jint) accumG; \
723        jint b = (jint) accumB; \
724        SAT(a, 255); \
725        SAT(r, a); \
726        SAT(g, a); \
727        SAT(b, a); \
728        *pRes = ((a << 24) | (r << 16) | (g <<  8) | (b)); \
729    } while (0)
730
731#endif /* BICUBIC_USE_DBL_CAST */
732
733#ifdef BICUBIC_USE_DBL_LUT
734
735#define ItoD1(v)    ((jdouble) (v))
736#define ItoD4(v)    ItoD1(v),  ItoD1(v+1),   ItoD1(v+2),   ItoD1(v+3)
737#define ItoD16(v)   ItoD4(v),  ItoD4(v+4),   ItoD4(v+8),   ItoD4(v+12)
738#define ItoD64(v)   ItoD16(v), ItoD16(v+16), ItoD16(v+32), ItoD16(v+48)
739
740static jdouble ItoD_table[] = {
741    ItoD64(0), ItoD64(64), ItoD64(128), ItoD64(192)
742};
743
744#define BC_DblToCoeff(v)        (v)
745#define BC_COEFF_ONE            1.0
746#define BC_TYPE                 jdouble
747#define BC_V_HALF               0.5
748#define BC_CompToV(v)           ItoD_table[v]
749#define BC_STORE_COMPS(pRes) \
750    do { \
751        jint a = (jint) accumA; \
752        jint r = (jint) accumR; \
753        jint g = (jint) accumG; \
754        jint b = (jint) accumB; \
755        SAT(a, 255); \
756        SAT(r, a); \
757        SAT(g, a); \
758        SAT(b, a); \
759        *pRes = ((a << 24) | (r << 16) | (g <<  8) | (b)); \
760    } while (0)
761
762#endif /* BICUBIC_USE_DBL_LUT */
763
764#ifdef BICUBIC_USE_INT_MATH
765
766#define BC_DblToCoeff(v)        ((jint) ((v) * 256))
767#define BC_COEFF_ONE            256
768#define BC_TYPE                 jint
769#define BC_V_HALF               (1 << 15)
770#define BC_CompToV(v)           ((jint) v)
771#define BC_STORE_COMPS(pRes) \
772    do { \
773        accumA >>= 16; \
774        accumR >>= 16; \
775        accumG >>= 16; \
776        accumB >>= 16; \
777        SAT(accumA, 255); \
778        SAT(accumR, accumA); \
779        SAT(accumG, accumA); \
780        SAT(accumB, accumA); \
781        *pRes = ((accumA << 24) | (accumR << 16) | (accumG << 8) | (accumB)); \
782    } while (0)
783
784#endif /* BICUBIC_USE_INT_MATH */
785
786#define BC_ACCUM(index, ycindex, xcindex) \
787    do { \
788        BC_TYPE factor = bicubic_coeff[xcindex] * bicubic_coeff[ycindex]; \
789        int rgb; \
790        rgb = pRGB[index]; \
791        accumB += BC_CompToV((rgb >>  0) & 0xff) * factor; \
792        accumG += BC_CompToV((rgb >>  8) & 0xff) * factor; \
793        accumR += BC_CompToV((rgb >> 16) & 0xff) * factor; \
794        accumA += BC_CompToV((rgb >> 24) & 0xff) * factor; \
795    } while (0)
796
797static BC_TYPE bicubic_coeff[513];
798static jboolean bicubictableinited;
799
800static void
801init_bicubic_table(jdouble A)
802{
803    /*
804     * The following formulas are designed to give smooth
805     * results when 'A' is -0.5 or -1.0.
806     */
807    int i;
808    for (i = 0; i < 256; i++) {
809        /* r(x) = (A + 2)|x|^3 - (A + 3)|x|^2 + 1 , 0 <= |x| < 1 */
810        jdouble x = i / 256.0;
811        x = ((A+2)*x - (A+3))*x*x + 1;
812        bicubic_coeff[i] = BC_DblToCoeff(x);
813    }
814
815    for (; i < 384; i++) {
816        /* r(x) = A|x|^3 - 5A|x|^2 + 8A|x| - 4A , 1 <= |x| < 2 */
817        jdouble x = i / 256.0;
818        x = ((A*x - 5*A)*x + 8*A)*x - 4*A;
819        bicubic_coeff[i] = BC_DblToCoeff(x);
820    }
821
822    bicubic_coeff[384] = (BC_COEFF_ONE - bicubic_coeff[128]*2) / 2;
823
824    for (i++; i <= 512; i++) {
825        bicubic_coeff[i] = BC_COEFF_ONE - (bicubic_coeff[512-i] +
826                                           bicubic_coeff[i-256] +
827                                           bicubic_coeff[768-i]);
828    }
829
830    bicubictableinited = JNI_TRUE;
831}
832
833static void
834BicubicInterp(jint *pRGB, jint numpix,
835              jint xfract, jint dxfract,
836              jint yfract, jint dyfract)
837{
838    jint i;
839    jint *pRes = pRGB;
840
841    if (!bicubictableinited) {
842        init_bicubic_table(-0.5);
843    }
844
845    for (i = 0; i < numpix; i++) {
846        BC_TYPE accumA, accumR, accumG, accumB;
847        jint xfactor, yfactor;
848
849        xfactor = URShift(xfract, 32-8);
850        yfactor = URShift(yfract, 32-8);
851        accumA = accumR = accumG = accumB = BC_V_HALF;
852        BC_ACCUM(0, yfactor+256, xfactor+256);
853        BC_ACCUM(1, yfactor+256, xfactor+  0);
854        BC_ACCUM(2, yfactor+256, 256-xfactor);
855        BC_ACCUM(3, yfactor+256, 512-xfactor);
856        BC_ACCUM(4, yfactor+  0, xfactor+256);
857        BC_ACCUM(5, yfactor+  0, xfactor+  0);
858        BC_ACCUM(6, yfactor+  0, 256-xfactor);
859        BC_ACCUM(7, yfactor+  0, 512-xfactor);
860        BC_ACCUM(8, 256-yfactor, xfactor+256);
861        BC_ACCUM(9, 256-yfactor, xfactor+  0);
862        BC_ACCUM(10, 256-yfactor, 256-xfactor);
863        BC_ACCUM(11, 256-yfactor, 512-xfactor);
864        BC_ACCUM(12, 512-yfactor, xfactor+256);
865        BC_ACCUM(13, 512-yfactor, xfactor+  0);
866        BC_ACCUM(14, 512-yfactor, 256-xfactor);
867        BC_ACCUM(15, 512-yfactor, 512-xfactor);
868        BC_STORE_COMPS(pRes);
869        pRes++;
870        pRGB += 16;
871        xfract += dxfract;
872        yfract += dyfract;
873    }
874}
875
876#ifdef MAKE_STUBS
877
878static void
879BilinearInterpStub(jint *pRGBbase, jint numpix,
880                   jint xfract, jint dxfract,
881                   jint yfract, jint dyfract)
882{
883    jint *pRGB = pRGBbase;
884    while (--numpix >= 0) {
885        *pRGBbase = *pRGB;
886        pRGBbase += 1;
887        pRGB += 4;
888    }
889}
890
891static void
892BicubicInterpStub(jint *pRGBbase, jint numpix,
893                  jint xfract, jint dxfract,
894                  jint yfract, jint dyfract)
895{
896    jint *pRGB = pRGBbase+5;
897    while (--numpix >= 0) {
898        *pRGBbase = *pRGB;
899        pRGBbase += 1;
900        pRGB += 16;
901    }
902}
903
904#endif /* MAKE_STUBS */
905