1/*
2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26#include <math.h>
27
28#include "jni_util.h"
29#include "GraphicsPrimitiveMgr.h"
30#include "Region.h"
31
32#include "sun_java2d_loops_ScaledBlit.h"
33
34/*
35 * The scaling loops used inside the helper functions are based on the
36 * following pseudocode for stepping through the source image:
37 *
38 * shift - number of bits of sub-pixel precision in scaled values
39 * srcxorig, srcyorig - scaled location of first pixel
40 * srcxinc, srcyinc - scaled x and y increments
41 * dstwidth, dstheight - number of pixels to process across and down
42 *
43 * 1. srcy = srcyorig;
44 * 2. for (dstheight) {
45 * 3.     srcx = srcxorig;
46 * 4.     for (dstwidth) {
47 * 5.         fetch and process pixel for (srcx >> shift, srcy >> shift)
48 * 6.         srcx += srcxinc;
49 * 7.     }
50 * 8.     srcy += srcyinc;
51 * 9. }
52 *
53 * Note that each execution of line 6 or 8 accumulates error of
54 * +/- 1 into the scaled coordinate variables.  These lines are
55 * each executed once per pixel across or once per pixel down
56 * the region being iterated over, thus the error can accumulate
57 * up to a magnitude of dstwidth in the horizontal direction and
58 * dstheight in the vertical direction.
59 *
60 * If the error ever reaches a magnitude of (1 << shift) then we
61 * will be off by at least 1 source pixel in our mapping.
62 *
63 * Note that we increment the source coordinates by the srcxinc
64 * and srcyinc variables in each step.  Thus, if our error ever
65 * accumulates to a magnitude equal to srcxinc or srcyinc then
66 * we will be ahead or behind of "where we should be" by at least
67 * one iteration.  Since each iteration is a destination pixel,
68 * this means that our actual location will be off by at least
69 * one destination pixel.
70 *
71 * This means that all of the values:
72 *
73 *     - (1 << shift)
74 *     - srcxinc
75 *     - srcyinc
76 *
77 * all represent a maximum bound on how much error we can accumulate
78 * before we are off by a source or a destination pixel.  Thus,
79 * we should make sure that we never process more than that many
80 * pixels if we want to maintain single pixel accuracy.  Even
81 * better would be to process many fewer pixels than those bounds
82 * to ensure that our accumulated error is much smaller than a
83 * pixel.
84 */
85
86/*
87 * Find and return the largest tile size that is a power of 2 and
88 * which is small enough to yield some reassuring degree of subpixel
89 * accuracy.  The degree of subpixel accuracy that will be preserved
90 * by the tile size it chooses will vary and the details on how
91 * it makes this decision are detailed in the comments below.
92 */
93static jint
94findpow2tilesize(jint shift, jint sxinc, jint syinc)
95{
96    /*
97     * The initial value of shift is our first estimate for
98     * the power of 2 for our tilesize since it ensures
99     * less than 1 source pixel of error.
100     *
101     * Reducing it until (1 << shift) is not larger than the
102     * smallest of our increments ensures we will have no more
103     * than 1 destination pixel of error as well.
104     */
105    if (sxinc > syinc) {
106        sxinc = syinc;
107    }
108    if (sxinc == 0) {
109        /* Degenerate case will cause infinite loop in next loop... */
110        return 1;
111    }
112    while ((1 << shift) > sxinc) {
113        shift--;
114    }
115    /*
116     * shift is now the largest it can be for less than 1 pixel
117     * of error in either source or destination spaces.
118     *
119     * Now we will try for at least 8 bits of subpixel accuracy
120     * with a tile size of at least 256x256 and reduce our subpixel
121     * accuracy on a sliding scale down to a tilesize of 1x1 when
122     * we have no bits of sub-pixel accuracy.
123     */
124    if (shift >= 16) {
125        /* Subtracting 8 asks for 8 bits of subpixel accuracy. */
126        shift -= 8;
127    } else {
128        /* Ask for half of the remaining bits to be subpixel accuracy. */
129        /* Rounding is in favor of subpixel accuracy over tile size. */
130        /* Worst case, shift == 0 and tilesize == (1 << 0) == 1 */
131        shift /= 2;
132    }
133    return (1 << shift);
134}
135
136/*
137 * For a given integer destination pixel coordinate "id", calculate the
138 * integer destination coordinate of the start of the "ts" sized tile
139 * in which it resides.
140 * Tiles all start at even multiples of the tile size from the integer
141 * destination origin "io".
142 *
143 * id == integer destination coordinate
144 * io == integer destination operation origin
145 * ts == tilesize (must be power of 2)
146 */
147#define TILESTART(id, io, ts)   ((io) + (((id)-(io)) & (~((ts)-1))))
148
149/*
150 * For a given integer destination pixel coordinate "id", calculate the
151 * sub-pixel accurate source coordinate from which its sample comes.
152 * The returned source coordinate is expressed in a shifted fractional
153 * arithmetic number system.
154 *
155 * id == integer destination coordinate
156 * fo == floating point destination operation origin,
157 * sf == source coordinate scale factor per destination pixel
158 *       (multiplied by fractional arithmetic "shift")
159 *
160 * The caller is required to cast this value to the appropriate
161 * integer type for the needed precision.  The rendering code which
162 * deals only with valid coordinates within the bounds of the source
163 * rectangle uses jint.  The setup code, which occasionally deals
164 * with coordinates that run out of bounds, uses jlong.
165 *
166 * Note that the rounding in this calculation is at a fraction of a
167 * source pixel of (1.0 / (1<<shift)) since the scale factor includes
168 * the fractional shift.  As a result, the type of rounding used is
169 * not very significant (floor, floor(x+.5), or ceil(x-.5)), but the
170 * ceil(x-.5) version is used for consistency with the way that pixel
171 * coordinates are rounded to assign the ".5" value to the lower
172 * integer.
173 */
174#define SRCLOC(id, fo, sf)   (ceil((((id) + 0.5) - (fo)) * (sf) - 0.5))
175
176/*
177 * Reverse map a srctarget coordinate into device space and refine the
178 * answer.  More specifically, what we are looking for is the smallest
179 * destination coordinate that maps to a source coordinate that is
180 * greater than or equal to the given target source coordinate.
181 *
182 * Note that since the inner loops use math that maps a destination
183 * coordinate into source space and that, even though the equation
184 * we use below is the theoretical inverse of the dst->src mapping,
185 * we cannot rely on floating point math to guarantee that applying
186 * both of these equations in sequence will give us an exact mapping
187 * of src->dst->src.  Thus, we must search back and forth to see if
188 * we really map back to the given source coordinate and that we are
189 * the smallest destination coordinate that does so.
190 *
191 * Note that, in practice, the answer from the initial guess tends to
192 * be the right answer most of the time and the loop ends up finding
193 * one iteration to be ">= srctarget" and the next to be "< srctarget"
194 * and thus finds the answer in 2 iterations.  A small number of
195 * times, the initial guess is 1 too low and so we do one iteration
196 * at "< srctarget" and the next at ">= srctarget" and again find the
197 * answer in 2 iterations.  All cases encountered during testing ended
198 * up falling into one of those 2 categories and so the loop was always
199 * executed exactly twice.
200 *
201 * Note also that the calculation of srcloc below may attempt to calculate
202 * the src location of the destination pixel which is "1 beyond" the
203 * end of the source image.  Since our shift calculation code in the
204 * main function only guaranteed that "srcw << shift" did not overflow
205 * a 32-bit signed integer, we cannot guarantee that "(srcw+1) << shift"
206 * or, more generally, "(srcw << shift)+srcinc" does not overflow.
207 * As a result, we perform our calculations here with jlong values
208 * so that we aren't affected by this overflow.  Since srcw (shifted)
209 * and srcinc are both 32-bit values, their sum cannot possibly overflow
210 * a jlong.  In fact, we can step up to a couple of billion steps of
211 * size "srcinc" past the end of the image before we have to worry
212 * about overflow - in practice, though, the search never steps more
213 * than 1 past the end of the image so this buffer is more than enough.
214 */
215static jint
216refine(jint intorigin, jdouble dblorigin, jint tilesize,
217       jdouble scale, jint srctarget, jint srcinc)
218{
219    /* Make a first estimate of dest coordinate from srctarget */
220    jint dstloc = (jint) ceil(dblorigin + srctarget / scale - 0.5);
221    /* Loop until we get at least one value < and one >= the target */
222    jboolean wasneg = JNI_FALSE;
223    jboolean waspos = JNI_FALSE;
224    jlong lsrcinc = srcinc;
225    jlong lsrctarget = srctarget;
226
227    while (JNI_TRUE) {
228        /*
229         * Find src coordinate from dest coordinate using the same
230         * math we will use below when iterating over tiles.
231         */
232        jint tilestart = TILESTART(dstloc, intorigin, tilesize);
233        jlong lsrcloc = (jlong) SRCLOC(tilestart, dblorigin, scale);
234        if (dstloc > tilestart) {
235            lsrcloc += lsrcinc * ((jlong) dstloc - tilestart);
236        }
237        if (lsrcloc >= lsrctarget) {
238            /*
239             * If we were previously less than target, then the current
240             * dstloc is the smallest dst which maps >= the target.
241             */
242            if (wasneg) break;
243            dstloc--;
244            waspos = JNI_TRUE;
245        } else {
246            /*
247             * If we were previously greater than target, then this must
248             * be the first dstloc which maps to < the target.  Since we
249             * want the smallest which maps >= the target, increment it
250             * first before returning.
251             */
252            dstloc++;
253            if (waspos) break;
254            wasneg = JNI_TRUE;
255        }
256    }
257    return dstloc;
258}
259
260/*
261 * Class:     sun_java2d_loops_ScaledBlit
262 * Method:    Scale
263 * Signature: (Lsun/java2d/SurfaceData;Lsun/java2d/SurfaceData;Ljava/awt/Composite;Lsun/java2d/pipe/Region;IIIIDDDD)V
264 */
265JNIEXPORT void JNICALL
266Java_sun_java2d_loops_ScaledBlit_Scale
267    (JNIEnv *env, jobject self,
268     jobject srcData, jobject dstData,
269     jobject comp, jobject clip,
270     jint sx1, jint sy1, jint sx2, jint sy2,
271     jdouble ddx1, jdouble ddy1, jdouble ddx2, jdouble ddy2)
272{
273    SurfaceDataOps *srcOps;
274    SurfaceDataOps *dstOps;
275    SurfaceDataRasInfo srcInfo;
276    SurfaceDataRasInfo dstInfo;
277    NativePrimitive *pPrim;
278    CompositeInfo compInfo;
279    jint sxinc, syinc, shift;
280    jint tilesize;
281    jint idx1, idy1;
282    jdouble scalex, scaley;
283    RegionData clipInfo;
284    jint dstFlags;
285    jboolean xunderflow, yunderflow;
286
287    pPrim = GetNativePrim(env, self);
288    if (pPrim == NULL) {
289        return;
290    }
291    if (pPrim->pCompType->getCompInfo != NULL) {
292        (*pPrim->pCompType->getCompInfo)(env, &compInfo, comp);
293    }
294    if (Region_GetInfo(env, clip, &clipInfo)) {
295        return;
296    }
297
298    srcOps = SurfaceData_GetOps(env, srcData);
299    if (srcOps == 0) {
300        return;
301    }
302    dstOps = SurfaceData_GetOps(env, dstData);
303    if (dstOps == 0) {
304        return;
305    }
306
307    /*
308     * Determine the precision to use for the fixed point math
309     * for the coordinate scaling.
310     * - OR together srcw and srch to get the MSB between the two
311     * - Next shift it up until it goes negative
312     * - Count the shifts and that will be the most accurate
313     *   precision available for the fixed point math
314     * - a source coordinate of 1.0 will be (1 << shift)
315     * - srcw & srch will be (srcw << shift) and (srch << shift)
316     *   and will not overflow
317     * Note that if srcw or srch are so large that they are
318     * negative numbers before shifting, then:
319     * - shift will be 0
320     * - tilesize will end up being 1x1 tiles
321     * - we will brute force calculate the source location
322     *   of every destination pixel using the TILESTART and
323     *   SRCLOC macros in this function and then call the
324     *   scale helper function to copy one pixel at a time.
325     * - TILESTART involves mostly jdouble calculations so
326     *   it should not have integer overflow problems.
327     */
328    sxinc = (sx2 - sx1) | (sy2 - sy1);
329    shift = 0;
330    if (sxinc > 0) {
331        while ((sxinc <<= 1) > 0) {
332            shift++;
333        }
334    }
335    /*
336     * Now determine the scaled integer increments used to traverse
337     * the source image for each destination pixel.  Our shift value
338     * has been calculated above so that any location within the
339     * destination image can be represented as a scaled integer
340     * without incurring integer overflow.
341     *
342     * But we also need to worry about overflow of the sxinc and syinc
343     * parameters.  We already know that "srcw<<shift" and "srch<<shift"
344     * cannot overflow a jint, and the only time that sxinc and syinc
345     * can be larger than those two values is if ddy2-ddy1 or ddx2-ddx1
346     * are smaller than 1.  Since this situation implies that the
347     * output area is no more than one pixel wide or tall, then we are
348     * stepping by distances that are at least the size of the image
349     * and only one destination pixel will ever be rendered - thus the
350     * amount by which we step is largely irrelevant since after
351     * drawing the first "in bounds" pixel, we will step completely
352     * out of the source image and render nothing more.  As a result,
353     * we assign the appropriate "size of image" stepping parameter
354     * for any scale to smaller than one device pixel.
355     */
356    yunderflow = (ddy2 - ddy1) < 1.0;
357    scaley = (((jdouble) (sy2 - sy1)) / (ddy2 - ddy1)) * (1 << shift);
358    syinc = (yunderflow ? ((sy2 - sy1) << shift) : (jint) scaley);
359    xunderflow = (ddx2 - ddx1) < 1.0;
360    scalex = (((jdouble) (sx2 - sx1)) / (ddx2 - ddx1)) * (1 << shift);
361    sxinc = (xunderflow ? ((sx2 - sx1) << shift) : (jint) scalex);
362    tilesize = findpow2tilesize(shift, sxinc, syinc);
363
364
365    srcInfo.bounds.x1 = sx1;
366    srcInfo.bounds.y1 = sy1;
367    srcInfo.bounds.x2 = sx2;
368    srcInfo.bounds.y2 = sy2;
369    if (srcOps->Lock(env, srcOps, &srcInfo, pPrim->srcflags) != SD_SUCCESS) {
370        return;
371    }
372    if (srcInfo.bounds.x2 <= srcInfo.bounds.x1 ||
373        srcInfo.bounds.y2 <= srcInfo.bounds.y1)
374    {
375        SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
376        return;
377    }
378
379    /*
380     * Only refine lower bounds if lower source coordinate was clipped
381     * because the math will work out to be exactly idx1, idy1 if not.
382     * Always refine upper bounds since we want to make sure not to
383     * overstep the source bounds based on the tiled iteration math.
384     *
385     * For underflow cases, simply check if the SRCLOC for the single
386     * destination pixel maps inside the source bounds.  If it does,
387     * we render that pixel row or column (and only that pixel row
388     * or column).  If it does not, we render nothing.
389     */
390    idx1 = (jint) ceil(ddx1 - 0.5);
391    idy1 = (jint) ceil(ddy1 - 0.5);
392    if (xunderflow) {
393        jdouble x = sx1 + (SRCLOC(idx1, ddx1, scalex) / (1 << shift));
394        dstInfo.bounds.x1 = dstInfo.bounds.x2 = idx1;
395        if (x >= srcInfo.bounds.x1 && x < srcInfo.bounds.x2) {
396            dstInfo.bounds.x2++;
397        }
398    } else {
399        dstInfo.bounds.x1 = ((srcInfo.bounds.x1 <= sx1)
400                             ? idx1
401                             : refine(idx1, ddx1, tilesize, scalex,
402                                      (srcInfo.bounds.x1-sx1) << shift, sxinc));
403        dstInfo.bounds.x2 = refine(idx1, ddx1, tilesize, scalex,
404                                   (srcInfo.bounds.x2-sx1) << shift, sxinc);
405    }
406    if (yunderflow) {
407        jdouble y = sy1 + (SRCLOC(idy1, ddy1, scaley) / (1 << shift));
408        dstInfo.bounds.y1 = dstInfo.bounds.y2 = idy1;
409        if (y >= srcInfo.bounds.y1 && y < srcInfo.bounds.y2) {
410            dstInfo.bounds.y2++;
411        }
412    } else {
413        dstInfo.bounds.y1 = ((srcInfo.bounds.y1 <= sy1)
414                             ? idy1
415                             : refine(idy1, ddy1, tilesize, scaley,
416                                      (srcInfo.bounds.y1-sy1) << shift, syinc));
417        dstInfo.bounds.y2 = refine(idy1, ddy1, tilesize, scaley,
418                                   (srcInfo.bounds.y2-sy1) << shift, syinc);
419    }
420
421    SurfaceData_IntersectBounds(&dstInfo.bounds, &clipInfo.bounds);
422    dstFlags = pPrim->dstflags;
423    if (!Region_IsRectangular(&clipInfo)) {
424        dstFlags |= SD_LOCK_PARTIAL_WRITE;
425    }
426    if (dstOps->Lock(env, dstOps, &dstInfo, dstFlags) != SD_SUCCESS) {
427        SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
428        return;
429    }
430
431    if (dstInfo.bounds.x2 > dstInfo.bounds.x1 &&
432        dstInfo.bounds.y2 > dstInfo.bounds.y1)
433    {
434        srcOps->GetRasInfo(env, srcOps, &srcInfo);
435        dstOps->GetRasInfo(env, dstOps, &dstInfo);
436        if (srcInfo.rasBase && dstInfo.rasBase) {
437            SurfaceDataBounds span;
438            void *pSrc = PtrCoord(srcInfo.rasBase,
439                                  sx1, srcInfo.pixelStride,
440                                  sy1, srcInfo.scanStride);
441
442            Region_IntersectBounds(&clipInfo, &dstInfo.bounds);
443            Region_StartIteration(env, &clipInfo);
444            if (tilesize >= (ddx2 - ddx1) &&
445                tilesize >= (ddy2 - ddy1))
446            {
447                /* Do everything in one tile */
448                jint sxloc = (jint) SRCLOC(idx1, ddx1, scalex);
449                jint syloc = (jint) SRCLOC(idy1, ddy1, scaley);
450                while (Region_NextIteration(&clipInfo, &span)) {
451                    jint tsxloc = sxloc;
452                    jint tsyloc = syloc;
453                    void *pDst;
454
455                    if (span.y1 > idy1) {
456                        tsyloc += syinc * (span.y1 - idy1);
457                    }
458                    if (span.x1 > idx1) {
459                        tsxloc += sxinc * (span.x1 - idx1);
460                    }
461
462                    pDst = PtrCoord(dstInfo.rasBase,
463                                    span.x1, dstInfo.pixelStride,
464                                    span.y1, dstInfo.scanStride);
465                    (*pPrim->funcs.scaledblit)(pSrc, pDst,
466                                               span.x2-span.x1, span.y2-span.y1,
467                                               tsxloc, tsyloc,
468                                               sxinc, syinc, shift,
469                                               &srcInfo, &dstInfo,
470                                               pPrim, &compInfo);
471                }
472            } else {
473                /* Break each clip span into tiles for better accuracy. */
474                while (Region_NextIteration(&clipInfo, &span)) {
475                    jint tilex, tiley;
476                    jint sxloc, syloc;
477                    jint x1, y1, x2, y2;
478                    void *pDst;
479
480                    for (tiley = TILESTART(span.y1, idy1, tilesize);
481                         tiley < span.y2;
482                         tiley += tilesize)
483                    {
484                        /* Clip span to Y range of current tile */
485                        y1 = tiley;
486                        y2 = tiley + tilesize;
487                        if (y1 < span.y1) y1 = span.y1;
488                        if (y2 > span.y2) y2 = span.y2;
489
490                        /* Find scaled source coordinate of first pixel */
491                        syloc = (jint) SRCLOC(tiley, ddy1, scaley);
492                        if (y1 > tiley) {
493                            syloc += syinc * (y1 - tiley);
494                        }
495
496                        for (tilex = TILESTART(span.x1, idx1, tilesize);
497                             tilex < span.x2;
498                             tilex += tilesize)
499                        {
500                            /* Clip span to X range of current tile */
501                            x1 = tilex;
502                            x2 = tilex + tilesize;
503                            if (x1 < span.x1) x1 = span.x1;
504                            if (x2 > span.x2) x2 = span.x2;
505
506                            /* Find scaled source coordinate of first pixel */
507                            sxloc = (jint) SRCLOC(tilex, ddx1, scalex);
508                            if (x1 > tilex) {
509                                sxloc += sxinc * (x1 - tilex);
510                            }
511
512                            pDst = PtrCoord(dstInfo.rasBase,
513                                            x1, dstInfo.pixelStride,
514                                            y1, dstInfo.scanStride);
515                            (*pPrim->funcs.scaledblit)(pSrc, pDst, x2-x1, y2-y1,
516                                                       sxloc, syloc,
517                                                       sxinc, syinc, shift,
518                                                       &srcInfo, &dstInfo,
519                                                       pPrim, &compInfo);
520                        }
521                    }
522                }
523            }
524            Region_EndIteration(env, &clipInfo);
525        }
526        SurfaceData_InvokeRelease(env, dstOps, &dstInfo);
527        SurfaceData_InvokeRelease(env, srcOps, &srcInfo);
528    }
529    SurfaceData_InvokeUnlock(env, dstOps, &dstInfo);
530    SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
531}
532