1/* 2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26#include <math.h> 27 28#include "jni_util.h" 29#include "GraphicsPrimitiveMgr.h" 30#include "Region.h" 31 32#include "sun_java2d_loops_ScaledBlit.h" 33 34/* 35 * The scaling loops used inside the helper functions are based on the 36 * following pseudocode for stepping through the source image: 37 * 38 * shift - number of bits of sub-pixel precision in scaled values 39 * srcxorig, srcyorig - scaled location of first pixel 40 * srcxinc, srcyinc - scaled x and y increments 41 * dstwidth, dstheight - number of pixels to process across and down 42 * 43 * 1. srcy = srcyorig; 44 * 2. for (dstheight) { 45 * 3. srcx = srcxorig; 46 * 4. for (dstwidth) { 47 * 5. fetch and process pixel for (srcx >> shift, srcy >> shift) 48 * 6. srcx += srcxinc; 49 * 7. } 50 * 8. srcy += srcyinc; 51 * 9. } 52 * 53 * Note that each execution of line 6 or 8 accumulates error of 54 * +/- 1 into the scaled coordinate variables. These lines are 55 * each executed once per pixel across or once per pixel down 56 * the region being iterated over, thus the error can accumulate 57 * up to a magnitude of dstwidth in the horizontal direction and 58 * dstheight in the vertical direction. 59 * 60 * If the error ever reaches a magnitude of (1 << shift) then we 61 * will be off by at least 1 source pixel in our mapping. 62 * 63 * Note that we increment the source coordinates by the srcxinc 64 * and srcyinc variables in each step. Thus, if our error ever 65 * accumulates to a magnitude equal to srcxinc or srcyinc then 66 * we will be ahead or behind of "where we should be" by at least 67 * one iteration. Since each iteration is a destination pixel, 68 * this means that our actual location will be off by at least 69 * one destination pixel. 70 * 71 * This means that all of the values: 72 * 73 * - (1 << shift) 74 * - srcxinc 75 * - srcyinc 76 * 77 * all represent a maximum bound on how much error we can accumulate 78 * before we are off by a source or a destination pixel. Thus, 79 * we should make sure that we never process more than that many 80 * pixels if we want to maintain single pixel accuracy. Even 81 * better would be to process many fewer pixels than those bounds 82 * to ensure that our accumulated error is much smaller than a 83 * pixel. 84 */ 85 86/* 87 * Find and return the largest tile size that is a power of 2 and 88 * which is small enough to yield some reassuring degree of subpixel 89 * accuracy. The degree of subpixel accuracy that will be preserved 90 * by the tile size it chooses will vary and the details on how 91 * it makes this decision are detailed in the comments below. 92 */ 93static jint 94findpow2tilesize(jint shift, jint sxinc, jint syinc) 95{ 96 /* 97 * The initial value of shift is our first estimate for 98 * the power of 2 for our tilesize since it ensures 99 * less than 1 source pixel of error. 100 * 101 * Reducing it until (1 << shift) is not larger than the 102 * smallest of our increments ensures we will have no more 103 * than 1 destination pixel of error as well. 104 */ 105 if (sxinc > syinc) { 106 sxinc = syinc; 107 } 108 if (sxinc == 0) { 109 /* Degenerate case will cause infinite loop in next loop... */ 110 return 1; 111 } 112 while ((1 << shift) > sxinc) { 113 shift--; 114 } 115 /* 116 * shift is now the largest it can be for less than 1 pixel 117 * of error in either source or destination spaces. 118 * 119 * Now we will try for at least 8 bits of subpixel accuracy 120 * with a tile size of at least 256x256 and reduce our subpixel 121 * accuracy on a sliding scale down to a tilesize of 1x1 when 122 * we have no bits of sub-pixel accuracy. 123 */ 124 if (shift >= 16) { 125 /* Subtracting 8 asks for 8 bits of subpixel accuracy. */ 126 shift -= 8; 127 } else { 128 /* Ask for half of the remaining bits to be subpixel accuracy. */ 129 /* Rounding is in favor of subpixel accuracy over tile size. */ 130 /* Worst case, shift == 0 and tilesize == (1 << 0) == 1 */ 131 shift /= 2; 132 } 133 return (1 << shift); 134} 135 136/* 137 * For a given integer destination pixel coordinate "id", calculate the 138 * integer destination coordinate of the start of the "ts" sized tile 139 * in which it resides. 140 * Tiles all start at even multiples of the tile size from the integer 141 * destination origin "io". 142 * 143 * id == integer destination coordinate 144 * io == integer destination operation origin 145 * ts == tilesize (must be power of 2) 146 */ 147#define TILESTART(id, io, ts) ((io) + (((id)-(io)) & (~((ts)-1)))) 148 149/* 150 * For a given integer destination pixel coordinate "id", calculate the 151 * sub-pixel accurate source coordinate from which its sample comes. 152 * The returned source coordinate is expressed in a shifted fractional 153 * arithmetic number system. 154 * 155 * id == integer destination coordinate 156 * fo == floating point destination operation origin, 157 * sf == source coordinate scale factor per destination pixel 158 * (multiplied by fractional arithmetic "shift") 159 * 160 * The caller is required to cast this value to the appropriate 161 * integer type for the needed precision. The rendering code which 162 * deals only with valid coordinates within the bounds of the source 163 * rectangle uses jint. The setup code, which occasionally deals 164 * with coordinates that run out of bounds, uses jlong. 165 * 166 * Note that the rounding in this calculation is at a fraction of a 167 * source pixel of (1.0 / (1<<shift)) since the scale factor includes 168 * the fractional shift. As a result, the type of rounding used is 169 * not very significant (floor, floor(x+.5), or ceil(x-.5)), but the 170 * ceil(x-.5) version is used for consistency with the way that pixel 171 * coordinates are rounded to assign the ".5" value to the lower 172 * integer. 173 */ 174#define SRCLOC(id, fo, sf) (ceil((((id) + 0.5) - (fo)) * (sf) - 0.5)) 175 176/* 177 * Reverse map a srctarget coordinate into device space and refine the 178 * answer. More specifically, what we are looking for is the smallest 179 * destination coordinate that maps to a source coordinate that is 180 * greater than or equal to the given target source coordinate. 181 * 182 * Note that since the inner loops use math that maps a destination 183 * coordinate into source space and that, even though the equation 184 * we use below is the theoretical inverse of the dst->src mapping, 185 * we cannot rely on floating point math to guarantee that applying 186 * both of these equations in sequence will give us an exact mapping 187 * of src->dst->src. Thus, we must search back and forth to see if 188 * we really map back to the given source coordinate and that we are 189 * the smallest destination coordinate that does so. 190 * 191 * Note that, in practice, the answer from the initial guess tends to 192 * be the right answer most of the time and the loop ends up finding 193 * one iteration to be ">= srctarget" and the next to be "< srctarget" 194 * and thus finds the answer in 2 iterations. A small number of 195 * times, the initial guess is 1 too low and so we do one iteration 196 * at "< srctarget" and the next at ">= srctarget" and again find the 197 * answer in 2 iterations. All cases encountered during testing ended 198 * up falling into one of those 2 categories and so the loop was always 199 * executed exactly twice. 200 * 201 * Note also that the calculation of srcloc below may attempt to calculate 202 * the src location of the destination pixel which is "1 beyond" the 203 * end of the source image. Since our shift calculation code in the 204 * main function only guaranteed that "srcw << shift" did not overflow 205 * a 32-bit signed integer, we cannot guarantee that "(srcw+1) << shift" 206 * or, more generally, "(srcw << shift)+srcinc" does not overflow. 207 * As a result, we perform our calculations here with jlong values 208 * so that we aren't affected by this overflow. Since srcw (shifted) 209 * and srcinc are both 32-bit values, their sum cannot possibly overflow 210 * a jlong. In fact, we can step up to a couple of billion steps of 211 * size "srcinc" past the end of the image before we have to worry 212 * about overflow - in practice, though, the search never steps more 213 * than 1 past the end of the image so this buffer is more than enough. 214 */ 215static jint 216refine(jint intorigin, jdouble dblorigin, jint tilesize, 217 jdouble scale, jint srctarget, jint srcinc) 218{ 219 /* Make a first estimate of dest coordinate from srctarget */ 220 jint dstloc = (jint) ceil(dblorigin + srctarget / scale - 0.5); 221 /* Loop until we get at least one value < and one >= the target */ 222 jboolean wasneg = JNI_FALSE; 223 jboolean waspos = JNI_FALSE; 224 jlong lsrcinc = srcinc; 225 jlong lsrctarget = srctarget; 226 227 while (JNI_TRUE) { 228 /* 229 * Find src coordinate from dest coordinate using the same 230 * math we will use below when iterating over tiles. 231 */ 232 jint tilestart = TILESTART(dstloc, intorigin, tilesize); 233 jlong lsrcloc = (jlong) SRCLOC(tilestart, dblorigin, scale); 234 if (dstloc > tilestart) { 235 lsrcloc += lsrcinc * ((jlong) dstloc - tilestart); 236 } 237 if (lsrcloc >= lsrctarget) { 238 /* 239 * If we were previously less than target, then the current 240 * dstloc is the smallest dst which maps >= the target. 241 */ 242 if (wasneg) break; 243 dstloc--; 244 waspos = JNI_TRUE; 245 } else { 246 /* 247 * If we were previously greater than target, then this must 248 * be the first dstloc which maps to < the target. Since we 249 * want the smallest which maps >= the target, increment it 250 * first before returning. 251 */ 252 dstloc++; 253 if (waspos) break; 254 wasneg = JNI_TRUE; 255 } 256 } 257 return dstloc; 258} 259 260/* 261 * Class: sun_java2d_loops_ScaledBlit 262 * Method: Scale 263 * Signature: (Lsun/java2d/SurfaceData;Lsun/java2d/SurfaceData;Ljava/awt/Composite;Lsun/java2d/pipe/Region;IIIIDDDD)V 264 */ 265JNIEXPORT void JNICALL 266Java_sun_java2d_loops_ScaledBlit_Scale 267 (JNIEnv *env, jobject self, 268 jobject srcData, jobject dstData, 269 jobject comp, jobject clip, 270 jint sx1, jint sy1, jint sx2, jint sy2, 271 jdouble ddx1, jdouble ddy1, jdouble ddx2, jdouble ddy2) 272{ 273 SurfaceDataOps *srcOps; 274 SurfaceDataOps *dstOps; 275 SurfaceDataRasInfo srcInfo; 276 SurfaceDataRasInfo dstInfo; 277 NativePrimitive *pPrim; 278 CompositeInfo compInfo; 279 jint sxinc, syinc, shift; 280 jint tilesize; 281 jint idx1, idy1; 282 jdouble scalex, scaley; 283 RegionData clipInfo; 284 jint dstFlags; 285 jboolean xunderflow, yunderflow; 286 287 pPrim = GetNativePrim(env, self); 288 if (pPrim == NULL) { 289 return; 290 } 291 if (pPrim->pCompType->getCompInfo != NULL) { 292 (*pPrim->pCompType->getCompInfo)(env, &compInfo, comp); 293 } 294 if (Region_GetInfo(env, clip, &clipInfo)) { 295 return; 296 } 297 298 srcOps = SurfaceData_GetOps(env, srcData); 299 if (srcOps == 0) { 300 return; 301 } 302 dstOps = SurfaceData_GetOps(env, dstData); 303 if (dstOps == 0) { 304 return; 305 } 306 307 /* 308 * Determine the precision to use for the fixed point math 309 * for the coordinate scaling. 310 * - OR together srcw and srch to get the MSB between the two 311 * - Next shift it up until it goes negative 312 * - Count the shifts and that will be the most accurate 313 * precision available for the fixed point math 314 * - a source coordinate of 1.0 will be (1 << shift) 315 * - srcw & srch will be (srcw << shift) and (srch << shift) 316 * and will not overflow 317 * Note that if srcw or srch are so large that they are 318 * negative numbers before shifting, then: 319 * - shift will be 0 320 * - tilesize will end up being 1x1 tiles 321 * - we will brute force calculate the source location 322 * of every destination pixel using the TILESTART and 323 * SRCLOC macros in this function and then call the 324 * scale helper function to copy one pixel at a time. 325 * - TILESTART involves mostly jdouble calculations so 326 * it should not have integer overflow problems. 327 */ 328 sxinc = (sx2 - sx1) | (sy2 - sy1); 329 shift = 0; 330 if (sxinc > 0) { 331 while ((sxinc <<= 1) > 0) { 332 shift++; 333 } 334 } 335 /* 336 * Now determine the scaled integer increments used to traverse 337 * the source image for each destination pixel. Our shift value 338 * has been calculated above so that any location within the 339 * destination image can be represented as a scaled integer 340 * without incurring integer overflow. 341 * 342 * But we also need to worry about overflow of the sxinc and syinc 343 * parameters. We already know that "srcw<<shift" and "srch<<shift" 344 * cannot overflow a jint, and the only time that sxinc and syinc 345 * can be larger than those two values is if ddy2-ddy1 or ddx2-ddx1 346 * are smaller than 1. Since this situation implies that the 347 * output area is no more than one pixel wide or tall, then we are 348 * stepping by distances that are at least the size of the image 349 * and only one destination pixel will ever be rendered - thus the 350 * amount by which we step is largely irrelevant since after 351 * drawing the first "in bounds" pixel, we will step completely 352 * out of the source image and render nothing more. As a result, 353 * we assign the appropriate "size of image" stepping parameter 354 * for any scale to smaller than one device pixel. 355 */ 356 yunderflow = (ddy2 - ddy1) < 1.0; 357 scaley = (((jdouble) (sy2 - sy1)) / (ddy2 - ddy1)) * (1 << shift); 358 syinc = (yunderflow ? ((sy2 - sy1) << shift) : (jint) scaley); 359 xunderflow = (ddx2 - ddx1) < 1.0; 360 scalex = (((jdouble) (sx2 - sx1)) / (ddx2 - ddx1)) * (1 << shift); 361 sxinc = (xunderflow ? ((sx2 - sx1) << shift) : (jint) scalex); 362 tilesize = findpow2tilesize(shift, sxinc, syinc); 363 364 365 srcInfo.bounds.x1 = sx1; 366 srcInfo.bounds.y1 = sy1; 367 srcInfo.bounds.x2 = sx2; 368 srcInfo.bounds.y2 = sy2; 369 if (srcOps->Lock(env, srcOps, &srcInfo, pPrim->srcflags) != SD_SUCCESS) { 370 return; 371 } 372 if (srcInfo.bounds.x2 <= srcInfo.bounds.x1 || 373 srcInfo.bounds.y2 <= srcInfo.bounds.y1) 374 { 375 SurfaceData_InvokeUnlock(env, srcOps, &srcInfo); 376 return; 377 } 378 379 /* 380 * Only refine lower bounds if lower source coordinate was clipped 381 * because the math will work out to be exactly idx1, idy1 if not. 382 * Always refine upper bounds since we want to make sure not to 383 * overstep the source bounds based on the tiled iteration math. 384 * 385 * For underflow cases, simply check if the SRCLOC for the single 386 * destination pixel maps inside the source bounds. If it does, 387 * we render that pixel row or column (and only that pixel row 388 * or column). If it does not, we render nothing. 389 */ 390 idx1 = (jint) ceil(ddx1 - 0.5); 391 idy1 = (jint) ceil(ddy1 - 0.5); 392 if (xunderflow) { 393 jdouble x = sx1 + (SRCLOC(idx1, ddx1, scalex) / (1 << shift)); 394 dstInfo.bounds.x1 = dstInfo.bounds.x2 = idx1; 395 if (x >= srcInfo.bounds.x1 && x < srcInfo.bounds.x2) { 396 dstInfo.bounds.x2++; 397 } 398 } else { 399 dstInfo.bounds.x1 = ((srcInfo.bounds.x1 <= sx1) 400 ? idx1 401 : refine(idx1, ddx1, tilesize, scalex, 402 (srcInfo.bounds.x1-sx1) << shift, sxinc)); 403 dstInfo.bounds.x2 = refine(idx1, ddx1, tilesize, scalex, 404 (srcInfo.bounds.x2-sx1) << shift, sxinc); 405 } 406 if (yunderflow) { 407 jdouble y = sy1 + (SRCLOC(idy1, ddy1, scaley) / (1 << shift)); 408 dstInfo.bounds.y1 = dstInfo.bounds.y2 = idy1; 409 if (y >= srcInfo.bounds.y1 && y < srcInfo.bounds.y2) { 410 dstInfo.bounds.y2++; 411 } 412 } else { 413 dstInfo.bounds.y1 = ((srcInfo.bounds.y1 <= sy1) 414 ? idy1 415 : refine(idy1, ddy1, tilesize, scaley, 416 (srcInfo.bounds.y1-sy1) << shift, syinc)); 417 dstInfo.bounds.y2 = refine(idy1, ddy1, tilesize, scaley, 418 (srcInfo.bounds.y2-sy1) << shift, syinc); 419 } 420 421 SurfaceData_IntersectBounds(&dstInfo.bounds, &clipInfo.bounds); 422 dstFlags = pPrim->dstflags; 423 if (!Region_IsRectangular(&clipInfo)) { 424 dstFlags |= SD_LOCK_PARTIAL_WRITE; 425 } 426 if (dstOps->Lock(env, dstOps, &dstInfo, dstFlags) != SD_SUCCESS) { 427 SurfaceData_InvokeUnlock(env, srcOps, &srcInfo); 428 return; 429 } 430 431 if (dstInfo.bounds.x2 > dstInfo.bounds.x1 && 432 dstInfo.bounds.y2 > dstInfo.bounds.y1) 433 { 434 srcOps->GetRasInfo(env, srcOps, &srcInfo); 435 dstOps->GetRasInfo(env, dstOps, &dstInfo); 436 if (srcInfo.rasBase && dstInfo.rasBase) { 437 SurfaceDataBounds span; 438 void *pSrc = PtrCoord(srcInfo.rasBase, 439 sx1, srcInfo.pixelStride, 440 sy1, srcInfo.scanStride); 441 442 Region_IntersectBounds(&clipInfo, &dstInfo.bounds); 443 Region_StartIteration(env, &clipInfo); 444 if (tilesize >= (ddx2 - ddx1) && 445 tilesize >= (ddy2 - ddy1)) 446 { 447 /* Do everything in one tile */ 448 jint sxloc = (jint) SRCLOC(idx1, ddx1, scalex); 449 jint syloc = (jint) SRCLOC(idy1, ddy1, scaley); 450 while (Region_NextIteration(&clipInfo, &span)) { 451 jint tsxloc = sxloc; 452 jint tsyloc = syloc; 453 void *pDst; 454 455 if (span.y1 > idy1) { 456 tsyloc += syinc * (span.y1 - idy1); 457 } 458 if (span.x1 > idx1) { 459 tsxloc += sxinc * (span.x1 - idx1); 460 } 461 462 pDst = PtrCoord(dstInfo.rasBase, 463 span.x1, dstInfo.pixelStride, 464 span.y1, dstInfo.scanStride); 465 (*pPrim->funcs.scaledblit)(pSrc, pDst, 466 span.x2-span.x1, span.y2-span.y1, 467 tsxloc, tsyloc, 468 sxinc, syinc, shift, 469 &srcInfo, &dstInfo, 470 pPrim, &compInfo); 471 } 472 } else { 473 /* Break each clip span into tiles for better accuracy. */ 474 while (Region_NextIteration(&clipInfo, &span)) { 475 jint tilex, tiley; 476 jint sxloc, syloc; 477 jint x1, y1, x2, y2; 478 void *pDst; 479 480 for (tiley = TILESTART(span.y1, idy1, tilesize); 481 tiley < span.y2; 482 tiley += tilesize) 483 { 484 /* Clip span to Y range of current tile */ 485 y1 = tiley; 486 y2 = tiley + tilesize; 487 if (y1 < span.y1) y1 = span.y1; 488 if (y2 > span.y2) y2 = span.y2; 489 490 /* Find scaled source coordinate of first pixel */ 491 syloc = (jint) SRCLOC(tiley, ddy1, scaley); 492 if (y1 > tiley) { 493 syloc += syinc * (y1 - tiley); 494 } 495 496 for (tilex = TILESTART(span.x1, idx1, tilesize); 497 tilex < span.x2; 498 tilex += tilesize) 499 { 500 /* Clip span to X range of current tile */ 501 x1 = tilex; 502 x2 = tilex + tilesize; 503 if (x1 < span.x1) x1 = span.x1; 504 if (x2 > span.x2) x2 = span.x2; 505 506 /* Find scaled source coordinate of first pixel */ 507 sxloc = (jint) SRCLOC(tilex, ddx1, scalex); 508 if (x1 > tilex) { 509 sxloc += sxinc * (x1 - tilex); 510 } 511 512 pDst = PtrCoord(dstInfo.rasBase, 513 x1, dstInfo.pixelStride, 514 y1, dstInfo.scanStride); 515 (*pPrim->funcs.scaledblit)(pSrc, pDst, x2-x1, y2-y1, 516 sxloc, syloc, 517 sxinc, syinc, shift, 518 &srcInfo, &dstInfo, 519 pPrim, &compInfo); 520 } 521 } 522 } 523 } 524 Region_EndIteration(env, &clipInfo); 525 } 526 SurfaceData_InvokeRelease(env, dstOps, &dstInfo); 527 SurfaceData_InvokeRelease(env, srcOps, &srcInfo); 528 } 529 SurfaceData_InvokeUnlock(env, dstOps, &dstInfo); 530 SurfaceData_InvokeUnlock(env, srcOps, &srcInfo); 531} 532