1/*
2 * Copyright (C) 2004, 2005, 2006, 2007 Nikolas Zimmermann <zimmermann@kde.org>
3 * Copyright (C) 2004, 2005 Rob Buis <buis@kde.org>
4 * Copyright (C) 2005 Eric Seidel <eric@webkit.org>
5 * Copyright (C) 2009 Dirk Schulze <krit@webkit.org>
6 * Copyright (C) 2010 Igalia, S.L.
7 * Copyright (C) Research In Motion Limited 2010. All rights reserved.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Library General Public
11 * License as published by the Free Software Foundation; either
12 * version 2 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * Library General Public License for more details.
18 *
19 * You should have received a copy of the GNU Library General Public License
20 * along with this library; see the file COPYING.LIB.  If not, write to
21 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22 * Boston, MA 02110-1301, USA.
23 */
24
25#include "config.h"
26
27#if ENABLE(FILTERS)
28#include "FEGaussianBlur.h"
29
30#include "FEGaussianBlurNEON.h"
31#include "Filter.h"
32#include "GraphicsContext.h"
33#include "TextStream.h"
34
35#include <runtime/JSCInlines.h>
36#include <runtime/TypedArrayInlines.h>
37#include <runtime/Uint8ClampedArray.h>
38#include <wtf/MathExtras.h>
39#include <wtf/ParallelJobs.h>
40
41static inline float gaussianKernelFactor()
42{
43    return 3 / 4.f * sqrtf(2 * piFloat);
44}
45
46static const int gMaxKernelSize = 500;
47
48namespace WebCore {
49
50FEGaussianBlur::FEGaussianBlur(Filter* filter, float x, float y, EdgeModeType edgeMode)
51    : FilterEffect(filter)
52    , m_stdX(x)
53    , m_stdY(y)
54    , m_edgeMode(edgeMode)
55{
56}
57
58PassRefPtr<FEGaussianBlur> FEGaussianBlur::create(Filter* filter, float x, float y, EdgeModeType edgeMode)
59{
60    return adoptRef(new FEGaussianBlur(filter, x, y, edgeMode));
61}
62
63float FEGaussianBlur::stdDeviationX() const
64{
65    return m_stdX;
66}
67
68void FEGaussianBlur::setStdDeviationX(float x)
69{
70    m_stdX = x;
71}
72
73float FEGaussianBlur::stdDeviationY() const
74{
75    return m_stdY;
76}
77
78void FEGaussianBlur::setStdDeviationY(float y)
79{
80    m_stdY = y;
81}
82
83EdgeModeType FEGaussianBlur::edgeMode() const
84{
85    return m_edgeMode;
86}
87
88void FEGaussianBlur::setEdgeMode(EdgeModeType edgeMode)
89{
90    m_edgeMode = edgeMode;
91}
92
93// This function only operates on Alpha channel.
94inline void boxBlurAlphaOnly(const Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* dstPixelArray,
95    unsigned dx, int& dxLeft, int& dxRight, int& stride, int& strideLine, int& effectWidth, int& effectHeight, const int& maxKernelSize)
96{
97    unsigned char* srcData = srcPixelArray->data();
98    unsigned char* dstData = dstPixelArray->data();
99    // Memory alignment is: RGBA, zero-index based.
100    const int channel = 3;
101
102    for (int y = 0; y < effectHeight; ++y) {
103        int line = y * strideLine;
104        int sum = 0;
105
106        // Fill the kernel.
107        for (int i = 0; i < maxKernelSize; ++i) {
108            unsigned offset = line + i * stride;
109            unsigned char* srcPtr = srcData + offset;
110            sum += srcPtr[channel];
111        }
112
113        // Blurring.
114        for (int x = 0; x < effectWidth; ++x) {
115            unsigned pixelByteOffset = line + x * stride + channel;
116            unsigned char* dstPtr = dstData + pixelByteOffset;
117            *dstPtr = static_cast<unsigned char>(sum / dx);
118
119            // Shift kernel.
120            if (x >= dxLeft) {
121                unsigned leftOffset = pixelByteOffset - dxLeft * stride;
122                unsigned char* srcPtr = srcData + leftOffset;
123                sum -= *srcPtr;
124            }
125
126            if (x + dxRight < effectWidth) {
127                unsigned rightOffset = pixelByteOffset + dxRight * stride;
128                unsigned char* srcPtr = srcData + rightOffset;
129                sum += *srcPtr;
130            }
131        }
132    }
133}
134
135inline void boxBlur(const Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* dstPixelArray,
136    unsigned dx, int dxLeft, int dxRight, int stride, int strideLine, int effectWidth, int effectHeight, bool alphaImage, EdgeModeType edgeMode)
137{
138    const int maxKernelSize = std::min(dxRight, effectWidth);
139    if (alphaImage) {
140        return boxBlurAlphaOnly(srcPixelArray, dstPixelArray, dx, dxLeft, dxRight, stride, strideLine,
141            effectWidth, effectHeight, maxKernelSize);
142    }
143
144    unsigned char* srcData = srcPixelArray->data();
145    unsigned char* dstData = dstPixelArray->data();
146
147    // Concerning the array width/length: it is Element size + Margin + Border. The number of pixels will be
148    // P = width * height * channels.
149    for (int y = 0; y < effectHeight; ++y) {
150        int line = y * strideLine;
151        int sumR = 0, sumG = 0, sumB = 0, sumA = 0;
152
153        if (edgeMode == EDGEMODE_NONE) {
154            // Fill the kernel.
155            for (int i = 0; i < maxKernelSize; ++i) {
156                unsigned offset = line + i * stride;
157                unsigned char* srcPtr = srcData + offset;
158                sumR += *srcPtr++;
159                sumG += *srcPtr++;
160                sumB += *srcPtr++;
161                sumA += *srcPtr;
162            }
163
164            // Blurring.
165            for (int x = 0; x < effectWidth; ++x) {
166                unsigned pixelByteOffset = line + x * stride;
167                unsigned char* dstPtr = dstData + pixelByteOffset;
168
169                *dstPtr++ = static_cast<unsigned char>(sumR / dx);
170                *dstPtr++ = static_cast<unsigned char>(sumG / dx);
171                *dstPtr++ = static_cast<unsigned char>(sumB / dx);
172                *dstPtr = static_cast<unsigned char>(sumA / dx);
173
174                // Shift kernel.
175                if (x >= dxLeft) {
176                    unsigned leftOffset = pixelByteOffset - dxLeft * stride;
177                    unsigned char* srcPtr = srcData + leftOffset;
178                    sumR -= srcPtr[0];
179                    sumG -= srcPtr[1];
180                    sumB -= srcPtr[2];
181                    sumA -= srcPtr[3];
182                }
183
184                if (x + dxRight < effectWidth) {
185                    unsigned rightOffset = pixelByteOffset + dxRight * stride;
186                    unsigned char* srcPtr = srcData + rightOffset;
187                    sumR += srcPtr[0];
188                    sumG += srcPtr[1];
189                    sumB += srcPtr[2];
190                    sumA += srcPtr[3];
191                }
192            }
193
194        } else {
195            // FIXME: Add support for 'wrap' here.
196            // Get edge values for edgeMode 'duplicate'.
197            unsigned char* edgeValueLeft = srcData + line;
198            unsigned char* edgeValueRight  = srcData + (line + (effectWidth - 1) * stride);
199
200            // Fill the kernel.
201            for (int i = dxLeft * -1; i < dxRight; ++i) {
202                // Is this right for negative values of 'i'?
203                unsigned offset = line + i * stride;
204                unsigned char* srcPtr = srcData + offset;
205
206                if (i < 0) {
207                    sumR += edgeValueLeft[0];
208                    sumG += edgeValueLeft[1];
209                    sumB += edgeValueLeft[2];
210                    sumA += edgeValueLeft[3];
211                } else if (i >= effectWidth) {
212                    sumR += edgeValueRight[0];
213                    sumG += edgeValueRight[1];
214                    sumB += edgeValueRight[2];
215                    sumA += edgeValueRight[3];
216                } else {
217                    sumR += *srcPtr++;
218                    sumG += *srcPtr++;
219                    sumB += *srcPtr++;
220                    sumA += *srcPtr;
221                }
222            }
223
224            // Blurring.
225            for (int x = 0; x < effectWidth; ++x) {
226                unsigned pixelByteOffset = line + x * stride;
227                unsigned char* dstPtr = dstData + pixelByteOffset;
228
229                *dstPtr++ = static_cast<unsigned char>(sumR / dx);
230                *dstPtr++ = static_cast<unsigned char>(sumG / dx);
231                *dstPtr++ = static_cast<unsigned char>(sumB / dx);
232                *dstPtr = static_cast<unsigned char>(sumA / dx);
233
234                // Shift kernel.
235                if (x < dxLeft) {
236                    sumR -= edgeValueLeft[0];
237                    sumG -= edgeValueLeft[1];
238                    sumB -= edgeValueLeft[2];
239                    sumA -= edgeValueLeft[3];
240                } else {
241                    unsigned leftOffset = pixelByteOffset - dxLeft * stride;
242                    unsigned char* srcPtr = srcData + leftOffset;
243                    sumR -= srcPtr[0];
244                    sumG -= srcPtr[1];
245                    sumB -= srcPtr[2];
246                    sumA -= srcPtr[3];
247                }
248
249                if (x + dxRight >= effectWidth) {
250                    sumR += edgeValueRight[0];
251                    sumG += edgeValueRight[1];
252                    sumB += edgeValueRight[2];
253                    sumA += edgeValueRight[3];
254                } else {
255                    unsigned rightOffset = pixelByteOffset + dxRight * stride;
256                    unsigned char* srcPtr = srcData + rightOffset;
257                    sumR += srcPtr[0];
258                    sumG += srcPtr[1];
259                    sumB += srcPtr[2];
260                    sumA += srcPtr[3];
261                }
262            }
263        }
264    }
265}
266
267inline void FEGaussianBlur::platformApplyGeneric(Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize)
268{
269    int stride = 4 * paintSize.width();
270    int dxLeft = 0;
271    int dxRight = 0;
272    int dyLeft = 0;
273    int dyRight = 0;
274    Uint8ClampedArray* src = srcPixelArray;
275    Uint8ClampedArray* dst = tmpPixelArray;
276
277    for (int i = 0; i < 3; ++i) {
278        if (kernelSizeX) {
279            kernelPosition(i, kernelSizeX, dxLeft, dxRight);
280#if HAVE(ARM_NEON_INTRINSICS)
281            if (!isAlphaImage())
282                boxBlurNEON(src, dst, kernelSizeX, dxLeft, dxRight, 4, stride, paintSize.width(), paintSize.height());
283            else
284                boxBlur(src, dst, kernelSizeX, dxLeft, dxRight, 4, stride, paintSize.width(), paintSize.height(), true, m_edgeMode);
285#else
286            boxBlur(src, dst, kernelSizeX, dxLeft, dxRight, 4, stride, paintSize.width(), paintSize.height(), isAlphaImage(), m_edgeMode);
287#endif
288            std::swap(src, dst);
289        }
290
291        if (kernelSizeY) {
292            kernelPosition(i, kernelSizeY, dyLeft, dyRight);
293#if HAVE(ARM_NEON_INTRINSICS)
294            if (!isAlphaImage())
295                boxBlurNEON(src, dst, kernelSizeY, dyLeft, dyRight, stride, 4, paintSize.height(), paintSize.width());
296            else
297                boxBlur(src, dst, kernelSizeY, dyLeft, dyRight, stride, 4, paintSize.height(), paintSize.width(), true, m_edgeMode);
298#else
299            boxBlur(src, dst, kernelSizeY, dyLeft, dyRight, stride, 4, paintSize.height(), paintSize.width(), isAlphaImage(), m_edgeMode);
300#endif
301            std::swap(src, dst);
302        }
303    }
304
305    // The final result should be stored in srcPixelArray.
306    if (dst == srcPixelArray) {
307        ASSERT(src->length() == dst->length());
308        memcpy(dst->data(), src->data(), src->length());
309    }
310
311}
312
313void FEGaussianBlur::platformApplyWorker(PlatformApplyParameters* parameters)
314{
315    IntSize paintSize(parameters->width, parameters->height);
316    parameters->filter->platformApplyGeneric(parameters->srcPixelArray.get(), parameters->dstPixelArray.get(),
317        parameters->kernelSizeX, parameters->kernelSizeY, paintSize);
318}
319
320inline void FEGaussianBlur::platformApply(Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize)
321{
322    int scanline = 4 * paintSize.width();
323    int extraHeight = 3 * kernelSizeY * 0.5f;
324    int optimalThreadNumber = (paintSize.width() * paintSize.height()) / (s_minimalRectDimension + extraHeight * paintSize.width());
325
326    if (optimalThreadNumber > 1) {
327        WTF::ParallelJobs<PlatformApplyParameters> parallelJobs(&platformApplyWorker, optimalThreadNumber);
328
329        int jobs = parallelJobs.numberOfJobs();
330        if (jobs > 1) {
331            // Split the job into "blockHeight"-sized jobs but there a few jobs that need to be slightly larger since
332            // blockHeight * jobs < total size. These extras are handled by the remainder "jobsWithExtra".
333            const int blockHeight = paintSize.height() / jobs;
334            const int jobsWithExtra = paintSize.height() % jobs;
335
336            int currentY = 0;
337            for (int job = 0; job < jobs; job++) {
338                PlatformApplyParameters& params = parallelJobs.parameter(job);
339                params.filter = this;
340
341                int startY = !job ? 0 : currentY - extraHeight;
342                currentY += job < jobsWithExtra ? blockHeight + 1 : blockHeight;
343                int endY = job == jobs - 1 ? currentY : currentY + extraHeight;
344
345                int blockSize = (endY - startY) * scanline;
346                if (!job) {
347                    params.srcPixelArray = srcPixelArray;
348                    params.dstPixelArray = tmpPixelArray;
349                } else {
350                    params.srcPixelArray = Uint8ClampedArray::createUninitialized(blockSize);
351                    params.dstPixelArray = Uint8ClampedArray::createUninitialized(blockSize);
352                    memcpy(params.srcPixelArray->data(), srcPixelArray->data() + startY * scanline, blockSize);
353                }
354
355                params.width = paintSize.width();
356                params.height = endY - startY;
357                params.kernelSizeX = kernelSizeX;
358                params.kernelSizeY = kernelSizeY;
359            }
360
361            parallelJobs.execute();
362
363            // Copy together the parts of the image.
364            currentY = 0;
365            for (int job = 1; job < jobs; job++) {
366                PlatformApplyParameters& params = parallelJobs.parameter(job);
367                int sourceOffset;
368                int destinationOffset;
369                int size;
370                int adjustedBlockHeight = job < jobsWithExtra ? blockHeight + 1 : blockHeight;
371
372                currentY += adjustedBlockHeight;
373                sourceOffset = extraHeight * scanline;
374                destinationOffset = currentY * scanline;
375                size = adjustedBlockHeight * scanline;
376
377                memcpy(srcPixelArray->data() + destinationOffset, params.srcPixelArray->data() + sourceOffset, size);
378            }
379            return;
380        }
381        // Fallback to single threaded mode.
382    }
383
384    // The selection here eventually should happen dynamically on some platforms.
385    platformApplyGeneric(srcPixelArray, tmpPixelArray, kernelSizeX, kernelSizeY, paintSize);
386}
387
388IntSize FEGaussianBlur::calculateUnscaledKernelSize(const FloatPoint& stdDeviation)
389{
390    ASSERT(stdDeviation.x() >= 0 && stdDeviation.y() >= 0);
391    IntSize kernelSize;
392
393    // Limit the kernel size to 500. A bigger radius won't make a big difference for the result image but
394    // inflates the absolute paint rect too much. This is compatible with Firefox' behavior.
395    if (stdDeviation.x()) {
396        int size = std::max<unsigned>(2, static_cast<unsigned>(floorf(stdDeviation.x() * gaussianKernelFactor() + 0.5f)));
397        kernelSize.setWidth(std::min(size, gMaxKernelSize));
398    }
399
400    if (stdDeviation.y()) {
401        int size = std::max<unsigned>(2, static_cast<unsigned>(floorf(stdDeviation.y() * gaussianKernelFactor() + 0.5f)));
402        kernelSize.setHeight(std::min(size, gMaxKernelSize));
403    }
404
405    return kernelSize;
406}
407
408IntSize FEGaussianBlur::calculateKernelSize(const Filter& filter, const FloatPoint& stdDeviation)
409{
410    FloatPoint stdFilterScaled(filter.applyHorizontalScale(stdDeviation.x()), filter.applyVerticalScale(stdDeviation.y()));
411    return calculateUnscaledKernelSize(stdFilterScaled);
412}
413
414void FEGaussianBlur::determineAbsolutePaintRect()
415{
416    IntSize kernelSize = calculateKernelSize(filter(), FloatPoint(m_stdX, m_stdY));
417
418    FloatRect absolutePaintRect = inputEffect(0)->absolutePaintRect();
419    // Edge modes other than 'none' do not inflate the affected paint rect.
420    if (m_edgeMode != EDGEMODE_NONE) {
421        setAbsolutePaintRect(enclosingIntRect(absolutePaintRect));
422        return;
423    }
424
425    // We take the half kernel size and multiply it with three, because we run box blur three times.
426    absolutePaintRect.inflateX(3 * kernelSize.width() * 0.5f);
427    absolutePaintRect.inflateY(3 * kernelSize.height() * 0.5f);
428
429    if (clipsToBounds())
430        absolutePaintRect.intersect(maxEffectRect());
431    else
432        absolutePaintRect.unite(maxEffectRect());
433
434    setAbsolutePaintRect(enclosingIntRect(absolutePaintRect));
435}
436
437void FEGaussianBlur::platformApplySoftware()
438{
439    FilterEffect* in = inputEffect(0);
440
441    Uint8ClampedArray* srcPixelArray = createPremultipliedImageResult();
442    if (!srcPixelArray)
443        return;
444
445    setIsAlphaImage(in->isAlphaImage());
446
447    IntRect effectDrawingRect = requestedRegionOfInputImageData(in->absolutePaintRect());
448    in->copyPremultipliedImage(srcPixelArray, effectDrawingRect);
449
450    if (!m_stdX && !m_stdY)
451        return;
452
453    IntSize kernelSize = calculateKernelSize(filter(), FloatPoint(m_stdX, m_stdY));
454    kernelSize.scale(filter().filterScale());
455
456    IntSize paintSize = absolutePaintRect().size();
457    paintSize.scale(filter().filterScale());
458    RefPtr<Uint8ClampedArray> tmpImageData = Uint8ClampedArray::createUninitialized(paintSize.width() * paintSize.height() * 4);
459    Uint8ClampedArray* tmpPixelArray = tmpImageData.get();
460
461    platformApply(srcPixelArray, tmpPixelArray, kernelSize.width(), kernelSize.height(), paintSize);
462}
463
464void FEGaussianBlur::dump()
465{
466}
467
468TextStream& FEGaussianBlur::externalRepresentation(TextStream& ts, int indent) const
469{
470    writeIndent(ts, indent);
471    ts << "[feGaussianBlur";
472    FilterEffect::externalRepresentation(ts);
473    ts << " stdDeviation=\"" << m_stdX << ", " << m_stdY << "\"]\n";
474    inputEffect(0)->externalRepresentation(ts, indent + 1);
475    return ts;
476}
477
478} // namespace WebCore
479
480#endif // ENABLE(FILTERS)
481