1/*
2 * Copyright (C) 2011 University of Szeged
3 * Copyright (C) 2011 Felician Marton
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL UNIVERSITY OF SZEGED OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#ifndef FECompositeArithmeticNEON_h
28#define FECompositeArithmeticNEON_h
29
30#if ENABLE(FILTERS) && HAVE(ARM_NEON_INTRINSICS)
31
32#include "FEComposite.h"
33#include "NEONHelpers.h"
34#include <arm_neon.h>
35
36namespace WebCore {
37
38template <int b1, int b4>
39inline void FEComposite::computeArithmeticPixelsNeon(unsigned char* source, unsigned char* destination,
40    unsigned pixelArrayLength, float k1, float k2, float k3, float k4)
41{
42    float32x4_t k1x4 = vdupq_n_f32(k1 / 255);
43    float32x4_t k2x4 = vdupq_n_f32(k2);
44    float32x4_t k3x4 = vdupq_n_f32(k3);
45    float32x4_t k4x4 = vdupq_n_f32(k4 * 255);
46    uint32x4_t max255 = vdupq_n_u32(255);
47
48    uint32_t* sourcePixel = reinterpret_cast<uint32_t*>(source);
49    uint32_t* destinationPixel = reinterpret_cast<uint32_t*>(destination);
50    uint32_t* destinationEndPixel = destinationPixel + (pixelArrayLength >> 2);
51
52    while (destinationPixel < destinationEndPixel) {
53        float32x4_t sourcePixelAsFloat = loadRGBA8AsFloat(sourcePixel);
54        float32x4_t destinationPixelAsFloat = loadRGBA8AsFloat(destinationPixel);
55
56        float32x4_t result = vmulq_f32(sourcePixelAsFloat, k2x4);
57        result = vmlaq_f32(result, destinationPixelAsFloat, k3x4);
58        if (b1)
59            result = vmlaq_f32(result, vmulq_f32(sourcePixelAsFloat, destinationPixelAsFloat), k1x4);
60        if (b4)
61            result = vaddq_f32(result, k4x4);
62
63        // Convert result to uint so negative values are converted to zero.
64        uint16x4_t temporary3 = vmovn_u32(vminq_u32(vcvtq_u32_f32(result), max255));
65        uint8x8_t temporary4 = vmovn_u16(vcombine_u16(temporary3, temporary3));
66        *destinationPixel++ = vget_lane_u32(vreinterpret_u32_u8(temporary4), 0);
67        ++sourcePixel;
68    }
69}
70
71inline void FEComposite::platformArithmeticNeon(unsigned char* source, unsigned char* destination,
72    unsigned pixelArrayLength, float k1, float k2, float k3, float k4)
73{
74    if (!k4) {
75        if (!k1) {
76            computeArithmeticPixelsNeon<0, 0>(source, destination, pixelArrayLength, k1, k2, k3, k4);
77            return;
78        }
79
80        computeArithmeticPixelsNeon<1, 0>(source, destination, pixelArrayLength, k1, k2, k3, k4);
81        return;
82    }
83
84    if (!k1) {
85        computeArithmeticPixelsNeon<0, 1>(source, destination, pixelArrayLength, k1, k2, k3, k4);
86        return;
87    }
88    computeArithmeticPixelsNeon<1, 1>(source, destination, pixelArrayLength, k1, k2, k3, k4);
89}
90
91} // namespace WebCore
92
93#endif // ENABLE(FILTERS) && HAVE(ARM_NEON_INTRINSICS)
94
95#endif // FECompositeArithmeticNEON_h
96