1/* 2 * Copyright (C) 2011 University of Szeged 3 * Copyright (C) 2011 Felician Marton 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#ifndef FECompositeArithmeticNEON_h 28#define FECompositeArithmeticNEON_h 29 30#if ENABLE(FILTERS) && HAVE(ARM_NEON_INTRINSICS) 31 32#include "FEComposite.h" 33#include "NEONHelpers.h" 34#include <arm_neon.h> 35 36namespace WebCore { 37 38template <int b1, int b4> 39inline void FEComposite::computeArithmeticPixelsNeon(unsigned char* source, unsigned char* destination, 40 unsigned pixelArrayLength, float k1, float k2, float k3, float k4) 41{ 42 float32x4_t k1x4 = vdupq_n_f32(k1 / 255); 43 float32x4_t k2x4 = vdupq_n_f32(k2); 44 float32x4_t k3x4 = vdupq_n_f32(k3); 45 float32x4_t k4x4 = vdupq_n_f32(k4 * 255); 46 uint32x4_t max255 = vdupq_n_u32(255); 47 48 uint32_t* sourcePixel = reinterpret_cast<uint32_t*>(source); 49 uint32_t* destinationPixel = reinterpret_cast<uint32_t*>(destination); 50 uint32_t* destinationEndPixel = destinationPixel + (pixelArrayLength >> 2); 51 52 while (destinationPixel < destinationEndPixel) { 53 float32x4_t sourcePixelAsFloat = loadRGBA8AsFloat(sourcePixel); 54 float32x4_t destinationPixelAsFloat = loadRGBA8AsFloat(destinationPixel); 55 56 float32x4_t result = vmulq_f32(sourcePixelAsFloat, k2x4); 57 result = vmlaq_f32(result, destinationPixelAsFloat, k3x4); 58 if (b1) 59 result = vmlaq_f32(result, vmulq_f32(sourcePixelAsFloat, destinationPixelAsFloat), k1x4); 60 if (b4) 61 result = vaddq_f32(result, k4x4); 62 63 // Convert result to uint so negative values are converted to zero. 64 uint16x4_t temporary3 = vmovn_u32(vminq_u32(vcvtq_u32_f32(result), max255)); 65 uint8x8_t temporary4 = vmovn_u16(vcombine_u16(temporary3, temporary3)); 66 *destinationPixel++ = vget_lane_u32(vreinterpret_u32_u8(temporary4), 0); 67 ++sourcePixel; 68 } 69} 70 71inline void FEComposite::platformArithmeticNeon(unsigned char* source, unsigned char* destination, 72 unsigned pixelArrayLength, float k1, float k2, float k3, float k4) 73{ 74 if (!k4) { 75 if (!k1) { 76 computeArithmeticPixelsNeon<0, 0>(source, destination, pixelArrayLength, k1, k2, k3, k4); 77 return; 78 } 79 80 computeArithmeticPixelsNeon<1, 0>(source, destination, pixelArrayLength, k1, k2, k3, k4); 81 return; 82 } 83 84 if (!k1) { 85 computeArithmeticPixelsNeon<0, 1>(source, destination, pixelArrayLength, k1, k2, k3, k4); 86 return; 87 } 88 computeArithmeticPixelsNeon<1, 1>(source, destination, pixelArrayLength, k1, k2, k3, k4); 89} 90 91} // namespace WebCore 92 93#endif // ENABLE(FILTERS) && HAVE(ARM_NEON_INTRINSICS) 94 95#endif // FECompositeArithmeticNEON_h 96