1/* 2 * Copyright (C) 2012 University of Szeged 3 * Copyright (C) 2012 Gabor Rapcsanyi 4 * Copyright (C) 2014 Adobe Systems Incorporated. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY 16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR 19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#ifndef FEBlendNEON_h 29#define FEBlendNEON_h 30 31#if ENABLE(FILTERS) && HAVE(ARM_NEON_INTRINSICS) 32 33#include "FEBlend.h" 34#include <arm_neon.h> 35 36namespace WebCore { 37 38class FEBlendUtilitiesNEON { 39public: 40 static inline uint16x8_t div255(uint16x8_t num, uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) 41 { 42 uint16x8_t quotient = vshrq_n_u16(num, 8); 43 uint16x8_t remainder = vaddq_u16(vsubq_u16(num, vmulq_u16(sixteenConst255, quotient)), sixteenConstOne); 44 return vaddq_u16(quotient, vshrq_n_u16(remainder, 8)); 45 } 46 47 static inline uint16x8_t normal(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t, 48 uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) 49 { 50 uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA); 51 uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB); 52 uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne); 53 return vaddq_u16(tmp3, pixelA); 54 } 55 56 static inline uint16x8_t multiply(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB, 57 uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) 58 { 59 uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA); 60 uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB); 61 uint16x8_t tmp3 = vaddq_u16(vsubq_u16(sixteenConst255, alphaB), pixelB); 62 uint16x8_t tmp4 = vmulq_u16(tmp3, pixelA); 63 uint16x8_t tmp5 = vaddq_u16(tmp2, tmp4); 64 return div255(tmp5, sixteenConst255, sixteenConstOne); 65 } 66 67 static inline uint16x8_t screen(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t, uint16x8_t, 68 uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) 69 { 70 uint16x8_t tmp1 = vaddq_u16(pixelA, pixelB); 71 uint16x8_t tmp2 = vmulq_u16(pixelA, pixelB); 72 uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne); 73 return vsubq_u16(tmp1, tmp3); 74 } 75 76 static inline uint16x8_t darken(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB, 77 uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) 78 { 79 uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA); 80 uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB); 81 uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne); 82 uint16x8_t tmp4 = vaddq_u16(tmp3, pixelA); 83 84 uint16x8_t tmp5 = vsubq_u16(sixteenConst255, alphaB); 85 uint16x8_t tmp6 = vmulq_u16(tmp5, pixelA); 86 uint16x8_t tmp7 = div255(tmp6, sixteenConst255, sixteenConstOne); 87 uint16x8_t tmp8 = vaddq_u16(tmp7, pixelB); 88 89 return vminq_u16(tmp4, tmp8); 90 } 91 92 static inline uint16x8_t lighten(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB, 93 uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) 94 { 95 uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA); 96 uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB); 97 uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne); 98 uint16x8_t tmp4 = vaddq_u16(tmp3, pixelA); 99 100 uint16x8_t tmp5 = vsubq_u16(sixteenConst255, alphaB); 101 uint16x8_t tmp6 = vmulq_u16(tmp5, pixelA); 102 uint16x8_t tmp7 = div255(tmp6, sixteenConst255, sixteenConstOne); 103 uint16x8_t tmp8 = vaddq_u16(tmp7, pixelB); 104 105 return vmaxq_u16(tmp4, tmp8); 106 } 107}; 108 109void FEBlend::platformApplySoftware() 110{ 111 FilterEffect* in = inputEffect(0); 112 FilterEffect* in2 = inputEffect(1); 113 114 Uint8ClampedArray* dstPixelArray = createPremultipliedImageResult(); 115 if (!dstPixelArray) 116 return; 117 118 IntRect effectADrawingRect = requestedRegionOfInputImageData(in->absolutePaintRect()); 119 RefPtr<Uint8ClampedArray> srcPixelArrayA = in->asPremultipliedImage(effectADrawingRect); 120 121 IntRect effectBDrawingRect = requestedRegionOfInputImageData(in2->absolutePaintRect()); 122 RefPtr<Uint8ClampedArray> srcPixelArrayB = in2->asPremultipliedImage(effectBDrawingRect); 123 124 unsigned pixelArrayLength = srcPixelArrayA->length(); 125 ASSERT(pixelArrayLength == srcPixelArrayB->length()); 126 127 if (pixelArrayLength >= 8) { 128 platformApplyNEON(srcPixelArrayA->data(), srcPixelArrayB->data(), dstPixelArray->data(), pixelArrayLength); 129 return 130 } 131 // If there is just one pixel we expand it to two. 132 ASSERT(pixelArrayLength > 0); 133 uint32_t sourceA[2] = {0, 0}; 134 uint32_t sourceBAndDest[2] = {0, 0}; 135 136 sourceA[0] = reinterpret_cast<uint32_t*>(srcPixelArrayA->data())[0]; 137 sourceBAndDest[0] = reinterpret_cast<uint32_t*>(srcPixelArrayB->data())[0]; 138 platformApplyNEON(reinterpret_cast<uint8_t*>(sourceA), reinterpret_cast<uint8_t*>(sourceBAndDest), reinterpret_cast<uint8_t*>(sourceBAndDest), 8); 139 reinterpret_cast<uint32_t*>(dstPixelArray->data())[0] = sourceBAndDest[0]; 140} 141 142void FEBlend::platformApplyNEON(unsigned char* srcPixelArrayA, unsigned char* srcPixelArrayB, unsigned char* dstPixelArray, 143 unsigned colorArrayLength) 144{ 145 uint8_t* sourcePixelA = reinterpret_cast<uint8_t*>(srcPixelArrayA); 146 uint8_t* sourcePixelB = reinterpret_cast<uint8_t*>(srcPixelArrayB); 147 uint8_t* destinationPixel = reinterpret_cast<uint8_t*>(dstPixelArray); 148 149 uint16x8_t sixteenConst255 = vdupq_n_u16(255); 150 uint16x8_t sixteenConstOne = vdupq_n_u16(1); 151 152 unsigned colorOffset = 0; 153 while (colorOffset < colorArrayLength) { 154 unsigned char alphaA1 = srcPixelArrayA[colorOffset + 3]; 155 unsigned char alphaB1 = srcPixelArrayB[colorOffset + 3]; 156 unsigned char alphaA2 = srcPixelArrayA[colorOffset + 7]; 157 unsigned char alphaB2 = srcPixelArrayB[colorOffset + 7]; 158 159 uint16x8_t doubblePixelA = vmovl_u8(vld1_u8(sourcePixelA + colorOffset)); 160 uint16x8_t doubblePixelB = vmovl_u8(vld1_u8(sourcePixelB + colorOffset)); 161 uint16x8_t alphaA = vcombine_u16(vdup_n_u16(alphaA1), vdup_n_u16(alphaA2)); 162 uint16x8_t alphaB = vcombine_u16(vdup_n_u16(alphaB1), vdup_n_u16(alphaB2)); 163 164 uint16x8_t result; 165 switch (m_mode) { 166 case BlendModeNormal: 167 result = FEBlendUtilitiesNEON::normal(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); 168 break; 169 case BlendModeMultiply: 170 result = FEBlendUtilitiesNEON::multiply(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); 171 break; 172 case BlendModeScreen: 173 result = FEBlendUtilitiesNEON::screen(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); 174 break; 175 case BlendModeDarken: 176 result = FEBlendUtilitiesNEON::darken(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); 177 break; 178 case BlendModeLighten: 179 result = FEBlendUtilitiesNEON::lighten(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); 180 break; 181 default: 182 result = vdupq_n_u16(0); 183 break; 184 } 185 186 vst1_u8(destinationPixel + colorOffset, vmovn_u16(result)); 187 188 unsigned char alphaR1 = 255 - ((255 - alphaA1) * (255 - alphaB1)) / 255; 189 unsigned char alphaR2 = 255 - ((255 - alphaA2) * (255 - alphaB2)) / 255; 190 191 dstPixelArray[colorOffset + 3] = alphaR1; 192 dstPixelArray[colorOffset + 7] = alphaR2; 193 194 colorOffset += 8; 195 if (colorOffset > colorArrayLength) { 196 ASSERT(colorOffset - 4 == colorArrayLength); 197 colorOffset = colorArrayLength - 8; 198 } 199 } 200} 201 202} // namespace WebCore 203 204#endif // ENABLE(FILTERS) && HAVE(ARM_NEON_INTRINSICS) 205 206#endif // FEBlendNEON_h 207