/* * Copyright (C) 2012 University of Szeged * Copyright (C) 2012 Gabor Rapcsanyi * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef FEBlendNEON_h #define FEBlendNEON_h #if ENABLE(FILTERS) && HAVE(ARM_NEON_INTRINSICS) #include "FEBlend.h" #include namespace WebCore { class FEBlendUtilitiesNEON { public: static inline uint16x8_t div255(uint16x8_t num, uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) { uint16x8_t quotient = vshrq_n_u16(num, 8); uint16x8_t remainder = vaddq_u16(vsubq_u16(num, vmulq_u16(sixteenConst255, quotient)), sixteenConstOne); return vaddq_u16(quotient, vshrq_n_u16(remainder, 8)); } static inline uint16x8_t normal(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t, uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) { uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA); uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB); uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne); return vaddq_u16(tmp3, pixelA); } static inline uint16x8_t multiply(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB, uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) { uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA); uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB); uint16x8_t tmp3 = vaddq_u16(vsubq_u16(sixteenConst255, alphaB), pixelB); uint16x8_t tmp4 = vmulq_u16(tmp3, pixelA); uint16x8_t tmp5 = vaddq_u16(tmp2, tmp4); return div255(tmp5, sixteenConst255, sixteenConstOne); } static inline uint16x8_t screen(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t, uint16x8_t, uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) { uint16x8_t tmp1 = vaddq_u16(pixelA, pixelB); uint16x8_t tmp2 = vmulq_u16(pixelA, pixelB); uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne); return vsubq_u16(tmp1, tmp3); } static inline uint16x8_t darken(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB, uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) { uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA); uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB); uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne); uint16x8_t tmp4 = vaddq_u16(tmp3, pixelA); uint16x8_t tmp5 = vsubq_u16(sixteenConst255, alphaB); uint16x8_t tmp6 = vmulq_u16(tmp5, pixelA); uint16x8_t tmp7 = div255(tmp6, sixteenConst255, sixteenConstOne); uint16x8_t tmp8 = vaddq_u16(tmp7, pixelB); return vminq_u16(tmp4, tmp8); } static inline uint16x8_t lighten(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB, uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) { uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA); uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB); uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne); uint16x8_t tmp4 = vaddq_u16(tmp3, pixelA); uint16x8_t tmp5 = vsubq_u16(sixteenConst255, alphaB); uint16x8_t tmp6 = vmulq_u16(tmp5, pixelA); uint16x8_t tmp7 = div255(tmp6, sixteenConst255, sixteenConstOne); uint16x8_t tmp8 = vaddq_u16(tmp7, pixelB); return vmaxq_u16(tmp4, tmp8); } }; void FEBlend::platformApplyNEON(unsigned char* srcPixelArrayA, unsigned char* srcPixelArrayB, unsigned char* dstPixelArray, unsigned colorArrayLength) { uint8_t* sourcePixelA = reinterpret_cast(srcPixelArrayA); uint8_t* sourcePixelB = reinterpret_cast(srcPixelArrayB); uint8_t* destinationPixel = reinterpret_cast(dstPixelArray); uint16x8_t sixteenConst255 = vdupq_n_u16(255); uint16x8_t sixteenConstOne = vdupq_n_u16(1); unsigned colorOffset = 0; while (colorOffset < colorArrayLength) { unsigned char alphaA1 = srcPixelArrayA[colorOffset + 3]; unsigned char alphaB1 = srcPixelArrayB[colorOffset + 3]; unsigned char alphaA2 = srcPixelArrayA[colorOffset + 7]; unsigned char alphaB2 = srcPixelArrayB[colorOffset + 7]; uint16x8_t doubblePixelA = vmovl_u8(vld1_u8(sourcePixelA + colorOffset)); uint16x8_t doubblePixelB = vmovl_u8(vld1_u8(sourcePixelB + colorOffset)); uint16x8_t alphaA = vcombine_u16(vdup_n_u16(alphaA1), vdup_n_u16(alphaA2)); uint16x8_t alphaB = vcombine_u16(vdup_n_u16(alphaB1), vdup_n_u16(alphaB2)); uint16x8_t result; switch (m_mode) { case FEBLEND_MODE_NORMAL: result = FEBlendUtilitiesNEON::normal(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); break; case FEBLEND_MODE_MULTIPLY: result = FEBlendUtilitiesNEON::multiply(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); break; case FEBLEND_MODE_SCREEN: result = FEBlendUtilitiesNEON::screen(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); break; case FEBLEND_MODE_DARKEN: result = FEBlendUtilitiesNEON::darken(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); break; case FEBLEND_MODE_LIGHTEN: result = FEBlendUtilitiesNEON::lighten(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); break; case FEBLEND_MODE_UNKNOWN: default: result = vdupq_n_u16(0); break; } vst1_u8(destinationPixel + colorOffset, vmovn_u16(result)); unsigned char alphaR1 = 255 - ((255 - alphaA1) * (255 - alphaB1)) / 255; unsigned char alphaR2 = 255 - ((255 - alphaA2) * (255 - alphaB2)) / 255; dstPixelArray[colorOffset + 3] = alphaR1; dstPixelArray[colorOffset + 7] = alphaR2; colorOffset += 8; if (colorOffset > colorArrayLength) { ASSERT(colorOffset - 4 == colorArrayLength); colorOffset = colorArrayLength - 8; } } } } // namespace WebCore #endif // ENABLE(FILTERS) && HAVE(ARM_NEON_INTRINSICS) #endif // FEBlendNEON_h