1/* 2 * Copyright (C) 2012 University of Szeged 3 * Copyright (C) 2012 Gabor Rapcsanyi 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#ifndef FEBlendNEON_h 28#define FEBlendNEON_h 29 30#if ENABLE(FILTERS) && HAVE(ARM_NEON_INTRINSICS) 31 32#include "FEBlend.h" 33#include <arm_neon.h> 34 35namespace WebCore { 36 37class FEBlendUtilitiesNEON { 38public: 39 static inline uint16x8_t div255(uint16x8_t num, uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) 40 { 41 uint16x8_t quotient = vshrq_n_u16(num, 8); 42 uint16x8_t remainder = vaddq_u16(vsubq_u16(num, vmulq_u16(sixteenConst255, quotient)), sixteenConstOne); 43 return vaddq_u16(quotient, vshrq_n_u16(remainder, 8)); 44 } 45 46 static inline uint16x8_t normal(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t, 47 uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) 48 { 49 uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA); 50 uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB); 51 uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne); 52 return vaddq_u16(tmp3, pixelA); 53 } 54 55 static inline uint16x8_t multiply(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB, 56 uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) 57 { 58 uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA); 59 uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB); 60 uint16x8_t tmp3 = vaddq_u16(vsubq_u16(sixteenConst255, alphaB), pixelB); 61 uint16x8_t tmp4 = vmulq_u16(tmp3, pixelA); 62 uint16x8_t tmp5 = vaddq_u16(tmp2, tmp4); 63 return div255(tmp5, sixteenConst255, sixteenConstOne); 64 } 65 66 static inline uint16x8_t screen(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t, uint16x8_t, 67 uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) 68 { 69 uint16x8_t tmp1 = vaddq_u16(pixelA, pixelB); 70 uint16x8_t tmp2 = vmulq_u16(pixelA, pixelB); 71 uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne); 72 return vsubq_u16(tmp1, tmp3); 73 } 74 75 static inline uint16x8_t darken(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB, 76 uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) 77 { 78 uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA); 79 uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB); 80 uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne); 81 uint16x8_t tmp4 = vaddq_u16(tmp3, pixelA); 82 83 uint16x8_t tmp5 = vsubq_u16(sixteenConst255, alphaB); 84 uint16x8_t tmp6 = vmulq_u16(tmp5, pixelA); 85 uint16x8_t tmp7 = div255(tmp6, sixteenConst255, sixteenConstOne); 86 uint16x8_t tmp8 = vaddq_u16(tmp7, pixelB); 87 88 return vminq_u16(tmp4, tmp8); 89 } 90 91 static inline uint16x8_t lighten(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB, 92 uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne) 93 { 94 uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA); 95 uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB); 96 uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne); 97 uint16x8_t tmp4 = vaddq_u16(tmp3, pixelA); 98 99 uint16x8_t tmp5 = vsubq_u16(sixteenConst255, alphaB); 100 uint16x8_t tmp6 = vmulq_u16(tmp5, pixelA); 101 uint16x8_t tmp7 = div255(tmp6, sixteenConst255, sixteenConstOne); 102 uint16x8_t tmp8 = vaddq_u16(tmp7, pixelB); 103 104 return vmaxq_u16(tmp4, tmp8); 105 } 106}; 107 108void FEBlend::platformApplyNEON(unsigned char* srcPixelArrayA, unsigned char* srcPixelArrayB, unsigned char* dstPixelArray, 109 unsigned colorArrayLength) 110{ 111 uint8_t* sourcePixelA = reinterpret_cast<uint8_t*>(srcPixelArrayA); 112 uint8_t* sourcePixelB = reinterpret_cast<uint8_t*>(srcPixelArrayB); 113 uint8_t* destinationPixel = reinterpret_cast<uint8_t*>(dstPixelArray); 114 115 uint16x8_t sixteenConst255 = vdupq_n_u16(255); 116 uint16x8_t sixteenConstOne = vdupq_n_u16(1); 117 118 unsigned colorOffset = 0; 119 while (colorOffset < colorArrayLength) { 120 unsigned char alphaA1 = srcPixelArrayA[colorOffset + 3]; 121 unsigned char alphaB1 = srcPixelArrayB[colorOffset + 3]; 122 unsigned char alphaA2 = srcPixelArrayA[colorOffset + 7]; 123 unsigned char alphaB2 = srcPixelArrayB[colorOffset + 7]; 124 125 uint16x8_t doubblePixelA = vmovl_u8(vld1_u8(sourcePixelA + colorOffset)); 126 uint16x8_t doubblePixelB = vmovl_u8(vld1_u8(sourcePixelB + colorOffset)); 127 uint16x8_t alphaA = vcombine_u16(vdup_n_u16(alphaA1), vdup_n_u16(alphaA2)); 128 uint16x8_t alphaB = vcombine_u16(vdup_n_u16(alphaB1), vdup_n_u16(alphaB2)); 129 130 uint16x8_t result; 131 switch (m_mode) { 132 case FEBLEND_MODE_NORMAL: 133 result = FEBlendUtilitiesNEON::normal(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); 134 break; 135 case FEBLEND_MODE_MULTIPLY: 136 result = FEBlendUtilitiesNEON::multiply(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); 137 break; 138 case FEBLEND_MODE_SCREEN: 139 result = FEBlendUtilitiesNEON::screen(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); 140 break; 141 case FEBLEND_MODE_DARKEN: 142 result = FEBlendUtilitiesNEON::darken(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); 143 break; 144 case FEBLEND_MODE_LIGHTEN: 145 result = FEBlendUtilitiesNEON::lighten(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne); 146 break; 147 case FEBLEND_MODE_UNKNOWN: 148 default: 149 result = vdupq_n_u16(0); 150 break; 151 } 152 153 vst1_u8(destinationPixel + colorOffset, vmovn_u16(result)); 154 155 unsigned char alphaR1 = 255 - ((255 - alphaA1) * (255 - alphaB1)) / 255; 156 unsigned char alphaR2 = 255 - ((255 - alphaA2) * (255 - alphaB2)) / 255; 157 158 dstPixelArray[colorOffset + 3] = alphaR1; 159 dstPixelArray[colorOffset + 7] = alphaR2; 160 161 colorOffset += 8; 162 if (colorOffset > colorArrayLength) { 163 ASSERT(colorOffset - 4 == colorArrayLength); 164 colorOffset = colorArrayLength - 8; 165 } 166 } 167} 168 169} // namespace WebCore 170 171#endif // ENABLE(FILTERS) && HAVE(ARM_NEON_INTRINSICS) 172 173#endif // FEBlendNEON_h 174