1/*
2 * Copyright (C) 2012 Intel Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1.  Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 * 2.  Redistributions in binary form must reproduce the above copyright
11 *     notice, this list of conditions and the following disclaimer in the
12 *     documentation and/or other materials provided with the distribution.
13 * 3.  Neither the name of Apple Inc. ("Apple") nor the names of
14 *     its contributors may be used to endorse or promote products derived
15 *     from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "config.h"
30
31#if ENABLE(WEB_AUDIO)
32
33#include "DirectConvolver.h"
34
35#if OS(DARWIN)
36#include <Accelerate/Accelerate.h>
37#endif
38
39#include "VectorMath.h"
40
41namespace WebCore {
42
43using namespace VectorMath;
44
45DirectConvolver::DirectConvolver(size_t inputBlockSize)
46    : m_inputBlockSize(inputBlockSize)
47#if USE(WEBAUDIO_IPP)
48    , m_overlayBuffer(inputBlockSize)
49#endif // USE(WEBAUDIO_IPP)
50    , m_buffer(inputBlockSize * 2)
51{
52}
53
54void DirectConvolver::process(AudioFloatArray* convolutionKernel, const float* sourceP, float* destP, size_t framesToProcess)
55{
56    ASSERT(framesToProcess == m_inputBlockSize);
57    if (framesToProcess != m_inputBlockSize)
58        return;
59
60    // Only support kernelSize <= m_inputBlockSize
61    size_t kernelSize = convolutionKernel->size();
62    ASSERT(kernelSize <= m_inputBlockSize);
63    if (kernelSize > m_inputBlockSize)
64        return;
65
66    float* kernelP = convolutionKernel->data();
67
68    // Sanity check
69    bool isCopyGood = kernelP && sourceP && destP && m_buffer.data();
70    ASSERT(isCopyGood);
71    if (!isCopyGood)
72        return;
73
74#if USE(WEBAUDIO_IPP)
75    float* outputBuffer = m_buffer.data();
76    float* overlayBuffer = m_overlayBuffer.data();
77    bool isCopyGood2 = overlayBuffer && m_overlayBuffer.size() >= kernelSize && m_buffer.size() == m_inputBlockSize * 2;
78    ASSERT(isCopyGood2);
79    if (!isCopyGood2)
80        return;
81
82    ippsConv_32f(static_cast<const Ipp32f*>(sourceP), framesToProcess, static_cast<Ipp32f*>(kernelP), kernelSize, static_cast<Ipp32f*>(outputBuffer));
83
84    vadd(outputBuffer, 1, overlayBuffer, 1, destP, 1, framesToProcess);
85    memcpy(overlayBuffer, outputBuffer + m_inputBlockSize, sizeof(float) * kernelSize);
86#else
87    float* inputP = m_buffer.data() + m_inputBlockSize;
88
89    // Copy samples to 2nd half of input buffer.
90    memcpy(inputP, sourceP, sizeof(float) * framesToProcess);
91
92#if OS(DARWIN)
93#if defined(__ppc__) || defined(__i386__)
94    conv(inputP - kernelSize + 1, 1, kernelP + kernelSize - 1, -1, destP, 1, framesToProcess, kernelSize);
95#else
96    vDSP_conv(inputP - kernelSize + 1, 1, kernelP + kernelSize - 1, -1, destP, 1, framesToProcess, kernelSize);
97#endif // defined(__ppc__) || defined(__i386__)
98#else
99    // FIXME: The macro can be further optimized to avoid pipeline stalls. One possibility is to maintain 4 separate sums and change the macro to CONVOLVE_FOUR_SAMPLES.
100#define CONVOLVE_ONE_SAMPLE             \
101    sum += inputP[i - j] * kernelP[j];  \
102    j++;
103
104    size_t i = 0;
105    while (i < framesToProcess) {
106        size_t j = 0;
107        float sum = 0;
108
109        // FIXME: SSE optimization may be applied here.
110        if (kernelSize == 32) {
111            CONVOLVE_ONE_SAMPLE // 1
112            CONVOLVE_ONE_SAMPLE // 2
113            CONVOLVE_ONE_SAMPLE // 3
114            CONVOLVE_ONE_SAMPLE // 4
115            CONVOLVE_ONE_SAMPLE // 5
116            CONVOLVE_ONE_SAMPLE // 6
117            CONVOLVE_ONE_SAMPLE // 7
118            CONVOLVE_ONE_SAMPLE // 8
119            CONVOLVE_ONE_SAMPLE // 9
120            CONVOLVE_ONE_SAMPLE // 10
121
122            CONVOLVE_ONE_SAMPLE // 11
123            CONVOLVE_ONE_SAMPLE // 12
124            CONVOLVE_ONE_SAMPLE // 13
125            CONVOLVE_ONE_SAMPLE // 14
126            CONVOLVE_ONE_SAMPLE // 15
127            CONVOLVE_ONE_SAMPLE // 16
128            CONVOLVE_ONE_SAMPLE // 17
129            CONVOLVE_ONE_SAMPLE // 18
130            CONVOLVE_ONE_SAMPLE // 19
131            CONVOLVE_ONE_SAMPLE // 20
132
133            CONVOLVE_ONE_SAMPLE // 21
134            CONVOLVE_ONE_SAMPLE // 22
135            CONVOLVE_ONE_SAMPLE // 23
136            CONVOLVE_ONE_SAMPLE // 24
137            CONVOLVE_ONE_SAMPLE // 25
138            CONVOLVE_ONE_SAMPLE // 26
139            CONVOLVE_ONE_SAMPLE // 27
140            CONVOLVE_ONE_SAMPLE // 28
141            CONVOLVE_ONE_SAMPLE // 29
142            CONVOLVE_ONE_SAMPLE // 30
143
144            CONVOLVE_ONE_SAMPLE // 31
145            CONVOLVE_ONE_SAMPLE // 32
146
147        } else if (kernelSize == 64) {
148            CONVOLVE_ONE_SAMPLE // 1
149            CONVOLVE_ONE_SAMPLE // 2
150            CONVOLVE_ONE_SAMPLE // 3
151            CONVOLVE_ONE_SAMPLE // 4
152            CONVOLVE_ONE_SAMPLE // 5
153            CONVOLVE_ONE_SAMPLE // 6
154            CONVOLVE_ONE_SAMPLE // 7
155            CONVOLVE_ONE_SAMPLE // 8
156            CONVOLVE_ONE_SAMPLE // 9
157            CONVOLVE_ONE_SAMPLE // 10
158
159            CONVOLVE_ONE_SAMPLE // 11
160            CONVOLVE_ONE_SAMPLE // 12
161            CONVOLVE_ONE_SAMPLE // 13
162            CONVOLVE_ONE_SAMPLE // 14
163            CONVOLVE_ONE_SAMPLE // 15
164            CONVOLVE_ONE_SAMPLE // 16
165            CONVOLVE_ONE_SAMPLE // 17
166            CONVOLVE_ONE_SAMPLE // 18
167            CONVOLVE_ONE_SAMPLE // 19
168            CONVOLVE_ONE_SAMPLE // 20
169
170            CONVOLVE_ONE_SAMPLE // 21
171            CONVOLVE_ONE_SAMPLE // 22
172            CONVOLVE_ONE_SAMPLE // 23
173            CONVOLVE_ONE_SAMPLE // 24
174            CONVOLVE_ONE_SAMPLE // 25
175            CONVOLVE_ONE_SAMPLE // 26
176            CONVOLVE_ONE_SAMPLE // 27
177            CONVOLVE_ONE_SAMPLE // 28
178            CONVOLVE_ONE_SAMPLE // 29
179            CONVOLVE_ONE_SAMPLE // 30
180
181            CONVOLVE_ONE_SAMPLE // 31
182            CONVOLVE_ONE_SAMPLE // 32
183            CONVOLVE_ONE_SAMPLE // 33
184            CONVOLVE_ONE_SAMPLE // 34
185            CONVOLVE_ONE_SAMPLE // 35
186            CONVOLVE_ONE_SAMPLE // 36
187            CONVOLVE_ONE_SAMPLE // 37
188            CONVOLVE_ONE_SAMPLE // 38
189            CONVOLVE_ONE_SAMPLE // 39
190            CONVOLVE_ONE_SAMPLE // 40
191
192            CONVOLVE_ONE_SAMPLE // 41
193            CONVOLVE_ONE_SAMPLE // 42
194            CONVOLVE_ONE_SAMPLE // 43
195            CONVOLVE_ONE_SAMPLE // 44
196            CONVOLVE_ONE_SAMPLE // 45
197            CONVOLVE_ONE_SAMPLE // 46
198            CONVOLVE_ONE_SAMPLE // 47
199            CONVOLVE_ONE_SAMPLE // 48
200            CONVOLVE_ONE_SAMPLE // 49
201            CONVOLVE_ONE_SAMPLE // 50
202
203            CONVOLVE_ONE_SAMPLE // 51
204            CONVOLVE_ONE_SAMPLE // 52
205            CONVOLVE_ONE_SAMPLE // 53
206            CONVOLVE_ONE_SAMPLE // 54
207            CONVOLVE_ONE_SAMPLE // 55
208            CONVOLVE_ONE_SAMPLE // 56
209            CONVOLVE_ONE_SAMPLE // 57
210            CONVOLVE_ONE_SAMPLE // 58
211            CONVOLVE_ONE_SAMPLE // 59
212            CONVOLVE_ONE_SAMPLE // 60
213
214            CONVOLVE_ONE_SAMPLE // 61
215            CONVOLVE_ONE_SAMPLE // 62
216            CONVOLVE_ONE_SAMPLE // 63
217            CONVOLVE_ONE_SAMPLE // 64
218
219        } else if (kernelSize == 128) {
220            CONVOLVE_ONE_SAMPLE // 1
221            CONVOLVE_ONE_SAMPLE // 2
222            CONVOLVE_ONE_SAMPLE // 3
223            CONVOLVE_ONE_SAMPLE // 4
224            CONVOLVE_ONE_SAMPLE // 5
225            CONVOLVE_ONE_SAMPLE // 6
226            CONVOLVE_ONE_SAMPLE // 7
227            CONVOLVE_ONE_SAMPLE // 8
228            CONVOLVE_ONE_SAMPLE // 9
229            CONVOLVE_ONE_SAMPLE // 10
230
231            CONVOLVE_ONE_SAMPLE // 11
232            CONVOLVE_ONE_SAMPLE // 12
233            CONVOLVE_ONE_SAMPLE // 13
234            CONVOLVE_ONE_SAMPLE // 14
235            CONVOLVE_ONE_SAMPLE // 15
236            CONVOLVE_ONE_SAMPLE // 16
237            CONVOLVE_ONE_SAMPLE // 17
238            CONVOLVE_ONE_SAMPLE // 18
239            CONVOLVE_ONE_SAMPLE // 19
240            CONVOLVE_ONE_SAMPLE // 20
241
242            CONVOLVE_ONE_SAMPLE // 21
243            CONVOLVE_ONE_SAMPLE // 22
244            CONVOLVE_ONE_SAMPLE // 23
245            CONVOLVE_ONE_SAMPLE // 24
246            CONVOLVE_ONE_SAMPLE // 25
247            CONVOLVE_ONE_SAMPLE // 26
248            CONVOLVE_ONE_SAMPLE // 27
249            CONVOLVE_ONE_SAMPLE // 28
250            CONVOLVE_ONE_SAMPLE // 29
251            CONVOLVE_ONE_SAMPLE // 30
252
253            CONVOLVE_ONE_SAMPLE // 31
254            CONVOLVE_ONE_SAMPLE // 32
255            CONVOLVE_ONE_SAMPLE // 33
256            CONVOLVE_ONE_SAMPLE // 34
257            CONVOLVE_ONE_SAMPLE // 35
258            CONVOLVE_ONE_SAMPLE // 36
259            CONVOLVE_ONE_SAMPLE // 37
260            CONVOLVE_ONE_SAMPLE // 38
261            CONVOLVE_ONE_SAMPLE // 39
262            CONVOLVE_ONE_SAMPLE // 40
263
264            CONVOLVE_ONE_SAMPLE // 41
265            CONVOLVE_ONE_SAMPLE // 42
266            CONVOLVE_ONE_SAMPLE // 43
267            CONVOLVE_ONE_SAMPLE // 44
268            CONVOLVE_ONE_SAMPLE // 45
269            CONVOLVE_ONE_SAMPLE // 46
270            CONVOLVE_ONE_SAMPLE // 47
271            CONVOLVE_ONE_SAMPLE // 48
272            CONVOLVE_ONE_SAMPLE // 49
273            CONVOLVE_ONE_SAMPLE // 50
274
275            CONVOLVE_ONE_SAMPLE // 51
276            CONVOLVE_ONE_SAMPLE // 52
277            CONVOLVE_ONE_SAMPLE // 53
278            CONVOLVE_ONE_SAMPLE // 54
279            CONVOLVE_ONE_SAMPLE // 55
280            CONVOLVE_ONE_SAMPLE // 56
281            CONVOLVE_ONE_SAMPLE // 57
282            CONVOLVE_ONE_SAMPLE // 58
283            CONVOLVE_ONE_SAMPLE // 59
284            CONVOLVE_ONE_SAMPLE // 60
285
286            CONVOLVE_ONE_SAMPLE // 61
287            CONVOLVE_ONE_SAMPLE // 62
288            CONVOLVE_ONE_SAMPLE // 63
289            CONVOLVE_ONE_SAMPLE // 64
290            CONVOLVE_ONE_SAMPLE // 65
291            CONVOLVE_ONE_SAMPLE // 66
292            CONVOLVE_ONE_SAMPLE // 67
293            CONVOLVE_ONE_SAMPLE // 68
294            CONVOLVE_ONE_SAMPLE // 69
295            CONVOLVE_ONE_SAMPLE // 70
296
297            CONVOLVE_ONE_SAMPLE // 71
298            CONVOLVE_ONE_SAMPLE // 72
299            CONVOLVE_ONE_SAMPLE // 73
300            CONVOLVE_ONE_SAMPLE // 74
301            CONVOLVE_ONE_SAMPLE // 75
302            CONVOLVE_ONE_SAMPLE // 76
303            CONVOLVE_ONE_SAMPLE // 77
304            CONVOLVE_ONE_SAMPLE // 78
305            CONVOLVE_ONE_SAMPLE // 79
306            CONVOLVE_ONE_SAMPLE // 80
307
308            CONVOLVE_ONE_SAMPLE // 81
309            CONVOLVE_ONE_SAMPLE // 82
310            CONVOLVE_ONE_SAMPLE // 83
311            CONVOLVE_ONE_SAMPLE // 84
312            CONVOLVE_ONE_SAMPLE // 85
313            CONVOLVE_ONE_SAMPLE // 86
314            CONVOLVE_ONE_SAMPLE // 87
315            CONVOLVE_ONE_SAMPLE // 88
316            CONVOLVE_ONE_SAMPLE // 89
317            CONVOLVE_ONE_SAMPLE // 90
318
319            CONVOLVE_ONE_SAMPLE // 91
320            CONVOLVE_ONE_SAMPLE // 92
321            CONVOLVE_ONE_SAMPLE // 93
322            CONVOLVE_ONE_SAMPLE // 94
323            CONVOLVE_ONE_SAMPLE // 95
324            CONVOLVE_ONE_SAMPLE // 96
325            CONVOLVE_ONE_SAMPLE // 97
326            CONVOLVE_ONE_SAMPLE // 98
327            CONVOLVE_ONE_SAMPLE // 99
328            CONVOLVE_ONE_SAMPLE // 100
329
330            CONVOLVE_ONE_SAMPLE // 101
331            CONVOLVE_ONE_SAMPLE // 102
332            CONVOLVE_ONE_SAMPLE // 103
333            CONVOLVE_ONE_SAMPLE // 104
334            CONVOLVE_ONE_SAMPLE // 105
335            CONVOLVE_ONE_SAMPLE // 106
336            CONVOLVE_ONE_SAMPLE // 107
337            CONVOLVE_ONE_SAMPLE // 108
338            CONVOLVE_ONE_SAMPLE // 109
339            CONVOLVE_ONE_SAMPLE // 110
340
341            CONVOLVE_ONE_SAMPLE // 111
342            CONVOLVE_ONE_SAMPLE // 112
343            CONVOLVE_ONE_SAMPLE // 113
344            CONVOLVE_ONE_SAMPLE // 114
345            CONVOLVE_ONE_SAMPLE // 115
346            CONVOLVE_ONE_SAMPLE // 116
347            CONVOLVE_ONE_SAMPLE // 117
348            CONVOLVE_ONE_SAMPLE // 118
349            CONVOLVE_ONE_SAMPLE // 119
350            CONVOLVE_ONE_SAMPLE // 120
351
352            CONVOLVE_ONE_SAMPLE // 121
353            CONVOLVE_ONE_SAMPLE // 122
354            CONVOLVE_ONE_SAMPLE // 123
355            CONVOLVE_ONE_SAMPLE // 124
356            CONVOLVE_ONE_SAMPLE // 125
357            CONVOLVE_ONE_SAMPLE // 126
358            CONVOLVE_ONE_SAMPLE // 127
359            CONVOLVE_ONE_SAMPLE // 128
360        } else {
361            while (j < kernelSize) {
362                // Non-optimized using actual while loop.
363                CONVOLVE_ONE_SAMPLE
364            }
365        }
366        destP[i++] = sum;
367    }
368#endif // OS(DARWIN)
369
370    // Copy 2nd half of input buffer to 1st half.
371    memcpy(m_buffer.data(), inputP, sizeof(float) * framesToProcess);
372#endif
373}
374
375void DirectConvolver::reset()
376{
377    m_buffer.zero();
378#if USE(WEBAUDIO_IPP)
379    m_overlayBuffer.zero();
380#endif // USE(WEBAUDIO_IPP)
381}
382
383} // namespace WebCore
384
385#endif // ENABLE(WEB_AUDIO)
386