1/*
2 * Copyright (C) 2010 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1.  Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 * 2.  Redistributions in binary form must reproduce the above copyright
11 *     notice, this list of conditions and the following disclaimer in the
12 *     documentation and/or other materials provided with the distribution.
13 * 3.  Neither the name of Apple Inc. ("Apple") nor the names of
14 *     its contributors may be used to endorse or promote products derived
15 *     from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "config.h"
30
31#if ENABLE(WEB_AUDIO)
32
33#include "ReverbConvolver.h"
34
35#include "VectorMath.h"
36#include "AudioBus.h"
37
38namespace WebCore {
39
40using namespace VectorMath;
41
42const int InputBufferSize = 8 * 16384;
43
44// We only process the leading portion of the impulse response in the real-time thread.  We don't exceed this length.
45// It turns out then, that the background thread has about 278msec of scheduling slop.
46// Empirically, this has been found to be a good compromise between giving enough time for scheduling slop,
47// while still minimizing the amount of processing done in the primary (high-priority) thread.
48// This was found to be a good value on Mac OS X, and may work well on other platforms as well, assuming
49// the very rough scheduling latencies are similar on these time-scales.  Of course, this code may need to be
50// tuned for individual platforms if this assumption is found to be incorrect.
51const size_t RealtimeFrameLimit = 8192  + 4096; // ~278msec @ 44.1KHz
52
53const size_t MinFFTSize = 128;
54const size_t MaxRealtimeFFTSize = 2048;
55
56static void backgroundThreadEntry(void* threadData)
57{
58    ReverbConvolver* reverbConvolver = static_cast<ReverbConvolver*>(threadData);
59    reverbConvolver->backgroundThreadEntry();
60}
61
62ReverbConvolver::ReverbConvolver(AudioChannel* impulseResponse, size_t renderSliceSize, size_t maxFFTSize, size_t convolverRenderPhase, bool useBackgroundThreads)
63    : m_impulseResponseLength(impulseResponse->length())
64    , m_accumulationBuffer(impulseResponse->length() + renderSliceSize)
65    , m_inputBuffer(InputBufferSize)
66    , m_minFFTSize(MinFFTSize) // First stage will have this size - successive stages will double in size each time
67    , m_maxFFTSize(maxFFTSize) // until we hit m_maxFFTSize
68    , m_useBackgroundThreads(useBackgroundThreads)
69    , m_backgroundThread(0)
70    , m_wantsToExit(false)
71    , m_moreInputBuffered(false)
72{
73    // If we are using background threads then don't exceed this FFT size for the
74    // stages which run in the real-time thread.  This avoids having only one or two
75    // large stages (size 16384 or so) at the end which take a lot of time every several
76    // processing slices.  This way we amortize the cost over more processing slices.
77    m_maxRealtimeFFTSize = MaxRealtimeFFTSize;
78
79    // For the moment, a good way to know if we have real-time constraint is to check if we're using background threads.
80    // Otherwise, assume we're being run from a command-line tool.
81    bool hasRealtimeConstraint = useBackgroundThreads;
82
83    const float* response = impulseResponse->data();
84    size_t totalResponseLength = impulseResponse->length();
85
86    // The total latency is zero because the direct-convolution is used in the leading portion.
87    size_t reverbTotalLatency = 0;
88
89    size_t stageOffset = 0;
90    int i = 0;
91    size_t fftSize = m_minFFTSize;
92    while (stageOffset < totalResponseLength) {
93        size_t stageSize = fftSize / 2;
94
95        // For the last stage, it's possible that stageOffset is such that we're straddling the end
96        // of the impulse response buffer (if we use stageSize), so reduce the last stage's length...
97        if (stageSize + stageOffset > totalResponseLength)
98            stageSize = totalResponseLength - stageOffset;
99
100        // This "staggers" the time when each FFT happens so they don't all happen at the same time
101        int renderPhase = convolverRenderPhase + i * renderSliceSize;
102
103        bool useDirectConvolver = !stageOffset;
104
105        auto stage = std::make_unique<ReverbConvolverStage>(response, totalResponseLength, reverbTotalLatency, stageOffset, stageSize, fftSize, renderPhase, renderSliceSize, &m_accumulationBuffer, useDirectConvolver);
106
107        bool isBackgroundStage = false;
108
109        if (this->useBackgroundThreads() && stageOffset > RealtimeFrameLimit) {
110            m_backgroundStages.append(WTF::move(stage));
111            isBackgroundStage = true;
112        } else
113            m_stages.append(WTF::move(stage));
114
115        stageOffset += stageSize;
116        ++i;
117
118        if (!useDirectConvolver) {
119            // Figure out next FFT size
120            fftSize *= 2;
121        }
122
123        if (hasRealtimeConstraint && !isBackgroundStage && fftSize > m_maxRealtimeFFTSize)
124            fftSize = m_maxRealtimeFFTSize;
125        if (fftSize > m_maxFFTSize)
126            fftSize = m_maxFFTSize;
127    }
128
129    // Start up background thread
130    // FIXME: would be better to up the thread priority here.  It doesn't need to be real-time, but higher than the default...
131    if (this->useBackgroundThreads() && m_backgroundStages.size() > 0)
132        m_backgroundThread = createThread(WebCore::backgroundThreadEntry, this, "convolution background thread");
133}
134
135ReverbConvolver::~ReverbConvolver()
136{
137    // Wait for background thread to stop
138    if (useBackgroundThreads() && m_backgroundThread) {
139        m_wantsToExit = true;
140
141        // Wake up thread so it can return
142        {
143            std::lock_guard<std::mutex> lock(m_backgroundThreadMutex);
144            m_moreInputBuffered = true;
145            m_backgroundThreadConditionVariable.notify_one();
146        }
147
148        waitForThreadCompletion(m_backgroundThread);
149    }
150}
151
152void ReverbConvolver::backgroundThreadEntry()
153{
154    while (!m_wantsToExit) {
155        // Wait for realtime thread to give us more input
156        m_moreInputBuffered = false;
157        {
158            std::unique_lock<std::mutex> lock(m_backgroundThreadMutex);
159
160            m_backgroundThreadConditionVariable.wait(lock, [this] { return m_moreInputBuffered || m_wantsToExit; });
161        }
162
163        // Process all of the stages until their read indices reach the input buffer's write index
164        int writeIndex = m_inputBuffer.writeIndex();
165
166        // Even though it doesn't seem like every stage needs to maintain its own version of readIndex
167        // we do this in case we want to run in more than one background thread.
168        int readIndex;
169
170        while ((readIndex = m_backgroundStages[0]->inputReadIndex()) != writeIndex) { // FIXME: do better to detect buffer overrun...
171            // The ReverbConvolverStages need to process in amounts which evenly divide half the FFT size
172            const int SliceSize = MinFFTSize / 2;
173
174            // Accumulate contributions from each stage
175            for (size_t i = 0; i < m_backgroundStages.size(); ++i)
176                m_backgroundStages[i]->processInBackground(this, SliceSize);
177        }
178    }
179}
180
181void ReverbConvolver::process(const AudioChannel* sourceChannel, AudioChannel* destinationChannel, size_t framesToProcess)
182{
183    bool isSafe = sourceChannel && destinationChannel && sourceChannel->length() >= framesToProcess && destinationChannel->length() >= framesToProcess;
184    ASSERT(isSafe);
185    if (!isSafe)
186        return;
187
188    const float* source = sourceChannel->data();
189    float* destination = destinationChannel->mutableData();
190    bool isDataSafe = source && destination;
191    ASSERT(isDataSafe);
192    if (!isDataSafe)
193        return;
194
195    // Feed input buffer (read by all threads)
196    m_inputBuffer.write(source, framesToProcess);
197
198    // Accumulate contributions from each stage
199    for (size_t i = 0; i < m_stages.size(); ++i)
200        m_stages[i]->process(source, framesToProcess);
201
202    // Finally read from accumulation buffer
203    m_accumulationBuffer.readAndClear(destination, framesToProcess);
204
205    // Now that we've buffered more input, wake up our background thread.
206
207    // We use use std::unique_lock with std::try_lock here because this is run on the real-time
208    // thread where it is a disaster for the lock to be contended (causes audio glitching).  It's OK if we fail to
209    // signal from time to time, since we'll get to it the next time we're called.  We're called repeatedly
210    // and frequently (around every 3ms).  The background thread is processing well into the future and has a considerable amount of
211    // leeway here...
212    std::unique_lock<std::mutex> lock(m_backgroundThreadMutex, std::try_to_lock);
213    if (!lock.owns_lock())
214        return;
215
216    m_moreInputBuffered = true;
217    m_backgroundThreadConditionVariable.notify_one();
218}
219
220void ReverbConvolver::reset()
221{
222    for (size_t i = 0; i < m_stages.size(); ++i)
223        m_stages[i]->reset();
224
225    for (size_t i = 0; i < m_backgroundStages.size(); ++i)
226        m_backgroundStages[i]->reset();
227
228    m_accumulationBuffer.reset();
229    m_inputBuffer.reset();
230}
231
232size_t ReverbConvolver::latencyFrames() const
233{
234    return 0;
235}
236
237} // namespace WebCore
238
239#endif // ENABLE(WEB_AUDIO)
240