1/*
2 * Copyright (C) 2013 Apple Inc.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "PlatformSpeechSynthesizer.h"
28
29#include "PlatformSpeechSynthesisUtterance.h"
30#include "PlatformSpeechSynthesisVoice.h"
31#include "WebCoreSystemInterface.h"
32#include <AppKit/NSSpeechSynthesizer.h>
33#include <wtf/PassRefPtr.h>
34#include <wtf/RetainPtr.h>
35
36#if ENABLE(SPEECH_SYNTHESIS)
37
38@interface WebSpeechSynthesisWrapper : NSObject<NSSpeechSynthesizerDelegate>
39{
40    WebCore::PlatformSpeechSynthesizer* m_synthesizerObject;
41    // Hold a Ref to the utterance so that it won't disappear until the synth is done with it.
42    WebCore::PlatformSpeechSynthesisUtterance* m_utterance;
43
44    RetainPtr<NSSpeechSynthesizer> m_synthesizer;
45    float m_basePitch;
46}
47
48- (WebSpeechSynthesisWrapper *)initWithSpeechSynthesizer:(WebCore::PlatformSpeechSynthesizer *)synthesizer;
49- (void)speakUtterance:(WebCore::PlatformSpeechSynthesisUtterance *)utterance;
50
51@end
52
53@implementation WebSpeechSynthesisWrapper
54
55- (WebSpeechSynthesisWrapper *)initWithSpeechSynthesizer:(WebCore::PlatformSpeechSynthesizer *)synthesizer
56{
57    if (!(self = [super init]))
58        return nil;
59
60    m_synthesizerObject = synthesizer;
61    [self updateBasePitchForSynthesizer];
62    return self;
63}
64
65// NSSpeechSynthesizer expects a Words per Minute (WPM) rate. There is no preset default
66// but they recommend that normal speaking is 180-220 WPM
67- (float)convertRateToWPM:(float)rate
68{
69    // We'll say 200 WPM is the default 1x value.
70    return 200.0f * rate;
71}
72
73- (float)convertPitchToNSSpeechValue:(float)pitch
74{
75    // This allows the base pitch to range from 0% - 200% of the normal pitch.
76    return m_basePitch * pitch;
77}
78
79- (void)updateBasePitchForSynthesizer
80{
81    // Reset the base pitch whenever we change voices, since the base pitch is different for each voice.
82    [m_synthesizer setObject:nil forProperty:NSSpeechResetProperty error:nil];
83    m_basePitch = [[m_synthesizer objectForProperty:NSSpeechPitchBaseProperty error:nil] floatValue];
84}
85
86- (void)speakUtterance:(WebCore::PlatformSpeechSynthesisUtterance *)utterance
87{
88    // When speak is called we should not have an existing speech utterance outstanding.
89    ASSERT(!m_utterance);
90    ASSERT(utterance);
91
92    if (!m_synthesizer) {
93        m_synthesizer = [[NSSpeechSynthesizer alloc] initWithVoice:nil];
94        [m_synthesizer setDelegate:self];
95    }
96
97    // Find if we should use a specific voice based on the voiceURI in utterance.
98    // Otherwise, find the voice that matches the language. The Mac doesn't have a default voice per language, so the first
99    // one will have to do.
100    Vector<RefPtr<WebCore::PlatformSpeechSynthesisVoice>> voiceList = m_synthesizerObject->voiceList();
101    size_t voiceListSize = voiceList.size();
102
103    WebCore::PlatformSpeechSynthesisVoice* utteranceVoice = utterance->voice();
104    // If no voice was specified, try to match by language.
105    if (!utteranceVoice && !utterance->lang().isEmpty()) {
106        for (size_t k = 0; k < voiceListSize; k++) {
107            if (equalIgnoringCase(utterance->lang(), voiceList[k]->lang())) {
108                utteranceVoice = voiceList[k].get();
109                if (voiceList[k]->isDefault())
110                    break;
111            }
112        }
113    }
114
115    NSString *voiceURI = nil;
116    if (utteranceVoice)
117        voiceURI = utteranceVoice->voiceURI();
118    else
119        voiceURI = [NSSpeechSynthesizer defaultVoice];
120
121    // Don't set the voice unless necessary. There's a bug in NSSpeechSynthesizer such that
122    // setting the voice for the first time will cause the first speechDone callback to report it was unsuccessful.
123    BOOL updatePitch = NO;
124    if (![[m_synthesizer voice] isEqualToString:voiceURI]) {
125        [m_synthesizer setVoice:voiceURI];
126        // Reset the base pitch whenever we change voices.
127        updatePitch = YES;
128    }
129
130    if (m_basePitch == 0 || updatePitch)
131        [self updateBasePitchForSynthesizer];
132
133    [m_synthesizer setObject:[NSNumber numberWithFloat:[self convertPitchToNSSpeechValue:utterance->pitch()]] forProperty:NSSpeechPitchBaseProperty error:nil];
134    [m_synthesizer setRate:[self convertRateToWPM:utterance->rate()]];
135    [m_synthesizer setVolume:utterance->volume()];
136
137    m_utterance = utterance;
138    [m_synthesizer startSpeakingString:utterance->text()];
139    m_synthesizerObject->client()->didStartSpeaking(utterance);
140}
141
142- (void)pause
143{
144    if (!m_utterance)
145        return;
146
147    [m_synthesizer pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
148    m_synthesizerObject->client()->didPauseSpeaking(m_utterance);
149}
150
151- (void)resume
152{
153    if (!m_utterance)
154        return;
155
156    [m_synthesizer continueSpeaking];
157    m_synthesizerObject->client()->didResumeSpeaking(m_utterance);
158}
159
160- (void)cancel
161{
162    if (!m_utterance)
163        return;
164
165    [m_synthesizer stopSpeakingAtBoundary:NSSpeechImmediateBoundary];
166    m_synthesizerObject->client()->speakingErrorOccurred(m_utterance);
167    m_utterance = 0;
168}
169
170- (void)invalidate
171{
172    m_utterance = 0;
173    [m_synthesizer setDelegate:nil];
174    [m_synthesizer stopSpeakingAtBoundary:NSSpeechImmediateBoundary];
175}
176
177- (void)speechSynthesizer:(NSSpeechSynthesizer *)sender didFinishSpeaking:(BOOL)finishedSpeaking
178{
179    if (!m_utterance)
180        return;
181
182    UNUSED_PARAM(sender);
183
184    // Clear the m_utterance variable in case finish speaking kicks off a new speaking job immediately.
185    WebCore::PlatformSpeechSynthesisUtterance* utterance = m_utterance;
186    m_utterance = 0;
187
188    if (finishedSpeaking)
189        m_synthesizerObject->client()->didFinishSpeaking(utterance);
190    else
191        m_synthesizerObject->client()->speakingErrorOccurred(utterance);
192}
193
194- (void)speechSynthesizer:(NSSpeechSynthesizer *)sender willSpeakWord:(NSRange)characterRange ofString:(NSString *)string
195{
196    UNUSED_PARAM(sender);
197    UNUSED_PARAM(string);
198
199    if (!m_utterance)
200        return;
201
202    // Mac platform only supports word boundaries.
203    m_synthesizerObject->client()->boundaryEventOccurred(m_utterance, WebCore::SpeechWordBoundary, characterRange.location);
204}
205
206@end
207
208namespace WebCore {
209
210PlatformSpeechSynthesizer::PlatformSpeechSynthesizer(PlatformSpeechSynthesizerClient* client)
211    : m_voiceListIsInitialized(false)
212    , m_speechSynthesizerClient(client)
213{
214}
215
216PlatformSpeechSynthesizer::~PlatformSpeechSynthesizer()
217{
218    [m_platformSpeechWrapper.get() invalidate];
219}
220
221void PlatformSpeechSynthesizer::initializeVoiceList()
222{
223    NSArray *availableVoices = wkSpeechSynthesisGetVoiceIdentifiers();
224    NSUInteger count = [availableVoices count];
225    for (NSUInteger k = 0; k < count; k++) {
226        NSString *voiceName = [availableVoices objectAtIndex:k];
227        NSDictionary *attributes = [NSSpeechSynthesizer attributesForVoice:voiceName];
228
229        NSString *voiceURI = [attributes objectForKey:NSVoiceIdentifier];
230        NSString *name = [attributes objectForKey:NSVoiceName];
231        NSString *language = [attributes objectForKey:NSVoiceLocaleIdentifier];
232        NSLocale *locale = [[NSLocale alloc] initWithLocaleIdentifier:language];
233        NSString *defaultVoiceURI = wkSpeechSynthesisGetDefaultVoiceIdentifierForLocale(locale);
234        [locale release];
235
236        // Change to BCP-47 format as defined by spec.
237        language = [language stringByReplacingOccurrencesOfString:@"_" withString:@"-"];
238
239        bool isDefault = [defaultVoiceURI isEqualToString:voiceURI];
240
241        m_voiceList.append(PlatformSpeechSynthesisVoice::create(voiceURI, name, language, true, isDefault));
242    }
243}
244
245void PlatformSpeechSynthesizer::pause()
246{
247    [m_platformSpeechWrapper.get() pause];
248}
249
250void PlatformSpeechSynthesizer::resume()
251{
252    [m_platformSpeechWrapper.get() resume];
253}
254
255void PlatformSpeechSynthesizer::speak(PassRefPtr<PlatformSpeechSynthesisUtterance> utterance)
256{
257    if (!m_platformSpeechWrapper)
258        m_platformSpeechWrapper = adoptNS([[WebSpeechSynthesisWrapper alloc] initWithSpeechSynthesizer:this]);
259
260    [m_platformSpeechWrapper.get() speakUtterance:utterance.get()];
261}
262
263void PlatformSpeechSynthesizer::cancel()
264{
265    [m_platformSpeechWrapper.get() cancel];
266}
267
268} // namespace WebCore
269
270#endif // ENABLE(SPEECH_SYNTHESIS)
271