WWDC23 · 13 min · Accessibility & Inclusion

Extend Speech Synthesis with personal and custom voices

Bring the latest advancements in Speech Synthesis to your apps. Learn how you can integrate your custom speech synthesizer and voices into iOS and macOS. We’ll show you how SSML is used to generate expressive speech synthesis, and explore how Personal Voice can enable your augmentative and assistive communication app to speak on a person’s behalf in an authentic way.

Watch at developer.apple.com ↗

Transcript all transcripts

Chapters

0:00 — Welcome
1:25 — Explore SSML
2:37 — Implement a synthesis provider
10:01 — Use Personal Voice

Code shown on screen · 16 snippets

SSML phrase xml · at 2:10 ↗

<speak>
    Hello
    <break time="1s"/>
    <prosody rate="200%">nice to meet you!</prosody>
</speak>

SSML utterance swift · at 2:29 ↗

let ssml = """
    <speak>
        Hello
        <break time="1s" />
        <prosody rate="200%">nice to meet you!</prosody>
    </speak>
"""

guard let ssmlUtterance = AVSpeechUtterance(ssmlRepresentation: ssml) else {
    return
}

self.synthesizer.speak(ssmlUtterance)

Create a host app swift · at 4:33 ↗

struct ContentView: View {
    
    var body: some View {
        List {
            Section("My Awesome Voices") {
                ForEach(availableVoices) { voice in
                    HStack {
                        Text(voice.name)
                        Spacer()
                        Button("Buy") {
                            // Buy this voice...
                        }
                    }
                }
            }
        }
    }

    var availableVoices: [WWDCVoice] {
        return [
            WWDCVoice(name: "Screen Reader Voice", id: "com.example.screen-reader-voice"),
            WWDCVoice(name: "Reading Voice", id: "com.example.reading-voice")
        ]
    }   
}

Keep track of purchased voices swift · at 5:04 ↗

struct ContentView: View {
    
    @State var purchasedVoices: [WWDCVoice] = []
    
    var body: some View {
        NavigationStack {
            List {
                MyAwesomeVoicesSection
                Section("Purchased Voices") {
                    ForEach(purchasedVoices) { voice in
                        NavigationLink {
                            // Destination View
                        } label: {
                            Text(voice.name)
                        }
                    }
                }
            }
        }
    }
}

Inform the system when available voices change swift · at 5:13 ↗

struct ContentView: View {
    
    @State var purchasedVoices: [WWDCVoice] = []
    
    var body: some View {
        List {
            MyAwesomeVoicesSection
            PurchasedVoicesSection
        }
    }
    
    func purchase(voice: WWDCVoice) {
        // Append voice to list of purchased voices
        purchasedVoices.append(voice)
        
        // Inform system of change in voices
        AVSpeechSynthesisProviderVoice.updateSpeechVoices()
    }
}

Update UI with purchased voices swift · at 5:39 ↗

struct ContentView: View {
    
    @State var purchasedVoices: [WWDCVoice] = []
    
    var body: some View {
        List {
            Section("My Awesome Voices") {
                ForEach(availableVoices.filter { !purchasedVoices.contains($0) }) { voice in
                    HStack {
                        Text(voice.name)
                        Spacer()
                        Button("Buy") {
                            purchase(voice: voice)
                        }
                    }
                }
            }
            PurchasedVoicesSection
        }
    }
}

Save available voices into UserDefaults swift · at 5:46 ↗

struct ContentView: View {
    
    let groupDefaults = UserDefaults(suiteName: "group.com.example.SpeechSynthesizerApp")!
    
    @State var purchasedVoices: [WWDCVoice] = []
    
    var body: some View {
        List {
            MyAwesomeVoicesSection
            PurchasedVoicesSection
        }
    }
    
    func purchase(voice: WWDCVoice) {
        // Append voice to list of purchased voices
        purchasedVoices.append(voice)
        
        // Write purchasedVoices to defaults
        updatePurchasedVoices()
        
        // Inform system of change in voices
        AVSpeechSynthesisProviderVoice.updateSpeechVoices()
    }
}

Monitor for system voice changes swift · at 6:25 ↗

struct ContentView: View {

    @State var systemVoices: [AVSpeechSynthesisVoice] = AVSpeechSynthesisVoice.speechVoices()
    
    var body: some View {
        List {
            MyAwesomeVoicesSection
            PurchasedVoicesSection
            Section("System Voices") {
                ForEach(systemVoices.filter { $0.language == "en-US" }) { voice in
                    Text(voice.name)
                }
            }
        }
        .onReceive(NotificationCenter.default
            .publisher(for: AVSpeechSynthesizer.availableVoicesDidChangeNotification)) { _ in
                systemVoices = AVSpeechSynthesisVoice.speechVoices()
        }
    }
}

Override speechVoices getter swift · at 6:53 ↗

// Implement a synthesis provider

public class WWDCSynthAudioUnit: AVSpeechSynthesisProviderAudioUnit {
    public override var speechVoices: [AVSpeechSynthesisProviderVoice] {
        get { }
    }
}

Use UserDefaults to provide set of available voices swift · at 7:02 ↗

public class WWDCSynthAudioUnit: AVSpeechSynthesisProviderAudioUnit {
    public override var speechVoices: [AVSpeechSynthesisProviderVoice] {
        get {
            let voices: [String : String] = groupDefaults.value(forKey: "voices") as? [String : String] ?? [:]
            return voices.map { key, value in
                return AVSpeechSynthesisProviderVoice(name: value,
                                                identifier: key,
                                          primaryLanguages: ["en-US"],
                                        supportedLanguages: ["en-US"] )
            }
        }
    }
}

Use your synthesis engine on each synthesis request swift · at 7:22 ↗

public class WWDCSynthAudioUnit: AVSpeechSynthesisProviderAudioUnit {
    public override func synthesizeSpeechRequest(speechRequest: AVSpeechSynthesisProviderRequest) {
        currentBuffer = getAudioBuffer(for: speechRequest.voice, with: speechRequest.ssmlRepresentation)
        framePosition = 0
    }
}

Handle request cancellation swift · at 8:14 ↗

public class WWDCSynthAudioUnit: AVSpeechSynthesisProviderAudioUnit {
    public override func synthesizeSpeechRequest(speechRequest: AVSpeechSynthesisProviderRequest) {
        currentBuffer = getAudioBuffer(for: speechRequest.voice, with: speechRequest.ssmlRepresentation)
        framePosition = 0
    }

    public override func cancelSpeechRequest() {
        currentBuffer = nil
    }
}

Override internalRenderBlock swift · at 8:28 ↗

public class WWDCSynthAudioUnit: AVSpeechSynthesisProviderAudioUnit {
    public override var internalRenderBlock: AUInternalRenderBlock {
       return { [weak self]
           actionFlags, timestamp, frameCount, outputBusNumber, outputAudioBufferList, _, _ in
           guard let self else { return kAudio_ParamError }

           return noErr
       }
    }
}

Implement the render block swift · at 8:42 ↗

public class WWDCSynthAudioUnit: AVSpeechSynthesisProviderAudioUnit {
    public override var internalRenderBlock: AUInternalRenderBlock {
       return { [weak self]
           actionFlags, timestamp, frameCount, outputBusNumber, outputAudioBufferList, _, _ in
           guard let self else { return kAudio_ParamError }

           // This is the audio buffer we are going to fill up
           var unsafeBuffer = UnsafeMutableAudioBufferListPointer(outputAudioBufferList)[0]
           let frames = unsafeBuffer.mData!.assumingMemoryBound(to: Float32.self)
                
           var sourceBuffer = UnsafeMutableAudioBufferListPointer(self.currentBuffer!.mutableAudioBufferList)[0]
           let sourceFrames = sourceBuffer.mData!.assumingMemoryBound(to: Float32.self)

           for frame in 0..<frameCount {
               if frames.count > frame && sourceFrames.count > self.framePosition {
                   frames[Int(frame)] = sourceFrames[Int(self.framePosition)]
                   self.framePosition += 1
                   if self.framePosition >= self.currentBuffer!.frameLength {
                       break
                   }
               }
           }
                
           return noErr
       }
    }
}

Request authorization for Personal Voice swift · at 11:10 ↗

struct ContentView: View {

    @State private var personalVoices: [AVSpeechSynthesisVoice] = []

    func fetchPersonalVoices() async {
        AVSpeechSynthesizer.requestPersonalVoiceAuthorization() { status in
            if status == .authorized {
                personalVoices = AVSpeechSynthesisVoice.speechVoices().filter { $0.voiceTraits.contains(.isPersonalVoice) }
            }
        }
    }
}

Use Personal Voice swift · at 11:34 ↗

func speakUtterance(string: String) {
    let utterance = AVSpeechUtterance(string: string)
    if let voice = personalVoices.first {
        utterance.voice = voice
        syntheizer.speak(utterance)
    }
}

Resources

Documentation Audio Unit
Documentation Creating an audio unit extension
Guide Speech Synthesis Markup Language (SSML)
Documentation Speech synthesis

Create a seamless speech experience in your apps

WWDC20 · 7 snippets

9 min

Chapters

Code shown on screen · 16 snippets

Resources

Related sessions

Create a seamless speech experience in your apps