Dunfey · Hotel WWDC as data, est. 1983
Front desk everything
Years
Topics

2023 Audio & Video

WWDC23 · 16 min · Audio & Video

What’s new in voice processing

Learn how to use the Apple voice processing APIs to achieve the best possible audio experience in your VoIP apps. We’ll show you how to detect when someone is talking while muted, adjust ducking behavior of other audio, and more.

Watch at developer.apple.com ↗

Transcript all transcripts

Chapters

  • 0:00 — Introduction
  • 3:19 — Other audio ducking
  • 7:55 — Muted talker detection
  • 11:37 — Muted talker detection for macOS

Code shown on screen · 9 snippets

Other audio ducking swift · at 5:50 ↗
// Insert code snipp297struct AUVoiceIOOtherAudioDuckingConfiguration {
	Boolean mEnableAdvancedDucking;
	AUVoiceIOOtherAudioDuckingLevel  mDuckingLevel;
};et.
typedef CF_ENUM(UInt32, AUVoiceIOOtherAudioDuckingLevel) {
	kAUVoiceIOOtherAudioDuckingLevelDefault = 0,
	kAUVoiceIOOtherAudioDuckingLevelMin = 10,
	kAUVoiceIOOtherAudioDuckingLevelMid = 20,
	kAUVoiceIOOtherAudioDuckingLevelMax = 30
};
Other audio ducking swift · at 6:48 ↗
const AUVoiceIOOtherAudioDuckingConfiguration duckingConfig = {
	.mEnableAdvancedDucking = true,
	.mDuckingLevel = AUVoiceIOOtherAudioDuckingLevel::kAUVoiceIOOtherAudioDuckingLevelMin
};
// AUVoiceIO creation code omitted
OSStatus err = AudioUnitSetProperty(auVoiceIO, kAUVoiceIOProperty_OtherAudioDuckingConfiguration, kAudioUnitScope_Global, 0, &duckingConfig, sizeof(duckingConfig));
Other audio ducking swift · at 6:50 ↗
const AUVoiceIOOtherAudioDuckingConfiguration duckingConfig = {
	.mEnableAdvancedDucking = true,
	.mDuckingLevel = AUVoiceIOOtherAudioDuckingLevel::kAUVoiceIOOtherAudioDuckingLevelMin
};
// AUVoiceIO creation code omitted
OSStatus err = AudioUnitSetProperty(auVoiceIO, kAUVoiceIOProperty_OtherAudioDuckingConfiguration, kAudioUnitScope_Global, 0, &duckingConfig, sizeof(duckingConfig));
Other audio ducking swift · at 7:20 ↗
public struct AVAudioVoiceProcessingOtherAudioDuckingConfiguration {
	public var enableAdvancedDucking: ObjCBool 
	public var duckingLevel: AVAudioVoiceProcessingOtherAudioDuckingConfiguration.Level
}
extension AVAudioVoiceProcessingOtherAudioDuckingConfiguration {
	public enum Level : Int, @unchecked Sendable {
		case `default` = 0
		case min = 10
		case mid = 20
		case max = 30
	}
}
Other audio ducking swift · at 7:31 ↗
let engine = AVAudioEngine()
let inputNode = engine.inputNode
do {
	try inputNode.setVoiceProcessingEnabled(true)
} catch {
	print("Could not enable voice processing \(error)")
}
let duckingConfig = AVAudioVoiceProcessingOtherAudioDuckingConfiguration(mEnableAdvancedDucking: false, mDuckingLevel: .max)
inputNode.voiceProcessingOtherAudioDuckingConfiguration = duckingConfig
Muted talker detection AUVoiceIO swift · at 7:32 ↗
AUVoiceIOMutedSpeechActivityEventListener listener =^(AUVoiceIOMutedSpeechActivityEvent event) {		
    if (event == kAUVoiceIOSpeechActivityHasStarted) {
		// User has started talking while muted. Prompt the user to un-mute
	} else if (event == kAUVoiceIOSpeechActivityHasEnded) {
		// User has stopped talking while muted
	}
};
OSStatus err = AudioUnitSetProperty(auVoiceIO, kAUVoiceIOProperty_MutedSpeechActivityEventListener, kAudioUnitScope_Global, 0, &listener,  sizeof(AUVoiceIOMutedSpeechActivityEventListener));
// When user mutes
UInt32 muteUplinkOutput = 1;
result = AudioUnitSetProperty(auVoiceIO, kAUVoiceIOProperty_MuteOutput, kAudioUnitScope_Global, 0, &muteUplinkOutput, sizeof(muteUplinkOutput));
Muted talker detection AVAudioEngine swift · at 11:08 ↗
let listener =  { (event : AVAudioVoiceProcessingSpeechActivityEvent) in
	if (event == AVAudioVoiceProcessingSpeechActivityEvent.started) {
		// User has started talking while muted. Prompt the user to un-mute
	} else if (event == AVAudioVoiceProcessingSpeechActivityEvent.ended) {
		// User has stopped talking while muted
	}
}
inputNode.setMutedSpeechActivityEventListener(listener)
// When user mutes
inputNode.isVoiceProcessingInputMuted = true
Voice activity detection - implementation with HAL APIs swift · at 12:31 ↗
// Enable Voice Activity Detection on the input device
const AudioObjectPropertyAddress kVoiceActivityDetectionEnable{
        kAudioDevicePropertyVoiceActivityDetectionEnable,
        kAudioDevicePropertyScopeInput,
        kAudioObjectPropertyElementMain };
OSStatus status = kAudioHardwareNoError;
UInt32 shouldEnable = 1;
status = AudioObjectSetPropertyData(deviceID, &kVoiceActivityDetectionEnable, 0, NULL, sizeof(UInt32), &shouldEnable);
// Register a listener on the Voice Activity Detection State property
const AudioObjectPropertyAddress kVoiceActivityDetectionState{
        kAudioDevicePropertyVoiceActivityDetectionState,
        kAudioDevicePropertyScopeInput,
        kAudioObjectPropertyElementMain };
status = AudioObjectAddPropertyListener(deviceID, &kVoiceActivityDetectionState, (AudioObjectPropertyListenerProc)listener_callback, NULL); // “listener_callback” is the name of your listener function
Voice activity detection - listener_callback implementation swift · at 13:13 ↗
OSStatus listener_callback(
    AudioObjectID                 inObjectID,
    UInt32                        inNumberAddresses,
    const AudioObjectPropertyAddress*   __nullable inAddresses,
    void* __nullable              inClientData)
{
  // Assuming this is the only property we are listening for, therefore no need to go through inAddresses
       UInt32 voiceDetected = 0;
     UInt32 propertySize = sizeof(UInt32);
     OSStatus status = AudioObjectGetPropertyData(inObjectID, &kVoiceActivityState, 0, NULL, &propertySize, &voiceDetected);
  
       if (kAudioHardwareNoError == status) {
 if (voiceDetected == 1) {
    // voice activity detected
	} else if (voiceDetected == 0) {
		    // voice activity not detected
	}
 }
 return status;
};