I am trying to stream the audio I get from the Speech SDK using SPXPushAudioOutputStream. I get all data without an issue and can write it into a wav or mp3 and then play it back with the code below.
struct ContentView: View {
@State private var inputText = """
@State private var resultText = ""
@State private var isPlaying = false
@State private var audioPlayer: AVAudioPlayer?
@State private var synthesisCompleted = false
let speechKey = "censored"
let serviceRegion = "switzerlandnorth"
var body: some View {
VStack {
TextField("Enter text to synthesize", text: $inputText)
Button(action: synthesisToPushAudioOutputStream) {
Text("Synthesize Speech")
Button(action: playAudio) {
Text(isPlaying ? "Stop" : "Play")
.onChange(of: resultText) { newValue in
debug("Result text changed to: (newValue)", function: "body.onChange")
synthesisCompleted = newValue.contains("Speech synthesis completed")
debug("Synthesis completed: (synthesisCompleted)", function: "body.onChange")
private func synthesisToPushAudioOutputStream() {
let startTime = Date()
debug("Starting speech synthesis...", function: #function)
let filePath = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent("pushStream.mp3")
debug("File path: (filePath.path)", function: #function)
if !FileManager.default.fileExists(atPath: filePath.path) {
debug("File doesn't exist. Creating new file...", function: #function)
FileManager.default.createFile(atPath: filePath.path, contents: nil, attributes: nil)
} else {
debug("File already exists. Will overwrite.", function: #function)
guard let fileHandle = try? FileHandle(forWritingTo: filePath) else {
debug("Failed to open file handle", function: #function)
updateResultText("Failed to open file at (filePath.path)")
debug("File handle opened successfully", function: #function)
var totalBytesWritten: UInt = 0
let stream = SPXPushAudioOutputStream(writeHandler: { data -> UInt in
totalBytesWritten += UInt(data.count)
debug("Wrote (data.count) bytes. Total: (totalBytesWritten) bytes", function: "SPXPushAudioOutputStream.writeHandler")
return UInt(data.count)
}, closeHandler: {
debug("File closed. Total bytes written: (totalBytesWritten)", function: "SPXPushAudioOutputStream.closeHandler")
debug("Configuring audio and speech...", function: #function)
let audioConfig = try? SPXAudioConfiguration(streamOutput: stream)
let speechConfig = try? SPXSpeechConfiguration(subscription: speechKey, region: serviceRegion)
guard let config = speechConfig, let audio = audioConfig else {
debug("Failed to create speech or audio configuration", function: #function)
updateResultText("Speech Config Error")
debug("Set output format to MP3", function: #function)
debug("Creating speech synthesizer...", function: #function)
let synthesizer = try? SPXSpeechSynthesizer(speechConfiguration: config, audioConfiguration: audio)
guard let synth = synthesizer else {
debug("Failed to create speech synthesizer", function: #function)
updateResultText("Speech Synthesis Error")
debug("Starting text-to-speech...", function: #function)
let speechResult = try? synth.speakText(inputText)
if let result = speechResult {
if result.reason == SPXResultReason.canceled {
let details = try! SPXSpeechSynthesisCancellationDetails(fromCanceledSynthesisResult: result)
debug("Speech synthesis canceled: (details.errorDetails ?? "Unknown error")", function: #function)
updateResultText("Canceled: (details.errorDetails ?? "Unknown error")")
} else if result.reason == SPXResultReason.synthesizingAudioCompleted {
let synthesisTime = Date().timeIntervalSince(startTime)
debug("Speech synthesis completed successfully in (String(format: "%.2f", synthesisTime)) seconds", function: #function)
updateResultText("Speech synthesis completed in (String(format: "%.2f", synthesisTime)) seconds.")
// Add a small delay to ensure file writing is complete
DispatchQueue.main.asyncAfter(deadline: .now() + 0.5) {
// Get file size
do {
let attributes = try FileManager.default.attributesOfItem(atPath: filePath.path)
let fileSize = attributes[.size] as? Int64 ?? 0
debug("File size: (fileSize) bytes", function: "DispatchQueue.asyncAfter")
} catch {
debug("Error getting file size: (error)", function: "DispatchQueue.asyncAfter")
// Get audio duration
let asset = AVAsset(url: filePath)
let duration = asset.duration
let durationSeconds = CMTimeGetSeconds(duration)
debug("Audio duration: (durationSeconds) seconds", function: "DispatchQueue.asyncAfter")
self.updateResultText("Speech synthesis completed in (String(format: "%.2f", synthesisTime)) seconds. Audio Duration: (String(format: "%.2f", durationSeconds)) seconds, Size: (FileManager.default.sizeFormatted(ofPath: filePath.path) ?? "Unknown")")
} else {
debug("Speech synthesis failed with reason: (result.reason)", function: #function)
updateResultText("Speech synthesis error.")
} else {
debug("Speech synthesis failed (no result)", function: #function)
updateResultText("Speech synthesis error.")
private func updateResultText(_ text: String) {
DispatchQueue.main.async {
self.resultText = text
debug("Updated result text: (text)", function: #function)
self.synthesisCompleted = text.contains("Speech synthesis completed")
debug("Synthesis completed: (self.synthesisCompleted)", function: #function)
private func playAudio() {
if isPlaying {
isPlaying = false
debug("Audio playback stopped", function: #function)
} else {
let filePath = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent("pushStream.mp3")
debug("Attempting to play audio from: (filePath.path)", function: #function)
do {
audioPlayer = try AVAudioPlayer(contentsOf: filePath)
isPlaying = true
debug("Audio playback started", function: #function)
if let duration = audioPlayer?.duration {
debug("Audio duration: (duration) seconds", function: #function)
} catch {
updateResultText("Error playing audio: (error.localizedDescription)")
debug("Detailed error playing audio: (error)", function: #function)
private func debug(_ message: String, function: String) {
let timestamp = DateFormatter.localizedString(from: Date(), dateStyle: .none, timeStyle: .medium)
print("[(timestamp)] [(function)] (message)")
// Add this extension for formatting file size
extension FileManager {
func sizeFormatted(ofPath path: String) -> String? {
guard let attributes = try? attributesOfItem(atPath: path) else { return nil }
let size = attributes[.size] as? Int64 ?? 0
return ByteCountFormatter.string(fromByteCount: size, countStyle: .file)
However I cannot for the life of me figure out how I would go about streaming it. I have very little knowledge of AVPlayer, so that obviously doesn’t help, but I tried using every approach I could find while browsing the net… any pointers to potential solutions would be highly appreciated!
Based on Sampath's answer, here's the code that finally worked for me (incl. the AudioPlayerNode for playback):
To stream audio generated from the Speech SDK using
, you can modify your existing code to play the audio as it is being streamed.I have configure the
to stream data to anAVAudioEngine
for real-time playback.The
method synthesizes text to speech usingSPXSpeechSynthesizer
and saved the synthesized audio as a.wav
I have used this git for code to text-to-speech using swift language.