I am trying to stream the audio I get from the Speech SDK using SPXPushAudioOutputStream. I get all data without an issue and can write it into a wav or mp3 and then play it back with the code below.
struct ContentView: View {
@State private var inputText = """
Die Gesundheitspolitik bleibt ein hartes Pflaster für Reformen. Bundesrätin Elisabeth Baume-Schneider forderte alle Akteure am Sonntag «nachdrücklich» auf, ihren Teil der Verantwortung zu übernehmen und «konkrete, mehrheitsfähige Sparvorschläge» vorzulegen. Mit Blick auf die vergangenen Jahrzehnte kann man darüber nur schmunzeln.
Solange besagte Akteure ihren Besitzstand eisern verteidigen und solange die politischen Kräfte aus allen Lagern ihrem Lobbydruck nachgeben, wird sich nichts ändern. Auch in den Kantonen überwiegen die Hemmungen, Spitäler zu schliessen und über die Grenzen hinweg die Zusammenarbeit zu verstärken. Ausnahmen bestätigen die Regel.
Das sagen die Ökonomen
Deshalb stellt sich die Frage, ob man nicht das zunehmend absurde Kopfprämiensystem abschaffen und auf ein durch Steuergelder finanziertes Gesundheitswesen umstellen sollte, wie in anderen Ländern. watson hat diese Frage den Gesundheitsökonomen Heinz Locher und Willy Oggier gestellt – und interessante Antworten erhalten.
"""
@State private var resultText = ""
@State private var isPlaying = false
@State private var audioPlayer: AVAudioPlayer?
@State private var synthesisCompleted = false
let speechKey = "censored"
let serviceRegion = "switzerlandnorth"
var body: some View {
VStack {
TextField("Enter text to synthesize", text: $inputText)
.textFieldStyle(RoundedBorderTextFieldStyle())
.padding()
Button(action: synthesisToPushAudioOutputStream) {
Text("Synthesize Speech")
}
.padding()
Button(action: playAudio) {
Text(isPlaying ? "Stop" : "Play")
}
.padding()
.disabled(!synthesisCompleted)
Text(resultText)
.padding()
}
.onChange(of: resultText) { newValue in
debug("Result text changed to: (newValue)", function: "body.onChange")
synthesisCompleted = newValue.contains("Speech synthesis completed")
debug("Synthesis completed: (synthesisCompleted)", function: "body.onChange")
}
}
private func synthesisToPushAudioOutputStream() {
let startTime = Date()
debug("Starting speech synthesis...", function: #function)
let filePath = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent("pushStream.mp3")
debug("File path: (filePath.path)", function: #function)
if !FileManager.default.fileExists(atPath: filePath.path) {
debug("File doesn't exist. Creating new file...", function: #function)
FileManager.default.createFile(atPath: filePath.path, contents: nil, attributes: nil)
} else {
debug("File already exists. Will overwrite.", function: #function)
}
guard let fileHandle = try? FileHandle(forWritingTo: filePath) else {
debug("Failed to open file handle", function: #function)
updateResultText("Failed to open file at (filePath.path)")
return
}
debug("File handle opened successfully", function: #function)
var totalBytesWritten: UInt = 0
let stream = SPXPushAudioOutputStream(writeHandler: { data -> UInt in
fileHandle.write(data)
totalBytesWritten += UInt(data.count)
debug("Wrote (data.count) bytes. Total: (totalBytesWritten) bytes", function: "SPXPushAudioOutputStream.writeHandler")
return UInt(data.count)
}, closeHandler: {
fileHandle.closeFile()
debug("File closed. Total bytes written: (totalBytesWritten)", function: "SPXPushAudioOutputStream.closeHandler")
})!
debug("Configuring audio and speech...", function: #function)
let audioConfig = try? SPXAudioConfiguration(streamOutput: stream)
let speechConfig = try? SPXSpeechConfiguration(subscription: speechKey, region: serviceRegion)
guard let config = speechConfig, let audio = audioConfig else {
debug("Failed to create speech or audio configuration", function: #function)
updateResultText("Speech Config Error")
return
}
config.setSpeechSynthesisOutputFormat(.audio24Khz160KBitRateMonoMp3)
debug("Set output format to MP3", function: #function)
updateResultText("Synthesizing...")
debug("Creating speech synthesizer...", function: #function)
let synthesizer = try? SPXSpeechSynthesizer(speechConfiguration: config, audioConfiguration: audio)
guard let synth = synthesizer else {
debug("Failed to create speech synthesizer", function: #function)
updateResultText("Speech Synthesis Error")
return
}
debug("Starting text-to-speech...", function: #function)
let speechResult = try? synth.speakText(inputText)
if let result = speechResult {
if result.reason == SPXResultReason.canceled {
let details = try! SPXSpeechSynthesisCancellationDetails(fromCanceledSynthesisResult: result)
debug("Speech synthesis canceled: (details.errorDetails ?? "Unknown error")", function: #function)
updateResultText("Canceled: (details.errorDetails ?? "Unknown error")")
} else if result.reason == SPXResultReason.synthesizingAudioCompleted {
let synthesisTime = Date().timeIntervalSince(startTime)
debug("Speech synthesis completed successfully in (String(format: "%.2f", synthesisTime)) seconds", function: #function)
updateResultText("Speech synthesis completed in (String(format: "%.2f", synthesisTime)) seconds.")
// Add a small delay to ensure file writing is complete
DispatchQueue.main.asyncAfter(deadline: .now() + 0.5) {
// Get file size
do {
let attributes = try FileManager.default.attributesOfItem(atPath: filePath.path)
let fileSize = attributes[.size] as? Int64 ?? 0
debug("File size: (fileSize) bytes", function: "DispatchQueue.asyncAfter")
} catch {
debug("Error getting file size: (error)", function: "DispatchQueue.asyncAfter")
}
// Get audio duration
let asset = AVAsset(url: filePath)
let duration = asset.duration
let durationSeconds = CMTimeGetSeconds(duration)
debug("Audio duration: (durationSeconds) seconds", function: "DispatchQueue.asyncAfter")
self.updateResultText("Speech synthesis completed in (String(format: "%.2f", synthesisTime)) seconds. Audio Duration: (String(format: "%.2f", durationSeconds)) seconds, Size: (FileManager.default.sizeFormatted(ofPath: filePath.path) ?? "Unknown")")
}
} else {
debug("Speech synthesis failed with reason: (result.reason)", function: #function)
updateResultText("Speech synthesis error.")
}
} else {
debug("Speech synthesis failed (no result)", function: #function)
updateResultText("Speech synthesis error.")
}
}
private func updateResultText(_ text: String) {
DispatchQueue.main.async {
self.resultText = text
debug("Updated result text: (text)", function: #function)
self.synthesisCompleted = text.contains("Speech synthesis completed")
debug("Synthesis completed: (self.synthesisCompleted)", function: #function)
}
}
private func playAudio() {
if isPlaying {
audioPlayer?.stop()
isPlaying = false
debug("Audio playback stopped", function: #function)
} else {
let filePath = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent("pushStream.mp3")
debug("Attempting to play audio from: (filePath.path)", function: #function)
do {
audioPlayer = try AVAudioPlayer(contentsOf: filePath)
audioPlayer?.play()
isPlaying = true
debug("Audio playback started", function: #function)
if let duration = audioPlayer?.duration {
debug("Audio duration: (duration) seconds", function: #function)
}
} catch {
updateResultText("Error playing audio: (error.localizedDescription)")
debug("Detailed error playing audio: (error)", function: #function)
}
}
}
private func debug(_ message: String, function: String) {
let timestamp = DateFormatter.localizedString(from: Date(), dateStyle: .none, timeStyle: .medium)
print("[(timestamp)] [(function)] (message)")
}
}
// Add this extension for formatting file size
extension FileManager {
func sizeFormatted(ofPath path: String) -> String? {
guard let attributes = try? attributesOfItem(atPath: path) else { return nil }
let size = attributes[.size] as? Int64 ?? 0
return ByteCountFormatter.string(fromByteCount: size, countStyle: .file)
}
}
However I cannot for the life of me figure out how I would go about streaming it. I have very little knowledge of AVPlayer, so that obviously doesn’t help, but I tried using every approach I could find while browsing the net… any pointers to potential solutions would be highly appreciated!
2
Answers
Based on Sampath's answer, here's the code that finally worked for me (incl. the AudioPlayerNode for playback):
To stream audio generated from the Speech SDK using
SPXPushAudioOutputStream
, you can modify your existing code to play the audio as it is being streamed.I have configure the
SPXPushAudioOutputStream
to stream data to anAVAudioEngine
for real-time playback.The
synthesisToWAV()
method synthesizes text to speech usingSPXSpeechSynthesizer
and saved the synthesized audio as a.wav
file.samples_swift_ios.wav:
I have used this git for code to text-to-speech using swift language.