I’m using Speech to Text on a *.wav file from within the Azure machine learning studio. Since the wav file contains a couple of minutes speech I’m using continuous recognition. I do get the STT working. However, I only get the result on the screen and don’t manage to save it to a file. The file is created, but it is empty.
Can you see what I’m missing here in the recognized_cb function ? My code is written in Python.
def stt_run4(wav_file_path, taal, key, regio, outputfile):
speech_config = speechsdk.SpeechConfig(subscription=key, region=regio)
speech_config.speech_recognition_language=taal
audio_config = speechsdk.audio.AudioConfig(filename=wav_file_path)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
done = False
# Set up the output file for the transcript
output_file = open(outputfile, "w")
def stop_cb(evt):
"""callback that signals to stop continuous recognition upon receiving an event `evt`"""
print('CLOSING on {}'.format(evt))
# Close the output file and stop the continuous recognition session
output_file.close()
speech_recognizer.stop_continuous_recognition()
print("Transcript saved in file:", outputfile)
nonlocal done
done = True
def recognized_cb(evt : speechsdk.SpeechRecognitionEventArgs) :
if speechsdk.ResultReason.RecognizingSpeech == evt.result.reason and len(evt.result.text) > 0 :
print('RECOGNIZED:', evt.result.text)
output_file.write(evt.result.text)
output_file.flush()
# Connect callbacks to the events fired by the speech recognizer
speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))
speech_recognizer.recognized.connect(recognized_cb)
speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
# stop continuous recognition on either session stopped or canceled events
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
# Start continuous speech recognition
result=speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
return
2
Answers
Console Output :
Text File :
In the recognized_cb() function if condition "speechsdk.ResultReason.RecognizingSpeech == evt.result.reason" needs to be corrected to "speechsdk.ResultReason.RecognizedSpeech==evt.result.reason".
Please make the following code changes so that you can see the desired results.