Azure speech to text with identification error 'Activation Phrase is not matched'

midorikawairu
March 15, 2024
172 views
0 votes
2 Answers

I would like to use Azure speech to text to perform speaker identification.
I have executed the following source code and successfully created a profile, but when I try to enroll the voice data into the created profile, I get a response: 400
{‘code’: ‘InvalidRequest’, ‘message’: ‘Activation Phrase is not matched’} error.

According to this Microsoft page, the Activation Phrase is not required for speaker identification.
https://learn.microsoft.com/ja-jp/azure/ai-services/speech-service/get-started-speaker-recognition?tabs=script&pivots=programming- language-rest

CreateProfile.py

########### module #############
import sys                  
import requests             
import json                 
import base64
import csv

########### Args & variable #########################
args = sys.argv
Profile_Name = args[1]
Profile_List = 'app/Profile_List.csv'

########### Create Profile #########################
with open(Profile_List) as fp:
    lst = list(csv.reader(fp))

for i in lst:
    if Profile_Name in i:
        print('The specified user is already registered.')
        sys.exit()

ApiPath = 'https://eastasia.api.cognitive.microsoft.com/speaker-recognition/identification/text-independent/profiles?api-version=2021-09-05'


headers = {
    # Request headers
    'Content-Type': 'application/json',
    'Ocp-Apim-Subscription-Key': 'XXXXXXXXXXXXXXXXXXXXXXXXXXX',
}

body = {
    'locale':'ja-JP',
}

r = requests.post(
    ApiPath,            
    headers = headers,  
    json = body         
)

try:
    ProfileId = r.json()['profileId']
except Exception:
    print('Error:{}'.format(r.status_code))
    print(r.json())
    sys.exit()

print(ProfileId)

f = open(Profile_List, 'a')
writer = csv.writer(f, lineterminator='n')
writer.writerow([Profile_Name, ProfileId])

CreateEnrollment.py

########### module #############
import sys                  
import requests             
import json                 
import base64
import csv
import time

########### Args & variable #########################
args = sys.argv
Profile_Name = args[1]
Profile_List = 'app/Profile_List.csv'
WavFile = f'app/{Profile_Name}.wav'

with open(Profile_List) as fp:
    lst = list(csv.reader(fp))

for i in lst:
    if Profile_Name in i:
        break

j = lst.index(i)
ProfileId = lst[j][1]

########### Create Enrollment #########################
ApiPath = f'https://eastasia.api.cognitive.microsoft.com/speaker-recognition/identification/text-independent/profiles/{ProfileId}/enrollments?api-version=2021-09-05'

headers = {
    # Request headers
    'Content-Type': 'application/octet-stream',
    'Ocp-Apim-Subscription-Key': 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXX',
}

with open(WavFile, 'rb') as f:
    body = f.read()

r = requests.post(
    ApiPath,            # URL
    headers = headers,  # ヘッダー
    data = body         # ボディ
)

print(ProfileId)

try:
    response = r
    print('response:', response.status_code)
    if response.status_code == 202:
        print(response.headers['Operation-Location'])
        operation_url = response.headers['Operation-Location']
    else:
        print(response.json()['error'])
        sys.exit()
except Exception:
    print(r.json()['error'])
    sys.exit()
####################################
########### Get Operation Status #########################
url = operation_url

headers = {
    # Request headers
    'Ocp-Apim-Subscription-Key': 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX',
}

status = ''
while status != 'succeeded':

    r = requests.get(
        url,            # URL
        headers = headers,  # ヘッダー
    )

    try:
        response = r
        print('response:', response.status_code)
        if response.status_code == 200:
            status = response.json()['status']
            print(f'現在の状態；{status}')
            if status == 'failed':
                message = response.json()['message']
                print(f'error:{message}')
                sys.exit()
            elif status != 'succeeded':
                time.sleep(3)
        else:
            print(r.json()['error'])
            sys.exit()
    except Exception:
        print(r.json()['error'])
        sys.exit()

enrollmentStatus = response.json()['processingResult']['enrollmentStatus']
remainingEnrollmentSpeechTime = response.json()['processingResult']['remainingEnrollmentSpeechTime']
speechTime = response.json()['processingResult']['speechTime']

Is the Activation Phrase necessary for speaker separation?
Or is the source code wrong?

Answers

Chosen as BEST ANSWER
- midorikawairu
- March 15, 2024 at 3:41 pm
- 0 votes
0
Thank you for answering my question. i try it now .

(Edit)

I created the profile ID using your CreateProfile .py code, and then modified the CreateEnrollment .py code below to convert speech to text using the profile ID and a .wav file.

Code :

CreateEnrollment .py :

import azure.cognitiveservices.speech as speechsdk

def recognize_speech(audio_file_path, subscription_key, region, profile_id):
    speech_config = speechsdk.SpeechConfig(subscription=subscription_key, region=region)

    speech_config.speech_property_id = profile_id
    audio_input = speechsdk.audio.AudioConfig(filename=audio_file_path)
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input)

    print("Recognizing speech from the audio file...")
    
    result = speech_recognizer.recognize_once()

    if result.reason == speechsdk.ResultReason.RecognizedSpeech:
        print("Recognized text:", result.text)
    elif result.reason == speechsdk.ResultReason.NoMatch:
        print("No speech could be recognized.")
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation = speechsdk.CancellationDetails.from_result(result)
        print("Cancellation reason:", cancellation.reason)
        if cancellation.reason == speechsdk.CancellationReason.Error:
            print("Error details:", cancellation.error_details)

if __name__ == "__main__":
    subscription_key = "<speech_key>"
    region = "<speech_region>"
    profile_id = "<profile_id>"
    audio_file_path = "path/to/app/JohnDoe.wav"

    try:
        with open(audio_file_path, "rb"):
            pass
    except FileNotFoundError:
        print("Audio file not found.")
    else:
        recognize_speech(audio_file_path, subscription_key, region, profile_id)

Output :

The code ran successfully and converted speech to text using the profile ID.

C:UsersxxxxxxxxDocumentsxxxxxxxxx>python CreateEnrollment.py JohnDoe
Recognizing speech from the audio file...
Recognized text: Hi John Doe. Welcome to my world.

Please signup or login to give your own answer.

Click here to cancel reply.