skip to Main Content

I would like to use Azure speech to text to perform speaker identification.
I have executed the following source code and successfully created a profile, but when I try to enroll the voice data into the created profile, I get a response: 400
{‘code’: ‘InvalidRequest’, ‘message’: ‘Activation Phrase is not matched’} error.

According to this Microsoft page, the Activation Phrase is not required for speaker identification.
https://learn.microsoft.com/ja-jp/azure/ai-services/speech-service/get-started-speaker-recognition?tabs=script&pivots=programming- language-rest

CreateProfile.py

########### module #############
import sys                  
import requests             
import json                 
import base64
import csv

########### Args & variable #########################
args = sys.argv
Profile_Name = args[1]
Profile_List = 'app/Profile_List.csv'

########### Create Profile #########################
with open(Profile_List) as fp:
    lst = list(csv.reader(fp))

for i in lst:
    if Profile_Name in i:
        print('The specified user is already registered.')
        sys.exit()

ApiPath = 'https://eastasia.api.cognitive.microsoft.com/speaker-recognition/identification/text-independent/profiles?api-version=2021-09-05'


headers = {
    # Request headers
    'Content-Type': 'application/json',
    'Ocp-Apim-Subscription-Key': 'XXXXXXXXXXXXXXXXXXXXXXXXXXX',
}

body = {
    'locale':'ja-JP',
}

r = requests.post(
    ApiPath,            
    headers = headers,  
    json = body         
)

try:
    ProfileId = r.json()['profileId']
except Exception:
    print('Error:{}'.format(r.status_code))
    print(r.json())
    sys.exit()

print(ProfileId)

f = open(Profile_List, 'a')
writer = csv.writer(f, lineterminator='n')
writer.writerow([Profile_Name, ProfileId])

CreateEnrollment.py

########### module #############
import sys                  
import requests             
import json                 
import base64
import csv
import time

########### Args & variable #########################
args = sys.argv
Profile_Name = args[1]
Profile_List = 'app/Profile_List.csv'
WavFile = f'app/{Profile_Name}.wav'

with open(Profile_List) as fp:
    lst = list(csv.reader(fp))

for i in lst:
    if Profile_Name in i:
        break

j = lst.index(i)
ProfileId = lst[j][1]

########### Create Enrollment #########################
ApiPath = f'https://eastasia.api.cognitive.microsoft.com/speaker-recognition/identification/text-independent/profiles/{ProfileId}/enrollments?api-version=2021-09-05'

headers = {
    # Request headers
    'Content-Type': 'application/octet-stream',
    'Ocp-Apim-Subscription-Key': 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXX',
}

with open(WavFile, 'rb') as f:
    body = f.read()

r = requests.post(
    ApiPath,            # URL
    headers = headers,  # ヘッダー
    data = body         # ボディ
)

print(ProfileId)

try:
    response = r
    print('response:', response.status_code)
    if response.status_code == 202:
        print(response.headers['Operation-Location'])
        operation_url = response.headers['Operation-Location']
    else:
        print(response.json()['error'])
        sys.exit()
except Exception:
    print(r.json()['error'])
    sys.exit()
####################################
########### Get Operation Status #########################
url = operation_url

headers = {
    # Request headers
    'Ocp-Apim-Subscription-Key': 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX',
}

status = ''
while status != 'succeeded':

    r = requests.get(
        url,            # URL
        headers = headers,  # ヘッダー
    )

    try:
        response = r
        print('response:', response.status_code)
        if response.status_code == 200:
            status = response.json()['status']
            print(f'現在の状態;{status}')
            if status == 'failed':
                message = response.json()['message']
                print(f'error:{message}')
                sys.exit()
            elif status != 'succeeded':
                time.sleep(3)
        else:
            print(r.json()['error'])
            sys.exit()
    except Exception:
        print(r.json()['error'])
        sys.exit()

enrollmentStatus = response.json()['processingResult']['enrollmentStatus']
remainingEnrollmentSpeechTime = response.json()['processingResult']['remainingEnrollmentSpeechTime']
speechTime = response.json()['processingResult']['speechTime']

Is the Activation Phrase necessary for speaker separation?
Or is the source code wrong?

2

Answers


  1. Chosen as BEST ANSWER

    Thank you for answering my question. i try it now .


  2. I created the profile ID using your CreateProfile .py code, and then modified the CreateEnrollment .py code below to convert speech to text using the profile ID and a .wav file.

    Code :

    CreateEnrollment .py :

    import azure.cognitiveservices.speech as speechsdk
    
    def recognize_speech(audio_file_path, subscription_key, region, profile_id):
        speech_config = speechsdk.SpeechConfig(subscription=subscription_key, region=region)
    
        speech_config.speech_property_id = profile_id
        audio_input = speechsdk.audio.AudioConfig(filename=audio_file_path)
        speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input)
    
        print("Recognizing speech from the audio file...")
        
        result = speech_recognizer.recognize_once()
    
        if result.reason == speechsdk.ResultReason.RecognizedSpeech:
            print("Recognized text:", result.text)
        elif result.reason == speechsdk.ResultReason.NoMatch:
            print("No speech could be recognized.")
        elif result.reason == speechsdk.ResultReason.Canceled:
            cancellation = speechsdk.CancellationDetails.from_result(result)
            print("Cancellation reason:", cancellation.reason)
            if cancellation.reason == speechsdk.CancellationReason.Error:
                print("Error details:", cancellation.error_details)
    
    if __name__ == "__main__":
        subscription_key = "<speech_key>"
        region = "<speech_region>"
        profile_id = "<profile_id>"
        audio_file_path = "path/to/app/JohnDoe.wav"
    
        try:
            with open(audio_file_path, "rb"):
                pass
        except FileNotFoundError:
            print("Audio file not found.")
        else:
            recognize_speech(audio_file_path, subscription_key, region, profile_id)
    

    Output :

    The code ran successfully and converted speech to text using the profile ID.

    C:UsersxxxxxxxxDocumentsxxxxxxxxx>python CreateEnrollment.py JohnDoe
    Recognizing speech from the audio file...
    Recognized text: Hi John Doe. Welcome to my world.
    

    enter image description here

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search