I would like to use Azure speech to text to perform speaker identification.
I have executed the following source code and successfully created a profile, but when I try to enroll the voice data into the created profile, I get a response: 400
{‘code’: ‘InvalidRequest’, ‘message’: ‘Activation Phrase is not matched’} error.
According to this Microsoft page, the Activation Phrase is not required for speaker identification.
https://learn.microsoft.com/ja-jp/azure/ai-services/speech-service/get-started-speaker-recognition?tabs=script&pivots=programming- language-rest
CreateProfile.py
########### module #############
import sys
import requests
import json
import base64
import csv
########### Args & variable #########################
args = sys.argv
Profile_Name = args[1]
Profile_List = 'app/Profile_List.csv'
########### Create Profile #########################
with open(Profile_List) as fp:
lst = list(csv.reader(fp))
for i in lst:
if Profile_Name in i:
print('The specified user is already registered.')
sys.exit()
ApiPath = 'https://eastasia.api.cognitive.microsoft.com/speaker-recognition/identification/text-independent/profiles?api-version=2021-09-05'
headers = {
# Request headers
'Content-Type': 'application/json',
'Ocp-Apim-Subscription-Key': 'XXXXXXXXXXXXXXXXXXXXXXXXXXX',
}
body = {
'locale':'ja-JP',
}
r = requests.post(
ApiPath,
headers = headers,
json = body
)
try:
ProfileId = r.json()['profileId']
except Exception:
print('Error:{}'.format(r.status_code))
print(r.json())
sys.exit()
print(ProfileId)
f = open(Profile_List, 'a')
writer = csv.writer(f, lineterminator='n')
writer.writerow([Profile_Name, ProfileId])
CreateEnrollment.py
########### module #############
import sys
import requests
import json
import base64
import csv
import time
########### Args & variable #########################
args = sys.argv
Profile_Name = args[1]
Profile_List = 'app/Profile_List.csv'
WavFile = f'app/{Profile_Name}.wav'
with open(Profile_List) as fp:
lst = list(csv.reader(fp))
for i in lst:
if Profile_Name in i:
break
j = lst.index(i)
ProfileId = lst[j][1]
########### Create Enrollment #########################
ApiPath = f'https://eastasia.api.cognitive.microsoft.com/speaker-recognition/identification/text-independent/profiles/{ProfileId}/enrollments?api-version=2021-09-05'
headers = {
# Request headers
'Content-Type': 'application/octet-stream',
'Ocp-Apim-Subscription-Key': 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXX',
}
with open(WavFile, 'rb') as f:
body = f.read()
r = requests.post(
ApiPath, # URL
headers = headers, # ヘッダー
data = body # ボディ
)
print(ProfileId)
try:
response = r
print('response:', response.status_code)
if response.status_code == 202:
print(response.headers['Operation-Location'])
operation_url = response.headers['Operation-Location']
else:
print(response.json()['error'])
sys.exit()
except Exception:
print(r.json()['error'])
sys.exit()
####################################
########### Get Operation Status #########################
url = operation_url
headers = {
# Request headers
'Ocp-Apim-Subscription-Key': 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX',
}
status = ''
while status != 'succeeded':
r = requests.get(
url, # URL
headers = headers, # ヘッダー
)
try:
response = r
print('response:', response.status_code)
if response.status_code == 200:
status = response.json()['status']
print(f'現在の状態;{status}')
if status == 'failed':
message = response.json()['message']
print(f'error:{message}')
sys.exit()
elif status != 'succeeded':
time.sleep(3)
else:
print(r.json()['error'])
sys.exit()
except Exception:
print(r.json()['error'])
sys.exit()
enrollmentStatus = response.json()['processingResult']['enrollmentStatus']
remainingEnrollmentSpeechTime = response.json()['processingResult']['remainingEnrollmentSpeechTime']
speechTime = response.json()['processingResult']['speechTime']
Is the Activation Phrase necessary for speaker separation?
Or is the source code wrong?
2
Answers
Thank you for answering my question. i try it now .
I created the profile ID using your CreateProfile .py code, and then modified the CreateEnrollment .py code below to convert speech to text using the profile ID and a .wav file.
Code :
CreateEnrollment .py :
Output :
The code ran successfully and converted speech to text using the profile ID.