A way to play Azure output format "audio-16khz-128kbitrate-mono-mp3" in Javascript

gOliveiraC
December 12, 2022
231 views
0 votes
2 Answers

I’m calling the Azure TTS rest API, using the header X-Microsoft-OutputFormat with the value audio-24khz-160kbitrate-mono-mp3, and I don’t know how to convert and play the audio from response.
Does any know how to play the audio response when call Azure Cognitive services rest API?

Tks.

I tried to convert using blob

 let wavFile = new Blob(res.data, { 
                                'type': 'audio/mp3' 
                              });

`
but without success.

Answers

a work around would be that you use the JavaScript azure cognitive service libraries to convert text to speech.
This way it will generate a .wav file which you can then play using a node-wav-player npm package to play the file .

code for test to speech

var  sdk = require("microsoft-cognitiveservices-speech-sdk");
var  readline = require("readline")

var  audioFile = "YourAudioFile.wav";

// This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"

const  speechConfig = sdk.SpeechConfig.fromSubscription( '< YOur KEY >', '<Your REgion >');

const  audioConfig = sdk.AudioConfig.fromAudioFileOutput(audioFile);

speechConfig.speechSynthesisVoiceName = "hi-IN-SwaraNeural";
var  synthesizer = new  sdk.SpeechSynthesizer(speechConfig, audioConfig);

var  rl = readline.createInterface({
        input:  process.stdin,
        output:  process.stdout
});

rl.question("Enter some text that you want to speak >n> ",
    function (text) {
        rl.close();
        synthesizer.speakTextAsync(text,
            function (result) {
                if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
                    console.log("synthesis finished.");
                } else {
                    console.error("Speech synthesis canceled, " + result.errorDetails +"nDid you set the speech resource key and region values?");
                }
                synthesizer.close();
                synthesizer = null;
            },
            function (err) {
                console.trace("err - " + err);
                synthesizer.close();
                synthesizer = null;
            });
        console.log("Now synthesizing to: " + audioFile);
    });

The above code is from the MSDOC on text to speech using JavaScript.

The following Code which will play the .wav file :

const  player = require('node-wav-player');

player.play({
    path:  './YourAudioFile.wav',
        }).then(() => {
            console.log('audio has started');
            }).catch((err) => {
                console.error(err);
        });

Please use fetch and ensure at least include following headers and payload:

const audio = document.createElement("audio");

fetch("{YourEndpointUrl}", {
  "headers": {
    "content-type": "application/ssml+xml",
    "ocp-apim-subscription-key": "{YourSpeechKey}",
    "x-microsoft-outputformat": "audio-24khz-160kbitrate-mono-mp3"
  },
  "body": "<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xml:lang="en-US"><voice name="AdaptVoice">My SSML</voice></speak>",
  "method": "POST"
})
.then(resp => resp.blob())
.then(URL.createObjectURL)
.then(url => {
  audio.src = url;
  audio.play();
});

Or, use async/await is more concise:

const audio = document.createElement("audio");

const resp = await fetch("{YourEndpointUrl}", {
  "headers": {
    "content-type": "application/ssml+xml",
    "ocp-apim-subscription-key": "{YourSpeechKey}",
    "x-microsoft-outputformat": "audio-24khz-160kbitrate-mono-mp3"
  },
  "body": "<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xml:lang="en-US"><voice name="AdaptVoice">My SSML</voice></speak>",
  "method": "POST"
});
const blob = await resp.blob();
const url = await URL.createObjectURL(blob);
audio.src = url;
audio.play();

Please signup or login to give your own answer.

Click here to cancel reply.