skip to Main Content

I want to use the yamnet TensorFlow model, but it needs to tensor between [-1, 1] to enter in the model.

How can I do that? I want to transform it to [-1, 1]?

Option #1

const readWavAudio = async () => {
  wavfiles = await fs.readFileSync('archive/dataset/1/1_484.wav')
  const wav = new wavefile.WaveFile();
  wav.fromBuffer(wavfiles)
  wav.toSampleRate(16000);
  return await wav.getSamples(false, Float32Array)
}

Option #2

async function readbuf_async() {
  data = fs.readFileSync("teste.wav");
  buffer = [];

  for (o = 0; o < data.length - 4 * 1; o += 4 * 1) {
    buffer.push(data.readFloatBE(o));
  }

  return buffer;
}

2

Answers


  1. You can scale the vector to a range between [-1, 1] by utilizing linear interpolation:

    const normalizeVector = (vector) => {
      const { min, max } = vectorScale(vector),
        originalScale = [min, max],
        adjustedScale = [-1, 1];
      return vector.map((value) => scaleValue(value, originalScale, adjustedScale));
    };
    
    const scaleValue = (value, originalScale, adjustedScale) => {
      const [minO, maxO] = originalScale,
        [minA, maxA] = adjustedScale;
      return ((value - minO) * (maxA - minA)) / (maxO - minO) + minA;
    };
    
    const vectorScale = (vector) =>
      vector.reduce(
        (scale, value) => {
          if (value < scale.min) scale.min = value;
          if (value > scale.max) scale.max = value;
          return scale;
        },
        { min: Number.MAX_SAFE_INTEGER, max: Number.MIN_SAFE_INTEGER }
      );
    

    Full example

    Here is a Node.js module that creates a JSON data file containing the desired values.

    import fs from "fs";
    import path from "path";
    import wavefile from "wavefile";
    
    const main = async () => {
      await analyzeAudioFile("archive/dataset/1/1_484.wav");
      console.log("Finished...");
    };
    
    const analyzeAudioFile = async (filename) => {
      console.log("Analyzing audio...");
      const samples = await readWavAudio(filename),
        scaled = normalizeVector(Array.from(samples)),
        outputFilename = `${fileNameFromPath(filename)}.json`;
      fs.writeFileSync(outputFilename, JSON.stringify(scaled));
      console.log(`Wrote ${outputFilename}`);
    };
    
    const readWavAudio = async (wavFilename) => {
      const wavfiles = fs.readFileSync(wavFilename);
      const wav = new wavefile.WaveFile();
      wav.fromBuffer(wavfiles);
      wav.toSampleRate(16000);
      return wav.getSamples(false, Float32Array);
    };
    
    const normalizeVector = (vector) => {
      const { min, max } = vectorScale(vector),
        originalScale = [min, max],
        adjustedScale = [-1, 1];
      return vector.map((value) => scaleValue(value, originalScale, adjustedScale));
    };
    
    const scaleValue = (value, originalScale, adjustedScale) => {
      const [minO, maxO] = originalScale,
        [minA, maxA] = adjustedScale;
      return ((value - minO) * (maxA - minA)) / (maxO - minO) + minA;
    };
    
    const vectorScale = (vector) =>
      vector.reduce(
        (scale, value) => {
          if (value < scale.min) scale.min = value;
          if (value > scale.max) scale.max = value;
          return scale;
        },
        { min: Number.MAX_SAFE_INTEGER, max: Number.MIN_SAFE_INTEGER }
      );
    
    const fileNameFromPath = (filename) => {
      const extension = path.extname(filename);
      return path.basename(filename, extension);
    };
    
    main();
    

    Output

    The Node.js module script will create a JSON file that contains an array of values between [-1, 1].

    Analyzing audio...
    Wrote 1_484.json
    Finished...
    

    Optimization

    Here is a slightly optimized version of normalizeVector:

    Note: The 2 is the result of min - max of the adjusted scale i.e. 1 - (-1).

    const normalizeVector = (vector) => {
      const { min, max } = vectorScale(vector),
        range = max - min;
      return vector.map((value) => lerp(value, range, 2, min, -1));
    };
    
    const lerp = (value, range, newRange, min, newMin) =>
      ((value - min) * newRange) / range + newMin;
    
    Login or Signup to reply.
  2. I was able to make use of wavefile for this. In my case, I was starting with base64 encoded mulaw audio from Twilio. Dividing by 32768 (the max 16-bit signed integer) is what gets you values between [-1, 1].

    const WaveFile = require('wavefile').WaveFile;
    const tf = require('@tensorflow/tfjs');
    
    const wav = new WaveFile();
    wav.fromScratch(1, 8000, '8m', Buffer.from(msg.media.payload, 'base64'));
    wav.fromMuLaw();
    wav.toSampleRate(16000);
    let samples = wav.getSamples(false, Int16Array);
    samples = Array.from(samples).map(s => s / 32768);
    waveform = tf.tensor(samples)
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search