skip to Main Content

I created the following code to extract numerical information from a user-provided string, which specifies the level or floor in a building. The goal is to accurately extract the numerical value from the input. However, the current implementation does not handle hyphenated numbers correctly. For instance, "twenty-third" is incorrectly resolved as 20 instead of 23.

function extractLevelFromString(input) {
    // Normalize the input string
    const normalizedInput = input.toLowerCase();

    
    const wordToNumberMap = {
        "one": 1, "first": 1,
        "two": 2, "second": 2,
        "three": 3, "third": 3,
        "four": 4, "fourth": 4,
        "five": 5, "fifth": 5,
        "six": 6, "sixth": 6,
        "seven": 7, "seventh": 7,
        "eight": 8, "eighth": 8,
        "nine": 9, "ninth": 9,
        "ten": 10, "tenth": 10,
        "eleven": 11, "eleventh": 11,
        "twelve": 12, "twelfth": 12,
        "thirteen": 13, "thirteenth": 13,
        "fourteen": 14, "fourteenth": 14,
        "fifteen": 15, "fifteenth": 15,
        "sixteen": 16, "sixteenth": 16,
        "seventeen": 17, "seventeenth": 17,
        "eighteen": 18, "eighteenth": 18,
        "nineteen": 19, "nineteenth": 19,
        "twenty": 20, "twentieth": 20,
        "twenty-one": 21, "twenty-first": 21,
        "twenty-two": 22, "twenty-second": 22,
        "twenty-three": 23, "twenty-third": 23,
        "twenty-four": 24, "twenty-fourth": 24,
        "twenty-five": 25, "twenty-fifth": 25,
        "twenty-six": 26, "twenty-sixth": 26,
        "twenty-seven": 27, "twenty-seventh": 27,
        "twenty-eight": 28, "twenty-eighth": 28,
        "twenty-nine": 29, "twenty-ninth": 29,
        "thirty": 30, "thirtieth": 30,
        "thirty-one": 31, "thirty-first": 31,
        "thirty-two": 32, "thirty-second": 32,
        "thirty-three": 33, "thirty-third": 33,
        "thirty-four": 34, "thirty-fourth": 34,
        "thirty-five": 35, "thirty-fifth": 35,
        "thirty-six": 36, "thirty-sixth": 36,
        "thirty-seven": 37, "thirty-seventh": 37,
        "thirty-eight": 38, "thirty-eighth": 38,
        "thirty-nine": 39, "thirty-ninth": 39,
        "forty": 40, "fortieth": 40,
        "forty-one": 41, "forty-first": 41,
        "forty-two": 42, "forty-second": 42,
        "forty-three": 43, "forty-third": 43,
        "forty-four": 44, "forty-fourth": 44,
        "forty-five": 45, "forty-fifth": 45,
        "forty-six": 46, "forty-sixth": 46,
        "forty-seven": 47, "forty-seventh": 47,
        "forty-eight": 48, "forty-eighth": 48,
        "forty-nine": 49, "forty-ninth": 49,
        "fifty": 50, "fiftieth": 50
    };
    

    const levelRegex = /b(level|floor|on|at)?s*(d+|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twenty-one|twenty-two|twenty-three|twenty-four|twenty-five|twenty-six|twenty-seven|twenty-eight|twenty-nine|thirty|thirty-one|thirty-two|thirty-three|thirty-four|thirty-five|thirty-six|thirty-seven|thirty-eight|thirty-nine|forty|forty-one|forty-two|forty-three|forty-four|forty-five|forty-six|forty-seven|forty-eight|forty-nine|fifty|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|twenty-first|twenty-second|twenty-third|twenty-fourth|twenty-fifth|twenty-sixth|twenty-seventh|twenty-eighth|twenty-ninth|thirtieth|thirty-first|thirty-second|thirty-third|thirty-fourth|thirty-fifth|thirty-sixth|thirty-seventh|thirty-eighth|thirty-ninth|fortieth|forty-first|forty-second|forty-third|forty-fourth|forty-fifth|forty-sixth|forty-seventh|forty-eighth|forty-ninth|fiftieth)(?:st|nd|rd|th)?b/gi;
 
    const matches = normalizedInput.matchAll(levelRegex);

    // Process matches
    for (const match of matches) {
        const levelCandidate = match[2]; // Get the potential level part

        // If numeric, return directly
        if (!isNaN(levelCandidate)) {
            return parseInt(levelCandidate, 10);
        }

        // If word-based, map to a number
        if (wordToNumberMap[levelCandidate]) {
            return wordToNumberMap[levelCandidate];
        }
    }

    // Return null if no level found
    return null;
}

I tried this using regex pattern matching and was expecting the resolution of numbers from the input string.

3

Answers


  1. Issue

    The regular expression greedily matches string values like "twenty" and "thirty" before matching the rest of the input when the string is "twenty-three" or "thirty-third".

    Solution Suggestion

    Update the regular expression to include a negative lookahead to only match values like "twenty" and "thirty" when not immediately followed by a hyphen.

    Example:

    (twenty|thirty|forty|fifty)(?!-)
    
    const levelRegex =
      /b(level|floor|on|at)?s*(d+|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|(twenty|thirty|forty|fifty)(?!-)|(twenty|thirty|forty|fifty)-(one|first|two|second|three|third|four|fourth|five|fifth|six|sixth|seven|seventh|eight|eigth|nine|ninth)|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth)(?:st|nd|rd|th)?b/gi;
    

    Here I’ve also grouped some values to reduce duplicate sub-strings.

    function extractLevelFromString(input) {
      // Normalize the input string
      const normalizedInput = input.toLowerCase();
    
      const wordToNumberMap = {
        one: 1,
        first: 1,
        two: 2,
        second: 2,
        three: 3,
        third: 3,
        four: 4,
        fourth: 4,
        five: 5,
        fifth: 5,
        six: 6,
        sixth: 6,
        seven: 7,
        seventh: 7,
        eight: 8,
        eighth: 8,
        nine: 9,
        ninth: 9,
        ten: 10,
        tenth: 10,
        eleven: 11,
        eleventh: 11,
        twelve: 12,
        twelfth: 12,
        thirteen: 13,
        thirteenth: 13,
        fourteen: 14,
        fourteenth: 14,
        fifteen: 15,
        fifteenth: 15,
        sixteen: 16,
        sixteenth: 16,
        seventeen: 17,
        seventeenth: 17,
        eighteen: 18,
        eighteenth: 18,
        nineteen: 19,
        nineteenth: 19,
        twenty: 20,
        twentieth: 20,
        "twenty-one": 21,
        "twenty-first": 21,
        "twenty-two": 22,
        "twenty-second": 22,
        "twenty-three": 23,
        "twenty-third": 23,
        "twenty-four": 24,
        "twenty-fourth": 24,
        "twenty-five": 25,
        "twenty-fifth": 25,
        "twenty-six": 26,
        "twenty-sixth": 26,
        "twenty-seven": 27,
        "twenty-seventh": 27,
        "twenty-eight": 28,
        "twenty-eighth": 28,
        "twenty-nine": 29,
        "twenty-ninth": 29,
        thirty: 30,
        thirtieth: 30,
        "thirty-one": 31,
        "thirty-first": 31,
        "thirty-two": 32,
        "thirty-second": 32,
        "thirty-three": 33,
        "thirty-third": 33,
        "thirty-four": 34,
        "thirty-fourth": 34,
        "thirty-five": 35,
        "thirty-fifth": 35,
        "thirty-six": 36,
        "thirty-sixth": 36,
        "thirty-seven": 37,
        "thirty-seventh": 37,
        "thirty-eight": 38,
        "thirty-eighth": 38,
        "thirty-nine": 39,
        "thirty-ninth": 39,
        forty: 40,
        fortieth: 40,
        "forty-one": 41,
        "forty-first": 41,
        "forty-two": 42,
        "forty-second": 42,
        "forty-three": 43,
        "forty-third": 43,
        "forty-four": 44,
        "forty-fourth": 44,
        "forty-five": 45,
        "forty-fifth": 45,
        "forty-six": 46,
        "forty-sixth": 46,
        "forty-seven": 47,
        "forty-seventh": 47,
        "forty-eight": 48,
        "forty-eighth": 48,
        "forty-nine": 49,
        "forty-ninth": 49,
        fifty: 50,
        fiftieth: 50,
      };
    
      const levelRegex =
        /b(level|floor|on|at)?s*(d+|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|(twenty|thirty|forty|fifty)(?!-)|(twenty|thirty|forty|fifty)-(one|first|two|second|three|third|four|fourth|five|fifth|six|sixth|seven|seventh|eight|eigth|nine|ninth)|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth)(?:st|nd|rd|th)?b/gi;
    
      const matches = normalizedInput.matchAll(levelRegex);
    
      // Process matches
      for (const match of matches) {
        const levelCandidate = match[2]; // Get the potential level part
    
        // If numeric, return directly
        if (!isNaN(levelCandidate)) {
          return parseInt(levelCandidate, 10);
        }
    
        // If word-based, map to a number
        if (wordToNumberMap[levelCandidate]) {
          return wordToNumberMap[levelCandidate];
        }
      }
    
      // Return null if no level found
      return null;
    }
    
    console.log("twenty-third ->", extractLevelFromString("twenty-third"));
    console.log("thirty-four ->", extractLevelFromString("thirty-four"));
    Login or Signup to reply.
  2. you can use words-to-numbers package which is available to install it from npm

    npm i words-to-numbers
    
    import wordsToNumbers from 'words-to-numbers';
    
    wordsToNumbers('first'); //1
    wordsToNumbers('one'); //1
    
    wordsToNumbers('second'); //2
    wordsToNumbers('two'); //2
    
    wordsToNumbers('third'); //3
    wordsToNumbers('three'); //3
    
    wordsToNumbers('one-hundred'); //100
    

    also, you can set the fuzzy option to true for available mistakes

    import wordsToNumbers from 'words-to-numbers';
    
    wordsToNumbers('too thousant and fiev', {fuzzy: true}); //2005
    
    Login or Signup to reply.
  3. One option is to place the single digit word matches (i.e one, two, …, twenty ..) at the end of the regex expression. So your regex expression will look like this:

    const levelRegex = /b(level|floor|on|at)?s*(d+|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty-one|twenty-two|twenty-three|twenty-four|twenty-five|twenty-six|twenty-seven|twenty-eight|twenty-nine|thirty|thirty-one|thirty-two|thirty-three|thirty-four|thirty-five|thirty-six|thirty-seven|thirty-eight|thirty-nine|forty|forty-one|forty-two|forty-three|forty-four|forty-five|forty-six|forty-seven|forty-eight|forty-nine|fifty|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|twenty-first|twenty-second|twenty-third|twenty-fourth|twenty-fifth|twenty-sixth|twenty-seventh|twenty-eighth|twenty-ninth|thirtieth|thirty-first|thirty-second|thirty-third|thirty-fourth|thirty-fifth|thirty-sixth|thirty-seventh|thirty-eighth|thirty-ninth|fortieth|forty-first|forty-second|forty-third|forty-fourth|forty-fifth|forty-sixth|forty-seventh|forty-eighth|forty-ninth|fiftieth|one|two|three|four|five|six|seven|eight|nine|ten|twenty)(?:st|nd|rd|th)?b/gi;
    

    Then this will resolve the "twenty-third" to 23.

    function extractLevelFromString(input) {
        // Normalize the input string
        const normalizedInput = input.toLowerCase();
    
        
        const wordToNumberMap = {
            "one": 1, "first": 1,
            "two": 2, "second": 2,
            "three": 3, "third": 3,
            "four": 4, "fourth": 4,
            "five": 5, "fifth": 5,
            "six": 6, "sixth": 6,
            "seven": 7, "seventh": 7,
            "eight": 8, "eighth": 8,
            "nine": 9, "ninth": 9,
            "ten": 10, "tenth": 10,
            "eleven": 11, "eleventh": 11,
            "twelve": 12, "twelfth": 12,
            "thirteen": 13, "thirteenth": 13,
            "fourteen": 14, "fourteenth": 14,
            "fifteen": 15, "fifteenth": 15,
            "sixteen": 16, "sixteenth": 16,
            "seventeen": 17, "seventeenth": 17,
            "eighteen": 18, "eighteenth": 18,
            "nineteen": 19, "nineteenth": 19,
            "twenty": 20, "twentieth": 20,
            "twenty-one": 21, "twenty-first": 21,
            "twenty-two": 22, "twenty-second": 22,
            "twenty-three": 23, "twenty-third": 23,
            "twenty-four": 24, "twenty-fourth": 24,
            "twenty-five": 25, "twenty-fifth": 25,
            "twenty-six": 26, "twenty-sixth": 26,
            "twenty-seven": 27, "twenty-seventh": 27,
            "twenty-eight": 28, "twenty-eighth": 28,
            "twenty-nine": 29, "twenty-ninth": 29,
            "thirty": 30, "thirtieth": 30,
            "thirty-one": 31, "thirty-first": 31,
            "thirty-two": 32, "thirty-second": 32,
            "thirty-three": 33, "thirty-third": 33,
            "thirty-four": 34, "thirty-fourth": 34,
            "thirty-five": 35, "thirty-fifth": 35,
            "thirty-six": 36, "thirty-sixth": 36,
            "thirty-seven": 37, "thirty-seventh": 37,
            "thirty-eight": 38, "thirty-eighth": 38,
            "thirty-nine": 39, "thirty-ninth": 39,
            "forty": 40, "fortieth": 40,
            "forty-one": 41, "forty-first": 41,
            "forty-two": 42, "forty-second": 42,
            "forty-three": 43, "forty-third": 43,
            "forty-four": 44, "forty-fourth": 44,
            "forty-five": 45, "forty-fifth": 45,
            "forty-six": 46, "forty-sixth": 46,
            "forty-seven": 47, "forty-seventh": 47,
            "forty-eight": 48, "forty-eighth": 48,
            "forty-nine": 49, "forty-ninth": 49,
            "fifty": 50, "fiftieth": 50
        };
        
    
        const levelRegex = /b(level|floor|on|at)?s*(d+|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty-one|twenty-two|twenty-three|twenty-four|twenty-five|twenty-six|twenty-seven|twenty-eight|twenty-nine|thirty|thirty-one|thirty-two|thirty-three|thirty-four|thirty-five|thirty-six|thirty-seven|thirty-eight|thirty-nine|forty|forty-one|forty-two|forty-three|forty-four|forty-five|forty-six|forty-seven|forty-eight|forty-nine|fifty|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|twenty-first|twenty-second|twenty-third|twenty-fourth|twenty-fifth|twenty-sixth|twenty-seventh|twenty-eighth|twenty-ninth|thirtieth|thirty-first|thirty-second|thirty-third|thirty-fourth|thirty-fifth|thirty-sixth|thirty-seventh|thirty-eighth|thirty-ninth|fortieth|forty-first|forty-second|forty-third|forty-fourth|forty-fifth|forty-sixth|forty-seventh|forty-eighth|forty-ninth|fiftieth|one|two|three|four|five|six|seven|eight|nine|ten|twenty)(?:st|nd|rd|th)?b/gi;
     
        const matches = normalizedInput.matchAll(levelRegex);
    
        // Process matches
        for (const match of matches) {
            const levelCandidate = match[2]; // Get the potential level part
    
            // If numeric, return directly
            if (!isNaN(levelCandidate)) {
                return parseInt(levelCandidate, 10);
            }
    
            // If word-based, map to a number
            if (wordToNumberMap[levelCandidate]) {
                return wordToNumberMap[levelCandidate];
            }
        }
    
        // Return null if no level found
        return null;
    }
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search