skip to Main Content

I’m trying to write a function that parses strings like the following

GEYDQORRGM5D[C[M[A,I,Q,Y],NA],O,Q,S]

and turns it into an array like the following

[
  "GEYDQORRGM5DCMA",
  "GEYDQORRGM5DCMI",
  "GEYDQORRGM5DCMQ",
  "GEYDQORRGM5DCMY",
  "GEYDQORRGM5DCNA",
  "GEYDQORRGM5DO",
  "GEYDQORRGM5DQ",
  "GEYDQORRGM5DS",
]

I’ve tried the following code and it works for some of the strings but most of them don’t work!!

function splitAndAddString() {
    let regex = /([A-Z0-9]+)(?:[([A-Z,]+)])?/g
    const result = []

    let match;
    while ((match = regex.exec(input)) !== null) {
        const prefix = match[1];
        const suffixes = match[2] ? match[2].split(",") : [""];
        for (const suffix of suffixes) {
            result.push(prefix + suffix);
        }
    }


    const newResult = []
    for (let i = 1; i < result.length; i++) {
        newResult.push(result[0] + result[i])
    }

    return newResult
}

const input = 'GEYDQORRGM5D[C[M[A,I,Q,Y],NA],O,Q,S]';
const output = splitAndAddString(input);
console.log(output);

it returns

[
  "GEYDQORRGM5DC",
  "GEYDQORRGM5DMA",
  "GEYDQORRGM5DMI",
  "GEYDQORRGM5DMQ",
  "GEYDQORRGM5DMY",
  "GEYDQORRGM5DNA",
  "GEYDQORRGM5DO",
  "GEYDQORRGM5DQ",
  "GEYDQORRGM5DS",
]

when its supposed to return

[
  "GEYDQORRGM5DCMA",
  "GEYDQORRGM5DCMI",
  "GEYDQORRGM5DCMQ",
  "GEYDQORRGM5DCMY",
  "GEYDQORRGM5DCNA",
  "GEYDQORRGM5DO",
  "GEYDQORRGM5DQ",
  "GEYDQORRGM5DS",
]

2

Answers


  1. Your regex can only deal with one level of bracket nesting. Instead deal with the [ and ] tokens separately, so you can make a recursive call when encountering [, and unwind when encountering ].

    This is also a nice candidate for a generator:

    function getProductions(s) {
        const tokens = s.split(/([[,]])/).values();
    
        function* dfs() {
            while (true) {
                const {value} = tokens.next();
                const token = tokens.next().value ?? "]";
                if ("],".includes(token) && value) yield value;
                if (token == "]") return;
                if (token == "[") {
                    for (const postfix of dfs()) {
                        yield value + postfix;
                    }
                }
            }
        }
        return [...dfs()];
    }
    
    // Demo run on your example:
    const s = "GEYDQORRGM5D[C[M[A,I,Q,Y],NA],O,Q,S]"
    console.log(getProductions(s));
    Login or Signup to reply.
  2. You need to create a function that can be called recursively with a subset of the string.

    I added comments in the code to explain what happens.

    function splitAndAddString(str) {
      let regex = /^([A-Z0-9]+)[(.*)]([A-Z0-9,]*)$/
      let result = [];
    
      let match = regex.exec(str);
      if (match) {
        let prefix = match[1]; // before first [
        let between = match[2]; // between first [ and last ]
        let suffix = match[3]; // after the last ]
        
        // call splitAndAddString requesive with between
        // and map the result with the current prefix
        let nested = splitAndAddString(between).map(str => prefix + str);
        result = result.concat(nested); // add to result
        
        // if suffix is a string with commas
        // and filter out empty strings from the array
        let suffixes = suffix.split(',').filter(str => str.length > 0);
        result = result.concat(suffixes); // add to result
      } else {
        // if match is NULL there can be a string with commas
        result = str.split(',').filter(str => str.length > 0);
      }
      return result;
    }
    
    const input = 'GEYDQORRGM5D[C[M[A,I,Q,Y],NA],O,Q,S]';
    const output = splitAndAddString(input);
    console.log(output);
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search