skip to Main Content

Hello I would like help creating a regex that replaces all html tags but when there is an end div and start div next to each other it adds a space, so for example

This <b>is</b> <div>a</div><div>test</div>

This is a test

What I currently have for regex is /(<([^>]+)>)/ig which will replace all html tags but Im wondering how do I also add a space whenever there is a closing div and starting div next to each other.

I tried using /(<([^>]+)>)/ig to replace the html which works but I need help with the spacing on divs when they are next to each other

2

Answers


  1. JS has built-in support for HTML parsing. Use it instead:

    function getSpaceSeparatedText(html) {
      // Create an element and use it as a parser
      let parser = document.createElement('div');
      
      parser.innerHTML = html;
      
      const result = [];
      
      for (const node of parser.childNodes) {
        // Get the trimmed text
        const text = node.textContent.trim();
        
        // If text is not empty, add it to result
        if (text) {
          result.push(text);
        }
      }
      
      return result.join(' ');
    }
    

    Try it:

    console.config({ maximize: true });
    
    function getSpaceSeparatedText(html) {
      let parser = document.createElement('div');
      
      parser.innerHTML = html;
      
      const result = [];
      
      for (const node of parser.childNodes) {
        const text = node.textContent.trim();
        
        if (text) {
          result.push(text);
        }
      }
      
      return result.join(' ');
    }
    
    const html = `
    This <b>is</b> 
    <div>a</div><div>test</div>
    `;
    
    console.log(getSpaceSeparatedText(html));
    <script src="https://gh-canon.github.io/stack-snippet-console/console.min.js"></script>
    Login or Signup to reply.
  2. This removes all html tags (https://regex101.com/r/t7WJex/1),
    but you need a callback to insert a space between a closing and open div.

    var text = "This <b>is</b> <div>a</div><div>test</div>"
    text = text.replace(/(</divs*><divs*>)|<(?:(?:(?:(script|style|object|embed|applet|noframes|noscript|noembed)(?:s+(?=((?:"[Ss]*?"|'[Ss]*?'|(?:(?!/>)[^>])?)+))2)?s*>)[Ss]*?</1s*(?=>))|(?:/?[w:]+s*/?)|(?:[w:]+s+(?:"[Ss]*?"|'[Ss]*?'|[^>]?)+s*/?)|?[Ss]*??|(?:!(?:(?:DOCTYPE[Ss]*?)|(?:[CDATA[[Ss]*?]])|(?:--[Ss]*?--)|(?:ATTLIST[Ss]*?)|(?:ENTITY[Ss]*?)|(?:ELEMENT[Ss]*?))))>/g, function(match, grp1)
        {
           if ( grp1 > "" ) 
              return " "; 
           else
              return ""
        }
    );
    
    console.log( text );
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search