skip to Main Content

I want to convert below html string into recommended whatsapp message format using javascript

let htmlText = '<b>TEST </b> BODY <i><b>WITH </b></i>SAMPLE <strike style="font-weight: bold; font-style: italic;">FORMAT&nbsp;</strike>&nbsp; &nbsp; HERE"';

into below format

*TEST*  BODY _*WITH *_ SAMPLE *_~FORMAT~_* HERE

2

Answers


  1. Chosen as BEST ANSWER

    Based on the @trincot solution, added some additional changes

    function htmlToFormat(htmlText) {
      const matches = htmlText.matchAll(/<i><b>(.*?)</b></i>/g);
      for (const match of matches) {
        if (match[1]) {
          htmlText = htmlText.replace(match[0], `<b><i>${match[1]}</i></b>`);
        }
      }
      // format strike text
      let strikeArr = htmlText.match(/<strike(.*?)</strike>/g);
      if (strikeArr.length) {
        for (let i=0;i< strikeArr.length; i++) {
          if (strikeArr[i]) {
            strikeText = strikeArr[i].match(/style="(.*?)">/g);
            if (strikeText.length) {
              let strikeTextSplit = strikeText[0].split(";");
              let italicStatus = false;
              let boldStatus = false;
              let trimedText = strikeArr[i].replace(/s*S*="[^"]+"s*/gm, "");
              for (let j=0; j< strikeTextSplit.length; j++) {
                if (strikeTextSplit[j].includes("italic")) {
                  italicStatus = true;
                }
                if (strikeTextSplit[j].includes("bold")) {
                  boldStatus = true;
                }
              }
              if (italicStatus) {
                trimedText = `<i>${trimedText}</i>`;
              }
              if (boldStatus) {
                trimedText = `<b>${trimedText}</b>`
              }
              htmlText = htmlText.replace(strikeArr[i], trimedText);
            }
          }
        }
      }
      const codes = { B: "*", I: "_", STRIKE: "~" };
      const { body } = new DOMParser().parseFromString(htmlText, "text/html");
      const dfs = ({ childNodes }) => Array.from(childNodes, node => {
        if (node.nodeType == 1) {
          const s = dfs(node);
          const code = codes[node.tagName];
          return code ? s.replace(/^(s*)(?=S)|(?<=S)(s*)$/g, `$1${code}$2`) : s;
        } else {
          return node.textContent;
        }
      }).join("");
    
      return dfs(body);
    }
    
    let htmlText = '<b>TEST </b> BODY <i><b>WITH </b></i>SAMPLE <strike style="font-weight: bold; font-style: italic;">FORMAT&nbsp;</strike>&nbsp; &nbsp; HERE"';
    console.log(htmlToFormat(htmlText));
    

  2. I would advise using a DOM Parser to parse the HTML, and then iterate over the DOM you get from it. This way the resulting text will also have all HTML entities resolved to text, any HTML comments will have been removed, and it will not break when spacing in HTML tags or their attributes is different from expected.

    I would also make sure the formatting characters are put adjacent to the word they apply to, so leaving any surrounding white space out of it:

    function htmlToFormat(html) {
        const codes = { B: "*", I: "_", STRIKE: "~" };
        const {body} = new DOMParser().parseFromString(htmlText, "text/html");
        const dfs = ({childNodes}) => Array.from(childNodes, node => {
            if (node.nodeType == 1) {
                const s = dfs(node);
                const code = codes[node.tagName];
                return code ? s.replace(/^(s*)(?=S)|(?<=S)(s*)$/g, `$1${code}$2`) : s;
            } else {
                return node.textContent;
            }
        }).join("");
    
        return dfs(body);
    }
    
    // Demo
    let htmlText = '<b>TEST </b> BODY <i><b>WITH </b></i>SAMPLE <strike style="font-weight: bold; font-style: italic;">FORMAT&nbsp;</strike>&nbsp; &nbsp; HERE"';
    console.log(htmlToFormat(htmlText));
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search