skip to Main Content

Below is the output of alert( rd.innerHTML );:

 <ruby><rb>「わたし</rb><rt><br>watashi</rt></ruby><ruby><rb>は</rb><rt><br>wa</rt></ruby>」&lt;Name des Sprechers&gt;<br><ruby><rb>です</rb><rt><br>desu</rt></ruby>。

I like to iterate over all those nodes. I tried (among others):

var items = rd.getElementsByTagName("*");
for (var i = 0; i < items.length; i++) {
    var item = items[i];
    alert( item.innerHTML);
}

But for the loop above I will get

  • <rb>「わたし</rb><rt><br>watashi</rt>
  • 「わたし
  • n watashi
  • <rb>は</rb><rt><br>wa</rt>
  • n wa
  • <rb>です</rb><rt><br>desu</rt>
  • です
  • n desu

How can I loop this HTML and also get the node 」&lt;Name des Sprechers&gt;<br>

Ideally I would like to have all rb elements and the text node in this order:

  • <rb>「わたし</rb><rt><br>watashi</rt>
  • <rb>は</rb><rt><br>wa</rt>
  • 」&lt;Name des Sprechers&gt;<br>
  • <rb>です</rb><rt><br>desu</rt>

2

Answers


  1. Where did Sprechers disappear in your output? Well you need to traverse the nodes rather than the elements. I’m re-using a function to extract text nodes from a node / element.

    var html = `<ruby><rb>「わたし</rb><rt><br>watashi</rt></ruby><ruby><rb>は</rb><rt><br>wa</rt></ruby>」&lt;Name des Sprechers&gt;<br><ruby><rb>です</rb><rt><br>desu</rt></ruby>`
    
    var rd = document.createElement("div");
    rd.innerHTML = html;
    
    
    function textUnder(node) {
      var all = [];
      for (node = node.firstChild; node; node = node.nextSibling) {
        if (node.nodeType == 3) {
          console.log("text node of " + node.parentElement.tagName + ": " +  node.textContent)
          all.push(node.textContent);
        } else {
          all = all.concat(textUnder(node));
        }
      }
      return all;
    }
    
    var texts = textUnder(rd)
    console.log(texts)
    .as-console-wrapper {
      min-height: 100%;
    }
    Login or Signup to reply.
  2. I think this is kind of tricky question.

    If my guess is rigth, you’re trying to separate japanese texts from ocidental ones…

    Try the snippet below. All names are self explanatories.

    const myContainer = document.getElementById('myContent');
    
    console.log('Language separation:', separateLanguages(myContainer));
    
    function separateLanguages ( myContent )
    {
      const myChildren = myContent.childNodes;
      const myTexts = [];
      
      for ( const myNode of myChildren )
      {
        // Separates ocidental texts.
        if ( myNode.nodeName == '#text' )
        {
          myTexts.push(myNode.wholeText);
        }
        
        // Separates oriental texts.
        else if ( myNode.nodeName == 'RUBY' )
        {
          myTexts.push(myNode.innerHTML);
          
          // If you wish to include the <ruby> tag, then use the line below
          // instead of the line above.
          
          // myTexts.push(myNode);
        }
        
        // Separates any other content.
        else
        {
          // Keep the line breaks in previous index.
          if ( myNode.nodeName == 'BR' )
          {
            myTexts[myTexts.length -1] = myTexts.at(-1) + myNode.outerHTML;
          }
          
          else
          {
            myTexts.push(myNode.outerHTML);
          }
        }
      }
      
      return myTexts;
    }
    <div id="myContent"><ruby><rb>「わたし</rb><rt><br>watashi</rt></ruby><ruby><rb>は</rb><rt><br>wa</rt></ruby>」&lt;Name des Sprechers&gt;<br><ruby><rb>です</rb><rt><br>desu</rt></ruby>。</div>

    NOTE: The function separateLanguages will only iterate over the root element, not its children.

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search