skip to Main Content

Is there a solution to enter a site with Python and run a script in the inspect section and get the html of that site. (it is not possible to get the site’s html by get)

I have this script and I tried it manually, I can download the site html with it, but I want to do it automatically.

function myFunction() {
  filename = "dom";
  var html = '',
    node = document.firstChild
  while (node) {
    switch (node.nodeType) {
      case Node.ELEMENT_NODE:
        html += node.outerHTML
        break
      case Node.TEXT_NODE:
        html += node.nodeValue
        break
      case Node.CDATA_SECTION_NODE:
        html += '<![CDATA[' + node.nodeValue + ']]>'
        break
      case Node.COMMENT_NODE:
        html += '<!--' + node.nodeValue + '-->'
        break
      case Node.DOCUMENT_TYPE_NODE:
        // (X)HTML documents are identified by public identifiers
        html +=
          '<!DOCTYPE ' +
          node.name +
          (node.publicId ? ' PUBLIC "' + node.publicId + '"' : '') +
          (!node.publicId && node.systemId ? ' SYSTEM' : '') +
          (node.systemId ? ' "' + node.systemId + '"' : '') +
          '>n'
        break
    }
    node = node.nextSibling
  }


  var file = new Blob([html], {
    type: 'text/html'
  });
  if (window.navigator.msSaveOrOpenBlob) // IE10+
    window.navigator.msSaveOrOpenBlob(file, filename);
  else { // Others
    var a = document.createElement("a"),
      url = URL.createObjectURL(file);
    a.href = url;
    a.download = filename;
    document.body.appendChild(a);
    a.click();
    setTimeout(function () {
      document.body.removeChild(a);
      window.URL.revokeObjectURL(url);
    }, 0);
  }
}
setInterval(myFunction, 5000);

2

Answers


  1. You should use a web scraper such as beautiful soup

    Login or Signup to reply.
  2. so you can use this tutorial for beautiful soup

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search