skip to Main Content

I am trying to write a TamperMonkey userscript that extracts URLs that match a certain pattern, visit these pages, extract a piece of information from these pages, and then updates the link for each URL with the extracted text. I am having problems with the regex matching/extracting.

I have tested my regex and know it matches but it’s not matching when I run the script and I’m not sure how to extract the group. Grateful for any help.

// ==UserScript==
// @name         Pull through titles
// @namespace    http://tampermonkey.net/
// @version      0.2
// @description  ---
// @author       You
// @match        https://xxx/curriculum-overview/*
// @icon         https://www.google.com/s2/favicons?sz=64&domain=tampermonkey.net
// @grant        none
// ==/UserScript==

(function() {
    'use strict';

    const regexPattern = /portfolio-item__text-input-wide" .*value="(.+)"/;
    var evidence = "";
    var title = "";

    //Show descriptors by default
    var x = document.getElementsByClassName("js-descriptors");
    for (var i = 0, max = x.length; i < max; i++) {
        x[i].style.display = "block";
    }

    //Find all URLs that link to evidence
    var urls = document.querySelectorAll('a[href*="portfolio-item/review"]');

    //For each URL, visit the page and extract the title of the evidence and update the innerText with that title
    for (i = 0; i < urls.length; i++){

        const xhr = new XMLHttpRequest();
        xhr.open("GET", urls[i], true);
        xhr.responseType = "text";
        xhr.onload = () => {
            if (xhr.readyState === xhr.DONE) {
                if (xhr.status === 200) {
                    evidence = xhr.responseText;
                }
            }
        };

        xhr.send(null);
        title = evidence.match(regexPattern); //extract matching regex pattern
        alert(title.toString()); //once I know the string is extracted will append to
        //urls[i].innerText = urls[i].toString(); //this line tests that the innerText can be changed to the URL; will change to title variable once working
    }
})
();

2

Answers


  1. Chosen as BEST ANSWER

    After a bit of debugging with a friend, we realised that urls[i] wasn't being accessed from inside onload which I think is what @Fraser was alluding to and hence why we were struggling to get it to update innerText. We came up with a slightly different solution. Posting here in case it helps anyone.

    (function() {
        'use strict';
    
        const regexPattern = /portfolio-item__text-input-wide" .*value="(.+)"/;
        var evidence = "";
        var title = "";
    
        //Show descriptors by default
        var x = document.getElementsByClassName("js-descriptors");
        for (var i = 0, max = x.length; i < max; i++) {
            x[i].style.display = "block";
        }
    
        //Find all URLs that link to evidence
        var urls = document.querySelectorAll('a[href*="portfolio-item/review"]');
    
        //For each URL, visit the page and extract the title of the evidence and update the innerText with that title
        for (i = 0; i < urls.length; i++){
            const xhr = new XMLHttpRequest();
            xhr.open("GET", urls[i], true);
            xhr.tmlink = urls[i] //this was our bodge by adding the url as a property to xhr
            xhr.responseType = "text";
            xhr.onload = function() {
                if (xhr.readyState === xhr.DONE) {
                    if (xhr.status === 200) {
                        evidence = xhr.responseText;
                        title = evidence.match(regexPattern); //extract matching regex pattern
                        this.tmlink.innerText = title[1].toString(); //updating the innerText for the hyperlink
                    }
                }
            };
    
            xhr.send(null);
        }
    })
    ();
    

  2. You need to do your regex, etc, within your xhr.onload – once the data has been fetched. This is because the xhr.open call is asynchronous – meaning that your title = evidence.match(regexPattern); will never work because evidence will be an empty string.

    e.g.

    xhr.onload = () => {
      if (xhr.readyState === xhr.DONE) {
        if (xhr.status === 200) {
          evidence = xhr.responseText;
          title = evidence.match(regexPattern); //extract matching regex pattern
          alert(title.toString());
        }
      }
    };
    

    Alternatively you could create a function, and call that from within your onload, e.g.

    function getTitle(text) {
       var mytitle = evidence.match(regexPattern); 
       console.log(mytitle); // or whatever...
    }
    
    // then...
    
    xhr.onload = () => {
      if (xhr.readyState === xhr.DONE) {
        if (xhr.status === 200) {
          getTitle(xhr.responseText);
        }
      }
    };
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search