Javascript - Extract matching group using regex in TamperMonkey

medic_dev
September 1, 2023
178 views
0 votes
2 Answers

I am trying to write a TamperMonkey userscript that extracts URLs that match a certain pattern, visit these pages, extract a piece of information from these pages, and then updates the link for each URL with the extracted text. I am having problems with the regex matching/extracting.

I have tested my regex and know it matches but it’s not matching when I run the script and I’m not sure how to extract the group. Grateful for any help.

// ==UserScript==
// @name         Pull through titles
// @namespace    http://tampermonkey.net/
// @version      0.2
// @description  ---
// @author       You
// @match        https://xxx/curriculum-overview/*
// @icon         https://www.google.com/s2/favicons?sz=64&domain=tampermonkey.net
// @grant        none
// ==/UserScript==

(function() {
    'use strict';

    const regexPattern = /portfolio-item__text-input-wide" .*value="(.+)"/;
    var evidence = "";
    var title = "";

    //Show descriptors by default
    var x = document.getElementsByClassName("js-descriptors");
    for (var i = 0, max = x.length; i < max; i++) {
        x[i].style.display = "block";
    }

    //Find all URLs that link to evidence
    var urls = document.querySelectorAll('a[href*="portfolio-item/review"]');

    //For each URL, visit the page and extract the title of the evidence and update the innerText with that title
    for (i = 0; i < urls.length; i++){

        const xhr = new XMLHttpRequest();
        xhr.open("GET", urls[i], true);
        xhr.responseType = "text";
        xhr.onload = () => {
            if (xhr.readyState === xhr.DONE) {
                if (xhr.status === 200) {
                    evidence = xhr.responseText;
                }
            }
        };

        xhr.send(null);
        title = evidence.match(regexPattern); //extract matching regex pattern
        alert(title.toString()); //once I know the string is extracted will append to
        //urls[i].innerText = urls[i].toString(); //this line tests that the innerText can be changed to the URL; will change to title variable once working
    }
})
();

Tags: javascript tampermonkey

Answers

Chosen as BEST ANSWER

After a bit of debugging with a friend, we realised that urls[i] wasn't being accessed from inside onload which I think is what @Fraser was alluding to and hence why we were struggling to get it to update innerText. We came up with a slightly different solution. Posting here in case it helps anyone.

(function() {
    'use strict';

    const regexPattern = /portfolio-item__text-input-wide" .*value="(.+)"/;
    var evidence = "";
    var title = "";

    //Show descriptors by default
    var x = document.getElementsByClassName("js-descriptors");
    for (var i = 0, max = x.length; i < max; i++) {
        x[i].style.display = "block";
    }

    //Find all URLs that link to evidence
    var urls = document.querySelectorAll('a[href*="portfolio-item/review"]');

    //For each URL, visit the page and extract the title of the evidence and update the innerText with that title
    for (i = 0; i < urls.length; i++){
        const xhr = new XMLHttpRequest();
        xhr.open("GET", urls[i], true);
        xhr.tmlink = urls[i] //this was our bodge by adding the url as a property to xhr
        xhr.responseType = "text";
        xhr.onload = function() {
            if (xhr.readyState === xhr.DONE) {
                if (xhr.status === 200) {
                    evidence = xhr.responseText;
                    title = evidence.match(regexPattern); //extract matching regex pattern
                    this.tmlink.innerText = title[1].toString(); //updating the innerText for the hyperlink
                }
            }
        };

        xhr.send(null);
    }
})
();

(Edit)

- Fraser
- September 1, 2023 at 6:29 pm
- 0 votes
0
You need to do your regex, etc, within your xhr.onload – once the data has been fetched. This is because the xhr.open call is asynchronous – meaning that your title = evidence.match(regexPattern); will never work because evidence will be an empty string.

e.g.
```
xhr.onload = () => {
  if (xhr.readyState === xhr.DONE) {
    if (xhr.status === 200) {
      evidence = xhr.responseText;
      title = evidence.match(regexPattern); //extract matching regex pattern
      alert(title.toString());
    }
  }
};
```
Alternatively you could create a function, and call that from within your onload, e.g.
```
function getTitle(text) {
   var mytitle = evidence.match(regexPattern); 
   console.log(mytitle); // or whatever...
}

// then...

xhr.onload = () => {
  if (xhr.readyState === xhr.DONE) {
    if (xhr.status === 200) {
      getTitle(xhr.responseText);
    }
  }
};
```
Login or Signup to reply.

Please signup or login to give your own answer.

Click here to cancel reply.

Javascript – Extract matching group using regex in TamperMonkey

Answers