user2918160 user2918160 - 22 days ago 8
Javascript Question

Change matcing words in a webpage's text to buttons

I am trying to make a Chrome extension that parses through a website looking for keywords, then replacing those keywords with buttons. However, when I change the text the image path becomes corrupted.

// This is a content script (isolated environment)
// It will have partial access to the chrome API
// TODO
// Consider adding a "run_at": "document_end" in the manifest...
// don't want to run before full load
// Might also be able to do this via the chrome API
console.log("Scraper Running");
var keywords = ["sword", "gold", "yellow", "blue", "green", "china", "civil", "state"];

// This will match the keywords with the page textx
// Will also create the necessary buttons
(function() {
function runScraper() {
console.log($('body'));
for(var i = 0; i < keywords.length; i++){
$("body:not([href]):not(:image)").html($("body:not([href]):not(:image)").html()
.replace(new RegExp(keywords[i], "ig"),"<button> " + keywords[i] + " </button>"));
console.log("Ran it " + i);
}
}
function createResourceButton() {
// Programatically create a button here

// Really want to return the button
return null;
}
function createActionButton() {
}
runScraper();
})();
// TODO create the functions that the buttons will call
// These will pass data to the chrome extension (see message passing)
// Or we can consider a hack like this:
// "Building a Chrome Extension - Inject code in a page using a Content script"
// http://stackoverflow.com/questions/9515704


Image of current results:

Wikipedia images will not load

Answer

Your approach to this problem is wrong. To do this, you need to walk though the document only changing text nodes, not the HTML of all nodes.

Modifying the code from this other answer of mine, the following complete extension changes all matching words on the page to buttons.

The extension in action:

button-izing matching words in Wikipedia page used in image by OP

manifest.json

{
    "description": "Upon action button click, make all matching words buttons.",
    "manifest_version": 2,
    "name": "Button all matching words",
    "version": "0.1",

    "permissions": [
        "activeTab"
    ],

    "background": {
        "scripts": [
            "background.js"
        ]
    },

    "browser_action": {
        "default_icon": {
            "32": "myIcon.png"
        },
        "default_title": "Make Buttons of specified words"
    }
}

background.js:

chrome.browserAction.onClicked.addListener(function(tab) {
    //Inject the script to change the text of all matching words into buttons.
    chrome.tabs.executeScript(tab.id,{file: 'contentScript.js'});
});

contentScript.js:

(function(){
    var keywords = ["sword", "gold", "yellow", "blue", "green", "china", "civil", "state"];
    //Build the RegExp once. Doing it for every replace is inefficient.
    //  Build one RegExp that matches all of the words instead of multiple RegExp.
    var regExpText = '\\b(' + keywords.join('|') + ')\\b';
    console.log(regExpText);
    var myRegExp = new RegExp(regExpText ,'mgi');

    function handleTextNode(textNode) {
        if(textNode.nodeName !== '#text') {
            //Don't do anything except on text nodes.
            return;
        }
        let origText = textNode.textContent;
        //Clear the regExp search, not doing so may cause issues if matching against
        //  identical strings after the first one.
        myRegExp.lastIndex = 0;
        let newHtml=origText.replace(myRegExp, '<button>$1</button>');
        //Only change the DOM if we actually made a replacement in the text.
        //Compare the strings, as it should be faster than a second RegExp operation and
        //  lets us use the RegExp in only one place for maintainability.
        if( newHtml !== origText) {
            let newSpan = document.createElement('span');
            newSpan.innerHTML = newHtml;
            textNode.parentNode.replaceChild(newSpan,textNode);
        }
    }

    //This assumes that you want all matching words in the document changed, without
    //  limiting it to only certain sub portions of the document (i.e. not 'not(a)').
    let allP = [document.body];
    let textNodes = [];
    for (let p of allP) {
        //Create aNodeIterator to get the text nodes descendants
        let nodeIter = document.createNodeIterator(p,NodeFilter.SHOW_TEXT);
        let currentNode;
        //Add text nodes found to list of text nodes to process below.
        while(currentNode = nodeIter.nextNode()) {
            textNodes.push(currentNode);
        }
    }
    //Process each text node
    textNodes.forEach(function(el){
        handleTextNode(el);
    });
})();

myIcon.png:

Icojam-Weathy-24-tornado.png

The code in handleTextNode to make changes to text nodes was modified from code in another answer of mine.

Comments