pugnator pugnator - 3 months ago 10
Javascript Question

Replacing a lot of text in browser's addon

I'm trying to develop a Firefox add-on that transliterates the text on any page into specific language. Actually it's just a set of 2D arrays which I iterate and use this code

function escapeRegExp(str) {
return str.replace(/([.*+?^=!:${}()|\[\]\/\\])/g, "\\$1");
}

function replaceAll(find, replace) {
return document.body.innerHTML.replace(new RegExp(escapeRegExp(find), 'g'), replace);
}

function convert2latin() {
for (var i = 0; i < Table.length; i++) {
document.body.innerHTML = replaceAll(Table[i][1], Table[i][0]);
}
}


It works, and I can ignore HTML tags, as it can be in english only, but the problem is performance. Of course it's very very poor. As I have no experience in JS, I tried to google and found that maybe documentFragment can help.

Maybe I should use another approach at all?

Answer

Based on your comments, you appear to have already been told that the most expensive thing is the DOM rebuild that happens when you completely replace the entire contents of the page (i.e. when you assign to document.body.innerHTML). You are currently doing that for each substitution. This results in Firefox re-rendering the entire page for each substitution you are making. You only need assign to document.body.innerHTML once, after you have made all of the substitutions.

The following should provide a first pass at making it faster:

function escapeRegExp(str) {
    return str.replace(/([.*+?^=!:${}()|\[\]\/\\])/g, "\\$1");
}

function convert2latin() {
    newInnerHTML = document.body.innerHTML
    for (let i = 0; i < Table.length; i++) {
        newInnerHTML = newInnerHTML.replace(new RegExp(escapeRegExp(Table[i][1]), 'g'), Table[i][0]);
    }
    document.body.innerHTML = newInnerHTML
}

You mention in comments that there is no real need to use a RegExp for the match, so the following would be even faster:

function convert2latin() {
    newInnerHTML = document.body.innerHTML
    for (let i = 0; i < Table.length; i++) {
        newInnerHTML = newInnerHTML.replace(Table[i][1], Table[i][0]);
    }
    document.body.innerHTML = newInnerHTML
}

If you really need to use a RegExp for the match, and you are going to perform these exact substitutions multiple times, you are better off creating all of the RegExp prior to the first use (e.g. when Table is created/changed) and storing them (e.g. in Table[i][2]).

However, assigning to document.body.innerHTML is a bad way to do this:

As the8472 mentioned, replacing the entire content of document.body.innerHTML is a very heavy handed way to perform this task, which has some significant disadvantages including probably breaking the functionality of other JavaScript in the page and potential security issues. A better solution would be to change only the textContent of the text nodes. However, doing so may incur the performance hit of doing a partial page re-layout for each node that is changed (if you do some other actions in addition to making just those changes in the loop for each node). The actual performance difference will depend on the content of the page and what else you do, if anything.

One method of doing this is to use a TreeWalker. The code to do so, could be something like:

function convert2latin(text) {
    for (let i = 0; i < Table.length; i++) {
        text = text.replace(Table[i][1], Table[i][0]);
    }
    return text
}

//Create the TreeWalker
let treeWalker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT,{
    {
        acceptNode: function(node) { 
            If(node.textContent.length ===0){
                return NodeFilter.FILTER_SKIP;
            } //else
            return NodeFilter.FILTER_ACCEPT;
    }
}, false );
//Iterate over all text nodes, changing the textContent of the text nodes 
while(treeWalker.nextNode()) {
    treeWalker.currentNode.textContent = convert2latin(treeWalker.currentNode.textContent));
}