Ammar Ammar - 4 months ago 22
HTML Question

Getting the actual HTML after a transition accures in a youtube page (chrome extension)

I'm working on a chrome extension that should run a script in every youtube watch page (i.e, https://www.youtube.com/watch?v=YisbVr69r7U)

In that script I want to get the itag's of the video (which I can get from a script that is in every youtube video page just by parsing "url_encoded_fmt_stream_map" property which is in yt.config)

The problem is that I can't find that property by parsing the (document.body.innerHTML) of some pages.

here's my manifest.json:

{
"manifest_version": 2,

"name" : "Test Extension",
"version" : "0.0",

"background": {
"scripts": ["background.js"]
},

"permissions": [
"https://www.youtube.com/*", "tabs", "webNavigation"
]
}


I know that youtube use transitions between pages (for example if you clicked on a video to watch then a red bar on the top of the page appears then the video page appears), I use webNavigation onHistoryStateUpdated event to execute a script into the page after the transition ends.

background.js:

const r = /https:\/\/www\.youtube\.com\/watch\?v=(.*?)(&.*)?/;
chrome.webNavigation.onHistoryStateUpdated.addListener(function(details) {
if(r.test(details.url))
chrome.tabs.executeScript(details.tabId,{file:"script.js"});
});


and script.js:

function getURLMap(bodyHTML) {
var r = /"url_encoded_fmt_stream_map":"(.*?)"/;
var matches = bodyHTML.match(r);
return matches[1];
}

function getTags(fmts_info) {
var tags = [];
r = /itag=(.*?)\\u/;
console.log(fmts_info[0]);
for(var i = 0; i < fmts_info.length; i++) {
matches = fmts_info[i].match(r);
tags[i] = matches[1];
}
return tags;
}

console.log(getTags(getURLMap(document.body.innerHTML).split(',')));


The extension does well when I go to a youtube watch page directly (openning a new tab on chrome and go directly to say: https://www.youtube.com/watch?v=YisbVr69r7U), it shows in console the itag's of that video correctly.
The problem comes when I come to a youtube watch page by a transition (for example from the youtube index page to a video page by clicking on a video), in this case I have this error in console:

Uncaught TypeError: Cannot read property '1' of null script.js:4


when I let my script.js show (document.body) in console, I can't find "url_encoded_stream_map" there

It seems like the problem is with how I deal with the transitions in the page.

I searched a lot to solve my problem but nothing worked for me.

I tried using content-scripts but seems like content scripts are inserted in the page when it's loaded, and not when a transition accures.

I want to get the actual HTML of the page, that has the itag's in it!

EDIT:

This is no duplicated to this

Tried this manifest.json:

{
"manifest_version": 2,

"name" : "Test Extension",
"version" : "0.0",

"content_scripts": [{
"matches": [ "*://*.youtube.com/*" ],
"js": [ "script.js" ],
"run_at": "document_start"
}]
}


script.js:

document.addEventListener("spfdone", process);
document.addEventListener("DOMContentLoaded", process);

function getURLMap(bodyHTML) {
var r = /"url_encoded_fmt_stream_map":"(.*?)"/;
var matches = bodyHTML.match(r);
return matches[1];
}

function getTags(fmts_info) {
var tags = [];
r = /itag=(.*?)\\u/;
for(var i = 0; i < fmts_info.length; i++) {
matches = fmts_info[i].match(r);
tags[i] = matches[1];
}
return tags;
}

function process() {
if (location.pathname != "/watch") {
return;
}
console.log(getTags(getURLMap(document.body.innerHTML).split(',')));
}


but the problem is not solved!

Answer

If you debug your script you will see that url_encoded_fmt_stream_map isn't added anywhere in the document after in-site navigation. Hacking the site JS shows that ytplayer.config variable is updated directly in such cases.

We'll have to inject our script into the page itself.

Declare a content script that runs on all of youtube in manifest.json:

"content_scripts": [{
  "matches": [ "*://*.youtube.com/*" ],
  "js": [ "content.js" ],
  "run_at": "document_start"
}]

content.js:

function injectedCode() {
    document.addEventListener("spfdone", process);
    document.addEventListener("DOMContentLoaded", process);

    function process() {
        function getTags(fmts_info) {
            var tags = [];
            r = /itag=(\d+)/;
            for(var i = 0; i < fmts_info.length; i++) {
                var matches = fmts_info[i].match(r);
                if (matches)
                    tags.push(matches[1]);
            }
            return tags;
        }
        if (location.href.indexOf('watch?') < 0) {
            return;
        }
        var tags = getTags(ytplayer.config.args.url_encoded_fmt_stream_map.split(','));
        console.log(tags);
    }
}

function getFunctionText(f) {
    return f.toString().match(/\{[\s\S]*\}$/)[0];
}

document.documentElement.appendChild(document.createElement("script")).text =
    getFunctionText(injectedCode)

To pass the results back to content script use custom events, or externally_connectable to send data directly to extension's background page script.

Comments