I need to get some statistics from a site which does not have an API. After checking the source code, I saw the data (which I need) is use to build a statistical graphic.
I was able to get the script tag using Request & Cheerio:
request(nodeUrl, function(error, res, body) {
var $ = cheerio.load(body);
var scripts = $('script').filter(function() {
return ($(this).html().indexOf('Dygraph(document') > -1);
});
if (scripts.length === 1) {
var text = $(scripts[0]).html();
console.log(text);
}
});
The data I need (using a js formatter to be easy to read and removing all other scripts around):
d = new Dygraph(document.getElementById("container"), [
[new Date("2017/08/01"), 0.0654],
[new Date("2017/08/02"), 0.257],
[new Date("2017/08/03"), 0.245],
[new Date("2017/08/04"), 0.15],
[new Date("2017/08/05"), 0.107],
[new Date("2017/08/06"), 0.109],
[new Date("2017/08/07"), 0.143],
[new Date("2017/08/08"), 0.222],
[new Date("2017/08/09"), 0.166],
[new Date("2017/08/10"), 0.156],
[new Date("2017/08/11"), 0.143],
[new Date("2017/08/12"), 0.199]
]);
I only need all: [new Date("2017/08/12"), 0.199]
Any suggestions will be great. Thanks in advance.
You can use regular expression to parse the data.
var re = /new Date\("([0-9]{4}\/[0-9]{2}\/[0-9]{2})"\), ([0-9]+\.[0-9]+)/g;
var m;
do {
m = re.exec($(scripts[0]).html());
// scraped data:
// [new Date(m[1]), m[2]]
}
while (m)