I'm using nightmarejs to scrape a website. First, i make a request to fetch some links, which lead to another page with more information that i also want. I've separated it in two functions:
const { csvFormat } = require('d3-dsv');
const Nightmare = require('nightmare');
const { writeFileSync } = require('fs');
const url = 'https://lojaonline.claro.com.br/celular';
function getUrls (){
console.log('Extraindo Links...');
const nightmare = new Nightmare({show: true});
var p1 = '51030';
var p2 = '560';
try{
nightmare.goto(url).wait('input[id="edit-cep-part1"]')
.type('input[id="edit-cep-part1"]', p1)
.wait('input[id="edit-cep-part2"]')
.type('input[id="edit-cep-part2"]', p2)
.click('input[value="Confirmar"]')
.wait('#products-container .products-list').evaluate(function(){
return Array.from(document.querySelectorAll('.offer')).map(element => element.href);
}).end()
.then(function(result){
var listaUrls = Object.values(result);
return listaUrls;
})
.then(function(listaUrls){
listaUrls.forEach(function(link){
console.log('Pegando preços de ' + link);
getPrecos(link);
});
});
}catch(e){
console.error(e);
}
};
function getPrecos(endereco) {
console.log('Extraindo preços...');
const nightmare = new Nightmare({gotoTimeout: 999999999});
var p1 = '51030';
var p2 = '560';
try{
nightmare.goto(endereco).wait('input[id="edit-cep-part1"]')
.type('input[id="edit-cep-part1"]', p1)
.wait('input[id="edit-cep-part2"]')
.type('input[id="edit-cep-part2"]', p2)
.click('input[value="Confirmar"]')
.wait('#plans-tab').evaluate(function(){
return Array.from(document.querySelectorAll('tr.body')).map(element => element.innerText);
}).end()
.then(function(result){
var listaPrecos = Object.values(result);
console.log(listaPrecos);
});
}catch(e){
console.error(e);
}
};
getUrls();
It works for the most part. Some requests are successful and i'm able to get the information but some requests are timed out after 30seconds:
UnhandledPromiseRejectionWarning: Error: .wait() for #plans-tab timed out after 30000msec.
I have to wait, input and click because this particular website asks for a zip code before displaying the data. And if i do show : true inside the getPrecos function, 20 instances of electron will pop up. What am i doing wrong here?
Is there a way to only fire a request after the previous one is finished?
You are using forEach to loop thru the list, if you want them to grab data one by one, then you should use for...of
and async await
or some promise library with concurrency support.
listaUrls.forEach(function(link) {
console.log("Pegando preços de " + link);
getPrecos(link);
});
The above snippet can be turned with async await and for loop like below.
// other part of code
.then(async function(listaUrls) { // <-- async function
for(const link of listaUrls){
console.log("Pegando preços de " + link);
await getPrecos(link); // <-- go thru the link one by one
}
});