rads89 rads89 - 4 months ago 15
Javascript Question

How can I access data from an array that is being populated inside a request method in node.js?

New to Node.js and I'm trying to scrape some data by looping through an array of 3 urls. The scraped data will be used to store in a mongodb collection.

Right now, I am looping through the array of urls and using node's request module inside the for loop for each url and storing data dynamically in an array called products.

My issue is that when i try to print products.length on to the console outside of the request method, the value is 0, indicating an empty array. Here's a part of my code:

//these arrays will store the scraped information from webpage
var prodList = [];
var priceList = [];

//this is the array that will be used to organize and display the scraped info
var products = [];

//store scraped data as an object
function Prod(prodName, price) {
this.prodName = prodName;
this.price = price;
};

var populateArray = function() {

//urls to scrape
var nyxLinks = [
"http://www.nyxcosmetics.ca/en_CA/face?sz=999&viewall=1",
"http://www.nyxcosmetics.ca/en_CA/lips?sz=999&viewall=1",
"http://www.nyxcosmetics.ca/en_CA/eyes?sz=999&viewall=1"
];

//empty all arrays
prodList = [];
priceList = [];
products = [];

for(var i = 0; i < nyxLinks.length; i++) {

//define url to download
var url = nyxLinks[i];
console.log(url);

request(url, function(error, response, body) {
if(!error) {

//load page into cheerio
var $ = cheerio.load(body);

//for each product on the page store in respective arrays
$(".product_tile_wrapper").each(function(i, elem) {
prodList.push($(this).find($(".product_name")).attr("title"));
priceList.push($(this).find($(".product_price")).attr("data-pricevalue"));
});

for(var i = 0; i < prodList.length; i++) {
//store product info as an object

products.push(new Prod(prodList[i], priceList[i]));
}
} else {
console.log("We've encountered an error!")
}
}).on("end", function(err, data) {
if(!err) {
console.log("products length " + products.length);
} else {
console.log(err);
}
});
}
console.log("products length " + products.length);
}

mongoose.connect('mongodb://127.0.0.1:27017/makeupdb');

var db = mongoose.connection;
db.on('error', console.error.bind(console, 'Connection Error:'));
db.once('open', function() {
// we're connected

populateArray();
console.log("number of products in products array " + products.length);

//clear the current collection - db.remove({})

//insert data in mongodb - db.insert(products)

});


The console output from this code is:

Server running at http://127.0.0.01:1337/
http://www.nyxcosmetics.ca/en_CA/face?sz=999&viewall=1
http://www.nyxcosmetics.ca/en_CA/lips?sz=999&viewall=1
http://www.nyxcosmetics.ca/en_CA/eyes?sz=999&viewall=1
products length 0
number of products in products array 0
products length 0
products length 31
products length 119


I believe I need to use a callback to be able to access the products array but I am not sure where I would need to use this call back. Any help will be much appreciated.

Thanks,

Radha

Answer

This is because of JavaScript's asynchronous model. The engine won't wait for the .on('end') callback to be executed before moving on to the next iteration of your for loop.

You could have a counter variable that is incremented each time a callback is called, and when the counter reaches the number of requests made, call your final function. Do something like this:

var numRequestsFinished = 0;
var products = [];
var finalCallback = function() {
  console.log('Final Products:', products);
};
for (var i = 0; i < nyxLinks.length; i++) {
  request(..., function(err, data) {
    numRequestsFinished++;
    // error checking

    products.push(data);

    if (numRequestsFinished === nyxLinks.length) {
      finalCallback();
    }
  });
}

Alternately, you could take a look at a Promise library such as Bluebird and the Promise.all API. This would allow you to define a function to be called when all promises in an array have completed.