Split Evo Split Evo - 3 months ago 48
Node.js Question

Error: read ECONNRESET, ENOTFOUND and socket hang up

I have some trouble with my crawler in node JS. Indeed, I have an error when I lunch my crawler for the Vinted website.
But i have this error a lot of time :
first i have this :

Error: getaddrinfo ENOTFOUND www.vinted.fr www.vinted.fr:443


Then

...
Error: read ECONNRESET
Error: read ECONNRESET
Error: read ECONNRESET
Error: read ECONNRESET
Error: read ECONNRESET
...


and sometimes

Error: socket hang up
Error: socket hang up
Error: socket hang up


But my crawler works and return me the good results for some products and stop after 10 min about.
I think is because i send too much resquest but i need it ... So, its probably a network issue

I am completely stuck with all these error Its possible to fix it ?

Thanks for your help.

Here my code :

fs.readFile(__dirname +'/link.json', 'utf8', function (err, data) {
var obj;
if (err) throw err;
obj = JSON.parse(data);
urlp = obj.link;
console.log(colors.yellow("Products:"+urlp.length));
for(i = 1; i < urlp.length-1; i++){

url = 'https://www.vinted.fr'+urlp[i-1];
request(url, function(error, response, html){

if(!error){
var $ = cheerio.load(html);
var link = [];
var json = { link : ""};
var price = $('span[itemprop=price]').text();
var format_price = price.replace(/\n|\r/g,"");
var format_price2 = format_price.replace(/ /g,"");
var res1 = $('.details-list--details');
var meta = $("link[rel='canonical']").attr('href');
var images = []; // tableau img


$('img[itemprop=image]').filter(function(){
var img = $(this).attr('data-src');;
images.push(img);
})
// var imageshow = console.log(colors.rainbow(images .join(", ")));

var brand = $('.inverse > [itemprop=name]').text();
var state = $('div[itemprop=itemCondition]').text();
var color = $('div[itemprop=color]').text();
console.log(urlp[i]);
var token_vendu = $('.state-bar').text();
if(token_vendu != ""){
console.log(colors.red('PRODUIT VENDU'));
var vendu = 1;
}else{
vendu = 0;
}
console.log(colors.blue("CallBack Vente "+vendu));

var discount_price = $('.old-price').text();
console.log("Discount: " + discount_price);
try{
if(brand == ""){

var size = res1.children().parent().text();
var format_size = size.replace(/ /g,"");
var format_size2 = format_size.replace(/[\n]/gi, " " );
var split_size1 = format_size2.split(" ");
var split_size2 = split_size1[0].split(" ");
var split4 = split_size2[4];
var formatsize = split4;

}else{

var size = res1.children().parent().children().text();
var format_size = size.replace(/ /g,"");
var format_size2 = format_size.replace(/[\n]/gi, " " );
var split_size = format_size2.split(" ");
console.log("split: "+split_size[1] )
var split3 = split_size[1].split(" ");
formatsize = split3[1];


}

} catch (e) {
split_size[1] = "N/A";
console.log(e.message);
}
console.log("Size : " + formatsize);
console.log("Brand : "+brand);
console.log(meta);
console.log("color : " + color);
console.log("state : " + state);

//Save to database
connection.query('INSERT INTO `vinted` VALUES ( NULL , ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP())',
[brand,
color,
format_price2,
discount_price,
state,
formatsize,
vendu,
images.join(", "),
meta
]
, function (err, result) {
if (err) {console.error('error inserting into database : ' + err.stack); return;}
});
}else{console.log(colors.red(error));} // here the error ECONNRESET/ENOTFOUND ...

Answer

My suspicion is that your crawler may be overwhelming the site you are trying to crawl, hence the ECONNRESET. I had a similar experience writing a node.js crawler myself. I had to self-throttle by sending my requests at timed intervals, to give the server some breathing space.

Example:

var request = require('request');
var EventEmitter = require('events').EventEmitter;

emitter = new EventEmitter();
function doCrawl(){
      setTimeout(function(){
        //do crawling operation, e.g.
        request(url, function(err, resp, html){
          if(!err){
            //do all you want with the response then trigger another one
            emitter.emit('fetchNext');
          }
        });
      }, 60000);
    }
}

emitter.on('fetchNext', doCrawl);

You may also want to consider doing this

EDIT>> Using your sample code

var urlStack = []; //an array that holds the list of urls you want to visit
var emitter = new EventEmitter();
emitter.on('fetchNext', delayedCrawl); //this is triggered after any item is saved

fs.readFile(__dirname + '/link.json', 'utf8', function(err, data) {
      var obj;
      if (err) throw err;
      obj = JSON.parse(data);
      urlp = obj.link;
      console.log(colors.yellow("Products:" + urlp.length));

      for (i = 1; i < urlp.length - 1; i++) {
        urlStack.push('https://www.vinted.fr' + urlp[i - 1];
        }
      emmiter.emit('fetchNext');
});
    function delayedCrawl(){
        setTimeout(doCrawl, 5000); //5-second delay
    }

    function doCrawl() {
      var url = urlStack.pop();
      if(!url) return;
      request(url, function(error, response, html) {

            if (!error) {
              var $ = cheerio.load(html);
              var link = [];
              var json = {
                link: ""
              };
              var price = $('span[itemprop=price]').text();
              var format_price = price.replace(/\n|\r/g, "");
              var format_price2 = format_price.replace(/ /g, "");
              var res1 = $('.details-list--details');
              var meta = $("link[rel='canonical']").attr('href');
              var images = []; // tableau img


              $('img[itemprop=image]').filter(function() {
                  var img = $(this).attr('data-src');;
                  images.push(img);
                })
                //  var imageshow = console.log(colors.rainbow(images .join(", ")));

              var brand = $('.inverse > [itemprop=name]').text();
              var state = $('div[itemprop=itemCondition]').text();
              var color = $('div[itemprop=color]').text();
              console.log(url);
              var token_vendu = $('.state-bar').text();
              if (token_vendu != "") {
                console.log(colors.red('PRODUIT VENDU'));
                var vendu = 1;
              } else {
                vendu = 0;
              }
              console.log(colors.blue("CallBack Vente " + vendu));

              var discount_price = $('.old-price').text();
              console.log("Discount: " + discount_price);
              try {
                if (brand == "") {

                  var size = res1.children().parent().text();
                  var format_size = size.replace(/ /g, "");
                  var format_size2 = format_size.replace(/[\n]/gi, " ");
                  var split_size1 = format_size2.split("    ");
                  var split_size2 = split_size1[0].split(" ");
                  var split4 = split_size2[4];
                  var formatsize = split4;

                } else {

                  var size = res1.children().parent().children().text();
                  var format_size = size.replace(/ /g, "");
                  var format_size2 = format_size.replace(/[\n]/gi, " ");
                  var split_size = format_size2.split("         ");
                  console.log("split: " + split_size[1])
                  var split3 = split_size[1].split(" ");
                  formatsize = split3[1];


                }

              } catch (e) {
                split_size[1] = "N/A";
                console.log(e.message);
              }
              console.log("Size : " + formatsize);
              console.log("Brand : " + brand);
              console.log(meta);
              console.log("color : " + color);
              console.log("state : " + state);

              //Save to database
              connection.query('INSERT INTO `vinted` VALUES ( NULL , ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP())', [brand,
                color,
                format_price2,
                discount_price,
                state,
                formatsize,
                vendu,
                images.join(", "),
                meta
              ], function(err, result) {
                emitter.emit('fetchNext');
                if (err) {
                  console.error('error inserting into database : ' + err.stack);
                  return;
                }
              });
            } else {
              console.log(colors.red(error));
            } // here the error ECONNRESET/ENOTFOUND ...