CoffeePeddlerIntern CoffeePeddlerIntern - 4 months ago 17
Node.js Question

Throttling requests to 3rd part Apis with Node

So I am building a space map and dumping the relations into a mongodb. In thiscenario I have regions, constellations, and solar system. Where 1 region can have many constellations and each constellation can have many solar systems. I have an api that allows crawl some of this but it requires an api call per item. Bringing up the problem where I am making ~6000 api calls.

Heres the 3rd party's api criteria:


  • General Rate Limit: 150 requests per second

  • Burst Size: 400

  • Concurrent Connections: 20



Here is my db models:

Region Model:

var mongoose = require('mongoose');
var Schema = mongoose.Schema;

//Schema Definition
var regionSchema = new Schema({
_id: Number,
name: String,
description: String,
href: String,
constellations: [{
type: Schema.ObjectId,
ref: 'Constellation'
}]//Reference
});

//Expose (export) the model
module.exports = mongoose.model('Region', regionSchema);


Constellation Model:

var mongoose = require('mongoose');
var Schema = mongoose.Schema;

//Schema Definition
var constellationSchema = new Schema({
_id: Number,
name: String,
href: String,
solarSystems: [{
type: Schema.ObjectId,
ref: 'SolarSystem'
}]
});

//Expose (export) the model
module.exports = mongoose.model('Constellation', constellationSchema);


Solar System Model:

var mongoose = require('mongoose');
var Schema = mongoose.Schema;

//Schema Definition
var solarSystemSchema = new Schema({
_id: Number,
name: String,
imgUrl: String
});

//Expose (export) the model
module.exports = mongoose.model('SolarSystem', solarSystemSchema);


I am also trying to save them in the proper order so the references are populated for the relations.

Here is my code:

function getAllRegions(req, res){
getAllRegionsHrefs().then(function (hrefs){
var newRegions = [];
for(var href in hrefs){
var options = {
uri: hrefs[href],
json: true
};
RequestPromise(options).then(function (responseItem){
var constellationObjects = [];
for(var item in responseItem.constellations){
var newConstellation = constellationModel({
_id: responseItem.constellations[item].id,
href: 'https://getspaceInfoHere.com/constellations/'+responseItem.constellations[item].id+'/'
});
newConstellation.save();
constellationObjects.push(newConstellation);
}
var newRegion = regionModel({
_id: responseItem.id,
name: responseItem.name,
description: responseItem.description,
href: 'https://getspaceInfoHere.com/regions/'+responseItem.id+'/',
constellations: constellationObjects
});
newRegion.save();
newRegions.push(newRegion);
console.log(newRegion);
});
}
});
}

function getAllRegionsHrefs(){
var options = {
uri: universeInfoEndpoint,
json: true
};
return RequestPromise(options).then(function (responseItems){
var regionHrefs = [];
for(var item in responseItems.items){
regionHrefs.push(responseItems.items[item].href);
}
return regionHrefs;
});
}


Now I am not even trying to get the detailed constellation info which provides the system info (which then provides an href to get detailed system info) and I'm running into my max. What are the best ways to throttle this so I can stay within the parameters?

UPDATE



function getAllRegions(req, res){
getAllRegionsHrefs().then(function (hrefs){
var chunks = _.chunk(hrefs, 25);
return Promise.map(chunks, function(chunk) {
return Promise.map(chunk, getRegion).then(function (getRegionResults){
for(var item in getRegionResults) {
Promise.map(getRegionResults[item].constellations, getConstellationInfo).then(function (constellationInfo) {
var chunks = _.chunk(constellationInfo, 150);
return Promise.map(chunks, function (chunk) {
return Promise.map(chunk, getSystem).delay(20000);
})
}).delay(20000);
}
}).delay(200000);
});
});
}

function getSystem(systems){
for(var updateSystem in systems){
var options = {
uri: systems[updateSystem].href,
json: true
};
RequestPromise(options).then(function (responseItem){
//Grab the system in the db and update it with its info
systemModel.findOne({ _id: systems[updateSystem]._id }, function (err, doc){
doc.name = responseItem.name;
doc.save();
});

});
}
}

function getConstellationInfo(constellation) {
var options = {
uri: constellation.href,
json: true
};
return RequestPromise(options).then(function (responseItem){
var arrayOfSystems = [];
for(var system in responseItem.systems){
var newSystem = new systemModel({
_id: responseItem.systems[system].id,
href: responseItem.systems[system].href
});
newSystem.save();
arrayOfSystems.push(newSystem);
}
//find the constellation and update it with its info
constellationModel.findOne({ _id: constellation._id }, function (err, doc){
doc.name = responseItem.name;
doc.solarSystems = arrayOfSystems;
doc.save();
});
return arrayOfSystems;
});
}


function getRegion(href) {
var options = {
uri: href,
json: true
};
return RequestPromise(options).then(function (responseItem){
var constellationObjects = [];
for(var item in responseItem.constellations){
var newConstellation = constellationModel({
_id: responseItem.constellations[item].id,
href: eveConstellationCrestEndpoint + responseItem.constellations[item].id+'/'
});
newConstellation.save();
constellationObjects.push(newConstellation);
}
var newRegion = regionModel({
_id: responseItem.id,
name: responseItem.name,
description: responseItem.description,
href: universeEndpoint + responseItem.id+'/',
constellations: constellationObjects
});
newRegion.save();
return newRegion;
});
}

function getAllRegionsHrefs(){
var options = {
uri: universeEndpoint,
json: true
};
return RequestPromise(options).then(function (responseItems){
var regionHrefs = [];
for(var item in responseItems.items){
regionHrefs.push(responseItems.items[item].href);
}
return regionHrefs;
});
}


Right now this is working for the entire chain(gets region, constellation, and system info) but the timeouts are working and starts to refuse connections at the system level. Any advice?

Answer

You can achieve this by chunking your hrefs into sets of 20 and setting a delay after each chunk, you'll probably want to play with those parameters:

Using lodash's _.chunk and Bluebird's Promise.delay with Promise.map:

function getAllRegions(req, res){
   getAllRegionsHrefs().then(function (hrefs){
       var chunks = _.chunk(hrefs, 20);
       return Promise.map(chunks, function(chunk) {
         // tune the delay to what you need it to be
         // it will wait the delay (in ms) before starting the next chunk of requests
         return Promise.map(chunk, getRegion).delay(150000);
       });
   });
}

function getRegion(href) {
    var options = {
        uri: hrefs[href],
        json: true
    };
    return RequestPromise(options).then(function (responseItem){
        var constellationObjects = [];
        for(var item in responseItem.constellations){
            var newConstellation = constellationModel({
                _id: responseItem.constellations[item].id,
                href: 'https://getspaceInfoHere.com/constellations/'+responseItem.constellations[item].id+'/'
            });
            newConstellation.save();
            constellationObjects.push(newConstellation);
        }
        var newRegion = regionModel({
            _id: responseItem.id,
            name: responseItem.name,
            description: responseItem.description,
            href: 'https://getspaceInfoHere.com/regions/'+responseItem.id+'/',
            constellations: constellationObjects
        });
        newRegion.save();
        console.log(newRegion);
        return newRegion;
    });
}