jcollum jcollum - 4 months ago 8
Node.js Question

how can I reliably process thousands of HTTP requests when some may error?

I have run into this problem before for a few HTTP transactions (like a hundred or so posts). Today I'm trying to do 7k HTTP requests. This seems silly but it's the only way to interact with the target system. The best I've been able to do will stall out at about 96% of the requests done. It will then just stop and never complete the last few requests.

Perhaps I'm using OiBackoff incorrectly. It seems to be working fine but that last 3% of the GETs won't ever finish. I've let it set for 5 minutes with no requests coming back when the highest retry interval in the log was 40 seconds.

I wonder if I should do like 100 requests at a time with OiBackoff there to make sure they are all complete.

The goal here is to hit a url that has a number like CFD1234, CFD1236, CFD1238 at the end and push the result (small chunk of xml) into an array. Here's the code, the closest I have to working. Perhaps I need to try a different library? I've tried this with a promises queue and couldn't get it to run. It will work if I create an array of function closures and fire them off in sequence but it takes forever, far longer than it should.

var cnum, cnums, complete, ld, logger, oibackoff, opt, processHttpGet, request, responses, total, yamljs, _fn, _i, _len;

yamljs = require('yamljs');

request = require('request');

oibackoff = require('oibackoff').backoff({
maxTries: 10,
delayRatio: 10
});

cnums = yamljs.load('./etc/cnumbers.yaml');

responses = [];

logger = {
debug: console.log,
error: console.log
};

ld = require('lodash');

cnums = ld.uniq(cnums);

logger.debug("cnums len: " + cnums.length);

processHttpGet = function(url, opt, cb) {
return request.get(url, opt, function(error, resp, body) {
if (error != null) {
return cb(error, null);
} else if (resp.statusCode >= 400) {
return cb(resp.statusCode, null);
} else {
return cb(null, body);
}
});
};

opt = null;

total = cnums.length;

complete = 0;

_fn = function(CNumber) {
var intermediate, url;
url = "http://abc:def@abc.def.com/xyz/def/abc.asmx/GetValueByID?ID=" + CNumber;
logger.debug("getting " + url);
intermediate = (function(_this) {
return function(err, tries, delay) {
if (err != null) {
logger.debug("GET failed for " + url + ":", err);
logger.debug("tries: %d, delay: %d", tries, delay);
}
if (tries > 10) {
logger.debug("/n/n Failed max tries.");
process.exit(0);
return false;
}
};
})(this);
return oibackoff(processHttpGet, url, opt, intermediate, function(error, response) {
if (error) {
return false;
} else {
++complete;
responses.push(response);
if (complete % 100 === 0) {
console.dir({
url: url,
response: response
});
}
logger.debug("success; responses complete: " + complete + ", total: " + total + ", percentage: " + (ld.round(complete / total, 2) * 100) + "%");
if (complete >= total) {
logger.debug(responses);
return process.exit(0);
}
}
});
};
for (_i = 0, _len = cnums.length; _i < _len; _i++) {
cnum = cnums[_i];
_fn(cnum);
}

Answer

The answer to this was to use Bluebird, Promise.map and concurrency with a backoff library.

# coffee 
# exports is an array of buffers  
retry = (require 'u-promised').retry
Promise = require("bluebird")

# build array of buffers to post

Promise.map(exports, (buffer) ->
  f = -> postToEndpoint(buffer)
  retry(5, f) # post with up to 5 retries
, {concurrency: config.export.concurrency}) # 40 for my app 
.then (result) ->
  c = 0
  ld.map(result, (x) -> c += x)
  msg = "Complete. #{c} posts completed."
  logger.info msg
.catch (reason) ->
  logger.error reason