Pono Pono - 2 months ago 8
Node.js Question

Advanced data aggregation: counting average in MongoDB collections

I have a collection of documents like:

{
"browser" : "firefox",
"version" : "4.0.1"
}

{
"browser" : "firefox",
"version" : "3.6.2"
}

{
"browser" : "ie",
"version" : "8.0"
}


How to count the average of all browsers so the results would be:

global firefox: 66%
global ie: 33%

precise firefox:
4.0.1: 50%
3.6.3: 50%


The tricky part is that I don't want to provide all Firefox verions available in an array. MongoDB query should find all distinct versions in a collection and count average for all of them.

Thanks in advance!

Answer

Here is a solution that produces your statistics with pure numbers (e.g. 0.5 instead of 50%):

var m = function() {
  emit('global', this.browser);
  emit('local', [this.browser, this.version]);
};

var r = function(key, values) {
  var global={}, local={}, total=0, i, j, x;
  if (key == 'global') {
    values.forEach(function(v) {
      global[v] = (global[v]||0) + 1;
      total += 1;
    });
    for (i in global) { global[i] = global[i] / total; }
    return global;
  } else if (key == 'local') {
    values.forEach(function(v) {
      if (!local[v[0]]) { local[v[0]] = {}; }
      x = local[v[0]];
      x[v[1]] = (x[v[1]]||0) + 1;
    });
    for (i in local) {
      total = 0;
      x = local[i];
      for (j in x) { total += x[j]; }
      for (j in x) { x[j] = x[j] / total; }
    }
    return local;
  };
};

db.browsers.mapReduce(m, r, {out:'bout'});
db.bout.find();
// => { "_id" : "global", "value" : { "firefox" : 0.6666666666666666, "ie" : 0.3333333333333333 } }
// => { "_id" : "local", "value" : { "firefox" : { "4.0.1" : 0.5, "3.6.2" : 0.5 }, "ie" : { "8.0" : 1 } } }
Comments