user3561335 user3561335 - 3 months ago 8
Node.js Question

Is this a safe way for inserting and updating an array of objects in mongodb?

The following code updates an array of specified objects or insert if the object is not in the database. It works fine but I'm new in mongodb and I'm not sure if this is a safe or fast way to do it.

Maybe I should use updateMany? I tried to use it but I couldn't get the same behaviour as the following code.

mongodb.connect(mongo_url, function(err, db) {
if(err) console.log(err)
else {
var mongo_products_collection = db.collection("products")

mongoUpsert(mongo_products_collection, data_products, function() {
db.close()
})
}
})

function mongoUpsert(collection, data_array, cb) {
var data_length = data_array.length

for (var i=0; i < data_length; i++) {
collection.update(
{product_id: data_array[i].product_id},
data_array[i],
{upsert: true}
)
}

return cb(false)
}

Answer

Using the bulkWrite API to carry out the updates handles this better

mongodb.connect(mongo_url, function(err, db) {
    if(err) console.log(err)
    else {
        var mongo_products_collection = db.collection("products")

        mongoUpsert(mongo_products_collection, data_products, function() {
            db.close()
        })
    }
})

function mongoUpsert(collection, data_array, cb) {

    var bulkUpdateOps = data_array.map(function(data) {
        return {
            "updateOne": {
                "filter": { 
                    "product_id": data.product_id,
                    "post_modified": { "$ne": data.post_modified }
                },
                "update": { "$set": data },
                "upsert": true
            }
        };
    });

    collection.bulkWrite(bulkUpdateOps, function(err, r) {
        // do something with result
    });

    return cb(false);
}

If you're dealing with larger arrays i.e. > 1000 then consider sending the writes to the server in batches of 500 which gives you a better performance as you are not sending every request to the server, just once in every 500 requests.

For bulk operations MongoDB imposes a default internal limit of 1000 operations per batch and so the choice of 500 documents is good in the sense that you have some control over the batch size rather than let MongoDB impose the default, i.e. for larger operations in the magnitude of > 1000 documents. So for the above case in the first approach one could just write all the array at once as this is small but the 500 choice is for larger arrays.

var ops = [],
    counter = 0;

data_array.forEach(function(data) {
    ops.push({
        "updateOne": {
            "filter": { 
                "product_id": data.product_id, 
                "post_modified": { "$ne": data.post_modified } 
            },
            "update": { "$set": data },
            "upsert": true
        }
    });
    counter++;

    if (counter % 500 == 0) {
        collection.bulkWrite(ops, function(err, r) {
            // do something with result
        });
        ops = [];
    }
})

if (counter % 500 != 0) {
    collection.bulkWrite(ops, function(err, r) {
        // do something with result
    }
}