Gerard Gerard - 6 months ago 20
Node.js Question

program fails because readFile is asynchronous?

I'm trying to combine two lists. List1 has 681 french verbs, List2 has the 681 translations.
I'm using javascript, and node.js to read the files.
Here's my attempt:

var frenchWords, englishWords, combinedList;
fs = require('fs')

// 1. read the french file

fs.readFile('frenchVerbsList.txt', 'utf8', function (err,data) {
if (err) {
return console.log("ERROR here!: " + err);
}
frenchWords = data.split('\n');
});

//read the english file

fs.readFile('englishVerbsList.txt', 'utf8', function (err,data2) {
if (err) {
return console.log("ERROR here!: " + err);
}
englishWords = data2.split('\n');
});

// 2. combine the lists
//*** it fails here, I'm guessing it's because the readFile operation hasn't finished yet.

var combinedList;
for(i=0; i<frenchWords.length; i++){
combinedList[i] = frenchWords[i] + ",,," + englishWords[i];
}
// 3. check the result

for(i=0; i<10; i++){
console.log(combinedList[i]);
}


Thanks very much for any help, I'm doing this to keep my mind active :-)

Answer

You are correct that the asynchronous nature of the fs.readFile() callbacks is causing your issue.

Those callbacks are called at an indeterminate time in the future while the rest of your code continues to run. Because of the event driven design of node.js, the callbacks won't be called until the rest of your code finishes executing. Therefore, it is guaranteed that you will be trying to use the englishWords and frenchWords variables before they have any results in them.

You have a bunch of different options:

Switch to fs.readFileAsync (not recommended in most cases)

You can switch to using fs.readFileSync(). This is the simplest change because your current flow of control will work. But, this is generally not recommended for node.js development because it's inefficient for server usage. If this code was in a server process that could/should maintain the ability to do other things while waiting for a file to be read, then fs.readFileSync() would kill scalability. If this is just a one-off script (not a loaded server), then fs.readFileSync() might work just fine. But, you should probably learn the better "node.js-style" wait of coding properly with async operations (see the following options).

Serialize Operations by Continuing Flow Inside the Callback

You can serialize your async operations with nesting. This involves continuing your processing logic only inside the async callbacks. That way, you know that the result you need is available for you to continues processing. That would look like this:

const fs = require('fs')

// read the french file
fs.readFile('frenchVerbsList.txt', 'utf8', function (err, data) {
    if (err) {
        return console.log("ERROR here!: " + err);
    }
    var frenchWords = data.split('\n');

    //read the english file
    fs.readFile('englishVerbsList.txt', 'utf8', function (err, data2) {
        if (err) {
            return console.log("ERROR here!: " + err);
        }
        var englishWords = data2.split('\n');

        // 2. combine the lists
        var combinedList = [];
        for (i = 0; i < frenchWords.length; i++) {
            combinedList[i] = frenchWords[i] + ",,," + englishWords[i];
        }

        // 3. check the result
        for (i = 0; i < 10; i++) {
            console.log(combinedList[i]);
        }
    });
});

Manually code a Check for When Both Async Operations are Done

The serialize option above has a disadvantage in that it waits for the first async operation to be done before it starts the next async operation. That is less than ideal because both async operations could be running in parallel (faster end-result). All of the following options will be different ways of running the async operations in parallel and monitoring when they are both done so you can trigger the final processing. This is the manual monitoring option. In the completion callback for both the loading of the french and english words, you check to see if the other is also done. If it is, then you call a function to process the results. Since only one can complete at a time, as long as there are no errors, one of them will complete second and will call your function to process the results:

var frenchWords, englishWords;
fs = require('fs')

// read the french file
fs.readFile('frenchVerbsList.txt', 'utf8', function (err, data) {
    if (err) {
        return console.log("ERROR here!: " + err);
    }
    frenchWords = data.split('\n');
    if (frenchWords && englishWords) {
        processResults();
    }
});

//read the english file

fs.readFile('englishVerbsList.txt', 'utf8', function (err, data2) {
    if (err) {
        return console.log("ERROR here!: " + err);
    }
    englishWords = data2.split('\n');
    if (frenchWords && englishWords) {
        processResults();
    }
});

function processResults() {

    // combine the lists
    var combinedList = [];
    for (let i = 0; i < frenchWords.length; i++) {
        combinedList[i] = frenchWords[i] + ",,," + englishWords[i];
    }

    // check the result
    for (let i = 0; i < 10; i++) {
        console.log(combinedList[i]);
    }
}

Use ES6 Promises to Monitor Your Async Operations

With ES6, promises have now become a standard part of the Javascript specification and they are an excellent way to coordinate multiple asynchronous operations and they also make proper error handling (especially in complex situations) a lot more straightforward. To use promises here, you would first want to create a "promisified" version of fs.readFile(). This would be a wrapper function that uses promises instead of a plain callback. Then, you can use Promise.all() to coordinate when the two async operations are done.

var fs = require('fs');
// promise wrapper
fs.readFileAsync = function(file, encoding) {
    return new Promise(function(resolve, reject) {
        fs.readFile(file, encoding, function(err, data) {
            if (err) return reject(err);
            resolve(data);
        });        
    });
}

// common helper function
function readFileSplitWords(file) {
    return fs.readFileAsync(file, 'utf8').then(function(data) {
        // make split words be the fulfilled value of the promise
        return data.split('\n');
    });
}

var frenchPromise = readFileSplitWords('frenchVerbsList.text');
var englishPromise = readFileSplitWords('englishVerbsList.txt');
Promise.all([frenchPromise, englishPromise]).then(function(results) {
    // combine the lists
    var frenchWords = results[0], englishWords = results[1];
    var combinedList = [];
    for (i = 0; i < frenchWords.length; i++) {
        combinedList[i] = frenchWords[i] + ",,," + englishWords[i];
    }

    // check the result
    for (i = 0; i < 10; i++) {
        console.log(combinedList[i]);
    }
}, function(err) {
   // handle an error here
});

Use a Promise Library for Extended Promise Functionality

ES6 Promises are very capable, but there are very helpful features when using promises that some 3rd party libraries have added. I personally use the Bluebird library. Here's how the previous option would look using the Bluebird library:

const Promise = require('bluebird');
const fs = Promise.promisifyAll(require('fs'));

// common helper function
function readFileSplitWords(file) {
    return fs.readFileAsync(file, 'utf8').then(function(data) {
        // make split words be the fulfilled value of the promise
        return data.split('\n');
    });
}

var frenchPromise = readFileSplitWords('frenchVerbsList.text');
var englishPromise = readFileSplitWords('englishVerbsList.txt');
Promise.all([frenchPromise, englishPromise]).spread(function(frenchWords, englishWords) {
    // combine the lists
    var combinedList = [];
    for (i = 0; i < frenchWords.length; i++) {
        combinedList[i] = frenchWords[i] + ",,," + englishWords[i];
    }

    // check the result
    for (i = 0; i < 10; i++) {
        console.log(combinedList[i]);
    }
}, function(err) {
   // handle an error here
});

This uses Bluebird's Promise.promisifyAll() to automatically make promisified versions of all the methods in the fs library (very useful). And, it uses the .spread() method instead of .then() to automatically separate out the two results into their named arguments.

Use More Extended Features to Process Arbitrary Array of Filenames

You can also use more extended Bluebird features such as Promise.map() which processes an array and then does Promise.all() on the resulting promises (something the above code did manually). This, then allows you to make the filenames be an arbitrary list of filenames of whatever languages you want and the code can be made more generic in that regard:

const Promise = require('bluebird');
const fs = Promise.promisifyAll(require('fs'));

// common helper function
function readFileSplitWords(file) {
    return fs.readFileAsync(file, 'utf8').then(function(data) {
        // make split words be the fulfilled value of the promise
        return data.split('\n');
    });
}

var files = ['frenchVerbsList.text', 'englishVerbsList.txt'];
Promise.map(files, readFileSplitWords).then(function(results) {
    // results is an array of arrays where each sub-array is a language list of words
    // combine the lists (assumes all word lists have the same length)
    var combinedList = [];
    var len = results[0].length;
    // for each word in the first array
    for (var i = 0; i < len; i++) {
        // get all the other words in the same array position
        var words = [];
        for (var j = 0; j < results.length; j++) {
            words.push(results[j][i]);
        }
        combinedList.push(words.join(',,,'));
    }

    // check the result
    for (i = 0; i < 10; i++) {
        console.log(combinedList[i]);
    }
}, function(err) {
   // handle an error here
});
Comments