aXul aXul - 19 days ago 12
Javascript Question

Can't process multiple pdf files using pdf2json for nodejs

I'm using https://github.com/modesty/pdf2json to parse multiple pdf files. It works with 1 single file, but when trying to load multiple files, the

pdfParser_dataReady
event seems to fire always with the same file.
This is what i've tried

var PDFParser = require('pdf2json');
var pdfParser = new PDFParser();
var fs = require('fs');
var fileNames = [];
var fileCont = 0;

fs.readdir(fileFolder, function(err, files){
for (var i = files.length - 1; i >= 0; i--) {
if (files[i].indexOf('.pdf') !== -1){
fileNames.push(files[i]);
}

pdfParser.loadPDF(fileNames[fileCont]);
});

pdfParser.on('pdfParser_dataReady', function(data){
//Do all my stuff and insert in db...

fileCont++;

If (fileCont === fileNames.lenght){
for (var i = fileNames.length - 1; i >= 0; i--) {
fs.unlink(fileFolder + fileNames[i]);
}
return res.json({
data: 'ok '
});
}

pdfParser.loadPDF(fileFolder + fileNames[fileCont]);
});

Answer

I managed to make pdf2json work with multiple files by creating a new PDFparser in each iteration. This is not a very 'pretty' way to manage multiple pdf files, the library should have an easy way of doing it, but it works!

var PDFParser = require('pdf2json');
var fs = require('fs');
var fileNames = [];
var fileFolder = 'myFolder/';
var fileCont = 0;

var loadPDF = function(filePath){
  if(fileNames.length === fileCont){
    //Insert in db and add any FINAL code, then return;
  }
  else{
    //Call for another file to process
    var pdfParser = null;
    pdfParser = new PDFParser();
    pdfParser.loadPDF(filePath);

    pdfParser.on('pdfParser_dataError', function(err){
      //Handle pdfParser error
    });

    pdfParser.on('pdfParser_dataReady', function(data){
      //Get the pdf data and process it
      fileCont++; //increase the file counter
      loadPDF(fileFolder + fileNames[fileCont]); //parse the next file
    });
  }
};

fs.readdir(fileFolder, function(err, files){
  for (var i = files.length - 1; i >= 0; i--) {
    if (files[i].indexOf('.pdf') !== -1){
      fileNames.push(files[i]);
    }
  }

  loadPDF(fileFolder + fileNames[fileCont]);
});
Comments