Dennis Bauszus Dennis Bauszus - 6 days ago 5
Node.js Question

Running multiple phantom process for multiple requests asynchronous in Node.js

I am writing a node.js application which allows the user to generate a multi-page pdf in node.js.

I create the pages one by one and pass the individual pages as an array with the request from function to function until all pages are created at which time I use pdf merge to put a multi-page document together.

function pdfExport_map(site, siteData, req){
phantom.pdf_export_map(site, siteData, req, pdfExport_census);
}

function pdfExport_census(site, siteData, req){
phantom.pdf_export_census(site, siteData, req, pdfExport_ue);
}

function pdfExport_ue(site, siteData, req){
phantom.pdf_export_ue(site, siteData, req, pdfExport_decay);
}

function pdfExport_decay(site, siteData, req){
phantom.pdf_export_decay(site, siteData, req, processSites);
}

function processSites(req) {
var pdfMerge = new PDFMerge(req.fileStack, 'C:\\Program Files (x86)\\PDFtk\\bin\\pdftk.exe');
var d = Date.now();
var filename = 'r' + d.toString() + '.pdf';
pdfMerge.asNewFile(process.env.FILEPATH + filename).merge(function (error, file) {
console.log(file);
});
}


This works well enough if one user runs the process. If multiple user run the process at the same time only one user gets a report. The process fails with the phantom promises. When I set break points in the following function I see that the data for site B is received (Breakpoint 1) before site A has been exported as PDF (Breakpoint 2). The process for site B will never pick up at this stage.

function pdf_export_map(site, siteData, req, _pdfExport_census) {
phantom.create()
.then(instance => {
ph = instance;
return instance.createPage();
})
.then(p => {
page = p;
page.open('about:blank');
})
.then(function () {
var renderTimeout,
c = 0;

function doRender() {
console.log('page loaded');
var d = Date.now();
var filename = 'm' + d.toString() + '.pdf';
page.render('tmp/' + filename)
.then(function () {


//breakpoint 2 page PDF export complete
ph.exit();
req.fileStack.push(process.env.FILEPATH + filename);
_pdfExport_census(site, siteData, req)
});
}

page.on('onResourceRequested', function (r) {
c++;
console.log(('000' + r.id).slice(-4) + ' | ' + r.url);
clearTimeout(renderTimeout);
});

page.on('onResourceReceived', function (r) {
if (!r.stage || r.stage === 'end') {
c--;
console.log(('000' + r.id).slice(-4) + ' | ' + r.status);
if (c === 0) {
renderTimeout = setTimeout(doRender, 5000);
}
}
});

if (process.platform == 'win32') {
page.property('paperSize', {
width: '11in',
height: '8.5in'
});
} else {
page.property('paperSize', {
width: '1056px',
height: '816px'
});
}

page.property('viewportSize', {
width: 1056,
height: 816
});

page.property('clipRect', {
top: 0,
left: 0,
width: 1056,
height: 816
});


//breakpoint 1 data received, start render
ejs.renderFile('views/phantommap.ejs', {
site: site,
features: siteData.features,
subdirectory: process.env.SUBDIRECTORY,
page: req.fileStack.length + 1
}, function (err, html) {
page.setContent(html, 'http://' + process.env.HOST + '/amazon/phantommap');
});

})
.catch(err => {
console.log(err);
ph.exit();
});
}


I have a poor understanding about phantom and promises. Is it possible to run this synchronous, e.g. finish the page for one site and then start the page for the second report?

Edit: Doing a few tests I have commented ph.exit(). I see I receive the data for Site A and for Site B. But the page for Site A is exported twice.

Edit: Following the recommendation from Amir I got this working by declaring the ph and page inside the function like so.

phantom.create()
.then(instance => {
_ph = instance;
return instance.createPage();
})
.then(p => {
_page = p;
_page.open('about:blank');
})
.then(function () {
var renderTimeout,
c = 0,
ph = _ph,
page = _page;


I still need to look into using async and await.

Answer

I suspect that your code is not thread-safe. I see you have a variable called ph. Is this a global var that is shared across multiple requests? If so, there is your problem. You are are sharing instances. Instead, you should not use any global variables and close the phantom instance for only that request.

There was a similar issue reported on phantom at https://github.com/amir20/phantomjs-node/issues/583. But the problem was that it was reusing the instance of the phantom process.

A couple of recommendations for your code. Use async and await instead. This callback hell is going to make you go crazy. Don't use any global variables that are declared outside of your session for each request. Sharing data between requests will have unexpected results.

Disclaimer: I am the author of PhantomJs-Node.

Comments