Francis Bongiovanni Francis Bongiovanni - 2 months ago 60x
Javascript Question

NightmareJS - Web Crawler needs to iterate over JSON data

I'm building a web crawler that will cover 200+ sites. The current code I have runs on top of an external JSON file I've built of a dozen sites. Sample:

"company": "My Company",
"url": "",
"query": "div.job-listings>dt a",
"link": "div.job-listings>dt a"
"company": "Another Company",
"url": "",
"query": ".clearfix~ .col-sm-6+ .col-sm-6 a , .col-sm-6:nth-child(4) a",
"link": ".clearfix~ .col-sm-6+ .col-sm-6 a , .col-sm-6:nth-child(4) a"

When I tried async.each it would log all of the original objects at the top of the function before attempting to enter the nightmare instance then return
error Nothing responds to "goto"
. Then I tried async.eachSeries, which prints out the correct result but stops after the first iteration.

var async = require ('async');
var data = require('./input.json')
var Nightmare = require('nightmare');
var nightmare = Nightmare({ show: false })

function crawl(data, cb) {
console.log(data) // When async.each will iterate all items then error
var nightmare = new Nightmare()
.goto(data.url) // go to JSON specified url
.wait(data.query) // wait until CSS selector loads
.evaluate(function (data) {
positionsArr = []
obj = {} =
query = document.querySelectorAll(data.query)
link = document.querySelectorAll(
/* Set query and link equal to all elements with selector
itearte through appending text (innerText) from each element
with job url to obj*/
var i;
for (i = 0; i < query.length; i++) {
positionsObj = {}
positionsObj.title = query[i].innerText.trim()
// if each position has individual page
if ( !== null) {
positionsObj.url = link[i].href
} else {
positionsObj.url = data.url
obj.positions = positionsArr
return obj
}, data)
.then(function (obj) {
.catch(function (error) {
console.error('error', error);

async.eachSeries(data, crawl, function (err){

How can I have this work without having to write an individual file for each? Or is there a better way of crawling this amount of sites?

Source code


You have to use the callback (cb) if you want to execute the second step and so on:

.then(function (obj) {
.catch(function (error) {
    console.error('error', error);