Amir Amir - 1 year ago 95
Node.js Question

cheerio: trouble selecting meta property

I want to extract some metadata from html meta tags. The following meta tags are present in fetched html.

<meta property="og:type" content="offer"/>
<meta property="og:title" content='خانه ذرت با کورن داگ لذیذ و خوشمزه در تهران' />

I have written the below sample code to get content of both og:type and og:title properties from meta tags:

var request = require('request');
var cheerio = require('cheerio');

var a='خانه-ذرت-با-کورن-داگ-لذیذ-و-خوشمزه/';

function getDealInfo(url){
var options = {
url: encodeURI(url),
headers: {
'Accept' : '*/*',
request(options, function(error, response, html){
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var title = $('meta[property="og:title"]').attr('content');
console.log('title: ' + title);
var type = $('meta[property="og:type"]').attr('content');
console.log('type: ' + type);
}else console.log('Error accessing Deal:' + response.statusCode + '\n'+error);

I get correct content for og:type and undefined for og:title , though both properties are present in the fetched html.

Can someone help me figure out why I cannot get the og:title property content?

Answer Source

I note that html response don't have meta og:title

You can see this by using:

request(options, function(error, response, html){
    fs.writeFile('./index.html', html)

But you can use needle package instead request

var needle = require('needle')
var results = []
needle.get(encodeURI(url), function(err, res) {
    if (err) throw err
    var $ = cheerio.load(res.body)
    var title = $('meta[property="og:title"]').attr('content')
        title: title
    fs.writeFile('./data.json', JSON.stringify(results))

Output data.json file with og:title content:

        "title": "خانه ذرت با کورن داگ لذیذ و خوشمزه  در تهران"
Recommended from our users: Dynamic Network Monitoring from WhatsUp Gold from IPSwitch. Free Download