Giles Hunt Giles Hunt - 2 months ago 18
Node.js Question

Node js Request - Empty body in response

I am using node js request to retrieve the HTML from the following URL but the body is returning empty.

var request = require("request");

var url = 'http://www.topshop.com/en/tsuk/product/bags-accessories-1702216/scarves-465/feather-wings-5884878?bi=0&ps=20';

request({
uri: url
}, function (error, response, body) {

console.log(body);

if (response.statusCode != '200') {
console.log('fail');
console.log(response.statusCode + ' # ' + error);
} else {
console.log(response.statusCode);
console.log('############');
console.log(response);
}
});


On closer inspection I can see this in the response:

_header: 'GET /webapp/wcs/stores/servlet/CatalogNavigationSearchResultCmd?langId=-1&storeId=12556&catalogId=33057&beginIndex=1&viewAllFlag=false&pageSize=20&searchTermScope=3&searchTermOperator=LIKE&searchType=ALL&sort_field=Relevance&searchTerm=TS19M11KRED&x=25&y=11&geoip=search HTTP/1.1\r\nreferer: http://www.topshop.com/en/tsuk/product/bags-accessories-1702216/scarves-465/feather-wings-5884878?bi=0&ps=20&geoip=prod\r\nhost: www.topshop.com\r\nConnection: close\r\n\r\n',
_headers:
{ referer: 'http://www.topshop.com/en/tsuk/product/bags-accessories-1702216/scarves-465/feather-wings-5884878?bi=0&ps=20&geoip=prod',
host: 'www.topshop.com' },


Which I assume means that there has been a redirect? Even though its returned a 200 OK instead of a 302 redirect.

I'm not sure of the best way to retrieve the body from the redirect? Do I need to make another request to the URL in the header? But shouldn't the response code be a 302 in this case instead of a 200?

Any help appreciated.

rsp rsp
Answer

What you show seem like something that happened after a redirect - see that the referer is set to your original URL.

Maybe you should set more headers, like User-Agent because some servers don't respond without it.

For example, see the code that I wrote for this answer:

'use strict';
var request = require('request');
var url = 'https://api.github.com/users/rsp';
request.get({
    url: url,
    json: true,
    headers: {'User-Agent': 'request'}
  }, (err, res, data) => {
    if (err) {
      console.log('Error:', err);
    } else if (res.statusCode !== 200) {
      console.log('Status:', res.statusCode);
    } else {
      // data is already parsed as JSON:
      console.log(data.html_url);
    }
});

It returns:

Note that it doesn't work without the User-Agent header:

'use strict';
var request = require('request');
var url = 'https://api.github.com/users/rsp';
request.get({
    url: url,
    json: true,
  }, (err, res, data) => {
    if (err) {
      console.log('Error:', err);
    } else if (res.statusCode !== 200) {
      console.log('Status:', res.statusCode);
    } else {
      // data is already parsed as JSON:
      console.log(data.html_url);
    }
});

It returns:

  • Status: 403

The same URL, the same code - the only difference is the User-Agent header.