phizzy phizzy - 6 months ago 97
Node.js Question

Query Instagram posts by hashtag and time range

I'm trying to query posts from Instagram by providing the hashtag and the time range (since and until dates).
I use the recent tags endpoint.


https://api.instagram.com/v1/tags/{tag-name}/media/recent?access_token=ACCESS-TOKEN



My code is written in Node.js using the
instagram-node
library (see the inline comments):

// Require the config file
var config = require('../config.js');

// Require and intialize the instagram instance
var ig = require('instagram-node').instagram();

// Set the access token
ig.use({ access_token: config.instagram.access_token });

// We export this function for public use
// hashtag: the hashtag to search for
// minDate: the since date
// maxDate: the until date
// callback: the callback function (err, posts)
module.exports = function (hashtag, minDate, maxDate, callback) {

// Create the posts array (will be concated with new posts from pagination responses)
var posts = [];

// Convert the date objects into timestamps (seconds)
var sinceTime = Math.floor(minDate.getTime() / 1000);
var untilTime = Math.floor(maxDate.getTime() / 1000);

// Fetch the IG posts page by page
ig.tag_media_recent(hashtag, { count: 50 }, function fetchPosts(err, medias, pagination, remaining, limit) {

// Handle error
if (err) {
return callback(err);
}

// Manually filter by time
var filteredByTime = medias.filter(function (currentPost) {
// Convert the created_time string into number (seconds timestamp)
var createdTime = +currentPost.created_time;

// Check if it's after since date and before until date
return createdTime >= sinceTime && createdTime <= untilTime;
});

// Get the last post on this page
var lastPost = medias[medias.length - 1] || {};

// ...and its timestamp
var lastPostTimeStamp = +(lastPost.created_time || -1);

// ...and its timestamp date object
var lastPostDate = new Date(lastPostTimeStamp * 1000);

// Concat the new [filtered] posts to the big array
posts = posts.concat(filteredByTime);

// Show some output
console.log('found ' + filteredByTime.length + ' new items total: ' + posts.length, lastPostDate);


// Check if the last post is BEFORE until date and there are no new posts in the provided range
if (filteredByTime.length === 0 && lastPostTimeStamp <= untilTime) {
// ...if so, we can callback!
return callback(null, posts);
}

// Navigate to the next page
pagination.next(fetchPosts);
});
};


This will start fetching the posts with the most recent to least recent ones, and manually filter the
created_time
.
This works, but it's very very inefficient because if we want, for example, to get the posts from one year ago, we have to iterate the pages until that time, and this will use a lot of requests (probably more than 5k / hour which is the rate limit).

Is there a better way to make this query? How to get the Instagram posts by providing the hashtag and the time range?

Answer

I think this is the basic idea you're looking for. I'm not overly familiar with Node.js, so this is all in plain javascript. You'll have to modify it to suit your needs and probably make a function out of it.

The idea is to convert an instagram id (1116307519311125603 in this example) to a date and visa versa to enable you to quickly grab a specific point in time rather then backtrack through all results until finding your desired timestamp. The portion of the id after the underscore '_' should be trimmed off as that refers, in some way, to the user IIRC. There are 4 functions in the example that I hope will help you out.

Happy hacking!

//static
var epoch_hour = 3600,
    epoch_day = 86400,
    epoch_month = 2592000,
    epoch_year = 31557600;

//you'll need to set this part up/integrate it with your code
var dataId = 1116307519311125603,
    range = 2 * epoch_hour,
    count = 1,
    tagName = 'cars',
    access = prompt('Enter access token:'),
    baseUrl = 'https://api.instagram.com/v1/tags/' + 
              tagName + '/media/recent?access_token=' + access;

//date && id utilities
function idToEpoch(n){
  return Math.round((n / 1000000000000 + 11024476.5839159095) / 0.008388608);
}

function epochToId(n){
  return Math.round((n * 0.008388608 - 11024476.5839159095) * 1000000000000);
}

function newDateFromEpoch(n){
  var d = new Date(0);
  d.setUTCSeconds(n);
  return d;
}

function dateToEpoch(d){
  return (d.getTime()-d.getMilliseconds())/1000;
}

//start with your id and range; do the figuring
var epoch_time = idToEpoch(dataId),
    minumumId = epochToId(epoch_time),
    maximumId = epochToId(epoch_time + range),
    minDate = newDateFromEpoch(epoch_time),
    maxDate = newDateFromEpoch(epoch_time + range);

var newUrl = baseUrl + 
             '&count=' + count + 
             '&min_tag_id=' + minumumId + 
             '&max_tag_id=' + maximumId;


//used for testing
/*alert('Start: ' + minDate + ' (' + epoch_time + 
        ')\nEnd: ' + maxDate + ' (' + (epoch_time +
        range) + ')');
window.location = newUrl;*/