Thomas Thomas - 1 month ago 6
PHP Question

PHP: cURL and keep track of all redirections

I'm looking to cURL a URL and keep track of each individual URL it goes through. For some reason I am unable to accomplish this without doing recursive cURL calls which is not ideal. Perhaps I am missing some easy option. Thoughts?

$url = "some url with redirects";
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_NOBODY, false);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1) Gecko/20061024 BonEcho/2.0");

$html = curl_exec($ch);
$info = array();
if(!curl_errno($ch))
{
$info = curl_getinfo($ch);
echo "<pre>";
print_r($info);
echo "</pre>";
}


and I get a response like this

Array
(
[url] => THE LAST URL THAT WAS HIT
[content_type] => text/html; charset=utf-8
[http_code] => 200
[header_size] => 1942
[request_size] => 1047
[filetime] => -1
[ssl_verify_result] => 0
[redirect_count] => 2 <---- I WANT THESE
[total_time] => 0.799589
[namelookup_time] => 0.000741
[connect_time] => 0.104206
[pretransfer_time] => 0.104306
[size_upload] => 0
[size_download] => 49460
[speed_download] => 61856
[speed_upload] => 0
[download_content_length] => 49460
[upload_content_length] => 0
[starttransfer_time] => 0.280781
[redirect_time] => 0.400723
)

Answer

You have

curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);

This means that cURL will follow redirects and return you only the final page with no Location header.

To follow location manually:

function getWebPage($url, $redirectcallback = null){
    $ch = curl_init($url);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
    curl_setopt($ch, CURLOPT_HEADER, true);
    curl_setopt($ch, CURLOPT_NOBODY, false);
    curl_setopt($ch, CURLOPT_TIMEOUT, 10);
    curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
    curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1) Gecko/20061024 BonEcho/2.0");

    $html = curl_exec($ch);
    $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    if ($http_code == 301 || $http_code == 302) {
        list($httpheader) = explode("\r\n\r\n", $html, 2);
        $matches = array();
        preg_match('/(Location:|URI:)(.*?)\n/', $httpheader, $matches);
        $nurl = trim(array_pop($matches));
        $url_parsed = parse_url($nurl);
        if (isset($url_parsed)) {
            if($redirectcallback){ // callback
                 $redirectcallback($nurl, $url);
            }
            $html = getWebPage($nurl, $redirectcallback);
        }
    }
    return $html;
}

function trackAllLocations($newUrl, $currentUrl){
    echo $currentUrl.' ---> '.$newUrl."\r\n";
}

getWebPage('some url with redirects', 'trackAllLocations');