Elfy Elfy - 1 month ago 15
HTTP Question

Check if URL is a zip

How can I determine if an URL is a ZIP but without downloading the whole URL first because it may be too large? Can I somehow get just a few bytes and check for a ZIP header?

Answer

I adapted my code from this answer to instead read 4 bytes from the response (using either a Range, or by aborting after reading 4 bytes) and then see if the 4 bytes match the zip magic header.

Give it a try and let me know the results. You'll probably want to add some error checking to see if the type of the file could not be determined if the curl request failed for one reason or another.

<?php

/**
 * Try to determine if a remote file is a zip by making an HTTP request for
 * a byte range or aborting the transfer after reading 4 bytes.
 *
 * @return bool true if the remote file is a zip, false otherwise
 */
function isRemoteFileZip($url)
{
    $ch = curl_init($url);

    $headers = array(
        'Range: bytes=0-4',
        'Connection: close',
    );

    curl_setopt($ch, CURLOPT_HEADER, 0);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2450.0 Iron/46.0.2450.0');
    curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
    curl_setopt($ch, CURLOPT_VERBOSE, 0); // set to 1 to debug
    curl_setopt($ch, CURLOPT_STDERR, fopen('php://output', 'r'));

    $header = '';

    // write function that receives data from the response
    // aborts the transfer after reading 4 bytes of data
    curl_setopt($ch, CURLOPT_WRITEFUNCTION, function($curl, $data) use(&$header) {
        $header .= $data;

        if (strlen($header) < 4) return strlen($data);

        return 0; // abort transfer
    });

    $result = curl_exec($ch);
    $info   = curl_getinfo($ch);

    // check for the zip magic header, return true if match, false otherwise
    return preg_match('/^PK(?:\x03\x04|\x05\x06|0x07\x08)/', $header);
}

var_dump(isRemoteFileZip('https://example.com/file.zip'));
var_dump(isRemoteFileZip('https://example.com/logo.png'));
Comments