Unix Man Unix Man - 1 month ago 9
PHP Question

Reading a large CSV file in PHP

I have a very large CSV file. 51427 lines to be exact.

Is there not a way where I can only read the required lines into an array? That would speed things up significantly.

Answer

You might want to look at streaming the csv file. Send start file location, start position and number of bytes to read as get paramters to a ProgressiveReader.php

class NoFileFoundException extends Exception {
    function __toString() {
        return '<h1><b>ERROR:</b> could not find ('
                    .$this->getMessage().
                    ') please check your settings.</h1>';
    }
}

class NoFileOpenException extends Exception {
    function __toString() {
        return '<h1><b>ERROR:</b> could not open ('
                    .$this->getMessage().
                    ') please check your settings.</h1>';
    }
}

interface Reader {
    function setFileName($fName);
    function open();
    function setBufferOffset($offset);
    function bufferSize();
    function isOffset();
    function setPacketSize($size);
    function read();
    function isEOF();
    function close();
    function readAll();
}

class ProgressiveReader implements Reader {
    private $fName;
    private $fileHandler;
    private $offset = 0;
    private $packetSize = 0;

    public function setFileName($fName) {
        $this->fName = $fName;
        if(!file_exists($this->fName)) {
            throw new NoFileFoundException($this->fName);
        }
    }

    public function open() {
        try {
            $this->fileHandler = fopen($this->fName, 'rb');
        }
        catch (Exception $e) {
            throw new NoFileOpenException($this->fName);
        }
        fseek($this->fileHandler, $this->offset);
    }

    public function setBufferOffset($offset) {
        $this->offset = $offset;
    }

    public function bufferSize() {
        return filesize($this->fName) - (($this->offset > 0) ? ($this->offset  + 1) : 0);
    }

    public function isOffset() {
        if($this->offset === 0) {
            return false;
        }
        return true;
    }

    public function setPacketSize($size) {
        $this->packetSize = $size;
    }

    public function read() {
        return fread($this->fileHandler, $this->packetSize);
    }

    public function isEOF() {
        return feof($this->fileHandler);
    }

    public function close() {
        if($this->fileHandler) {
            fclose($this->fileHandler);
        }
    }

    public function readAll() {
        return fread($this->fileHandler, filesize($this->fName));
    }
}

Here are the unit tests:

require_once 'PHPUnit/Framework.php';

require_once dirname(__FILE__).'/../ProgressiveReader.php';

class ProgressiveReaderTest extends PHPUnit_Framework_TestCase {

    protected $reader;
    private $fp;
    private $fname = "Test.txt";

    protected function setUp() {
        $this->createTestFile();
        $this->reader = new ProgressiveReader();
    }

    protected function tearDown() {
        $this->reader->close();
    }

    public function test_isValidFile() {
        $this->reader->setFileName($this->fname);
    }

    public function test_isNotValidFile() {
        try {
            $this->reader->setFileName("nothing.tada");
        }
        catch (Exception $e) {
            return;
        }

        $this->fail();
    }

    public function test_isFileOpen() {
        $this->reader->setFileName($this->fname);
        $this->reader->open();
    }

    public function test_couldNotOpenFile() {
        $this->reader->setFileName($this->fname);
        try {
            $this->deleteTestFile();
            $this->reader->open();
        }
        catch (Exception $e) {
            return;
        }

        $this->fail();
    }

    public function test_bufferSizeZeroOffset() {
        $this->reader->setFileName($this->fname);
        $this->reader->open();
        $this->assertEquals($this->reader->bufferSize(), 12);
    }

    public function test_bufferSizeTwoOffset() {
        $this->reader->setFileName($this->fname);
        $this->reader->setBufferOffset(2);
        $this->reader->open();
        $this->assertEquals($this->reader->bufferSize(), 9);
    }

    public function test_readBuffer() {
        $this->reader->setFileName($this->fname);
        $this->reader->setBufferOffset(0);
        $this->reader->setPacketSize(1);
        $this->reader->open();
        $this->assertEquals($this->reader->read(), "T");
    }

    public function test_readBufferWithOffset() {
        $this->reader->setFileName($this->fname);
        $this->reader->setBufferOffset(2);
        $this->reader->setPacketSize(1);
        $this->reader->open();
        $this->assertEquals($this->reader->read(), "S");
    }

    public function test_readSuccesive() {
        $this->reader->setFileName($this->fname);
        $this->reader->setBufferOffset(0);
        $this->reader->setPacketSize(6);
        $this->reader->open();
        $this->assertEquals($this->reader->read(), "TEST1\n");
        $this->assertEquals($this->reader->read(), "TEST2\n");
    }

    public function test_readEntireBuffer() {
        $this->reader->setFileName($this->fname);
        $this->reader->open();
        $this->assertEquals($this->reader->readAll(), "TEST1\nTEST2\n");
    }

    public function test_isNotEOF() {
        $this->reader->setFileName($this->fname);
        $this->reader->setBufferOffset(2);
        $this->reader->setPacketSize(1);
        $this->reader->open();
        $this->assertFalse($this->reader->isEOF());
    }

    public function test_isEOF() {
        $this->reader->setFileName($this->fname);
        $this->reader->setBufferOffset(0);
        $this->reader->setPacketSize(15);
        $this->reader->open();
        $this->reader->read();
        $this->assertTrue($this->reader->isEOF());
    }

    public function test_isOffset() {
        $this->reader->setFileName($this->fname);
        $this->reader->setBufferOffset(2);
        $this->assertTrue($this->reader->isOffset());
    }

    public function test_isNotOffset() {
        $this->reader->setFileName($this->fname);
        $this->assertFalse($this->reader->isOffset());
    }

    private function createTestFile() {
        $this->fp = fopen($this->fname, "wb");
        fwrite($this->fp, "TEST1\n");
        fwrite($this->fp, "TEST2\n");
        flush();
        fclose($this->fp);
    }

    private function deleteTestFile() {
        if(file_exists($this->fname)) {
            unlink($this->fname);
        }

    }
}