J. Doe J. Doe - 20 days ago 9
C Question

performance comparison - pcap file reading: C++'s ifstream VS C's fread

I was researching around which one is a faster binary file reader : C++'s ifstream::read or C's fread.

According to the internet, including similiar questions, there is not much difference, so I decided to dig dipper.

I used a 1.22gb pcap file, which contains about 1,377,000 packets.
Both programs compiled using mingw32-g++ , no optimizations.

header structs are defined according to wireshark's wiki - libpcap file structure:
https://wiki.wireshark.org/Development/LibpcapFileFormat

This is the C code:

#include <stdio.h>
#include <stdlib.h>
#include <Winsock2.h>

/* definition of structs: pcap_global_header, pcap_packet_header, ethernet_header, ipv4_header, tcp_header */

int main()
{
int count = 0, bytes_read;

/* open file */
FILE * file = fopen("test.pcap", "rb");

/* read file header */
struct pcap_global_header gheader;

fread(&gheader, sizeof(char), sizeof(struct pcap_global_header), file);

// if not ethernet type
if(gheader.network != 1)
{
printf("not ethernet !\n");
return 1;
}

/* read packets */
char *buffer = (char*)malloc(gheader.snaplen);

struct pcap_packet_header pheader;
struct ether_header eth;
struct ipv4_header ip;
struct tcp_header tcp;

fread(&pheader, sizeof(char), sizeof(struct pcap_packet_header), file);

while(!feof(file))
{
++count;

bytes_read = fread(&eth, sizeof(char), sizeof(struct ether_header), file);

// ip
if(eth.type == 0x08)
{
bytes_read += fread(&ip, sizeof(char), sizeof(struct ipv4_header), file);

//tcp
if( ip.protocol == 0x06 )
{
bytes_read += fread(&tcp, sizeof(char), sizeof(struct tcp_header), file);
}
}

//read rest of the packet
fread(buffer, sizeof(char), pheader.incl_len - bytes_read, file);

// read next packet's header
fread(&pheader, sizeof(char), sizeof(struct pcap_packet_header), file);
}

printf("(C) total packets: %d\n", count);

return 0;
}


and this is the C++ code:

#include <iostream>
#include <fstream>
#include <memory>

#include <Winsock2.h>

/* definition of structs: pcap_global_header, pcap_packet_header, ethernet_header, ipv4_header, tcp_header */

int main()
{
int count_packets = 0, bytes_read;

/* open file */
std::ifstream file("test.pcap", std::fstream::binary | std::fstream::in);

/* read file header */
struct pcap_global_header gheader;

file.read((char*)&gheader, sizeof(struct pcap_global_header));

// if not ethernet type
if(gheader.network != 1)
{
printf("not ethernet !\n");
return 1;
}

/* read packets */
char *buffer = std::allocator<char>().allocate(gheader.snaplen);

struct pcap_packet_header pheader;
struct ether_header eth;
struct ipv4_header ip;
struct tcp_header tcp;

file.read((char*)&pheader, sizeof(pcap_packet_header));

while(!file.eof())
{
++count_packets;

file.read((char*)&eth, sizeof(struct ether_header));
bytes_read = sizeof(struct ether_header);

// ip
if(eth.type == 0x08)
{
file.read((char*)&ip, sizeof(struct ipv4_header));
bytes_read += sizeof(struct ipv4_header);

//tcp
if( ip.protocol == 0x06 )
{
file.read((char*)&tcp, sizeof(struct tcp_header));
bytes_read += sizeof(struct tcp_header);
}
}

// read rest of the packet
file.read(buffer, pheader.incl_len - bytes_read);

// read next packet's header
file.read((char*)&pheader, sizeof(pcap_packet_header));
}

std::cout << "(C++) total packets :" << count_packets << std::endl;

return 0;
}


The results are very disappointing:

C code result:

(C) total packets: 1377065

Process returned 0 (0x0) execution time : 1.031 s
Press any key to continue.


C++ code result:

(C++) total packets :1377065

Process returned 0 (0x0) execution time : 3.172 s
Press any key to continue.


Obviously, I ran each version a couple of times, and so, I am looking for a faster way to read files using C++.

Answer

ifstream::read() copies data from the internal buffer to your buffer. It cause the main difference in performance. You could try to overcome it and replace internal buffer with your own via pubsetbuf:

std::ifstream file;
char buf[1024];
file.rdbuf()->pubsetbuf(buf, sizeof buf);

Problem is that this function is implementation defined and in most cases you still need to use extra data copy.

In your case you don't need all the power of the ifstream, so for performance and simplicity I suggest to use <cstdio>.