Diego Fernando Pava Diego Fernando Pava - 2 months ago 17
C++ Question

Fast algorithm to write data from a std::vector to a text file

I currently write a set of doubles from a vector to a text file like this:

std::ofstream fout;
fout.open("vector.txt");

for (l = 0; l < vector.size(); l++)
fout << std::setprecision(10) << vector.at(l) << std::endl;

fout.close();


But this is taking a lot of time to finish. Is there a faster or more efficient way to do this? I would love to see and learn it.

Answer

Your algorithm has two parts: 1. serialize a double number to a string, and 2. write results to a file. You can speed up the first item by using sprintf, however, I can only see 15% improvement in my machine when using -O3 flag. The second item can be speed up by serializing data to a string stream first then write the results to the output file. My proposed solutions can improve the performance of the modified version of your code about 3 times. Note that you can also improve the performance of your code by writing to a file in binary format. Below is my complete code sample:

#include <algorithm>
#include <cstdlib>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <vector>
#include "fmt/format.h"

constexpr size_t LEN = 32;

template <typename T> std::vector<T> create_test_data(const size_t N) {
    std::vector<T> data(N);
    for (size_t idx = 0; idx < N; ++idx) {
        data[idx] = idx;
    }
    return data;
}

// A modified version of the original approach.
void first_approach(const std::vector<double> &data, const std::string &fileName) {
    std::ofstream fout(fileName);
    for (size_t l = 0; l < data.size(); l++) {
        fout << std::setprecision(10) << data.at(l) << std::endl;
    }
    fout.close();
}

// Cache to a string stream before writing to the output file
void second_approach(const std::vector<double> &data, const std::string &fileName) {   
    // Serialize data to the string first.
    std::stringstream buffer;
    for (const double value : data) {
        buffer << std::setprecision(10) << value << "\n";
    }

    // Now write to the output file.
    std::ofstream fout(fileName);
    fout << buffer.str();
    fout.close();
}

// Use sprintf
void third_approach(const std::vector<double> &data, const std::string &fileName) {   
    // Serialize data to the string first.
    std::stringstream buffer;
    char aLine[LEN];
    std::for_each(data.cbegin(), data.cend(), [&buffer, &aLine](const double value) {
        sprintf(aLine, "%.10g\n", value);
        buffer << aLine;
    });

    // Now write to the output file.
    std::ofstream fout(fileName);
    fout << buffer.str();
    fout.close();
}

// Use fmt::MemoryWriter (https://github.com/fmtlib/fmt)
void fourth_approach(const std::vector<double> &data, const std::string &fileName) {
    // Serialize data to the string first.
    std::stringstream buffer;
    char aLine[LEN];
    fmt::MemoryWriter writer;
    std::for_each(data.cbegin(), data.cend(), [&aLine, &writer](const double value) {
        sprintf(aLine, "%.10g\n", value);
        writer << aLine;
    });

    // Now write to the output file.
    std::ofstream fout(fileName);
    fout << writer.str();
    fout.close();
}

// Use std::vector<char>
void fifth_approach(const std::vector<double> &data, const std::string &fileName) {
    // Serialize data to the string first.
    char aLine[LEN];
    std::vector<char> buffer;
    buffer.reserve(data.size() * LEN);
    std::for_each(data.cbegin(), data.cend(), [&buffer, &aLine](const double value) {
        sprintf(aLine, "%.10g\n", value);
        const size_t len = strlen(aLine);
        for (size_t idx = 0; idx < len; ++idx) {
            buffer.push_back(aLine[idx]);
        }
    });

    // Now write to the output file.
    std::ofstream fout(fileName);
    fout << buffer.data();
    fout.close();
}

int main() {
    constexpr size_t N = 3000000;
    std::vector<double> data = create_test_data<double>(N);
    std::cout << "Number of elements: " << N << "\n";
    first_approach(data, "first.txt");
    second_approach(data, "second.txt");
    third_approach(data, "third.txt");
    fourth_approach(data, "fourth.txt");
    fifth_approach(data, "fifth.txt");
}

Sample output collected in Linux using clang-3.9.1 and -O3 flag.

Number of elements: 3000000
Original approach: 4003.4  milliseconds
stringstream: 1487.52  milliseconds
sprintf: 1293.75  milliseconds
fmt: 1256.78  milliseconds
std::vector<char>: 1253.89  milliseconds