 Martin Kristiansen - 2 years ago 198
C Question

Estimating memory scope of erlang datastructure

Being a former C programmer and current Erlang hacker one question has popped up.

How do I estimate the memory scope of my erlang datastructures?

Lets say I had an array of 1k integers in C, estimating the memory demand of this is easy, just the size of my array, times the size of an integer, 1k 32bit integers would take up 4kb or memory, and some constant amount of pointers and indexes.

In erlang however estimating the memory usage is somewhat more complicated, how much memory does an entry in erlangs array structure take up?, how do I estimate the size of a dynamically sized integer.

I have noticed that scanning over integers in array is fairly slow in erlang, scanning an array of about 1M integers takes almost a second in erlang, whereas a simple piece of c code will do it in arround 2 ms, this most likely is due to the amount of memory taken up by the datastructure.

I'm asking this, not because I'm a speed freak, but because estimating memory has, at least in my experience, been a good way of determining scalability of software.

My test code:

first the C code:

#include <cstdio>
#include <cstdlib>
#include <time.h>

#include <queue>
#include <iostream>

class DynamicArray{
protected:
int* array;
unsigned int size;
unsigned int max_size;

public:
DynamicArray() {
array = new int;
size = 0;
max_size = 1;
}

~DynamicArray() {
delete[] array;
}

void insert(int value) {
if (size == max_size) {
int* old_array = array;
array = new int[size * 2];
memcpy ( array, old_array, sizeof(int)*size );

for(int i = 0; i != size; i++)
array[i] = old_array[i];
max_size *= 2;
delete[] old_array;
}
array[size] = value;
size ++;
}

inline int read(unsigned idx) const {
return array[idx];
}

void print_array() {
for(int i = 0; i != size; i++)
printf("%d ", array[i]);
printf("\n ");

}

int size_of() const {
return max_size * sizeof(int);
}

};

void test_array(int test) {
printf(" %d ", test);
clock_t t1,t2;
t1=clock();
DynamicArray arr;
for(int i = 0; i != test; i++) {
//arr.print_array();
arr.insert(i);
}
int val = 0;
for(int i = 0; i != test; i++)
printf(" size %g MB ", (arr.size_of()/(1024*1024.0)));
t2=clock();
float diff ((float)t2-(float)t1);
std::cout<<diff/1000<< " ms" ;
printf(" %d \n", val == ((1 + test)*test)/2);
}

int main(int argc, char** argv) {
int size = atoi(argv);
printf(" -- STARTING --\n");
test_array(size);
return 0;
}

and the erlang code:

-module(test).

-export([go/1]).

construct_list(Arr, Idx, Idx) ->
Arr;
construct_list(Arr, Idx, Max) ->
construct_list(array:set(Idx, Idx, Arr), Idx + 1, Max).

sum_list(_Arr, Idx, Idx, Sum) ->
Sum;
sum_list(Arr, Idx, Max, Sum) ->
sum_list(Arr, Idx + 1, Max, array:get(Idx, Arr) + Sum ).

go(Size) ->
A0 = array:new(Size),
A1 = construct_list(A0, 0, Size),
sum_list(A1, 0, Size, 0).

Timing the c code:

bash-3.2\$ g++ -O3 test.cc -o test
bash-3.2\$ ./test 1000000
-- STARTING --
1000000 size 4 MB 5.511 ms 0

and the erlang code:

1> f(Time), {Time, _} =timer:tc(test, go, ), Time/1000.0.
2189.418 RichardC