Francesco Di Lauro Francesco Di Lauro -3 years ago 120
C Question

Generating a power-law distribution in C and testing it with python

I know that, given a rng which generates random numbers uniformly distributed, a way to obtain power-like data is, following Wolfram Mathworld the following: let y be a random variable uniformly distributed in (0,1) and x another random variable distributed as P(x) = C*x**n (for x in (xmin,xmax)). We have that

x=[ (xmax**(n+1) - xmin**(n-1))y+xmin**(n+1) ]**(1/(n+1))


So i made this program in C that generates 50k numbers from 1 to 100 that should be distributed as x^(-2) and prints the frequency of the outcomes on a file DATA.txt:

void random_powerlike(int *k, int dim, double degree, int xmin, int xmax, unsigned int *seed)
{
int i;
double aux;
for(i=0; i<dim; i++)
{
aux=(powq(xmax, degree +1 ) - powq(xmin, degree +1 ))*((double)rand_r(seed)/RAND_MAX)+ powq(xmin, degree +1);

k[i]=(int) powq(aux, 1/(degree+1));

}
}

int main()
{
unsigned int seed = 1934123471792583;

FILE *tmp;
char stringa[50];
sprintf(stringa, "Data.txt");
tmp=fopen(stringa, "w");

int dim=50000;
int *k;
k= (int *) malloc(dim*sizeof(int));
int degree=-2;
int freq[100];

random_powerlike(k,dim, degree, 1,100,&seed);
fprintf(tmp, "#degree = %d x=[%d,%d]\n",degree,1,100);
for(int j=0; j< 100;j++)
{
freq[j]=0;
for(int i = 0; i< dim; ++i)
{
if(k[i]==j+1)
freq[j]++;
}
fprintf(tmp, "%d %d\n", j+1, freq[j]);
}
fflush(tmp);
fclose(tmp);

return 0;
}


I decided to fit these numbers with pylab, to see if the best power-law to fit them is something as a*x**b, with b = -2. I wrote this program in python:

import numpy
from scipy.optimize import curve_fit
import pylab

num, freq = pylab.loadtxt("Data.txt", unpack=True)
freq=freq/freq[0]

def funzione(num, a,b):
return a*num**(b)

pars, covm = curve_fit(funzione, num, freq, absolute_sigma=True)
xx=numpy.linspace(1, 99)
pylab.plot(xx, funzione(xx, pars[0],pars[1]), color='red')
pylab.errorbar(num, freq, linestyle='', marker='.',color='black')
pylab.show()
print pars


The problem is that when i fit the data, I obtain an exponent value of ~-1.65.

I think that I made a mistake somewhere, but I can't figure it out where.

Answer Source

I think you have to make a histogram. I just rewrote your code a bit and it fits very well now

#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>

double rndm() {
    return (double)rand()/(double)RAND_MAX;
}

double power_sample(double xmin, double xmax, int degree) {
    double pmin = pow(xmin, degree + 1);
    double pmax = pow(xmax, degree + 1);
    double v = pmin + (pmax - pmin)*rndm();
    return pow(v, 1.0/(degree + 1));
}

int main() {
    unsigned int seed = 32345U;
    srand(seed);

    int xmin = 1;
    int xmax = 100;

    double* hist = malloc((xmax-xmin + 1)*sizeof(double));
    memset(hist, 0, (xmax-xmin + 1)*sizeof(double));

    // sampling
    int nsamples = 100000000;
    for(int k = 0; k != nsamples; ++k) {
        double v = power_sample(xmin, xmax, 2);
        int idx = (int)v;
        hist[idx] += 1.0;
    }

    // normalization
    for(int k = xmin; k != xmax; ++k) {
        hist[k] /= (double)nsamples;
    }

    // output
    for(int k = xmin; k != xmax; ++k) {
        double x = k + 0.5;
        printf(" %e     %e\n", x, hist[k]);
    }

    return 0;
}

and fitting code

import numpy
from scipy.optimize import curve_fit
import pylab

def funzione(x, a,b):
    return a * numpy.power(x, b)

num, freq = pylab.loadtxt("q.dat", unpack=True)

pars, covm =  curve_fit(funzione, num, freq, absolute_sigma=True)
pylab.plot(num, funzione(num, pars[0], pars[1]), color='red')
pylab.errorbar(num, freq, linestyle='', marker='.',color='black')
pylab.show()
print(pars)

and it produced

[  3.00503372e-06   1.99961571e+00]

which is pretty close

Recommended from our users: Dynamic Network Monitoring from WhatsUp Gold from IPSwitch. Free Download