Generating a power-law distribution in C and testing it with python -
i know that, given rng generates random numbers uniformly distributed, way obtain power-like data is, following wolfram mathworld following: let y random variable uniformly distributed in (0,1) , x random variable distributed p(x) = c*x**n (for x in (xmin,xmax)). have that
x=[ (xmax**(n+1) - xmin**(n-1))y+xmin**(n+1) ]**(1/(n+1))
so made program in c generates 50k numbers 1 100 should distributed x^(-2) , prints frequency of outcomes on file data.txt:
void random_powerlike(int *k, int dim, double degree, int xmin, int xmax, unsigned int *seed) { int i; double aux; for(i=0; i<dim; i++) { aux=(powq(xmax, degree +1 ) - powq(xmin, degree +1 ))*((double)rand_r(seed)/rand_max)+ powq(xmin, degree +1); k[i]=(int) powq(aux, 1/(degree+1)); } } int main() { unsigned int seed = 1934123471792583; file *tmp; char stringa[50]; sprintf(stringa, "data.txt"); tmp=fopen(stringa, "w"); int dim=50000; int *k; k= (int *) malloc(dim*sizeof(int)); int degree=-2; int freq[100]; random_powerlike(k,dim, degree, 1,100,&seed); fprintf(tmp, "#degree = %d x=[%d,%d]\n",degree,1,100); for(int j=0; j< 100;j++) { freq[j]=0; for(int = 0; i< dim; ++i) { if(k[i]==j+1) freq[j]++; } fprintf(tmp, "%d %d\n", j+1, freq[j]); } fflush(tmp); fclose(tmp); return 0; }
i decided fit these numbers pylab, see if best power-law fit them a*x**b, b = -2. wrote program in python:
import numpy scipy.optimize import curve_fit import pylab num, freq = pylab.loadtxt("data.txt", unpack=true) freq=freq/freq[0] def funzione(num, a,b): return a*num**(b) pars, covm = curve_fit(funzione, num, freq, absolute_sigma=true) xx=numpy.linspace(1, 99) pylab.plot(xx, funzione(xx, pars[0],pars[1]), color='red') pylab.errorbar(num, freq, linestyle='', marker='.',color='black') pylab.show() print pars
the problem when fit data, obtain exponent value of ~-1.65.
i think made mistake somewhere, can't figure out where.
i think have make histogram. rewrote code bit , fits now
#include <math.h> #include <stdlib.h> #include <string.h> #include <stdio.h> double rndm() { return (double)rand()/(double)rand_max; } double power_sample(double xmin, double xmax, int degree) { double pmin = pow(xmin, degree + 1); double pmax = pow(xmax, degree + 1); double v = pmin + (pmax - pmin)*rndm(); return pow(v, 1.0/(degree + 1)); } int main() { unsigned int seed = 32345u; srand(seed); int xmin = 1; int xmax = 100; double* hist = malloc((xmax-xmin + 1)*sizeof(double)); memset(hist, 0, (xmax-xmin + 1)*sizeof(double)); // sampling int nsamples = 100000000; for(int k = 0; k != nsamples; ++k) { double v = power_sample(xmin, xmax, 2); int idx = (int)v; hist[idx] += 1.0; } // normalization for(int k = xmin; k != xmax; ++k) { hist[k] /= (double)nsamples; } // output for(int k = xmin; k != xmax; ++k) { double x = k + 0.5; printf(" %e %e\n", x, hist[k]); } free(hist); // cleanup return 0; }
and fitting code
import numpy scipy.optimize import curve_fit import pylab def funzione(x, a,b): return * numpy.power(x, b) num, freq = pylab.loadtxt("q.dat", unpack=true) pars, covm = curve_fit(funzione, num, freq, absolute_sigma=true) pylab.plot(num, funzione(num, pars[0], pars[1]), color='red') pylab.errorbar(num, freq, linestyle='', marker='.',color='black') pylab.show() print(pars)
and produced
[ 3.00503372e-06 1.99961571e+00]
which pretty close
Comments
Post a Comment