Chad Johnson Chad Johnson - 2 months ago 7
C Question

How can I use gsl to calculate polynomial regression data points?

(Disclaimer: I am terrible with math and am coming from JavaScript, so I apologize for any inaccuracies and will do my best to correct them.)

The example on Rosetta Code shows how to calculate coefficients using gsl. Here is the code:

polifitgsl.h:

#ifndef _POLIFITGSL_H
#define _POLIFITGSL_H
#include <gsl/gsl_multifit.h>
#include <stdbool.h>
#include <math.h>
bool polynomialfit(int obs, int degree,
double *dx, double *dy, double *store); /* n, p */
#endif


polifitgsl.cpp:

#include "polifitgsl.h"

bool polynomialfit(int obs, int degree,
double *dx, double *dy, double *store) /* n, p */
{
gsl_multifit_linear_workspace *ws;
gsl_matrix *cov, *X;
gsl_vector *y, *c;
double chisq;

int i, j;

X = gsl_matrix_alloc(obs, degree);
y = gsl_vector_alloc(obs);
c = gsl_vector_alloc(degree);
cov = gsl_matrix_alloc(degree, degree);

for(i=0; i < obs; i++) {
for(j=0; j < degree; j++) {
gsl_matrix_set(X, i, j, pow(dx[i], j));
}
gsl_vector_set(y, i, dy[i]);
}

ws = gsl_multifit_linear_alloc(obs, degree);
gsl_multifit_linear(X, y, c, cov, &chisq, ws);

/* store result ... */
for(i=0; i < degree; i++)
{
store[i] = gsl_vector_get(c, i);
}

gsl_multifit_linear_free(ws);
gsl_matrix_free(X);
gsl_matrix_free(cov);
gsl_vector_free(y);
gsl_vector_free(c);
return true; /* we do not "analyse" the result (cov matrix mainly)
to know if the fit is "good" */
}


main.cpp (note I've replaced the sample numbers for x any y with my own):

#include <stdio.h>

#include "polifitgsl.h"

#define NP 11
double x[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = {98.02, 98.01, 98.01, 98.02, 97.98, 97.97, 97.96, 97.94, 97.96, 97.96, 97.97, 97.97, 97.94, 97.94, 97.94, 97.92, 97.96, 97.9, 97.85, 97.9};

#define DEGREE 3
double coeff[DEGREE];

int main()
{
int i;

polynomialfit(NP, DEGREE, x, y, coeff);
for(i=0; i < DEGREE; i++) {
printf("%lf\n", coeff[i]);
}
return 0;
}


And here is the output:

98.030909
-0.016182
0.000909


So that gives me the coefficients. But what I really want is the actual fitted points. In JavaScript, I've used the regression package to calculate the points:

var regression = require('regression');

var calculateRegression = function(values, degree) {
var data = [];
var regressionOutput;
var valuesCount = values.length;
var i = 0;

// Format the data in a way the regression library expects.
for (i = 0; i < valuesCount; i++) {
data[i] = [i, values[i]];
}

// Use the library to calculate the regression.
regressionOutput = regression('polynomial', data, degree);

return regressionOutput;
};

var y = [98.02, 98.01, 98.01, 98.02, 97.98, 97.97, 97.96, 97.94, 97.96, 97.96, 97.97, 97.97, 97.94, 97.94, 97.94, 97.92, 97.96, 97.9, 97.85, 97.9];

console.log(calculateRegression(y, 3));


Which produces:

{ equation:
[ 98.02987916431594,
-0.017378390369880512,
0.0015748071645344357,
-0.00005721503635571101 ],
points:
[ [ 0, 98.02987916431594 ],
[ 1, 98.01401836607424 ],
[ 2, 98.00096389194348 ],
[ 3, 97.9903724517055 ],
[ 4, 97.98190075514219 ],
[ 5, 97.97520551203543 ],
[ 6, 97.96994343216707 ],
[ 7, 97.96577122531896 ],
[ 8, 97.96234560127297 ],
[ 9, 97.959323269811 ],
[ 10, 97.95636094071487 ],
[ 11, 97.95311532376647 ],
[ 12, 97.94924312874768 ],
[ 13, 97.94440106544033 ],
[ 14, 97.93824584362629 ],
[ 15, 97.93043417308745 ],
[ 16, 97.92062276360569 ],
[ 17, 97.90846832496283 ],
[ 18, 97.89362756694074 ],
[ 19, 97.87575719932133 ] ],
string: 'y = 0x^3 + 0x^2 + -0.02x + 98.03' }


(Note there are floating point issues in JavaScript, so the numbers aren't perfectly exact.)

points
here is what I'm wanting to generate using gsl. Is there a way I can do this?

Answer Source

Chad, if I understand what you are needing, you simply need to compute the values based on the polynomial equation using the coefficients you found with your polynomialfit function. After you compute the coefficients, you can find the value y for any x (for DEGREE = 3) with the equation:

y = x^2*(coeff2) + x*(coeff1) + coeff0

or in C-syntax

y = x*x*coeff[2] + x*coeff[1] + coeff[0];

You can modify your main.cpp as follows (you should rename both main.cpp to main.c and polyfitgsl.cpp to polyfitgsl.c -- as they are both C-source files, not C++)

#include <stdio.h>

#include "polifitgsl.h"

#define NP 20
double x[] = {  0,  1,  2,  3,  4,  5,  6,  7,  8, 9,
               10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98,
               97.97, 97.96, 97.94, 97.96, 97.96,
               97.97, 97.97, 97.94, 97.94, 97.94,
               97.92, 97.96, 97.9,  97.85, 97.9 };

#define DEGREE 3
double coeff[DEGREE];

int main (void)
{
    int i;

    polynomialfit (NP, DEGREE, x, y, coeff);

    printf ("\n polynomial coefficients:\n\n");
    for (i = 0; i < DEGREE; i++) {
        printf ("  coeff[%d] : %11.7lf\n", i, coeff[i]);
    }
    putchar ('\n');

    printf (" computed values:\n\n   x    y\n");
    for (i = 0; i < (int)(sizeof x/sizeof *x); i++)
        printf ("  %2d, %11.7lf\n", i, 
                i*i*coeff[2] + i*coeff[1] + coeff[0]);
    putchar ('\n');

    return 0;
}

Which provides the following output:

Example Use/Output

$ ./bin/main

 polynomial coefficients:

  coeff[0] :  98.0132468
  coeff[1] :  -0.0053003
  coeff[2] :  -0.0000558

 computed values:

   x    y
   0,  98.0132468
   1,  98.0078906
   2,  98.0024229
   3,  97.9968435
   4,  97.9911524
   5,  97.9853497
   6,  97.9794354
   7,  97.9734094
   8,  97.9672718
   9,  97.9610226
  10,  97.9546617
  11,  97.9481891
  12,  97.9416049
  13,  97.9349091
  14,  97.9281016
  15,  97.9211825
  16,  97.9141517
  17,  97.9070093
  18,  97.8997553
  19,  97.8923896

That seems like what you are asking for. Look it over, compare your values and let me know if you need additional help.


Cleaning Up A Bit Further

Just to clean the code up a bit further, since there is no need for global declaration of x, y, or coeff, and using an enum to define the constants DEGREE and NP, a tidier version would be:

#include <stdio.h>

#include "polifitgsl.h"

enum { DEGREE = 3, NP = 20 };

int main (void)
{
    int i;
    double x[] = {  0,  1,  2,  3,  4,  5,  6,  7,  8, 9,
                   10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
    double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98,
                   97.97, 97.96, 97.94, 97.96, 97.96,
                   97.97, 97.97, 97.94, 97.94, 97.94,
                   97.92, 97.96, 97.9,  97.85, 97.9 };
    double coeff[DEGREE] = {0};


    polynomialfit (NP, DEGREE, x, y, coeff);

    printf ("\n polynomial coefficients:\n\n");
    for (i = 0; i < DEGREE; i++)
        printf ("  coeff[%d] : %11.7lf\n", i, coeff[i]);
    putchar ('\n');

    printf (" computed values:\n\n   x    y\n");
    for (i = 0; i < (int)(sizeof x/sizeof *x); i++)
        printf ("  %2d, %11.7lf\n", i, 
                i*i*coeff[2] + i*coeff[1] + coeff[0]);
    putchar ('\n');

    return 0;
}