Least squares polynomial fitting works only with even number of coordinates

I've written a programme which calculates the equation of a parabola by giving a number of coordinates but it only works with an even number of coordinates. If I enter an uneven number it will display some nonsense bs.

Here is the code (I couldn't copy the code here because of some formatting problems):

#include<iostream>
#include<iomanip>
#include<cmath>
using namespace std;
int main()
{
    int i, j, k, n, N;
    n = 2;
    cout << "Number of data pairs:" << endl;
    cin >> N;
    double * x = new double[N]; 
    double * y = new double[N];
    cout << endl << "Enter the x-axis values:" << endl;
    for (i = 0; i<N; i++)
        cin >> x[i];
    cout << endl << "Enter the y-axis values:" << endl;
    for (i = 0; i<N; i++)
        cin >> y[i];
    double X[5];
    for (i = 0; i<2 * n + 1; i++)
    {
        X[i] = 0;
        for (j = 0; j<N; j++)
            X[i] = X[i] + pow(x[j], i);
    }
    double B[3][4], a[3]; //B is the Normal matrix(augmented) that will store the equations, 'a' is for value of the final coefficients
    for (i = 0; i <= n; i++)
        for (j = 0; j <= n; j++)
            B[i][j] = X[i + j];
    double Y[3];
    for (i = 0; i<n + 1; i++)
    {
        Y[i] = 0;
        for (j = 0; j<N; j++)
            Y[i] = Y[i] + pow(x[j], i)*y[j];
    }
    for (i = 0; i <= n; i++)
        B[i][n + 1] = Y[i];
    n = n + 1;
    for (i = 0; i < n; i++)
    {
        for (k = i + 1; k < n; k++)
            if (B[i][i] < B[k][i])
                for (j = 0; j <= n; j++)
                {
                    double temp = B[i][j];
                    B[i][j] = B[k][j];
                    B[k][j] = temp;
                }
    }
        for (i = 0; i<n - 1; i++) //loop to perform the gauss elimination
            for (k = i + 1; k<n; k++)
            {
                double t = B[k][i] / B[i][i];
                for (j = 0; j <= n; j++)
                    B[k][j] = B[k][j] - t*B[i][j]; //make the elements below the pivot elements equal to zero or elimnate the variables
            }
        for (i = n - 1; i >= 0; i--) //back-substitution
        {
            a[i] = B[i][n];
            for (j = 0; j < n; j++)
                if (j != i)
                    a[i] = a[i] - B[i][j] * a[j];
            a[i] = a[i] / B[i][i];
        }
        cout << endl << "The equation is the following:" << endl;;
        cout << a[2] << "x^2 + " << a[1] << "x + " << a[0];
        cout << endl;
        delete[]x;
        delete[]y;
        system("pause");
        return 0;
}

The outputs I got from 3 and 4 coordinates:

3 coordinates:

Number of data points:

3

Enter the x-axis values:

1

2

3

Enter the y-axis values

1

4

9

The equation is the following:

1.02762e+47x^2 + -4.316e+47x + 3.90495e+47

4 coordinates:

Number of data points:

4

Enter the x-axis values:

1

2

3

4

Enter the y-axis values

1

4

9

16

The equation is the following:

1x^2 + -0x + 0

Any ideas or tips?

Thanks in advance

Solution

The comments already posted note that some of the indexing is out of bounds (exceeds the size of the matrix). I would prefer to input the x,y data as one pair of values per line, an x value and a y value, which is an easy change to make (cin >> x[i] >> y[i]), but the examples below use the sequence from the original question.

Example code for conventional method for quadratic equation fit:

#include<iostream>
#include<iomanip>
#include<cmath>
using namespace std;
int main()
{
    int i, j, k, N;
    cout << "Number of data pairs:" << endl;
    cin >> N;
    double * x = new double[N]; 
    double * y = new double[N];
    cout << endl << "Enter the x-axis values:" << endl;
    for (i = 0; i<N; i++)
        cin >> x[i];
    cout << endl << "Enter the y-axis values:" << endl;
    for (i = 0; i<N; i++)
        cin >> y[i];
    double B[3][4] = {0.0};         // generate augmented matrix
    for(k = 0; k < 3; k++){
        for (i = 0; i < N; i++) {
            for (j = 0; j < 3; j++) {
                B[k][j] += pow(x[i], j + k);}
            B[k][3] += y[i]*pow(x[i], k);}}
    for(k = 0; k < 3; k++){         // invert matrix
        double q = B[k][k];         //   divide row by B[k][k]
        for(i = 0; i < 4; i++){
            B[k][i] /= q;}
        for(j = 0; j < 3; j++){     //   zero out column B[][k]
            if(j == k)
                continue;
            double m = B[j][k];         
            for(i = 0; i < 4; i++){
                B[j][i] -= m*B[k][i];}}}
    cout << endl << "The equation is the following:" << endl;;
    cout << B[2][3] << " x^2 + " << B[1][3] << " x + " << B[0][3];
    cout << endl;
    delete[]x;
    delete[]y;
    system("pause");
    return 0;
}

Example conventional code for generic degree equation:

#include<iostream>
#include<iomanip>
#include<cmath>
using namespace std;
int main()
{
    int d, i, j, k, n;
    cout << "Degree of equation:" << endl;
    cin >> d;
    double **A = new double *[d+1];
    for (k = 0; k < d+1; k++) {
        A[k] = new double[d+2];
        for (i = 0; i < d+2; i++) {
            A[k][i] = 0.0;}}
    cout << "Number of data pairs:" << endl;
    cin >> n;
    double * x = new double[n]; 
    double * y = new double[n];
    cout << endl << "Enter the x-axis values:" << endl;
    for (i = 0; i < n; i++)
        cin >> x[i];
    cout << endl << "Enter the y-axis values:" << endl;
    for (i = 0; i < n; i++)
        cin >> y[i];
    for(k = 0; k < d+1; k++){
        for (i = 0; i < n; i++) {
            for (j = 0; j < d+1; j++) {
                A[k][j] += pow(x[i], j + k);}
            A[k][d+1] += y[i]*pow(x[i], k);}}
    for(k = 0; k < d+1; k++){       // invert matrix
        double q = A[k][k];         //   divide A[k][] by A[k][k]
                                    //   if q == 0, would need to swap rows                                 
        for(i = 0; i < d+2; i++){
            A[k][i] /= q;}
        for(j = 0; j < d+1; j++){   //   zero out column A[][k]
            if(j == k)
                continue;
            double m = A[j][k];         
            for(i = 0; i < d+2; i++){
                A[j][i] -= m*A[k][i];}}}
    cout << endl << "The equation is the following:" << endl;
    for(k = d; k >= 2; k--)
        cout << A[k][d+1] << " x^" << k << " + ";
    cout << A[1][d+1] << " x" << " + " << A[0][d+1] << endl;
    for (k = 0; k < d+1; k++)
        delete[] A[k];
    delete[]A;
    delete[]y;
    delete[]x;
    system("pause");
    return 0;
}

Sample test input:

I normally use an alternative algorithm that avoids having to invert a matrix, which is better for higher order polynomials, but unneeded for a quadratic equation.

If interested in the alternative algorithm, here is a link to a pdf file that describes the algorithm and includes pseudo code. I have old working code, but it needs to be converted (it uses cgets() which is rarely supported anymore).

http://rcgldr.net/misc/opls.pdf

Example code. It's really old and originally ran on an Atari ST. I converted it to work with Visual Studio. The reason for all of the statics was to reduce the number of symbols that the linker would have to deal with.

/*------------------------------------------------------*/
/*      fit4.c          polynomial fit program          */
/*      originally written in 1990, minimal updates     */
/*------------------------------------------------------*/
/* disable Visual Studio warnings for old functions */
#define _CRT_SECURE_NO_WARNINGS 1
#include <stdio.h>

/*                              mmax = max # coefficients */
/*                              nmax = max # points */
/*                              bfrsz = bfr size */
/*                              linsz = size of line bfr */
#define mmax 11
#define nmax 300
#define bfrsz 0x2000
#define linsz 64

static void polyf();
static void gcoef();
static void gvar();
static void calc();
static void calcx();
static void rdata();
static void gdata();
static int  gtlin();
static char gtchr();
static int  conrs();

static char cbfr[64];           /* console response bfr */
static char line[linsz];
static int  lineno;

static char sbfr[bfrsz];        /* file params */
static FILE *sfp;
static char *sptr, *send;
static int gteof;

static int wf;

static int m, n;                /* input values */
static double x[nmax];
static double y[nmax];
static double w[nmax];
static double b[mmax];          /* generated values */
static double A[mmax];
static double B[mmax];
static double L[mmax];
static double W[mmax];
static double p2[nmax];
static double p1[nmax];
static double p0[nmax];
static double c[mmax];          /* coefficients for y(x) */
static double z[nmax];          /* calculated y[] */
static double xi, zi, vr;
static double D0, D1;           /* constants */

static double *pp2;             /* pointers to logical p2, p1, p0 */
static double *pp1;
static double *pp0;
static double *ppx;

static double *px, *pf, *pw;    /* for gdata */

main()
{
int i;

name0:
    printf("\nEnter name of data file: "); /* get file name */
    if(!conrs())
        return(0);
    sfp = fopen(&cbfr[2], "rb"); /* open file */
    if(sfp == (FILE *)0){
        printf("\nfile not found");
        goto name0;}

    wf = 0;                     /* ask for weighting */
    printf("\nUsing weights (Y/N)? ");
    if(!conrs())
        return(0);
    if('Y' == (cbfr[2]&0x5f))
        wf = 1;

deg0:
    printf("\nEnter degree of equation (1-n): ");  /* get # terms */
    if(!conrs())
        return(0);
    sscanf(&cbfr[2], "%d", &m);
    if(m >= mmax)
        goto deg0;

    sptr = send = (char *)0;
    gteof = 0;
    lineno = 0;

    gdata();                    /* get data */

    printf("\n%5d points found ", n);

    polyf();                    /* generate b[], A[], B[] */
    gcoef();                    /* generate coefficients */
    gvar();                     /* generate variance */

    for(i = 0; i <= m; i++){
        printf("\n%3d  b%12.4le  A%12.4le  B%12.4le",
                i, b[i], A[i], B[i]);
        printf("  L%12.4le  W%12.4le", L[i], W[i]);}

    printf("\nvariance = %12.4le\n", vr);

    for(i = m; i; i--)
        printf("%12.4le X**%1d + ", c[i], i);
    printf("%12.4le\n", c[0]);

    for(i = 0; i < n; i++){     /* calculate results */
        xi = x[i];
        calc();
        z[i] = zi;}

    for(i = 0; i < n; i += 1)   /* display results */
        printf("\n%14.6le  %14.6le  %14.6le  %14.6le",
                x[i], y[i], z[i], y[i]-z[i]);
    printf("\n");
    return(0);
}

/*------------------------------------------------------*/
/*      polyf           poly fit                        */
/*      in:             x[], y[], w[], n, m             */
/*      out:            b[], A[], B[], L[], W[]         */
/*------------------------------------------------------*/
static void polyf()
{
int i, j;

    D0 = (double)0.;            /* init */
    D1 = (double)1.;
    pp2 = p2;
    pp1 = p1;
    pp0 = p0;

    j = 0;
    A[j] = D0;                  /* calc A, p[j], p[j-1], L, W */
    L[j] = D0;                  /* note A[0] not used */
    W[j] = D0;
    for(i = 0; i < n; i++){
        pp0[i] = D1;
        pp1[i] = D0;
        L[j] += w[i];
        W[j] += w[i]*y[i];}
    B[0] = D0;
    b[j] = W[j]/L[j];

    for(j = 1; j <= m; j++){
        ppx = pp2;              /* save old p[j], p[j-1] */
        pp2 = pp1;
        pp1 = pp0;
        pp0 = ppx;
        A[j] = D0;              /* calc A */
        for(i = 0; i < n; i++){
            A[j] += w[i]*x[i]*pp1[i]*pp1[i]/L[j-1];}
        L[j] = D0;              /* calc p[j], L, W */
        W[j] = D0;
        for(i = 0; i < n; i++){
            pp0[i] = (x[i]-A[j])*pp1[i]-B[j-1]*pp2[i];
            L[j] += w[i]*pp0[i]*pp0[i];
            W[j] += w[i]*y[i]*pp0[i];}
        B[j] = L[j]/L[j-1];     /* calc B[], b[] */
        b[j] = W[j]/L[j];}
}

/*------------------------------------------------------*/
/*      gcoef   generate coefficients                   */
/*      in:     b[], A[], B[]                           */
/*      out:    c[]                                     */
/*      uses:   p0[], p1[], p2[]                        */
/*------------------------------------------------------*/
static void gcoef()
{
int i, j;
    for(i = 0; i <= m; i++){            /* init */
        c[i] = p2[i] = p1[i] = p0[i] = 0.;}
    p0[0] = D1;
    c[0] += b[0]*p0[0];
    for(j = 1; j <= m; j++){            /* generate coefs */
        p2[0] = p1[0];
        p1[0] = p0[0];
        p0[0] = -A[j]*p1[0]-B[j-1]*p2[0];
        c[0] += b[j]*p0[0];
        for(i = 1; i <= j; i++){
            p2[i] = p1[i];
            p1[i] = p0[i];
            p0[i] = p1[i-1]-A[j]*p1[i]-B[j-1]*p2[i];
            c[i] += b[j]*p0[i];}}
}

/*------------------------------------------------------*/
/*      gvar    generate variance                       */
/*------------------------------------------------------*/
static void gvar()
{
int i;
double tt;
    vr = 0.;
    for(i = 0; i < n; i++){
        xi = x[i];
        calc();
        tt = y[i]-zi;
        vr += tt*tt;}
    vr /= n-m-1;
}

/*------------------------------------------------------*/
/*      calc            calc zi, given xi               */
/*      in:             c[]                             */
/*------------------------------------------------------*/
static void calc ()
{
int i;
    zi = c[m];
    for(i = m-1; i >= 0; i--)
        zi = zi*xi + c[i];
}

/*------------------------------------------------------*/
/*      calcx           calc zi, given xi               */
/*      in:             b[], A[], B[]                   */
/*------------------------------------------------------*/
static void calcx()
{
int i;
double q2, q1, q0;
    if(m == 0){
        zi = b[0];
        return;}
    if(m == 1){
        zi = b[0]+(xi-A[1])*b[1];
        return;}
    q1 = b[m];
    q0 = b[m-1]+(xi-A[m])*q1;
    for(i = m-2; i >= 0; i--){
        q2 = q1;
        q1 = q0;
        q0 = b[i]+(xi-A[i+1])*q1-B[i+1]*q2;}
    zi = q0;
}

/*------------------------------------------------------*/
/*      gdata   get data                                */
/*------------------------------------------------------*/
static void gdata()
{
    px = &x[0];
    pf = &y[0];
    pw = &w[0];
    while(1){
        gtlin();                /* get a line */
        if(gteof)
            break;
        if(lineno == nmax){
            printf("\ntoo many points\n");
            break;}
        if(wf){                 /* stuff values */
            sscanf(line, "%le%le%le", px, pf, pw);}
        else{
            sscanf(line, "%le%le", px, pf);
            *pw = 1.0;}
        px++;                   /* bump ptrs */
        pf++;
        pw++;}
    fclose(sfp);                /* close file */
    n = lineno;                 /* set # points */
}

/*------------------------------------------------------*/
/*      gtlin   get a line of data                      */
/*------------------------------------------------------*/
static int gtlin()
{
char chr;
int col;
    col = 0;
    while(1){
        chr = gtchr();
        switch(chr){
          case 0x0a:            /* line feed */
            lineno++;
            return(col);
          case 0x1a:
            return(col);
          default:
            line[col] = chr;
            col++;
            if(col >= linsz){
                printf("line # %d too long\n%s",lineno, line);
                return(col);}}}
}

/*------------------------------------------------------*/
/*      gtchr   get a char                              */
/*------------------------------------------------------*/
static char gtchr()
{
int cnt;
    if(gteof)                   /* check for eof */
        return(0x1a);
    if(sptr == send){
        if(!(cnt = (int) fread(sbfr, 1, bfrsz, sfp))){
            fclose(sfp);
            gteof = 1;
            return(0x1a);}
        sptr = sbfr;
        send = sbfr+cnt;}
    return(*sptr++);
}

/*------------------------------------------------------*/
/*      conrs           get string from console         */
/*------------------------------------------------------*/
static int conrs()
{
int i;
    memset(cbfr, 0, sizeof(cbfr));  /* get a line */
    cbfr[0] = sizeof(cbfr)-2;
    fgets(cbfr+2, sizeof(cbfr)-2, stdin);
    cbfr[1] = (char)(strlen(&cbfr[2])-1);
    i = cbfr[1];
    cbfr[2+i] = 0;
    return(i);
}

Example data file. I named it fitdat.txt:

1  1
2  4
3  9

Compile and run the program. Enter the name of the data file, then N when prompted for using weights, and then 2 for the degree of the equation to be generated. (If using weights, the first column would be the weighting factor, a weight of 2 would be the same has two instances of the same data point, but weights can be values like 1.5).