Search code examples
c#data-fittingnon-linear-regressionalglib

Alglib Data fitting with minlmoptimize does not minimize the results. Full c# included


I'm having trouble implementing the lm optimizer in the alglib library. I'm not sure why the parameters are hardly changing at all while still receiving an exit code of 4. I have been unable to determine what i am doing wrong with the documentation for alglib. Below is the full source I am running:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Threading.Tasks;

namespace FBkineticsFitter
{
    class Program
    {

        public static int Main(string[] args)
        {
            /*
             * This code finds the parameters ka, kd, and Bmax from the minimization of the residuals using "V" mode of the Levenberg-Marquardt optimizer (alglib library).
             * This optimizer is used because the equation is non-linear and this particular version of the optimizer does not require the ab inito calculation of partial
             * derivatives, a jacobian matrix, or other parameter-space definitions, so it's implementation is simple.
             * 
             * The equations being solved represent a model of a protein-protein interaction where protein in solution is interacting with immobilized protein on a sensor
             * in a 1:1 stoichiometery. Mass transport limit is not taken into account. The detials of this equation are described in:
             *      R.B.M. Schasfoort and Anna J. Tudos Handbook of Surface Plasmon Resonance, 2008, Chapter 5, ISBN: 978-0-85404-267-8
             *
             *          Y=((ka*Cpro*Bmax)/(ka*Cpro+kd))*(1-exp(-1*X*(ka*Cpro+kd)))  ; this equation describes the association phase
             *          
             *          Y=Req*exp(-1*X*kd) ; this equation describes the dissociation phase
             * 
             * The data are fit globally such that Bmax and Req parameters are linked and kd parameters are linked during simultaneous optimization for the most robust fit
             *          
             *  Y= signal
             *  X= time
             *  ka= association constant
             *  kd= dissociation constant
             *  Bmax= maximum binding capacity at equilibrium
             *  Req=(Cpro/(Cpro+kobs))*Bmax :. in this case Req=Bmax because Cpro=0 during the dissociation step
             *  Cpro= concentration of protein in solution
             *  
             *      additional calculations:
             *          kobs=ka*Cpro
             *          kD=kd/ka
            */
            GetRawDataXY(@"C:\Results.txt");
            double epsg = .0000001;
            double epsf = 0;
            double epsx = 0;
            int maxits = 0;
            alglib.minlmstate state;
            alglib.minlmreport rep;

            alglib.minlmcreatev(2, GlobalVariables.param, 0.0001, out state);
            alglib.minlmsetcond(state, epsg, epsf, epsx, maxits);
            alglib.minlmoptimize(state, Calc_residuals, null, null);
            alglib.minlmresults(state, out GlobalVariables.param, out rep);

            System.Console.WriteLine("{0}", rep.terminationtype); ////1=relative function improvement is no more than EpsF. 2=relative step is no more than EpsX. 4=gradient norm is no more than EpsG. 5=MaxIts steps was taken. 7=stopping conditions are too stringent,further improvement is impossible, we return best X found so far. 8= terminated by user
            System.Console.WriteLine("{0}", alglib.ap.format(GlobalVariables.param, 20));
            System.Console.ReadLine();
            return 0;
        }

        public static void Calc_residuals(double[] param, double[] fi, object obj)
        {
            /*calculate the difference of the model and the raw data at each X (I.E. residuals)
             * the sum of the square of the residuals is returned to the optimized function to be minimized*/
            fi[0] = 0;
            fi[1] = 0;

            for (int i = 0; i < GlobalVariables.rawXYdata[0].Count();i++ )
            {
                if (GlobalVariables.rawXYdata[1][i] <= GlobalVariables.breakpoint)
                {
                    fi[0] += System.Math.Pow((kaEQN(GlobalVariables.rawXYdata[0][i]) - GlobalVariables.rawXYdata[1][i]), 2);
                }
                else
                {
                    fi[1] += System.Math.Pow((kdEQN(GlobalVariables.rawXYdata[0][i]) - GlobalVariables.rawXYdata[1][i]), 2);
                }
            }

        }

        public static double kdEQN(double x)
        {
            /*Calculate kd Y value based on the incremented parameters*/
            return GlobalVariables.param[2] * Math.Exp(-1 * x * GlobalVariables.param[1]);
        }

        public static double kaEQN(double x)
        {
            /*Calculate ka Y value based on the incremented parameters*/
            return ((GlobalVariables.param[0] * GlobalVariables.Cpro * GlobalVariables.param[2]) / (GlobalVariables.param[0] * GlobalVariables.Cpro + GlobalVariables.param[1])) * (1 - Math.Exp(-1 * x * (GlobalVariables.param[0] * GlobalVariables.Cpro + GlobalVariables.param[1])));
        }

        public static void GetRawDataXY(string filename)
        {
            /*Read in Raw data From tab delim txt*/
            string[] elements = { "x", "y" };
            int count = 0;
            GlobalVariables.rawXYdata[0] = new double[1798];
            GlobalVariables.rawXYdata[1] = new double[1798];

            using (StreamReader sr = new StreamReader(filename))
            {
                while (sr.Peek() >= 0)
                {
                    elements = sr.ReadLine().Split('\t');
                    GlobalVariables.rawXYdata[0][count] = Convert.ToDouble(elements[0]);
                    GlobalVariables.rawXYdata[1][count] = Convert.ToDouble(elements[1]);
                    count++;
                }
            }
        }

        public class GlobalVariables
        {
            public static double[] param = new double[] { 1, .02, 0.13 }; ////ka,kd,Bmax  these are initial guesses for the algorithm
            public static double[][] rawXYdata = new double[2][];
            public static double Cpro = 100E-9;
            public static double kD = 0;
            public static double breakpoint = 180;
        }
    }
}

Solution

  • According to Sergey Bochkanova The issue is the following:

    "You should use param[] array which is provided to you by optimizer. It creates its internal copy of your param, and updates this copy - not your param array.

    From the optimizer point of view, it has function which never changes when it changes its internal copy of param. So, it terminates right after first iteration."

    Here is the updated and working example code:

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.IO;
    using System.Threading.Tasks;
    
    namespace FBkineticsFitter
    {
        class Program
        {
    
            public static int Main(string[] args)
            {
                /*
                 * This code finds the parameters ka, kd, and Bmax from the minimization of the residuals using "V" mode of the Levenberg-Marquardt optimizer (alglib library).
                 * This optimizer is used because the equation is non-linear and this particular version of the optimizer does not require the ab inito calculation of partial
                 * derivatives, a jacobian matrix, or other parameter-space definitions, so it's implementation is simple.
                 * 
                 * The equations being solved represent a model of a protein-protein interaction where protein in solution is interacting with immobilized protein on a sensor
                 * in a 1:1 stoichiometery. Mass transport limit is not taken into account. The detials of this equation are described in:
                 *      R.B.M. Schasfoort and Anna J. Tudos Handbook of Surface Plasmon Resonance, 2008, Chapter 5, ISBN: 978-0-85404-267-8
                 *
                 *          Y=((Cpro*Rmax)/(Cpro+kd))*(1-exp(-1*X*(ka*Cpro+kd)))  ; this equation describes the association phase
                 *          
                 *          Y=Req*exp(-1*X*kd)+NS ; this equation describes the dissociation phase
                 *          
                 * According to ForteBio's Application Notes #14 the amplitudes of the data can be correctly accounted for by modifying the above equations as follows:
                 * 
                 *          Y=(Rmax*(1/(1+(kd/(ka*Cpro))))*(1-exp(((-1*Cpro)+kd)*X))    ; this equation describes the association phase
                 *          
                 *          Y=Y0*(exp(-1*kd*(X-X0)))    ; this equation describes the dissociation phase
                 * 
                 * 
                 * 
                 * The data are fit simultaneously such that all fitting parameters are linked during optimization for the most robust fit
                 *          
                 *  Y= signal
                 *  X= time
                 *  ka= association constant  [fitting parameter 0]
                 *  kd= dissociation constant [fitting parameter 1]
                 *  Rmax= maximum binding capacity at equilibrium  [fitting parameter 2]
                 *  KD=kd/ka 
                 *  kobs=ka*Cpro+kd
                 *  Req=(Cpro/(Cpro+KD))*Rmax
                 *  Cpro= concentration of protein in solution
                 *  NS= non-specific binding at time=infinity (constant correction for end point of fit) [this is taken into account in the amplitude corrected formula: Y0=Ylast]
                 *  Y0= the initial value of Y for the first point of the dissociation curve (I.E. the last point of the association phase)
                 *  X0= the initial value of X for the first point of the dissociation phase
                 *          
                */
    
                GetRawDataXY(@"C:\Results.txt");
                double epsg = .00001;
                double epsf = 0;
                double epsx = 0;
                int maxits = 10000;
                alglib.minlmstate state;
                alglib.minlmreport rep;
                double[] param = new double[] { 1000000, .0100, 0.20};////ka,kd,Rmax  these are initial guesses for the algorithm and should be mid range for the expected data.,  The last parameter Rmax should be guessed as the maximum Y-value of Ka
                double[] scaling= new double[] { 1E6,1,1};
    
                alglib.minlmcreatev(2, param, 0.001, out state);
                alglib.minlmsetcond(state, epsg, epsf, epsx, maxits);
                alglib.minlmsetgradientcheck(state, 1);
                alglib.minlmsetscale(state, scaling);
                alglib.minlmoptimize(state, Calc_residuals, null, V.rawXYdata);
                alglib.minlmresults(state, out param, out rep);
    
                System.Console.WriteLine("{0}", rep.terminationtype); ////1=relative function improvement is no more than EpsF. 2=relative step is no more than EpsX. 4=gradient norm is no more than EpsG. 5=MaxIts steps was taken. 7=stopping conditions are too stringent,further improvement is impossible, we return best X found so far. 8= terminated by user
                System.Console.WriteLine("{0}", alglib.ap.format(param, 25));
                System.Console.ReadLine();
                return 0;
            }
    
            public static void Calc_residuals(double[] param, double[] fi, object obj)
            {
                /*calculate the difference of the model and the raw data at each X (I.E. residuals)
                 * the sum of the square of the residuals is returned to the optimized function to be minimized*/
                CalcVariables(param);
    
                fi[0] = 0;
                fi[1] = 0;
    
                for (int i = 0; i < V.rawXYdata[0].Count(); i++)
                {
                    if (V.rawXYdata[0][i] <= V.breakpoint)
                    {
                        fi[0] += System.Math.Pow((kaEQN(V.rawXYdata[0][i], param) - V.rawXYdata[1][i]), 2);
                    }
                    else
                    {
                        if (!V.breakpointreached) 
                        { 
                            V.breakpointreached = true;
                            V.X_0 = V.rawXYdata[0][i];
                            V.Y_0 = V.rawXYdata[1][i];
                        }
    
                        fi[1] += System.Math.Pow((kdEQN(V.rawXYdata[0][i], param) - V.rawXYdata[1][i]), 2);
                    }
                }
    
                if (param[0] <= 0 || param[1] <=0 || param[2] <= 0)////Exponentiates the error if the parameters go negative to favor positive non-zero values
                {
                    fi[0] = Math.Pow(fi[0], 2);
                    fi[1] = Math.Pow(fi[1], 2);
                }
    
                System.Console.WriteLine("{0}"+"  "+V.Cpro+"  -->"+fi[0], alglib.ap.format(param, 5));
                Console.WriteLine((kdEQN(V.rawXYdata[0][114], param)));
            }
    
            public static double kdEQN(double X, double[] param)
            {
                /*Calculate kd Y value based on the incremented parameters*/
                return (V.Rmax * (1 / (1 + (V.kd / (V.ka * V.Cpro)))) * (1 - Math.Exp((-1 * V.ka * V.Cpro) * V.X_0))) * Math.Exp((-1 * V.kd) * (X - V.X_0));
            }
    
            public static double kaEQN(double X, double[] param)
            {
                /*Calculate ka Y value based on the incremented parameters*/
                return ((V.Cpro * V.Rmax) / (V.Cpro + V.kd)) * (1 - Math.Exp(-1 * X * ((V.ka * V.Cpro) + V.kd)));
            }
    
            public static void GetRawDataXY(string filename)
            {
                /*Read in Raw data From tab delim txt*/
                string[] elements = { "x", "y" };
                int count = 0;
                V.rawXYdata[0] = new double[226];
                V.rawXYdata[1] = new double[226];
    
                using (StreamReader sr = new StreamReader(filename))
                {
                    while (sr.Peek() >= 0)
                    {
                        elements = sr.ReadLine().Split('\t');
                        V.rawXYdata[0][count] = Convert.ToDouble(elements[0]);
                        V.rawXYdata[1][count] = Convert.ToDouble(elements[1]);
                        count++;
                    }
                }
            }
    
            public class V
            {
                /*Global Variables*/
                public static double[][] rawXYdata = new double[2][];
                public static double Cpro = 100E-9;
                public static bool breakpointreached = false;
                public static double X_0 = 0;
                public static double Y_0 = 0;
                public static double ka = 0;
                public static double kd = 0;
                public static double Rmax = 0;
                public static double KD = 0;
                public static double Kobs = 0;
                public static double Req = 0;
                public static double breakpoint = 180;
            }
    
            public static void CalcVariables(double[] param)
            {
                V.ka = param[0];
                V.kd = param[1];
                V.Rmax = param[2];
                V.KD = param[1] / param[0];
                V.Kobs = param[0] * V.Cpro + param[1];
                V.Req = (V.Cpro / (V.Cpro + param[0] * V.Cpro + param[1])) * param[2];
            }
        }
    }