Search code examples
c++linker-errorsrcppdynamic-linkinggsl

Implementing GSL algorithm for non-linear data fitting in R: Error in dyn.load


I am trying to implement a GSL Nonlinear least-squares algorithm for curve fitting in R using Rcpp. This question is close to a previous question I asked here: Fixing parameters of a fitting function in Nonlinear Least-Square GSL

My attempt to implement a GSL-based Nonlinear least-squares algorithm has been successful if my objective is to estimate all the parameter of a given function, that is used to fit the data. The problem comes when I try to follow @zkoza suggestion in Fixing parameters of a fitting function in Nonlinear Least-Square GSL for fixing some of the parameters of the function.

When I sourceCpp my code, adapted following my previous question I do get the following error:

Error in dyn.load("/private/var/folders/pq/hxwd9my563q_qpy4rbrlgkmw0000gn/T/RtmpRKdn9f/sourceCpp-x86_64-apple-darwin17.0-1.0.8.3/sourcecpp_a60fe49985e/sourceCpp_80.so") : 
  unable to load shared object '/private/var/folders/pq/hxwd9my563q_qpy4rbrlgkmw0000gn/T/RtmpRKdn9f/sourceCpp-x86_64-apple-darwin17.0-1.0.8.3/sourcecpp_a60fe49985e/sourceCpp_80.so':
  dlopen(/private/var/folders/pq/hxwd9my563q_qpy4rbrlgkmw0000gn/T/RtmpRKdn9f/sourceCpp-x86_64-apple-darwin17.0-1.0.8.3/sourcecpp_a60fe49985e/sourceCpp_80.so, 0x0006): symbol not found in flat namespace '__Z28internal_make_gsl_vector_ptrILm3EEP10gsl_vectorRKNSt3__15arrayIdXT_EEE' 

This is the C++ wrapper code for performing the nonlinear least-squares data fitting:

// [[Rcpp::depends(RcppEigen)]]
// [[Rcpp::depends(RcppNumerical)]]
// [[Rcpp::depends(RcppGSL)]]
// [[Rcpp::depends(BH)]]
#define EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#include <RcppNumerical.h>
#include <RcppGSL.h>
#include <array>
#include <Rcpp.h>
#include <iostream>
#include <vector>
#include <cassert>
#include <functional>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_multifit_nlinear.h>

using namespace std;
using namespace Rcpp;
using namespace Numer;

template <class R, class... ARGS>
struct function_ripper {
    static constexpr size_t n_args = sizeof...(ARGS);
};

template <class R, class... ARGS>
auto constexpr n_params(R (ARGS...) )
{
    return function_ripper<R, ARGS...>();
}

template <typename F, size_t... Is>
auto gen_tuple_impl(F func, std::index_sequence<Is...> )
{
    return std::make_tuple(func(Is)...);
}

template <size_t N, typename F>
auto gen_tuple(F func)
{
    return gen_tuple_impl(func, std::make_index_sequence<N>{} );
}

auto internal_solve_system(gsl_vector* initial_params, gsl_multifit_nlinear_fdf *fdf,
                           gsl_multifit_nlinear_parameters *params) -> std::vector<double>
{
    // This specifies a trust region method
    const gsl_multifit_nlinear_type *T = gsl_multifit_nlinear_trust;
    const size_t max_iter = 200;
    const double xtol = 1.0e-8;
    const double gtol = 1.0e-8;
    const double ftol = 1.0e-8;

    auto *work = gsl_multifit_nlinear_alloc(T, params, fdf->n, fdf->p);
    int info;

    // initialize solver
    gsl_multifit_nlinear_init(initial_params, fdf, work);
    //iterate until convergence
    gsl_multifit_nlinear_driver(max_iter, xtol, gtol, ftol, nullptr, nullptr, &info, work);

    // result will be stored here
    gsl_vector * y    = gsl_multifit_nlinear_position(work);
    auto result = std::vector<double>(initial_params->size);

    for(int i = 0; i < result.size(); i++)
    {
        result[i] = gsl_vector_get(y, i);
    }

    auto niter = gsl_multifit_nlinear_niter(work);
    auto nfev  = fdf->nevalf;
    auto njev  = fdf->nevaldf;
    auto naev  = fdf->nevalfvv;
    gsl_multifit_nlinear_free(work);
    gsl_vector_free(initial_params);
    return result;
}

auto internal_make_gsl_vector_ptr(const std::vector<double>& vec) -> gsl_vector*
{
    auto* result = gsl_vector_alloc(vec.size());
    int i = 0;
    for(const auto e: vec)
    {
        gsl_vector_set(result, i, e);
        i++;
    }
    return result;
}

template<typename C1>
struct fit_data
{
    const std::vector<double>& t;
    const std::vector<double>& y;
    // the actual function to be fitted
    C1 f;
};

template<typename FitData, int n_params>
int internal_f(const gsl_vector* x, void* params, gsl_vector *f)
{
    auto* d  = static_cast<FitData*>(params);
    // Convert the parameter values from gsl_vector (in x) into std::tuple
    auto init_args = [x](int index)
    {
        return gsl_vector_get(x, index);
    };
    auto parameters = gen_tuple<n_params>(init_args);

    // Calculate the error for each...
    for (size_t i = 0; i < d->t.size(); ++i)
    {
        double ti = d->t[i];
        double yi = d->y[i];
        auto func = [ti, &d](auto ...xs)
        {
            // call the actual function to be fitted
            return d->f(ti, xs...);
        };
        auto y = std::apply(func, parameters);
        gsl_vector_set(f, i, yi - y);
    }
    return GSL_SUCCESS;
}

using func_f_type   = int (*) (const gsl_vector*, void*, gsl_vector*);
using func_df_type  = int (*) (const gsl_vector*, void*, gsl_matrix*);
using func_fvv_type = int (*) (const gsl_vector*, const gsl_vector *, void *, gsl_vector *);

template<auto n>
auto internal_make_gsl_vector_ptr(const std::array<double, n>& vec) -> gsl_vector*;


auto internal_solve_system(gsl_vector* initial_params, gsl_multifit_nlinear_fdf *fdf,
             gsl_multifit_nlinear_parameters *params) -> std::vector<double>;

template<typename C1>
auto curve_fit_impl(func_f_type f, func_df_type df, func_fvv_type fvv, gsl_vector* initial_params, fit_data<C1>& fd) -> std::vector<double>
{
    assert(fd.t.size() == fd.y.size());

    auto fdf = gsl_multifit_nlinear_fdf();
    auto fdf_params = gsl_multifit_nlinear_default_parameters();

    fdf.f   = f;
    fdf.df  = df;
    fdf.fvv = fvv;
    fdf.n   = fd.t.size();
    fdf.p   = initial_params->size;
    fdf.params = &fd;

    // "This selects the Levenberg-Marquardt algorithm with geodesic acceleration."
    fdf_params.trs = gsl_multifit_nlinear_trs_lmaccel;
    return internal_solve_system(initial_params, &fdf, &fdf_params);
}

template <typename Callable, auto n>
auto curve_fit(Callable f, const std::array<double, n>& initial_params, const std::vector<double>& x, const std::vector<double>& y) -> std::vector<double>
{
    auto params = internal_make_gsl_vector_ptr(initial_params);
    auto fd = fit_data<Callable>{x, y, f};
    return curve_fit_impl(internal_f<decltype(fd), n>, nullptr, nullptr, params,  fd);
}

And these are the functions I used to fit the data:

// [[Rcpp::export]]
double gaussian(double x, double a, double b, double c){
    const double z = (x - b) / c;
    return a * std::exp(-0.5 * z * z);
}

struct gaussian_fixed_a{
    double a;
    gaussian_fixed_a(double a) : a{a} {}
    double operator()(double x, double b, double c) const { return gaussian(x, a, b, c); }
};

// [[Rcpp::export]]
Rcpp::List fittingTest(const std::vector<double> xs,const std::vector<double> ys, const double a){

      gaussian_fixed_a g(a);
      auto r = curve_fit(g, std::array{0.444, 0.11}, xs, ys);
      return Rcpp::List::create(Rcpp::Named("b") = r[0],
                                  Rcpp::Named("c") = r[1]);
        }

Any idea where my code is creating the linking problem?


Solution

  • The error was solved by following @zkoza answer in , that is specifying a compile-time array where the number of parameters is automatically deduced from the length of the array. In Line 81 - 92:

    template<auto n>
    auto internal_make_gsl_vector_ptr(const std::array<double, n>& vec) -> gsl_vector*
    {
        auto* result = gsl_vector_alloc(vec.size());
        int i = 0;
        for(const auto e: vec)
        {
            gsl_vector_set(result, i, e);
            i++;
        }
        return result;
    }
    

    The full code now looks like:

    #include <RcppGSL.h>
    #include <array>
    #include <Rcpp.h>
    #include <cmath>
    #include <iostream>
    #include <cstdlib>
    #include <stdio.h>
    #include <math.h>
    #include <stdlib.h>
    #include <vector>
    #include <cassert>
    #include <functional>
    #include <gsl/gsl_vector.h>
    #include <gsl/gsl_multifit_nlinear.h>
    
    using namespace std;
    using namespace Rcpp;
    
    // [[Rcpp::depends(RcppGSL)]]
    
    template <typename F, size_t... Is>
    auto gen_tuple_impl(F func, std::index_sequence<Is...> )
    {
        return std::make_tuple(func(Is)...);
    }
    
    template <size_t N, typename F>
    auto gen_tuple(F func)
    {
        return gen_tuple_impl(func, std::make_index_sequence<N>{} );
    }
    
    template <class R, class... ARGS>
    struct function_ripper {
        static constexpr size_t n_args = sizeof...(ARGS);
    };
    
    template <class R, class... ARGS>
    auto constexpr n_params(R (ARGS...) )
    {
        return function_ripper<R, ARGS...>();
    }
    
    auto internal_solve_system(gsl_vector* initial_params, gsl_multifit_nlinear_fdf *fdf,
                 gsl_multifit_nlinear_parameters *params) -> std::vector<double>
    {
      // This specifies a trust region method
      const gsl_multifit_nlinear_type *T = gsl_multifit_nlinear_trust;
      const size_t max_iter = 200;
      const double xtol = 1.0e-8;
      const double gtol = 1.0e-8;
      const double ftol = 1.0e-8;
    
      auto *work = gsl_multifit_nlinear_alloc(T, params, fdf->n, fdf->p);
      int info;
    
      // initialize solver
      gsl_multifit_nlinear_init(initial_params, fdf, work);
      //iterate until convergence
      gsl_multifit_nlinear_driver(max_iter, xtol, gtol, ftol, nullptr, nullptr, &info, work);
    
      // result will be stored here
      gsl_vector * y    = gsl_multifit_nlinear_position(work);
      auto result = std::vector<double>(initial_params->size);
    
      for(int i = 0; i < result.size(); i++)
      {
        result[i] = gsl_vector_get(y, i);
      }
    
      auto niter = gsl_multifit_nlinear_niter(work);
      auto nfev  = fdf->nevalf;
      auto njev  = fdf->nevaldf;
      auto naev  = fdf->nevalfvv;
    
      gsl_multifit_nlinear_free(work);
      gsl_vector_free(initial_params);
      return result;
    }
    
    template<auto n>
    auto internal_make_gsl_vector_ptr(const std::array<double, n>& vec) -> gsl_vector*
    {
        auto* result = gsl_vector_alloc(vec.size());
        int i = 0;
        for(const auto e: vec)
        {
            gsl_vector_set(result, i, e);
            i++;
        }
        return result;
    }
    
    template<typename C1>
    struct fit_data
    {
        const std::vector<double>& t;
        const std::vector<double>& y;
        // the actual function to be fitted
        C1 f;
    };
    
    
    template<typename FitData, int n_params>
    int internal_f(const gsl_vector* x, void* params, gsl_vector *f)
    {
        auto* d  = static_cast<FitData*>(params);
        // Convert the parameter values from gsl_vector (in x) into std::tuple
        auto init_args = [x](int index)
        {
            return gsl_vector_get(x, index);
        };
        auto parameters = gen_tuple<n_params>(init_args);
    
        // Calculate the error for each...
        for (size_t i = 0; i < d->t.size(); ++i)
        {
            double ti = d->t[i];
            double yi = d->y[i];
            auto func = [ti, &d](auto ...xs)
            {
                // call the actual function to be fitted
                return d->f(ti, xs...);
            };
            auto y = std::apply(func, parameters);
            gsl_vector_set(f, i, yi - y);
        }
        return GSL_SUCCESS;
    }
    
    using func_f_type   = int (*) (const gsl_vector*, void*, gsl_vector*);
    using func_df_type  = int (*) (const gsl_vector*, void*, gsl_matrix*);
    using func_fvv_type = int (*) (const gsl_vector*, const gsl_vector *, void *, gsl_vector *);
    
    template<auto n>
    auto internal_make_gsl_vector_ptr(const std::array<double, n>& vec) -> gsl_vector*;
    
    auto internal_solve_system(gsl_vector* initial_params, gsl_multifit_nlinear_fdf *fdf,
                 gsl_multifit_nlinear_parameters *params) -> std::vector<double>;
    
    template<typename C1>
    auto curve_fit_impl(func_f_type f, func_df_type df, func_fvv_type fvv, gsl_vector* initial_params, fit_data<C1>& fd) -> std::vector<double>
    {
        assert(fd.t.size() == fd.y.size());
    
        auto fdf = gsl_multifit_nlinear_fdf();
        auto fdf_params = gsl_multifit_nlinear_default_parameters();
    
        fdf.f   = f;
        fdf.df  = df;
        fdf.fvv = fvv;
        fdf.n   = fd.t.size();
        fdf.p   = initial_params->size;
        fdf.params = &fd;
    
        // "This selects the Levenberg-Marquardt algorithm with geodesic acceleration."
        fdf_params.trs = gsl_multifit_nlinear_trs_lmaccel;
        return internal_solve_system(initial_params, &fdf, &fdf_params);
    }
    
    template <typename Callable, auto n>
    auto curve_fit(Callable f, const std::array<double, n>& initial_params, const std::vector<double>& x, const std::vector<double>& y) -> std::vector<double>
    {
        // We can't pass lambdas without convert to std::function.
        //constexpr auto n = 3;//decltype(n_params(f))::n_args - 5;
        //constexpr auto n = 2;
        assert(initial_params.size() == n);
    
        auto params = internal_make_gsl_vector_ptr(initial_params);
        auto fd = fit_data<Callable>{x, y, f};
        return curve_fit_impl(internal_f<decltype(fd), n>, nullptr, nullptr, params,  fd);
    }
    

    In the same *.cpp file is also included the gaussian function, the gaussian functor gaussian_fixed_a and the fittingTest function, as detailed in the question.

    In R, I would test the fittingTest function in the following way:

    Sys.setenv("PKG_CXXFLAGS"="-std=c++17")
    
    require(Rcpp)
    require(RcppGSL)
    
    sourceCpp('./testingCode.cpp')
    
    x_list = seq(2,14,1)
    
    ###START: simulate Data under gaussian function###
    res = data.frame(geo=x_list,val=NA)
    
    for(i in 1:length(x_list)){
      res$val[i]= gaussian(x_list[i], a=8, b=0.4, c=5)
    }
    ###END: simulate Data under gaussian function###
    
    fittingTest(xs=res$geo,ys=res$val,a=8)
    

    And the result I get for this example, while fixing a=8, are:

    $b
    [1] 0.4
    
    $c
    [1] 5
    

    For a visual inspection of the result, you can see that the simulated and fitted data overlap perfectly (note that in this R example I did not add any 'noise' to the simulated data.

    enter image description here