Search code examples
c++neural-networkmlp

I implemented a MLP Neural Network in C++, but if i normally compile it doesn't work, if i use the debugger with VS Code it works


I tried to implemt a MLP Neural Network for digit recognition using Eigen3, but if i normally compile and execute it at some point all the parameters (weights, biases, activations) become NaN. Instead if i debug it with VS Code it works.

This is the network

#include "..\Headers\Network.h"
#include <cmath>
#include <iostream>
#include <iterator>
#include <string>
#include <vector>
#include <algorithm>

using std::vector;
using std::string;
using Eigen::VectorXd;
using Eigen::MatrixXd;

double sigmoide (double x);
double sigmoide_derivative(double x);
VectorXd sigmoide_derivative(VectorXd vec);
void load_val (string pi, VectorXd& pixels);
std::ofstream _log ("log.txt");

template <typename T>
void print(T& mat)
{
    for (int r = 0; r < mat.rows(); r++)
    {
        for (int c = 0; c < mat.cols(); c++)
        {
        }
    }
}

Network::Network(vector<string> _data, int _l_rate, vector<int> dim) : data{_data}, l_rate{_l_rate}
{
    layers = dim.size();
    for (int i = 0; i < layers - 1; i++)
    {
        MatrixXd m = MatrixXd::Random(dim[i + 1], dim[i]);
        weights.push_back(m);
        VectorXd b = VectorXd::Random(dim[i + 1]);
        biases.push_back(b);
        VectorXd _z (dim[i + 1]);
        z.push_back(_z);
        VectorXd n (dim[i]);
        neurons.push_back(n);
    }
    VectorXd n_f (dim[layers - 1]);
    neurons.push_back(n_f);
}

void Network::learn(int epoch, int mini_batch)
{

    for(int e = 0; e < epoch; e++)
    {
        std::cout << "Epoch: " << e + 1 << "\n\n";
        double e_cost = 0;
        shuffle(begin(data), end(data), rng);
        for(unsigned long long int n = 0; n < data.size();)
        {
            e_cost += SGD(mini_batch, n);
        }
    }
}


double Network::SGD (int mini_batch, unsigned long long int& n_data)
{
    vector<VectorXd> p_d_biases = vector<VectorXd>();
    vector<MatrixXd> p_d_weights = vector<MatrixXd>();
    double b_cost = 0;
    for(int m = 0; m < mini_batch && n_data != data.size(); m++, n_data++)
    {
        feed_forward(data.at(n_data));
        SGD_b(p_d_biases, data.at(n_data));
        SGD_w(p_d_weights, p_d_biases);
        b_cost += cost(data.at(n_data));
    }
    step(p_d_weights, p_d_biases, mini_batch);
    return b_cost / mini_batch;
}

double Network::cost(string sample)
{
    VectorXd e_values = VectorXd();
    exp_values(sample, e_values);
    double s_cost = 0;
    for(int i = 0; i < neurons[layers - 1].size(); i++)
    {
        if(e_values[i] == 1)
        {
            s_cost += std::pow(neurons.at(layers - 1)(i) - 1, 2);
        }
        else
        {
            s_cost += std::pow(neurons.at(layers - 1)(i), 2);
        }
    }
    return s_cost;
}

void Network::step(const vector<MatrixXd>& p_d_weights, const vector<VectorXd>& p_d_biases, int mini_batch)
{
    for(int l = layers - 2, n = 0; l >= 0; l--, n++)
    {
        VectorXd b_tmp (biases.at(l).rows());
        MatrixXd w_tmp (weights.at(l).rows(), weights.at(l).cols());
        for(int i = 0; i < layers - 1; i++)
        {
            b_tmp += p_d_biases.at((i * (layers - 1)) + n);
            w_tmp += p_d_weights.at((i * (layers - 1)) + n);
        }
        biases.at(l) -= l_rate * (b_tmp / mini_batch);
        weights.at(l) -= l_rate * (w_tmp / mini_batch);
    }
}

void Network::SGD_w (vector<MatrixXd>& p_d_weights, const vector<VectorXd>& p_d_biases)
{
    for(int l = layers - 2; l >= 0; l--)
    {
        p_d_weights.push_back(p_d_biases.at(p_d_biases.size() - (l + 1)) * neurons.at(l).transpose());
    }
}

void Network::SGD_b (vector<VectorXd>& p_d_biases, string sample)
{
    VectorXd e_values = VectorXd();
    exp_values(sample, e_values);
    for(int l = layers - 1; l > 0; l--)
    {
        if(l == (layers - 1))
        {
            p_d_biases.push_back((2*(neurons.at(l) - e_values)).cwiseProduct(sigmoide_derivative(z.at(l-1))));
        }
        else
        {
            VectorXd b =(weights.at(l).transpose() * p_d_biases.at(p_d_biases.size() - 1)).cwiseProduct(sigmoide_derivative(z.at(l - 1)));
            p_d_biases.push_back(b);
        }
    }
}

void Network::feed_forward(string sample)
{
    load_val(sample, neurons[0]);
    for(int l = 0; l < layers - 1; l++)
    {
        z.at(l) = weights.at(l) * neurons.at(l) + biases.at(l);
        for (int i = 0; i < biases.at(l).size(); i++)
        {
            neurons.at(l + 1)(i) = sigmoide(z.at(l)(i));
        }
    }
}

void Network::exp_values (string sample, VectorXd& e_values)
{
    e_values.resize(neurons[layers - 1].size());
    short digit = std::stoi(string(sample.begin(), sample.begin() + 1));
    for(int i = 0;  i < e_values.size(); i++)
    {
        if(i - digit == 0)
        {
            e_values(i) = 1;
        }
        else
        {
            e_values(i) = 0;
        }
    }
}

void load_val (string pi, VectorXd& pixels)
{
    int i = 0;
    for(auto s = pi.begin() + 3, p = pi.begin() + 1; s != pi.end(); s++)
    {
        if(*s == ',')
        {
            double t = std::stoi(std::string(p + 1, s)) / 255.;
            pixels(i) = t;
            p = s;
            i++;
        }
        if(s == (pi.end() - 1))
        {
            double t = std::stoi(std::string(s, pi.end())) / 255.;
            pixels(i) = t;
        }
    }
}

VectorXd sigmoide_derivative(VectorXd vec)
{
    VectorXd result = VectorXd();
    result.resize(vec.size());
    for(int r = 0; r < vec.rows(); r++)
    {
        result(r) = sigmoide_derivative(vec(r));
    }
    return result;
}

double sigmoide_derivative(double x)
{
    return std::exp(x) / std::pow(1 + std::exp(x), 2);
}

double sigmoide (double x)
{
    return 1. / (1 + (1. / std::exp(x)));
}


This is the main

int main()
{
    ifstream f_data ("..\\csv_files\\mnist_train.csv");
    vector<string> data;
    if(f_data.good())
    {
        while(!(f_data.eof())) 
        {
            string tmp;
            f_data >> tmp;
            data.push_back(tmp);
        }
        vector<int> dim {784, 16, 10};
        Network n (data, 3, dim);
        n.learn(20, 10);
    
        ifstream t_data ("..\\csv_files\\mnist_test.csv");
        string s;
        t_data >> s;
        cout << string(s.begin(), s.begin() + 1) << endl;
        n.feed_forward(s);
        print(n.getNeurons(2));
    }
    return 0;
}

I tried to disable the compiler optimizations, i controlled that everything was initiliazed, but other than this i don't know where to put my hands. I use as a compiler GCC from MinGW.


Solution

  • I found the solution, in the sigmoide and sigmoide_derivative function there are exp() functions that for values to high of x returns a value too large, so instead they give NaN. I added a control that in case of NaN value returns 0. Probably in the debug mode this cases were automatically managed.