i'm implementing a neural network with "LEARNING PURPOSES".
Y have the following templated classes:
The problem is that Parameter class is templated, and when passing it as a reference to de update method of the optimizer the compiler complains that the virtual method cannot be templated, and if I pass it as non templated, it complains that it doesn't have any templated parameters.
Also tried CRTP idiom for making Strategy Pattern static, so I can pass templated arguments to the virtual method, but the problem is that the compiler complains that the base class that i'm passing to the pointer where I store the optimizer in the Parameter should be templated.
#include<iostream>
using size_type = std::size_t;
#include<vector>
#include<random>
#include<memory>
// #include "tensor_algebra.h" here I define alias for Tensor operations
// for avoid using global namespaces
// but for testig you can use the following include
#include <Fastor/Fastor.h>
using namespace Fastor;
template<size_type input_features, size_type output_features>
struct Parameters;
class Optimizer{
public:
virtual void update(Parameters& parameters) = 0;
virtual ~Optimizer() = default;
};
template<size_type input_features, size_type output_features>
struct Parameters{
Tensor<float, input_features, output_features> weight;
Tensor<float, input_features, output_features> weight_gradient;
Tensor<float, output_features> bias;
Tensor<float, output_features> bias_gradient;
Parameters(const std::string& initializer = "he")
: bias(0,0) {
std::random_device rd;
std::mt19937 generator(rd());
std::normal_distribution<float> distribution;
switch(initializer){
case "he":
distribution = std::normal_distribution<float>(0, std::sqrt(2.0 / input_features));
break;
case "xavier":
distribution = std::normal_distribution<float>(0, std::sqrt(2.0 / input_features + output_features));
break;
default:
std::cout << "Invalid initializer" << std::endl;
break;
}
for(auto i = 0; i < input_features; ++i){
for(auto j = 0; j < output_features; ++j){
weight(i, j) = distribution(generator);
}
}
}
void update(){
optimizer->update(*this);
}
};
class SGD : public Optimizer{
public:
SGD(float learning_rate) : learning_rate_(learning_rate) {}
void update(Parameters& parameters){
parameters.weight -= learning_rate_ * parameters.weight_gradient;
parameters.bias -= learning_rate_ * parameters.bias_gradient;
}
private:
float learning_rate_;
};
//Linear layer class
template<size_type input_features, size_type output_features>
class Linear{
public:
Linear(float learning_rate, const std::string& initializer = "he")
: parameters(learning_rate, initializer){}
void set_optimizer(std::shared_ptr<Optimizer> optimizer){
parameters.optimizer = optimizer;
} //Seting optimizer in linear layer.
//forward method
template<size_type batch_size>
Tensor<float, batch_size, output_features> forward(const Tensor<float, batch_size, input_features> &input){
Tensor<float,batch_size> ones(1);
return matmul(input, parameters.weight) + outer(ones, parameters.bias);
}
//backward method
template<size_type batch_size>
Tensor<float, batch_size, input_features> backward(
const Tensor<float, batch_size, output_features> &gradient,
const Tensor<float, batch_size, input_features>& input
){
parameters.weight_gradient = matmul(transpose(input), gradient);
parameters.bias_gradient = 0.0; // Initialize bias_gradient with zeros
for (size_t i = 0; i < batch_size; i++) {
for (size_t j = 0; j < output_features; j++){
parameters.bias_gradient(j) += gradient(i, j);
}
}
Tensor<float, batch_size, input_features> input_gradient = matmul(gradient, transpose(parameters.weight));
return input_gradient;
}
private:
Parameters<input_features, output_features> parameters;
};
int main(){
Linear<2,3> linear(0.01);
linear.set_optimizer(std::make_shared<SGD>(0.01));
Tensor<float, 2, 2> input = {{1, 2}, {3, 4}};
Tensor<float, 2, 3> output = linear.forward(input);
std::cout << output << std::endl;
return 0;
}
I tried this:
template<class Derived>
class Optimizer{
public:
Derived& self(){return static_cast<Derived&>(*this);}
const Derived& self() const {return static_cast<const Derived&>(*this);}
template<size_type input_size, size_type output_size>
void update(Parameters<input_size,output_size>& parameters){
self().update(parameters);
}
};
class SGD : public Optimizer<SGD>{
public:
SGD(float learning_rate) : learning_rate_(learning_rate) {}
template<size_type input_size, size_type output_size>
void update(Parameters<input_size,output_size>& parameters){
parameters.weight -= learning_rate_ * parameters.weight_gradient;
parameters.bias -= learning_rate_ * parameters.bias_gradient;
}
private:
float learning_rate_;
};
but doesn't seems to work. Any advice?
I cannot understand what you really want to do because your code has many syntax errors, no call of Parameters::update()
.
However I tried something to remove syntax errors, and to call Parameters::update()
from Linear
.
What I did is just simply making all classes to be template samely.
I hope it will be of some help.
Note: I don't have unkowns "Fastor.h", so, following code becames skelton like( all unknown type data and process for them ware simply removed.)
template<size_type input_features, size_type output_features>
struct Parameters;
template<size_type input_features, size_type output_features>
class Optimizer{
public:
virtual void update( Parameters<input_features,output_features>& parameters ) = 0;
virtual ~Optimizer() = default;
};
template<size_type input_features, size_type output_features>
struct Parameters
{
std::shared_ptr< Optimizer<input_features,output_features> > optimizer;
Parameters(const std::string& initializer = "he"){ /*NOP*/ }
void update(){ optimizer->update(*this); }
};
template<size_type input_features, size_type output_features>
class SGD : public Optimizer<input_features, output_features>
{
public:
SGD(float learning_rate) : learning_rate_(learning_rate) {}
void update( Parameters<input_features,output_features> ¶meters )
{ std::cout << "SGD::update() called" << std::endl; }
private:
float learning_rate_;
};
//Linear layer class
template<size_type input_features, size_type output_features>
class Linear{
public:
Linear( float learning_rate, const std::string& initializer = "he")
: parameters( /*learning_rate,*/ initializer){}
void set_optimizer( std::shared_ptr< Optimizer<input_features,output_features> > optimizer )
{ parameters.optimizer = optimizer; }
//Test method I added
void DoSomething(){ parameters.update(); }
private:
Parameters<input_features, output_features> parameters;
};
int main(){
Linear<2,3> linear(0.01f);
linear.set_optimizer( std::make_shared< SGD<2,3> >(0.01f) );
linear.DoSomething();
return 0;
}