I have a very large array which has around 10000000 double entries (in the code below this number was set to NX=1000 but actually it will be 10000000 in realistic situations). I want to do some calculations with this array from which I get a new array. Then I do again some calculations with this new array and so on. I am looking for the most efficient way to do these manipulations with respect to RAM (I believe that this is the limiting factor here, since my arras are so large). I know that because of the large arrays I should try to pass the array as much as possible per reference. However I have problems with passing the arrays to other functions. Here is the code:
#include <iostream>
#include <array>
#define NX 1000
////////////////////////
//Function declaration//
////////////////////////
void time_propagation(std::array<double,NX>& X1,std::array<double,NX>& X2, double dt);
std::array<double,NX> F(std::array<double,NX>& X);
///////////////////////
//operator overloading/
///////////////////////
// double* Array
std::array<double,NX> operator*(double d,std::array<double,NX> X){
std::array<double,NX> res={};
for(int i=0;i<NX;i++){
res[i]=X[i]*d;
}
return res;
}
//Array + Array
std::array<double,NX> operator+(std::array<double,NX> X1,std::array<double,NX> X2){
std::array<double,NX> res={};
for(int i=0;i<NX;i++){
res[i]=X1[i]+X2[i];
}
return res;
}
/////////////
//main()/////
/////////////
int main () {
std::array<double,NX> X0={};
std::array<double,NX> X1 = {};
time_propagation(X0,X1,0.1);
//Now I will write X0 to a file. Afterwards X0 is not needed anymore. Only X1. Do I have to manually delete X0?
std::array<double,NX> X2 = {};
time_propagation(X1,X2,0.1);
//Now I will write X1 to a file. Afterwards X1 is not needed anymore. Only X2. Do I have to manually delete X1?
//... I have to do this step very often
return 0;
}
///////////////////////
//Function definitons//
///////////////////////
std::array<double,NX> F(std::array<double,NX>& X){
std::array<double,NX> R={};
double a=X[NX-2];
double b=X[NX-1];
R[NX-1]=-a;
R[NX-1]=3.0;
return R;
}
void time_propagation(std::array<double,NX>& X1,std::array<double,NX>& X2,double dt){
std::array<double,NX> K1=F(X1);
std::array<double,NX> K2=F(X1+dt/2.0*K1);
std::array<double,NX> K3=F(X1+dt/2.0*K2);
std::array<double,NX> K4=F(X1+dt*K3);
X2=X1+1.0/6.0*dt*(K1+2.0*K2+2.0*K3+K4);
}
I know that my error is in the function "time_propagation". In the fifth last line I call the function "F" with the argument "X1+dt/2.0*K1". "X1" was passed by reference and "K1" was defined before. Do I first have to create a copy and the call the function on that copy? Something like that:
void time_propagation(std::array<double,NX>& X1,std::array<double,NX>& X2,double dt){
std::array<double,NX> argument={};
for(int i=0;i<NX;i++){argument[i]=X1[i];}
std::array<double,NX> K1=F(argument);
for(int i=0;i<NX;i++){argument[i]=X1[i]+dt/2.0*K1[i];}
std::array<double,NX> K2=F(argument);
for(int i=0;i<NX;i++){argument[i]=X1[i]+dt/2.0*K2[i];}
std::array<double,NX> K3=F(argument);
for(int i=0;i<NX;i++){argument[i]=X1[i]+dt/2.0*K3[i];}
std::array<double,NX> K4=F(argument);
for(int i=0;i<NX;i++){X2[i]=X1[i]+1.0/6.0*dt*(K1[i]+2.0*K2[i]+2.0*K3[i]+K4[i]); }
}
If I use this new "time_propagation" function then I don't get any errors anymore, but my feeling is that this workaround is very bad and there is a nicer way of doing this? Second and most importantly even with this new version of "time_propagation()", I get a "Segmentation fault (core dumped)" if I increase NX to 10000000. I guess this happens because the code is very badly written? In general my laptop should be able to store 10000000 doubles in the RAM? I have 16 GB of RAM... and 10000000 doubles require only 0.2 GB of RAM!?
According to your comment, you are very close to the answer. One thing you missed is that besides array is stack-based and vector is heap-based there is one more difference between array
and vector
: the vector needs to call its constructor(or resize) to enlarge size. So after switching to vector
, we need a slight modify.
Change the unnecessary copy arguments to reference.
Use a lambda to call swap
or clear
and shrink_to_fit
Bonus: To remove the unnecessary temporary object in operator *
and operator +
, see Expression_templates
A fixed code might like this:
#include <array>
#include <iostream>
#include <vector>
#define NX 1000
////////////////////////
// Function declaration//
////////////////////////
void time_propagation(std::vector<double>& X1, std::vector<double>& X2,
double dt);
std::vector<double> F(const std::vector<double>& X);
///////////////////////
// operator overloading/
///////////////////////
// double* Array
std::vector<double> operator*(double d, const std::vector<double>& X) {
std::vector<double> res(NX);
for (int i = 0; i < NX; i++) {
res[i] = X[i] * d;
}
return res;
}
// Array + Array
std::vector<double> operator+(const std::vector<double>& X1,
const std::vector<double>& X2) {
std::vector<double> res(NX);
for (int i = 0; i < NX; i++) {
res[i] = X1[i] + X2[i];
}
return res;
}
/////////////
// main()/////
/////////////
int main() {
auto release_vec = [](auto& vec) {
vec.clear();
vec.shrink_to_fit();
};
std::vector<double> X0(NX);
std::vector<double> X1(NX);
time_propagation(X0, X1, 0.1);
// Now I will write X0 to a file. Afterwards X0 is not needed anymore. Only
// X1. Do I have to manually delete X0?
release_vec(X0);
std::vector<double> X2(NX);
time_propagation(X1, X2, 0.1);
// Now I will write X1 to a file. Afterwards X1 is not needed anymore. Only
// X2. Do I have to manually delete X1?
//... I have to do this step very often
release_vec(X1);
return 0;
}
///////////////////////
// Function definitons//
///////////////////////
std::vector<double> F(const std::vector<double>& X) {
std::vector<double> R(NX);
double a = X[NX - 2];
double b = X[NX - 1];
R[NX - 1] = -a;
R[NX - 1] = 3.0;
return R;
}
void time_propagation(std::vector<double>& X1, std::vector<double>& X2,
double dt) {
std::vector<double> K1 = F(X1);
std::vector<double> K2 = F(X1 + dt / 2.0 * K1);
std::vector<double> K3 = F(X1 + dt / 2.0 * K2);
std::vector<double> K4 = F(X1 + dt * K3);
X2 = X1 + 1.0 / 6.0 * dt * (K1 + 2.0 * K2 + 2.0 * K3 + K4);
}