I wrote a numerical integration code:
#include <thrust/inner_product.h>
#include <thrust/transform.h>
#include <thrust/for_each.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/device_vector.h>
#include <thrust/reduce.h>
#include <array>
#include <iostream>
template<typename val_type, std::size_t n, std::size_t n_batch,
template<typename...> typename Container>
struct Integrator {
const val_type coeff;
Integrator(val_type a, val_type b) :
coeff((b-a)/3./static_cast<val_type>(n)) {}
template <typename itor_type>
void operator()(itor_type f_begin, itor_type dens_begin) {
val_type C = this->coeff;
auto zitor_begin = thrust::make_zip_iterator(
thrust::make_tuple(
thrust::make_counting_iterator(0),f_begin));
auto titor_begin = make_transform_iterator(zitor_begin,
[C](auto _tuple){
return static_cast<val_type>(thrust::get<1>(_tuple)
* (thrust::get<0>(_tuple)==0 ? C
: thrust::get<0>(_tuple)%2==0? 2.*C:4.*C));
});
auto binary_pred = [](int i,int j) { return i/n==j/n ; };
thrust::reduce_by_key(thrust::make_counting_iterator(0), // input key
thrust::make_counting_iterator(static_cast<int>(n*n_batch)),
titor_begin, // input value
thrust::make_discard_iterator(), // output key
dens_begin, // output value
binary_pred);
} // end of operator()
};
The algorithm is a simple Simpson integration. And here is the main.cu
:
#include <iostream>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include "Integrator.hpp"
using Real = double;
constexpr std::size_t n = 1000, m = 4;
constexpr Real h = 2.*M_PI/(static_cast<Real>(n)-1.);
int main(int argc, char* argv[]) {
Integrator<Real,n,m,thrust::device_vector> inttest(0,2.*M_PI);
thrust::host_vector<Real> _fff(n*m), _x(n);
thrust::device_vector<Real> fff(n*m), fint(m);
for (int i=0; i<n; ++i)
_x[i] = i*h;
for (int I=0; I<m; ++I)
for (int i=0; i<n; ++i)
_fff[I*n+i] = std::sin(0.5*_x[i]);
fff = _fff;
inttest(fff.begin(),fint.begin());
thrust::copy(fint.begin(),fint.end(),
std::ostream_iterator<Real>(std::cout," "));
std::cout << std::endl;
}
I compiled this code on a Windows Subsystem for Linux (WSL) of a Windows11 system and a Ubuntu22.4 Linux server. Compilation succeeded on both machines. (I simply used nvc++ main.cu
)
The two systems (WSL and Ubuntu Linux) have exactly the same
However, only the Linux machine yields the correct result (should be 3.99 3.99 3.99 3.99
), while The WSL Windows11 machine yields 0 0 0 0
. And the size of the executable a.out
file is 24Mb on WSL and 8Mb on the Linux machine, so I know something wrong must have happened, but where?
As the problem only occurs with nvc++
and not with nvcc
, this seems to be a compiler problem.
The NVHPC SDK (and with it the nvc++
compiler) does not officially support Windows or WSL yet, as none of them is mentioned in the installation guide or the release notes.