I have a flatten array of double fill with points x,y,z I transfer this array in x, y, z array. I use those arrays thru a zip iterator
tpl4zip first = thrust::make_zip_iterator(thrust::make_tuple(X.begin(), Y.begin(), Z.begin(), K.begin()));
tpl4zip last = thrust::make_zip_iterator(thrust::make_tuple(X.end(), Y.end(), Z.end(), K.end()));
I'm able to sort this tuple without problem. I need then to reduce to be able to count and so want to use reduce_by_key. but all my attempts to make a functional reduce_by_key went wrongs. Does some could tell me what i'm doing wrong. Thanks in advance.
here my code :
thrust::device_vector<int> counter(N/KEYLEN); // keys in one row
thrust::fill(counter.begin(), counter.end(), 1); // set counter for key
auto my_z = thrust::make_zip_iterator(thrust::make_tuple(first,counter.begin()));
thrust::device_vector<double> X_r(N/KEYLEN); // keys in one row
thrust::device_vector<double> Y_r(N/KEYLEN); // keys in one row
thrust::device_vector<double> Z_r(N/KEYLEN); // keys in one row
thrust::device_vector<int> K_r(N/KEYLEN); // index in one row
tpl4zip my_zr = thrust::make_zip_iterator(thrust::make_tuple(X_r.begin(), Y_r.begin(), Z_r.begin(), K_r.begin()));
thrust::device_vector<int> counter_2(N/KEYLEN); // keys in one row
thrust::fill(counter_2.begin(), counter_2.end(), 1); // set index for key
auto pack = thrust::make_zip_iterator(thrust::make_tuple(my_zr,counter_2.begin()));
thrust::equal_to<int> binary_pred;
thrust::reduce_by_key(
first,
last,
my_z,
first,
pack,
binary_pred,
TuplePlus()
);
My TuplePlus is define like this :
struct TuplePlus
{
__host__ __device__
tpl5int operator ()(const tpl5int& lhs, const tpl5int& rhs)
{
int count = thrust::get<1>(lhs)+thrust::get<1>(rhs);
return thrust::make_tuple(thrust::get<0>(lhs),count);
}
};
my tuple and zip iterator like this :
#define N 30 // make this evenly divisible by 3 for this example
typedef thrust::tuple<double, double, double, int> tpl4int;
typedef thrust::device_vector<double>::iterator doubleiter;
typedef thrust::device_vector<int>::iterator intiter;
typedef thrust::tuple<doubleiter, doubleiter, doubleiter, intiter> tpl4doubleiter;
typedef thrust::zip_iterator<tpl4doubleiter> tpl4zip;
typedef thrust::tuple<tpl4zip, int> tpl5int;
typedef thrust::tuple<tpl4doubleiter, intiter> tpl5doubleiter;
typedef thrust::zip_iterator<tpl5doubleiter> tpl5zip;
here the full code
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/sequence.h>
#include <thrust/fill.h>
#include <thrust/tuple.h>
#define N 30 // make this evenly divisible by 3 for this example
typedef thrust::tuple<double, double, double, int> tpl4int;
typedef thrust::device_vector<double>::iterator doubleiter;
typedef thrust::device_vector<int>::iterator intiter;
typedef thrust::tuple<doubleiter, doubleiter, doubleiter, intiter> tpl4doubleiter;
typedef thrust::zip_iterator<tpl4doubleiter> tpl4zip;
typedef thrust::tuple<tpl4zip, int> tpl5int;
typedef thrust::tuple<tpl4doubleiter, intiter> tpl5doubleiter;
typedef thrust::zip_iterator<tpl5doubleiter> tpl5zip;
struct sort_
{
double decimPrecision;
sort_(double _decimPrecision)
{
decimPrecision=_decimPrecision;
}
__host__ __device__
bool operator()(const tpl4int &a,const tpl4int &b) const
{
if (round(thrust::get<0>(a)/decimPrecision) != round(thrust::get<0>(b)/decimPrecision))
return round(thrust::get<0>(a)/decimPrecision) > round(thrust::get<0>(b)/decimPrecision);
if (round(thrust::get<1>(a)/decimPrecision) != round(thrust::get<1>(b)/decimPrecision))
return round(thrust::get<1>(a)/decimPrecision) > round(thrust::get<1>(b)/decimPrecision);
return round(thrust::get<2>(a)/decimPrecision) > round(thrust::get<2>(b)/decimPrecision);
}
};
struct TuplePlus
{
__host__ __device__
tpl5int operator ()(const tpl5int& lhs, const tpl5int& rhs)
{
int count = thrust::get<1>(lhs)+thrust::get<1>(rhs);
return thrust::make_tuple(thrust::get<0>(lhs),count);
}
};
int main()
{
#define KEYLEN 3
thrust::device_vector<double> input(10*KEYLEN);
int i=0;
// input[0] = vec3(0,0,5.005);
input[i++] = 1.0245;
input[i++] = 2.54;
input[i++] = 3.001;
// input[1] = vec3(0,0,5.005);
input[i++] = 2.0;
input[i++] = 1.0;
input[i++] = 5.01125;
// input[2] = vec3(0,0,5.005);
input[i++] = 6.0;
input[i++] = 1.0;
input[i++] = 5.0145;
// input[3] = vec3(2,1,5.001);
input[i++] = 6.0;
input[i++] = 1.0215;
input[i++] = 6.001;
// input[4] = vec3(3,0,5.001);
input[i++] = 6.0;
input[i++] = 1.0845;
input[i++] = 5.00125;
// input[5] = vec3(4,0,5.001);
input[i++] = 5.0;
input[i++] = 0.0;
input[i++] = 5.001;
// input[6] = vec3(5,0,5.001);
input[i++] = 5.0;
input[i++] = 0.0;
input[i++] = 5.001;
// input[7] = vec3(6,0,10.501);
input[i++] = 6.0;
input[i++] = 0.0;
input[i++] = 10.501;
// input[8] = vec3(0,0,5.001);
input[i++] = 1.0;
input[i++] = 0.0;
input[i++] = 5.0015478;
// input[9] = vec3(0,0,5.001);
input[i++] = 6.0;
input[i++] = 1.005;
input[i++] = 5.001;
thrust::device_vector<double> X(N/KEYLEN); // keys in one row
thrust::device_vector<double> Y(N/KEYLEN); // keys in one row
thrust::device_vector<double> Z(N/KEYLEN); // keys in one row
size_t dsize = sizeof(input)/sizeof(double);
size_t numkeys = dsize/KEYLEN;
int index=0;
for( int i = 0; i<N/KEYLEN;i++)
{
X[i]=input[index++];
Y[i]=input[index++];
Z[i]=input[index++];
}
thrust::device_vector<int> K(N/KEYLEN); // keys in one row
thrust::sequence(K.begin(), K.end(), 0); // set index for key
tpl4zip first = thrust::make_zip_iterator(thrust::make_tuple(X.begin(), Y.begin(), Z.begin(), K.begin()));
tpl4zip last = thrust::make_zip_iterator(thrust::make_tuple(X.end(), Y.end(), Z.end(), K.end()));
thrust::sort(first,last,sort_(0.01));
thrust::device_vector<int> counter(N/KEYLEN); // keys in one row
thrust::fill(counter.begin(), counter.end(), 1); // set counter for key
auto my_z = thrust::make_zip_iterator(thrust::make_tuple(first,counter.begin()));
thrust::device_vector<double> X_r(N/KEYLEN); // keys in one row
thrust::device_vector<double> Y_r(N/KEYLEN); // keys in one row
thrust::device_vector<double> Z_r(N/KEYLEN); // keys in one row
thrust::device_vector<int> K_r(N/KEYLEN); // keys in one row
tpl4zip my_zr = thrust::make_zip_iterator(thrust::make_tuple(X_r.begin(), Y_r.begin(), Z_r.begin(), K_r.begin()));
thrust::device_vector<int> counter_2(N/KEYLEN); // keys in one row
thrust::fill(counter_2.begin(), counter_2.end(), 1); // set counter for key
auto pack = thrust::make_zip_iterator(thrust::make_tuple(my_zr,counter_2.begin()));
thrust::equal_to<int> binary_pred;
thrust::reduce_by_key(
first,
last,
my_z,
first,
pack,
binary_pred,
TuplePlus()
);
std::vector<tpl4int> result_sorted(N/KEYLEN);
thrust::copy(first,first+N/KEYLEN,result_sorted.begin());
for (int i=0; i<N/KEYLEN; i++)
{
std::cout << "{ " << result_sorted[i].get<0>() ;
std::cout << ", " << result_sorted[i].get<1>() ;
std::cout << ", " << result_sorted[i].get<2>() ;
std::cout << "} i= " << result_sorted[i].get<3>()<< std::endl;
}
return 0;
}
Citing the documentation of thrust::reduce_by_key
:
The input ranges shall not overlap either output range.
So you can not give first
as the output iterator here. This operation cannot be done in place.
Also you try to use a thrust::equal_to<int>
while your keys are not of type int
, but of type tpl4int
.
You seem to make things much more complicated with zip iterators here than they need to be: You zip your keys with your values instead of just using the values. To get the expected result, you could probably just take
// only to see how it would look if needed:
auto custom_binary_pred = [](const tpl4int &lhs, const tpl4int &rhs) {
return thrust::get<0>(lhs) == thrust::get<0>(rhs) &&
thrust::get<1>(lhs) == thrust::get<1>(rhs) &&
thrust::get<2>(lhs) == thrust::get<2>(rhs) &&
thrust::get<3>(lhs) == thrust::get<3>(rhs);
};
thrust::reduce_by_key(first,
last,
counter.begin(),
my_zr,
counter_2.begin(),
custom_binary_pred,
thrust::plus<int>()); // doesn't need to be specified due to overload of reduce_by_key
I'm fairly certain that you don't even need to specify custom_binary_pred
, as Thrust probably implements operator==
for its tuple (haven't found documentation on this here).
You don't need to initialize counter_2
with thrust::fill
, as the values will be overwritten either way.