So I have an device array of PackedVertex structs:
struct PackedVertex {
glm::vec3 Vertex;
glm::vec2 UV;
glm::vec3 Normal;
}
I'm trying to sort them so that duplicates are clustered together in the array; I don't care about overall order at all.
I've tried sorting them by comparing the lengths of the vectors which ran but didn't sort them correctly so now I'm trying per variable using 3 stable_sorts with the binary_operators:
__thrust_hd_warning_disable__
struct sort_packed_verts_by_vertex : public thrust::binary_function < PackedVertex, PackedVertex, bool >
{
__host__ __device__ bool operator()(const PackedVertex &lhs, const PackedVertex &rhs)
{
return lhs.Vertex.x < rhs.Vertex.x || lhs.Vertex.y < rhs.Vertex.y || lhs.Vertex.z < rhs.Vertex.z;
}
};
__thrust_hd_warning_disable__
struct sort_packed_verts_by_uv : public thrust::binary_function < PackedVertex, PackedVertex, bool >
{
__host__ __device__ bool operator()(const PackedVertex &lhs, const PackedVertex &rhs)
{
return lhs.UV.x < rhs.UV.x || lhs.UV.y < rhs.UV.y;
}
};
__thrust_hd_warning_disable__
struct sort_packed_verts_by_normal : public thrust::binary_function < PackedVertex, PackedVertex, bool >
{
__host__ __device__ bool operator()(const PackedVertex &lhs, const PackedVertex &rhs)
{
return lhs.Normal.x < rhs.Normal.x || lhs.Normal.y < rhs.Normal.y || lhs.Normal.z < rhs.Normal.z;
}
};
Trouble is I'm getting a thrust error now: "launch_closure_by_value" which hazarding a guess means that my sort isn't converging due to my operators.
That being said I'm also pretty sure this is not the best way for me to be doing this kind of sort so any feedback would be greatly appreciated.
I don't believe your sort functors are correct.
A sort functor must give a consistent ordering. Let's just consider this one:
return lhs.UV.x < rhs.UV.x || lhs.UV.y < rhs.UV.y;
Suppose I have two UV
quantites like this:
UV1.x: 1
UV1.y: 0
UV2.x: 0
UV2.y: 1
This functor will return true
no matter which order I present UV1
and UV2
. Your other functors are similarly defective.
In thrust speak, these are not valid StrictWeakOrdering functors. If we wish to order UV1
and UV2
, we must provide a functor which (consistently) returns true
for one presentation order and false
for the other presentation order. (The only exception to this is if the two presented quantities are truly equal, then the functor should always return just one answer, either true
or false
, consistently, regardless of presentation order. However the UV1
and UV2
presented here are not "equal" for the purposes of your desired ordering, i.e. grouping of identical structs.)
The following simple test seems to work for me:
$ cat t717.cu
#include <thrust/sort.h>
#include <thrust/device_ptr.h>
#include <iostream>
#include <stdlib.h>
#define DSIZE 64
#define RNG 10
struct PackedVertex {
float3 Vertex;
float2 UV;
float3 Normal;
};
struct my_PV_grouper {
template <typename T>
__host__ __device__
bool operator()(const T &lhs, const T &rhs) const {
if (lhs.Vertex.x > rhs.Vertex.x) return true;
else if (lhs.Vertex.x < rhs.Vertex.x) return false;
else if (lhs.Vertex.y > rhs.Vertex.y) return true;
else if (lhs.Vertex.y < rhs.Vertex.y) return false;
else if (lhs.Vertex.z > rhs.Vertex.z) return true;
else if (lhs.Vertex.z < rhs.Vertex.z) return false;
else if (lhs.UV.x > rhs.UV.x) return true;
else if (lhs.UV.x < rhs.UV.x) return false;
else if (lhs.UV.y > rhs.UV.y) return true;
else if (lhs.UV.y < rhs.UV.y) return false;
else if (lhs.Normal.x > rhs.Normal.x) return true;
else if (lhs.Normal.x < rhs.Normal.x) return false;
else if (lhs.Normal.y > rhs.Normal.y) return true;
else if (lhs.Normal.y < rhs.Normal.y) return false;
else if (lhs.Normal.z > rhs.Normal.z) return true;
else return false;
}
};
int main(){
PackedVertex h_data[DSIZE];
PackedVertex *d_data;
for (int i =0; i < DSIZE; i++)
h_data[i].Vertex.x = h_data[i].Vertex.y = h_data[i].Vertex.z = h_data[i].UV.x = h_data[i].UV.y = h_data[i].Normal.x = h_data[i].Normal.y = h_data[i].Normal.z = rand()%RNG;
cudaMalloc(&d_data, DSIZE*sizeof(PackedVertex));
cudaMemcpy(d_data, h_data, DSIZE*sizeof(PackedVertex), cudaMemcpyHostToDevice);
thrust::device_ptr<PackedVertex> d_ptr(d_data);
thrust::sort(d_ptr, d_ptr+DSIZE, my_PV_grouper());
cudaMemcpy(h_data, d_data, DSIZE*sizeof(PackedVertex), cudaMemcpyDeviceToHost);
for (int i =0; i < DSIZE; i++)
std::cout << h_data[i].Vertex.x << " ";
std::cout << std::endl;
}
$ nvcc -o t717 t717.cu
$ ./t717
9 9 9 9 9 9 9 8 8 8 7 7 7 7 7 7 7 6 6 6 6 6 6 6 6 6 5 5 5 5 5 5 4 4 4 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0
$
In case it's not clear, there is nothing particularly specific to the usage of thrust and functors here; the fundamental logic used to order these items needs to be correct for a valid sort. Even if you wrote a simple serial bubble-sort, it would have to use similar logic. The logic presented in your functors cannot be used to provide a sensible ordering.
If there are other problems with your approach, I can't say, as you have not provided a proper MCVE, which is expected for questions like this.