Search code examples
cudaiteratorziptuplesthrust

Modifying zip iterator with eigen::Matrix gives errenous results


I have three set of points X,Y,Z. I intend to apply a transform using Eigen::Matrix4f. I use a zip iterator and a transform operator to do it. The program compiles however the result is only partially correct. this post is inspired by How to modify the contents of a zip iterator

The transformation of A= [0 1 2;3 4 5;6 7 8; 1 1 1] with M=[1 2 3 4;5 6 7 8;9 10 11 12;13 14 15 16] using M*A should result to: R=[28 34 40; 68 86 104; 108 138 168] However it gives: R=[28 34 40; 208 251 294; 2410 2905 3400].

The X values are being correctly modified. However the Y & Z values are faulty.

My code and cmakelists is as below:

#include <thrust/iterator/zip_iterator.h>
#include <thrust/execution_policy.h>
#include <thrust/copy.h>
#include <thrust/device_vector.h>

#include <Eigen/Dense>
#include <iostream>

typedef thrust::device_vector<float>::iterator                     FloatIterator;
typedef thrust::tuple<FloatIterator, FloatIterator, FloatIterator> FloatIteratorTuple;
typedef thrust::zip_iterator<FloatIteratorTuple>                   Float3Iterator;

typedef thrust::tuple<float,float,float> Float3;

struct modify_tuple
{
    Eigen::Matrix4f _Mat4f;
    modify_tuple(Eigen::Matrix4f Mat4f) : _Mat4f(Mat4f) { }
    __host__ __device__ Float3 operator()(Float3 a) const
    {

        Eigen::Vector4f V(thrust::get<0>(a), thrust::get<1>(a), thrust::get<2>(a), 1.0);

    V=_Mat4f*V;

    Float3  res=thrust::make_tuple( V(0,0), V(1,0), V(2,0) );

        return res;
    }
};


int main(void)
{
    thrust::device_vector<float> X(3);
    thrust::device_vector<float> Y(3);
    thrust::device_vector<float> Z(3);

    X[0]=0,    X[1]=1,    X[2]=2;
    Y[0]=4,    Y[1]=5,    Y[2]=6;
    Z[0]=7,    Z[1]=8,    Z[2]=9;

    std::cout << "X,Y,Z before transformation="<< std::endl;
    thrust::copy_n(X.begin(), 3, std::ostream_iterator<float>(std::cout, ","));
    std::cout << std::endl;
    thrust::copy_n(Y.begin(), 3, std::ostream_iterator<float>(std::cout, ","));
    std::cout << std::endl;
    thrust::copy_n(Z.begin(), 3, std::ostream_iterator<float>(std::cout, ","));
    std::cout << std::endl;


    Float3Iterator P_first = thrust::make_zip_iterator(make_tuple(X.begin(), Y.begin(), Z.begin()));
    Float3Iterator P_last  = thrust::make_zip_iterator(make_tuple(X.end(),   Y.end(),   Z.end()));


    Eigen::Matrix4f M;
    M(0,0)= 1; M(0,1)= 2;  M(0,2)= 3;  M(0,3)= 4; 
    M(1,0)= 5; M(1,1)= 6;  M(1,2)= 7;  M(1,3)= 8; 
    M(2,0)= 9; M(2,1)= 10; M(2,2)= 11; M(2,3)= 12; 
    M(3,0)= 13; M(3,1)= 14;  M(3,2)= 15;  M(3,3)= 16;

    thrust::transform(thrust::device, P_first,P_last, P_first, modify_tuple(M));

    std::cout << "X, Y, Z after transformation="<< std::endl;
    thrust::copy_n(X.begin(), 3, std::ostream_iterator<float>(std::cout, ","));
    std::cout << std::endl;
    thrust::copy_n(Y.begin(), 3, std::ostream_iterator<float>(std::cout, ","));
    std::cout << std::endl;
    thrust::copy_n(Z.begin(), 3, std::ostream_iterator<float>(std::cout, ","));
    std::cout << std::endl;


    return 0;
}

CMakeLists.txt

CMAKE_MINIMUM_REQUIRED(VERSION 2.8)

FIND_PACKAGE(CUDA REQUIRED)
INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
INCLUDE_DIRECTORIES (/usr/include/eigen3)

set(
    CUDA_NVCC_FLAGS
    ${CUDA_NVCC_FLAGS};
    -O3 -gencode arch=compute_52,code=sm_52;
    )

CUDA_ADD_EXECUTABLE(modify_zip_iterator_stackoverflow_ver2 modify_zip_iterator_stackoverflow_ver2.cu)
TARGET_LINK_LIBRARIES(modify_zip_iterator_stackoverflow_ver2 ${CUDA_LIBRARIES})

Solution

  • Probably you just need to get the latest Eigen.

    I used CUDA 9.2 on Fedora27, and grabbed the latest eigen from here.

    Then I compiled and ran your code as follows:

    $ cat t21.cu
    #include <thrust/iterator/zip_iterator.h>
    #include <thrust/execution_policy.h>
    #include <thrust/copy.h>
    #include <thrust/device_vector.h>
    
    #include <Eigen/Dense>
    #include <iostream>
    
    typedef thrust::device_vector<float>::iterator                     FloatIterator;
    typedef thrust::tuple<FloatIterator, FloatIterator, FloatIterator> FloatIteratorTuple;
    typedef thrust::zip_iterator<FloatIteratorTuple>                   Float3Iterator;
    
    typedef thrust::tuple<float,float,float> Float3;
    
    struct modify_tuple
    {
        Eigen::Matrix4f _Mat4f;
        modify_tuple(Eigen::Matrix4f Mat4f) : _Mat4f(Mat4f) { }
        __host__ __device__ Float3 operator()(Float3 a) const
        {
    
            Eigen::Vector4f V(thrust::get<0>(a), thrust::get<1>(a), thrust::get<2>(a), 1.0);
    
        V=_Mat4f*V;
    
        Float3  res=thrust::make_tuple( V(0,0), V(1,0), V(2,0) );
    
            return res;
        }
    };
    
    
    int main(void)
    {
        thrust::device_vector<float> X(3);
        thrust::device_vector<float> Y(3);
        thrust::device_vector<float> Z(3);
    
        X[0]=0,    X[1]=1,    X[2]=2;
        Y[0]=4,    Y[1]=5,    Y[2]=6;
        Z[0]=7,    Z[1]=8,    Z[2]=9;
    
        std::cout << "X,Y,Z before transformation="<< std::endl;
        thrust::copy_n(X.begin(), 3, std::ostream_iterator<float>(std::cout, ","));
        std::cout << std::endl;
        thrust::copy_n(Y.begin(), 3, std::ostream_iterator<float>(std::cout, ","));
        std::cout << std::endl;
        thrust::copy_n(Z.begin(), 3, std::ostream_iterator<float>(std::cout, ","));
        std::cout << std::endl;
    
    
        Float3Iterator P_first = thrust::make_zip_iterator(make_tuple(X.begin(), Y.begin(), Z.begin()));
        Float3Iterator P_last  = thrust::make_zip_iterator(make_tuple(X.end(),   Y.end(),   Z.end()));
    
    
        Eigen::Matrix4f M;
        M(0,0)= 1; M(0,1)= 2;  M(0,2)= 3;  M(0,3)= 4;
        M(1,0)= 5; M(1,1)= 6;  M(1,2)= 7;  M(1,3)= 8;
        M(2,0)= 9; M(2,1)= 10; M(2,2)= 11; M(2,3)= 12;
        M(3,0)= 13; M(3,1)= 14;  M(3,2)= 15;  M(3,3)= 16;
    
        thrust::transform(thrust::device, P_first,P_last, P_first, modify_tuple(M));
    
        std::cout << "X, Y, Z after transformation="<< std::endl;
        thrust::copy_n(X.begin(), 3, std::ostream_iterator<float>(std::cout, ","));
        std::cout << std::endl;
        thrust::copy_n(Y.begin(), 3, std::ostream_iterator<float>(std::cout, ","));
        std::cout << std::endl;
        thrust::copy_n(Z.begin(), 3, std::ostream_iterator<float>(std::cout, ","));
        std::cout << std::endl;
    
    
        return 0;
    }
    $ nvcc -std=c++11 -I/path/to/eigen/eigen-eigen-71546f1a9f0c t21.cu -o t21 --expt-relaxed-constexpr
    $ ./t21
    X,Y,Z before transformation=
    0,1,2,
    4,5,6,
    7,8,9,
    X, Y, Z after transformation=
    33,39,45,
    81,99,117,
    129,159,189,
    $
    

    The output doesn't match what you are expecting in your question, but what you are expecting is not correct either.

    The first tuple provided to your functor as a result of dereferencing your zip iterator will be (X[0],Y[0],Z[0]), which is (0,4,7). Your functor then converts that to (0,4,7,1) and does a matrix-vector multiplication with your M matrix. The first row inner product is given by 0*1+4*2+7*3+1*4, which sum is 33. The second row inner product is given by 0*5+4*6+7*7+1*8, which sum is 81. The third row inner product is given by 0*9+4*10+7*11+1*12, which sum is 129. You can see this sequence 33,81,129 is exactly the first column of the output above.

    The second tuple provided to your functor as a result of dereferencing your zip iterator will be (X[1],Y[1],Z[1]), which is (1,5,8). Your functor then converts that to (1,5,8,1) and does a matrix-vector multiplication with your M matrix. The first row inner product is given by 1*1+5*2+8*3+1*4 which sum is 39. The second row innner product is given by 1*5+5*6+8*7+1*8 which sum is 99. The third row inner product is given by 1*9+5*10+8*11+1*12 which sum is 159. You can see this sequence 39,99,159 is exactly the second column of the output above.

    I haven't done the corresponding arithmetic for the 3rd column of the output, but I don't think it is wrong.

    Here's a modification of your code, demonstrating the correctness of the results, doing the arithmetic in Eigen host code:

    $ cat t21.cu
    #include <thrust/iterator/zip_iterator.h>
    #include <thrust/execution_policy.h>
    #include <thrust/copy.h>
    #include <thrust/device_vector.h>
    
    #include <Eigen/Dense>
    #include <iostream>
    
    typedef thrust::device_vector<float>::iterator                     FloatIterator;
    typedef thrust::tuple<FloatIterator, FloatIterator, FloatIterator> FloatIteratorTuple;
    typedef thrust::zip_iterator<FloatIteratorTuple>                   Float3Iterator;
    
    typedef thrust::tuple<float,float,float> Float3;
    
    struct modify_tuple
    {
        Eigen::Matrix4f _Mat4f;
        modify_tuple(Eigen::Matrix4f Mat4f) : _Mat4f(Mat4f) { }
        __host__ __device__ Float3 operator()(Float3 a) const
        {
    
            Eigen::Vector4f V(thrust::get<0>(a), thrust::get<1>(a), thrust::get<2>(a), 1.0);
    
        V=_Mat4f*V;
    
        Float3  res=thrust::make_tuple( V(0,0), V(1,0), V(2,0) );
    
            return res;
        }
    };
    
    
    int main(void)
    {
        thrust::device_vector<float> X(3);
        thrust::device_vector<float> Y(3);
        thrust::device_vector<float> Z(3);
    
        X[0]=0,    X[1]=1,    X[2]=2;
        Y[0]=4,    Y[1]=5,    Y[2]=6;
        Z[0]=7,    Z[1]=8,    Z[2]=9;
        std::cout << "X,Y,Z before transformation="<< std::endl;
        thrust::copy_n(X.begin(), 3, std::ostream_iterator<float>(std::cout, ","));
        std::cout << std::endl;
        thrust::copy_n(Y.begin(), 3, std::ostream_iterator<float>(std::cout, ","));
        std::cout << std::endl;
        thrust::copy_n(Z.begin(), 3, std::ostream_iterator<float>(std::cout, ","));
        std::cout << std::endl;
        thrust::host_vector<float> hX = X;
        thrust::host_vector<float> hY = Y;
        thrust::host_vector<float> hZ = Z;
    
    
        Float3Iterator P_first = thrust::make_zip_iterator(make_tuple(X.begin(), Y.begin(), Z.begin()));
        Float3Iterator P_last  = thrust::make_zip_iterator(make_tuple(X.end(),   Y.end(),   Z.end()));
    
    
        Eigen::Matrix4f M;
        M(0,0)= 1; M(0,1)= 2;  M(0,2)= 3;  M(0,3)= 4;
        M(1,0)= 5; M(1,1)= 6;  M(1,2)= 7;  M(1,3)= 8;
        M(2,0)= 9; M(2,1)= 10; M(2,2)= 11; M(2,3)= 12;
        M(3,0)= 13; M(3,1)= 14;  M(3,2)= 15;  M(3,3)= 16;
    
        thrust::transform(thrust::device, P_first,P_last, P_first, modify_tuple(M));
    
        std::cout << "X, Y, Z after transformation="<< std::endl;
        thrust::copy_n(X.begin(), 3, std::ostream_iterator<float>(std::cout, ","));
        std::cout << std::endl;
        thrust::copy_n(Y.begin(), 3, std::ostream_iterator<float>(std::cout, ","));
        std::cout << std::endl;
        thrust::copy_n(Z.begin(), 3, std::ostream_iterator<float>(std::cout, ","));
        std::cout << std::endl;
        Eigen::Vector4f hV;
        hV(0) = hX[0];
        hV(1) = hY[0];
        hV(2) = hZ[0];
        hV(3) = 1;
        hV = M*hV;
        std::cout << "column 0:" << std::endl;
        std::cout << hV;
        std::cout << std::endl;
        hV(0) = hX[1];
        hV(1) = hY[1];
        hV(2) = hZ[1];
        hV(3) = 1;
        hV = M*hV;
        std::cout << "column 1:" << std::endl;
        std::cout << hV;
        std::cout << std::endl;
        hV(0) = hX[2];
        hV(1) = hY[2];
        hV(2) = hZ[2];
        hV(3) = 1;
        hV = M*hV;
        std::cout << "column 2:" << std::endl;
        std::cout << hV;
        std::cout << std::endl;
    
        return 0;
    }
    $ nvcc -std=c++11 -I/home/bob/eigen/eigen-eigen-71546f1a9f0c t21.cu -o t21 --expt-relaxed-constexpr
    $ ./t21
    X,Y,Z before transformation=
    0,1,2,
    4,5,6,
    7,8,9,
    X, Y, Z after transformation=
    33,39,45,
    81,99,117,
    129,159,189,
    column 0:
     33
     81
    129
    177
    column 1:
     39
     99
    159
    219
    column 2:
     45
    117
    189
    261
    $