Search code examples
c++c++11tasktbb

C++ numerical mystery


I wrote a class for seprate some task into two sub-tasks.

e.g. make the additions of two vectors of same size.

I designed it as a like binary function, I mean one of the input pointers is also the output.

I also supposed the input and output had the same size.

Then I wrote a apply a slight modification in order to manage the situation where the input and the output don't have the same size.

e.g. the horizontal addition of the elements of a vector.

Since this modification something weird happen.

I apply it for process histograms of unsigned short.

The histogram is store in a std::vector<std::uint64_t> of size 65536.

I want to make the addition of two histograms in one and clear the other.

At the moment where my class split itself the input in two output elements something mysterious happen.

65536 / 2 = 1099511660544.

Like everyone I sustpected a saturation of range what could have been copied into another type.

But for manage the size I use std::size_t EVERY WHERE. I am working on 64 bits machine so I do think it's not a saturation issue.

Any help would be helpfull.

Thanks in advance.

---- EDIT----

Allow me a practical exemple of my issue :

worker.hpp :

#ifndef WORKER_HPP
#define WORKER_HPP

#include <tbb/task.h>

#include <type_traits>

#ifndef TBB_ASSERT
#ifdef _DEBUG
#include <cassert>
#define TBB_ASSERT(x) std::assert(x)
#else
#define TBB_ASSERT(x)
#endif
#endif

#define OUTPUT_SIZE_EQUAL_INPUT_SIZE std::size_t(0x80000000)

namespace separable_calculation
{

template<class _Sty,class _Dty = _Sty>
struct basic_operator
{

    typedef const _Sty* const_src_pointer;
    typedef _Dty* dst_pointer;

    typedef const_src_pointer const_pointer;
    typedef dst_pointer pointer;

    basic_operator() = default;

    basic_operator(const basic_operator&) = delete;
    basic_operator(basic_operator&&) = delete;

    virtual ~basic_operator() = default;

    basic_operator& operator=(const basic_operator&) = delete;
    basic_operator& operator=(basic_operator&&) = delete;

    virtual void operator()(const_pointer,const std::size_t&,pointer/*,const std::size_t&*/) = 0;
};


template<class _Ty,template<class>class _Op>
class worker : public tbb::task
{

//    static_assert(std::is_function<_Op>::value || std::is_base_of<basic_operator<_Ty>*,&_Op>::value,"The operator functor must be a derivation of the struct basic_operator");
    static_assert(!std::is_abstract<_Op<_Ty> >::value,"The operator struct / class is abstract");

public:

    typedef _Ty* pointer;
    typedef const _Ty* const_pointer;
    typedef std::size_t size_type;

private:

    typedef worker self;

    enum
    {
        MUST_BE_PROCESSED = 0x2,
        HAS_BEEN_PROCESSED = 0x4,
        FIRST_HALF_MUTEX = 0x8,
        SECOND_HALF_MUTEX = 0x10
    };


    const_pointer _src;
    size_type _sz;
    pointer _dst;
    size_type _dsz;

    unsigned char _status;


    inline worker get_first_half()
    {
        std::size_t chk(2);

        return worker(this->_src,this->_sz/chk,this->_dst,this->_dsz/chk);
    }

    inline worker get_second_half()
    {
        const std::size_t half_src = this->_sz/2;
        const std::size_t half_dst = this->_sz/2;

        return worker(this->_src+half_src,half_src,this->_dst+half_dst,half_dst);
    }

    // helpfull for the method run.
    worker(worker&& obj);

    // usefull for prepare the two sub-tasks.
    worker(const worker& obj,int);

public:



    worker(const_pointer src,const size_type& sz,pointer dst,const size_type& dsz = OUTPUT_SIZE_EQUAL_INPUT_SIZE);


    inline worker():
        worker(nullptr,0,nullptr,0)
    {}

    worker(const worker&) = delete;


    ~worker() = default;

    worker& operator=(const worker&) = delete;
    worker& operator=(worker&&) = delete;

    virtual tbb::task* execute();

    // Execute the tasks.
    void run();


    // Update the source and | or the destination pointers.
    void update(const_pointer src,const std::size_t& sz,pointer dst=nullptr,const std::size_t& dsz = OUTPUT_SIZE_EQUAL_INPUT_SIZE);


};


}

#endif // WORKER_H

worker.tcc :

   #ifndef WORKER_TCC
    #define WORKER_TCC

    #include <iostream>

    #define DEBUG(str) std::cout<<str<<" "<<this->_sz<<" "<<this->_dsz<<std::endl;


    namespace separable_calculation
    {


    template<class _Ty,template<class>class _Op>
    worker<_Ty,_Op>::worker(worker&& obj):
        _src(obj._src),
        _sz(obj._sz),
        _dst(obj._dst),
        _dsz(obj._dsz),
        _status(obj._status)
    {
        DEBUG("move ctor")
    }

    template<class _Ty,template<class>class _Op>
    worker<_Ty,_Op>::worker(const worker& obj,int):
        _src(obj._src),
        _sz(obj._sz),
        _dst(obj._dst),
        _dsz(obj._dsz),
        _status(HAS_BEEN_PROCESSED)
    {
        DEBUG("copy ctor")
    }


    template<class _Ty,template<class>class _Op>
    worker<_Ty,_Op>::worker(const_pointer src,const size_type& sz,pointer dst,const size_type& dsz):
        _src(src),
        _sz(sz),
        _dst(dst),
        _dsz(dsz == OUTPUT_SIZE_EQUAL_INPUT_SIZE ? sz : dsz),
        _status(MUST_BE_PROCESSED)
    {
        DEBUG("param ctor")
    }



    template<class _Ty,template<class>class _Op>
    tbb::task* worker<_Ty,_Op>::execute()
    {
        tbb::task* ret(nullptr);

        // prepare the two sub-tasks
        if((this->_status & MUST_BE_PROCESSED) == MUST_BE_PROCESSED)
        {
            tbb::task* a = new (this->allocate_child()) self(this->get_first_half(),int());
            tbb::task* b = new (this->allocate_child()) self(this->get_second_half(),int());

            this->set_ref_count(3);

            this->spawn(*a);
            this->spawn_and_wait_for_all(*b);

        }
        else
        {
            _Op<_Ty> op;

            std::cout<<"check "<<this->_sz<<" "<<this->_dsz<<std::endl;

            op(this->_src,this->_sz,this->_dst/*,this->_dsz*/);
        }

        return ret;
    }


    template<class _Ty,template<class>class _Op>
    void worker<_Ty,_Op>::run()
    {
        TBB_ASSERT(this->_dst && this->_src);

        self& s = *new(tbb::task::allocate_root()) self(std::move(*this));

        tbb::task::spawn_root_and_wait(s);

    }


    template<class _Ty,template<class>class _Op>
    void worker<_Ty,_Op>::update(const_pointer src,const std::size_t& sz,pointer dst,const std::size_t& dsz)
    {
        std::cout<<"update"<<std::endl;
        if(src)
        {
            this->_src = src;
            this->_sz = sz;
        }

        if(dst)
            this->_dst = dst;

        if(dsz != OUTPUT_SIZE_EQUAL_INPUT_SIZE)
            this->_dsz = (dsz != OUTPUT_SIZE_EQUAL_INPUT_SIZE) ? dsz : this->_sz;

        this->_status = MUST_BE_PROCESSED;
    }



    }


    #endif // WORKER_TCC

main.cpp :

    #include "worker.hpp"
    #include "worker.tcc"
    #include <cstdlib>

    namespace
    {

    template<class _Ty>
    struct add_t : separable_calculation::basic_operator<_Ty>
    {
        typedef separable_calculation::basic_operator<_Ty> MyBase;

        typedef typename MyBase::const_pointer const_pointer;
        typedef typename MyBase::pointer pointer;
        typedef typename MyBase::const_src_pointer const_src_pointer;
        typedef typename MyBase::dst_pointer dst_pointer;


        add_t() = default;

        virtual ~add_t() = default;

        virtual void operator()(const_pointer src,const std::size_t& sz,pointer dst/*,const std::size_t& dsz*/)
        {
            pointer it_d = dst;

            for(const_pointer it_s = src;it_s != (src+sz); it_s++,it_d++)
                *it_d += *it_s;


        }
    };

    }

    int main()
    {

        std::vector<std::uint64_t> a(65536,1);
        std::vector<std::uint64_t> b(a);

        separable_calculation::worker<std::uint64_t,

add_t> calc(a.data(),a.size(),b.data(),b.size());

        calc.run();



    return EXIT_SUCCESS;
    }

My output for this exemple are :

param ctor 65536 65536

move ctor 65536 65536

param ctor 1099511660544 32768

copy ctor 1099511660544 32768

param ctor 1099511660544 32768

copy ctor 1099511660544 32768

check 1099511660544 32768

And then it has crashed.

Now if you do neutralize _dsz on the class worker as is :

let say worker2.hpp :

#ifndef WORKER2_HPP
#define WORKER2_HPP

#include <tbb/task.h>

#include <type_traits>

#ifndef TBB_ASSERT
#ifdef _DEBUG
#include <cassert>
#define TBB_ASSERT(x) std::assert(x)
#else
#define TBB_ASSERT(x)
#endif
#endif


namespace separable_calculation2
{



template<class _Ty,template<class>class _Op>
class worker : public tbb::task
{

//    static_assert(std::is_function<_Op>::value || std::is_base_of<basic_operator<_Ty>*,&_Op>::value,"The operator functor must be a derivation of the struct basic_operator");
    static_assert(!std::is_abstract<_Op<_Ty> >::value,"The operator struct / class is abstract");

public:

    typedef _Ty* pointer;
    typedef const _Ty* const_pointer;
    typedef std::size_t size_type;

private:

    typedef worker self;

    enum
    {
        MUST_BE_PROCESSED = 0x2,
        HAS_BEEN_PROCESSED = 0x4,
        FIRST_HALF_MUTEX = 0x8,
        SECOND_HALF_MUTEX = 0x10
    };


    const_pointer _src;
    size_type _sz;
    pointer _dst;
//    size_type _dsz;

    unsigned char _status;


    inline worker get_first_half()
    {
        std::size_t chk(2);

        return worker(this->_src,this->_sz/chk,this->_dst/*,this->_dsz/chk*/);
    }

    inline worker get_second_half()
    {
        const std::size_t half_src = this->_sz/2;
        const std::size_t half_dst = this->_sz/2;

        return worker(this->_src+half_src,half_src,this->_dst+half_dst/*,half_dst*/);
    }

    // helpfull for the method run.
    worker(worker&& obj);

    // usefull for prepare the two sub-tasks.
    worker(const worker& obj,int);

public:



    worker(const_pointer src,const size_type& sz,pointer dst/*,const size_type& dsz = OUTPUT_SIZE_EQUAL_INPUT_SIZE*/);


    inline worker():
        worker(nullptr,0,nullptr,0)
    {}

    worker(const worker&) = delete;


    ~worker() = default;

    worker& operator=(const worker&) = delete;
    worker& operator=(worker&&) = delete;

    virtual tbb::task* execute();

    // Execute the tasks.
    void run();


    // Update the source and | or the destination pointers.
    void update(const_pointer src,const std::size_t& sz,pointer dst=nullptr/*,const std::size_t& dsz = OUTPUT_SIZE_EQUAL_INPUT_SIZE*/);


};


}

#endif // WORKER2_H

worker2.tcc:

    #ifndef WORKER2_TCC
    #define WORKER2_TCC

    #include <iostream>

    #define DEBUG(str) std::cout<<str<<" "<<this->_sz<<" "<<this->_dsz<<std::endl;


    namespace separable_calculation2
    {


    template<class _Ty,template<class>class _Op>
    worker<_Ty,_Op>::worker(worker&& obj):
        _src(obj._src),
        _sz(obj._sz),
        _dst(obj._dst),
    //    _dsz(obj._dsz),
        _status(obj._status)
    {
    //    DEBUG("move ctor")
    }

    templa

te<class _Ty,template<class>class _Op>
worker<_Ty,_Op>::worker(const worker& obj,int):
    _src(obj._src),
    _sz(obj._sz),
    _dst(obj._dst),
//    _dsz(obj._dsz),
    _status(HAS_BEEN_PROCESSED)
{
//    DEBUG("copy ctor")
}


template<class _Ty,template<class>class _Op>
worker<_Ty,_Op>::worker(const_pointer src,const size_type& sz,pointer dst/*,const size_type& dsz*/):
    _src(src),
    _sz(sz),
    _dst(dst),
//    _dsz(dsz == OUTPUT_SIZE_EQUAL_INPUT_SIZE ? sz : dsz),
    _status(MUST_BE_PROCESSED)
{
//    DEBUG("param ctor")
}



template<class _Ty,template<class>class _Op>
tbb::task* worker<_Ty,_Op>::execute()
{
    tbb::task* ret(nullptr);

    // prepare the two sub-tasks
    if((this->_status & MUST_BE_PROCESSED) == MUST_BE_PROCESSED)
    {
        tbb::task* a = new (this->allocate_child()) self(this->get_first_half(),int());
        tbb::task* b = new (this->allocate_child()) self(this->get_second_half(),int());

        this->set_ref_count(3);

        this->spawn(*a);
        this->spawn_and_wait_for_all(*b);

    }
    else
    {
        _Op<_Ty> op;

//        std::cout<<"check "<<this->_sz<<" "<<this->_dsz<<std::endl;

        op(this->_src,this->_sz,this->_dst/*,this->_dsz*/);
    }

    return ret;
}


template<class _Ty,template<class>class _Op>
void worker<_Ty,_Op>::run()
{
    TBB_ASSERT(this->_dst && this->_src);

    self& s = *new(tbb::task::allocate_root()) self(std::move(*this));

    tbb::task::spawn_root_and_wait(s);

}


template<class _Ty,template<class>class _Op>
void worker<_Ty,_Op>::update(const_pointer src,const size_type& sz,pointer dst/*,const std::size_t& dsz*/)
{
    std::cout<<"update"<<std::endl;
    if(src)
    {
        this->_src = src;
        this->_sz = sz;
    }

    if(dst)
        this->_dst = dst;

//    if(dsz != OUTPUT_SIZE_EQUAL_INPUT_SIZE)
//        this->_dsz = (dsz != OUTPUT_SIZE_EQUAL_INPUT_SIZE) ? dsz : this->_sz;

    this->_status = MUST_BE_PROCESSED;
}



}


#endif // WORKER2_TCC

Now in the main.cpp add :

 #include "worker2.hpp"
#include "worker2.tcc"

And inside the function main add neutralize the previous calc object and write :

    separable_calculation2::worker<std::uint64_t,add_t> calc(a.data(),a.size(),b.data());

    calc.run();

std::cout<<"END PROCESSING"<<std::endl;

Solution

  • It is not an answer, just a note on your code.

    I see some inefficiency in that you use low-level task API (there are high-level task_group and parallel_invoke) but still in blocking style:

            this->set_ref_count(3);
    
            this->spawn(*a);
            this->spawn_and_wait_for_all(*b);
    

    It is less efficient than continuation-passing style. and less readable than high-level API