I'm reading C++ Concurrency in Action, 2ed. And the writer showed an implementation of thread pool that uses work stealing as follows:
// Listing 9.7 Lock-based queue for work stealing
class work_stealing_queue
{
private:
typedef function_wrapper data_type;
std::deque<data_type> the_queue;
mutable std::mutex the_mutex;
public:
work_stealing_queue()
{}
work_stealing_queue(const work_stealing_queue& other)=delete;
work_stealing_queue& operator=(const work_stealing_queue& other)=delete;
void push(data_type data)
{
std::lock_guard<std::mutex> lock(the_mutex);
the_queue.push_front(std::move(data));
}
bool empty() const
{
std::lock_guard<std::mutex> lock(the_mutex);
return the_queue.empty();
}
bool try_pop(data_type& res)
{
std::lock_guard<std::mutex> lock(the_mutex);
if(the_queue.empty())
{
return false;
}
res=std::move(the_queue.front());
the_queue.pop_front();
return true;
}
bool try_steal(data_type& res)
{
std::lock_guard<std::mutex> lock(the_mutex);
if(the_queue.empty())
{
return false;
}
res=std::move(the_queue.back());
the_queue.pop_back();
return true;
}
};
// Listing 9.8 A thread pool that uses work stealing
class thread_pool
{
typedef function_wrapper task_type;
std::atomic_bool done;
threadsafe_queue<task_type> pool_work_queue;
std::vector<std::unique_ptr<work_stealing_queue>> queues;
std::vector<std::thread> threads;
join_threads joiner;
static thread_local work_stealing_queue* local_work_queue;
static thread_local unsigned my_index;
void worker_thread(unsigned my_index_)
{
my_index=my_index_;
local_work_queue=queues[my_index].get();
while(!done)
{
run_pending_task();
}
}
bool pop_task_from_local_queue(task_type& task)
{
return local_work_queue && local_work_queue->try_pop(task);
}
bool pop_task_from_pool_queue(task_type& task)
{
return pool_work_queue.try_pop(task);
}
bool pop_task_from_other_thread_queue(task_type& task)
{
for(unsigned i=0;i<queues.size();++i)
{
unsigned const index=(my_index+i+1)%queues.size();
if(queues[index]->try_steal(task))
{
return true;
}
}
return false;
}
public:
thread_pool():
done(false),joiner(threads)
{
unsigned const thread_count=std::thread::hardware_concurrency();
try
{
for(unsigned i=0;i<thread_count;++i)
{
queues.push_back(std::unique_ptr<work_stealing_queue>(
new work_stealing_queue));
}
for(unsigned i=0;i<thread_count;++i)
{
threads.push_back(std::thread(&thread_pool::worker_thread,this,i));
}
}
catch(...)
{
done=true;
throw;
}
}
~thread_pool()
{
done=true;
}
template<typename FunctionType>
std::future<typename std::result_of<FunctionType()>::type> submit(
FunctionType f)
{
typedef typename std::result_of<FunctionType()>::type result_type;
std::packaged_task<result_type()> task(f);
std::future<result_type> res(task.get_future());
if(local_work_queue)
{
local_work_queue->push(std::move(task));
}
else
{
pool_work_queue.push(std::move(task));
}
return res;
}
void run_pending_task()
{
task_type task;
if(pop_task_from_local_queue(task) ||
pop_task_from_pool_queue(task) ||
pop_task_from_other_thread_queue(task))
{
task();
}
else
{
std::this_thread::yield();
}
}
};
Here comes my question: In the constructor of thread_pool
, he firstly constructed all the work_stealing_queue
and then all the worker threads. And when the execution of a worker thread reaches run_pending_task
, it will try to access the thread_pool::queues
member varaiable. Is it possible that, for the reason of reordering, the construction of the elements in thread_pool::queues
are still not finished before the access from the worker thread? If not, how is the order ensured? I could not find any synchronize-with relationship between these events.
Would you kindly explain the thread safety problem described above?
Thanks for your reading!
The completion of the std::thread
constructor call synchronizes-with the start of the invocation of the thread function. (see [thread.thread.constr]/6)
Therefore it is guaranteed that worker_thread
will see the state of queues
after the loop in which its elements are constructed and there is no data race.