I'm trying to write a thread pool in c++
that fulfills the following criteria:
thread_pool
class.thread_pool::start_work
that changes this shared input, and tells all the workers to work for a fixed number of tasks.thread_pool::start_work
returns std::future
Below is what I have so far. It can be built and run with g++ test_tp.cpp -std=c++17 -lpthread; ./a.out
Unfortunately it either deadlocks or does the work too many (or sometimes too few) times. I am thinking that it's because m_num_comps_done
is not thread-safe. There are chances that all the threads skip over the last count, and then they all end up yield
ing. But isn't this variable atomic?
#include <vector>
#include <thread>
#include <mutex>
#include <shared_mutex>
#include <queue>
#include <atomic>
#include <future>
#include <iostream>
#include <numeric>
/**
* @class join_threads
* @brief RAII thread killer
*/
class join_threads
{
std::vector<std::thread>& m_threads;
public:
explicit join_threads(std::vector<std::thread>& threads_)
: m_threads(threads_) {}
~join_threads() {
for(unsigned long i=0; i < m_threads.size(); ++i) {
if(m_threads[i].joinable())
m_threads[i].join();
}
}
};
// how remove the first two template parameters ?
template<typename func_input_t, typename F>
class thread_pool
{
using func_output_t = typename std::result_of<F(func_input_t)>::type;
static_assert( std::is_floating_point<func_output_t>::value,
"function output type must be floating point");
unsigned m_num_comps;
std::atomic_bool m_done;
std::atomic_bool m_has_an_input;
std::atomic<int> m_num_comps_done; // need to be atomic? why?
F m_f; // same function always used
func_input_t m_param; // changed occasionally by a single writer
func_output_t m_working_output; // many reader threads average all their output to get this
std::promise<func_output_t> m_out;
mutable std::shared_mutex m_mut;
mutable std::mutex m_output_mut;
std::vector<std::thread> m_threads;
join_threads m_joiner;
void worker_thread() {
while(!m_done)
{
if(m_has_an_input){
if( m_num_comps_done.load() < m_num_comps - 1 ) {
std::shared_lock<std::shared_mutex> lk(m_mut);
func_output_t tmp = m_f(m_param); // long time
m_num_comps_done++;
// quick
std::lock_guard<std::mutex> lk2(m_output_mut);
m_working_output += tmp / m_num_comps;
}else if(m_num_comps_done.load() == m_num_comps - 1){
std::shared_lock<std::shared_mutex> lk(m_mut);
func_output_t tmp = m_f(m_param); // long time
m_num_comps_done++;
std::lock_guard<std::mutex> lk2(m_output_mut);
m_working_output += tmp / m_num_comps;
m_num_comps_done++;
try{
m_out.set_value(m_working_output);
}catch(std::future_error& e){
std::cout << "future_error caught: " << e.what() << "\n";
}
}else{
std::this_thread::yield();
}
}else{
std::this_thread::yield();
}
}
}
public:
/**
* @brief ctor spawns working threads
*/
thread_pool(F f, unsigned num_comps)
: m_num_comps(num_comps)
, m_done(false)
, m_has_an_input(false)
, m_joiner(m_threads)
, m_f(f)
{
unsigned const thread_count=std::thread::hardware_concurrency(); // should I subtract one?
try {
for(unsigned i=0; i<thread_count; ++i) {
m_threads.push_back( std::thread(&thread_pool::worker_thread, this));
}
} catch(...) {
m_done=true;
throw;
}
}
~thread_pool() {
m_done=true;
}
/**
* @brief changes the shared data member,
* resets the num_comps_left variable,
* resets the accumulator thing to 0, and
* resets the promise object
*/
std::future<func_output_t> start_work(func_input_t new_param) {
std::unique_lock<std::shared_mutex> lk(m_mut);
m_param = new_param;
m_num_comps_done = 0;
m_working_output = 0.0;
m_out = std::promise<func_output_t>();
m_has_an_input = true; // only really matters just after initialization
return m_out.get_future();
}
};
double slowSum(std::vector<double> nums) {
// std::this_thread::sleep_for(std::chrono::milliseconds(200));
return std::accumulate(nums.begin(), nums.end(), 0.0);
}
int main(){
// construct
thread_pool<std::vector<double>, std::function<double(std::vector<double>)>>
le_pool(slowSum, 1000);
// add work
auto ans = le_pool.start_work(std::vector<double>{1.2, 3.2, 4213.1});
std::cout << "final answer is: " << ans.get() << "\n";
std::cout << "it should be 4217.5\n";
return 1;
}
You check the "done" count, then get the lock. This allows multiple threads to be waiting for the lock. In particular, there might not be a thread that enters the second if
body.
The other side of that is because you have all threads running all the time, the "last" thread may not get access to its exclusive section early (before enough threads have run) or even late (because additional threads are waiting at the mutex in the first loop).
To fix the first issue, since the second if
block has all of the same code that is in the first if
block, you can have just one block that checks the count to see if you've reached the end and should set the out value.
The second issue requires you to check m_num_comps_done
a second time after acquiring the mutex.