I have a simple question for you. I made this code to calculate the factorial of a number without recursion.
int fact2(int n){
int aux=1, total = 1;
int i;
int limit = n - 1;
for (i=1; i<=limit; i+=2){
aux = i*(i+1);
total = total*aux;
}
for (;i<=n;i++){
total = total*i;
}
return total;
}
As you can see, my code uses loop unrolling to optimize clock cycles in the execution. Now I'm asked to add two-way parallelism to the same code, any idea how?
You can use ptherads library to create two separate threads. Each thread should do half of the multiplications. I could put together following solution.
#include <pthread.h>
typedef struct {
int id;
int num;
int *result;
} thread_arg_t;
void* thread_func(void *arg) {
int i;
thread_arg_t *th_arg = (thread_arg_t *)arg;
int start, end;
if(th_arg->id == 0) {
start = 1;
end = th_arg->num/2;
} else if (th_arg->id == 1) {
start = th_arg->num / 2;
end = th_arg->num + 1;
} else {
return NULL;
}
for(i=start; i < end; i++) {
th_arg->result[th_arg->id] *= i;
}
return NULL;
}
int factorial2(int n) {
pthread_t threads[2];
int rc;
int result[2];
thread_arg_t th_arg[2];
for(i=0; i<2; i++) {
th_arg[i].id = i;
th_arg[i].num = n;
th_arg[i].result = result;
rc = pthread_create(&threads[i], NULL, thread_func, (void *)&th_arg[i]);
if (rc){
printf("pthread_create() failed, rc = %d\n", rc);
exit(1);
}
}
/* wait for threads to finish */
for(i=0; i<2; i++) {
pthread_join(thread[i], NULL);
/* compute final one multiplication */
return (result[0] * result[1]);
}
The pthread library implementation should take care of parallelizing the work of two threads for you. Also, this example can be generalized for N threads with minor modifications.