Harmonic progression sum c++ MPI

I'm trying to make a parallel version of "Harmonic Progression Sum" problem using MPI. But I'm new with MPI and I don't know how to run this method with MPI, because it isn't work.

Parallel Program:

//#include "stdafx.h"
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <iostream>
#include <sstream>
#include <mpi.h>

#define d 10    //Numbers of Digits (Example: 5 => 0,xxxxx)
#define n 1000  //Value of N (Example: 5 => 1/1 + 1/2 + 1/3 + 1/4 + 1/5)

using namespace std;

int numProcess, rank, msg, source, dest, tag, qtd_elemento;

int escravo(long unsigned int *digits, int ValueEnd)
{
    MPI_Status status;

    MPI_Recv(digits, (d + 11), MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);

    for (int i = 1; i <= ValueEnd; ++i) {
        long unsigned int remainder = 1;
        for (long unsigned int digit = 0; digit < d + 11 && remainder; ++digit) {
            long unsigned int div = remainder / i;
            long unsigned int mod = remainder % i;
            digits[digit] += div;
            remainder = mod * 10;
        }
    }

    MPI_Send(&digits, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
}

void HPSSeguencial(char* output) {
    long unsigned int digits[d + 11];
    int DivN = n / 4; //Limiting slave.

    for (int digit = 0; digit < d + 11; ++digit)
        digits[digit] = 0;

    if (rank != 0){
        escravo(digits, (DivN * 1 ) );
        escravo(digits, (DivN * 2 ) );
        escravo(digits, (DivN * 3 ) );
        escravo(digits, (DivN * 4 ) );
    }

    for (int i = d + 11 - 1; i > 0; --i) {
        digits[i - 1] += digits[i] / 10;
        digits[i] %= 10;
    }
    if (digits[d + 1] >= 5) {
        ++digits[d];
    }


    for (int i = d; i > 0; --i) {
        digits[i - 1] += digits[i] / 10;
        digits[i] %= 10;
    }
    stringstream stringstreamA;
    stringstreamA << digits[0] << ",";


    for (int i = 1; i <= d; ++i) {
        stringstreamA << digits[i];
    }
    string stringA = stringstreamA.str();
    stringA.copy(output, stringA.size());
}

int main() {
    MPI_Init(&argc,&argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &numProcess);

    char output[d + 10];
    HPSSeguencial(output);
    cout << output << endl;

    MPI_Finalize();

    system("PAUSE");
    return 0;
}

Original Code

#include "stdafx.h"
#include <iostream>
#include <sstream>
#include <time.h>

#define d 10    //Numbers of Digits (Example: 5 => 0,xxxxx)
#define n 1000  //Value of N (Example: 5 => 1/1 + 1/2 + 1/3 + 1/4 + 1/5)

using namespace std;

void HPS(char* output) {
    long unsigned int digits[d + 11];

    for (int digit = 0; digit < d + 11; ++digit)
        digits[digit] = 0;

    for (int i = 1; i <= n; ++i) {
        long unsigned int remainder = 1;
        for (long unsigned int digit = 0; digit < d + 11 && remainder; ++digit) {
            long unsigned int div = remainder / i;
            long unsigned int mod = remainder % i;
            digits[digit] += div;
            remainder = mod * 10;
        }
    }


    for (int i = d + 11 - 1; i > 0; --i) {
        digits[i - 1] += digits[i] / 10;
        digits[i] %= 10;
    }
    if (digits[d + 1] >= 5) {
        ++digits[d];
    }


    for (int i = d; i > 0; --i) {
        digits[i - 1] += digits[i] / 10;
        digits[i] %= 10;
    }
    stringstream stringstreamA;
    stringstreamA << digits[0] << ",";


    for (int i = 1; i <= d; ++i) {
        stringstreamA << digits[i];
    }
    string stringA = stringstreamA.str();
    stringA.copy(output, stringA.size());
}


int main() {

    char output[d + 10];
    HPS(output);
    cout << output<< endl;

    system("PAUSE");
    return 0;
}

Examples:

Input:

#define d 10
#define n 1000

Output:

7,4854708606╠╠╠╠╠╠╠╠╠╠╠╠

Input:

#define d 12
#define n 7

Output:

2,592857142857╠╠╠╠╠╠╠╠╠╠╠╠╠╠ÀÂ♂ü─¨@

Regards

Original Code

http://regulus.pcs.usp.br/marathon/current/warmup.pdf

Solution

I am assuming that you want to parallelize this part:

for (int i = 1; i <= ValueEnd; ++i) 
{
        long unsigned int remainder = 1;
        for (long unsigned int digit = 0; digit < d + 11 && remainder; ++digit)
        {
            long unsigned int div = remainder / i;
            long unsigned int mod = remainder % i;
            digits[digit] += div;
            remainder = mod * 10;
        }
}

You can divide each for iteration by each MPI process:

int idP = getProcessId(), numP = numberProcess();
for (int i = idP; i <= ValueEnd; i+=numP)
{
  ...
}

The getProcessId() gives you the process ID and numberProcess() gives you the number of process:

int getProcessId(){
    int rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    return rank;
}
// Get number of process
int numberProcess(){
    int numProc;
    MPI_Comm_size(MPI_COMM_WORLD, &numProc);
    return numProc;
}

Each process will have a copy of the array digits; After the parallel for, the master process collects the results from all slaves process using a MPI_reduce. Or if you want to combine the values from all processes and distribute the result back to all processes you can use MPI_Allreduce.

 long unsigned int digits[d + 11];
    int DivN = n / 4; //Limiting slave.

    for (int digit = 0; digit < d + 11; ++digit)
        digits[digit] = 0;

    if (rank != 0){
        escravo(digits, (DivN * 1 ) );
        escravo(digits, (DivN * 2 ) );
        escravo(digits, (DivN * 3 ) );
        escravo(digits, (DivN * 4 ) );
    }

According to the code above process 0 will not execute the method escravo. Furthermore, you are not distributing correctly the work among processes. Process 1 will execute the out for loop inside the method escravo from 1 until n/4, but then process 2 will execute from 1 until 2n/4... Thus, you have different processes executing the same iterations, when what you really want is to divide these iterations among process.