Search code examples
cparallel-processingopenmppari

How to use OpenMP with PARI/GP?


I want to parallelize a for loop that includes some operations from PARI/GP library, along with some other operations. However, using OpenMP with PARI/GP does not seem very straightforward it appears. The only similar example I found is this, which in fact seems to use sections from OpenMP. I tried to do something using parallel for clause as you can see below, however, it throws segmentation fault on PARI/GP.

#include <omp.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <time.h>
#include "pari/pari.h"

#define MAX_THREAD      8
#define MAX_COUNT       8
#define CLOCK_PRECISION 1E9

typedef struct vector {
  void **items;
  unsigned int capacity;
  unsigned int size;
} vector_st;

typedef vector_st *vector_t;

vector_t vector_init(unsigned int capacity) {
  if (capacity <= 0) {
    fprintf(stderr, "Error: invalid vector capacity.\n");
    exit(1);
  }

  vector_t v = (vector_t) calloc(1, sizeof(vector_st));
  v->capacity = capacity;
  v->size = 0;
  v->items = malloc(sizeof(void *) * v->capacity);
  if (v->items == NULL) {
    fprintf(stderr, "Error: could not allocate memory.\n");
    exit(1);
  }

  return v;
}

unsigned int vector_size(vector_t v) {
  return v->size;
}


void *vector_get(vector_t v, unsigned int index) {
  if (index >= v->size) {
    fprintf(stderr, "Error: index larger than the vector size.\n");
    exit(1);
  }
  return v->items[index];
}

void vector_resize(vector_t v, unsigned int capacity) {
#ifdef DEBUG
  printf("Vector resize, from %u to %u.\n", v->capacity, capacity);
#endif

  void **items = realloc(v->items, sizeof(void *) * capacity);
  if (items) {
    v->items = items;
    v->capacity = capacity;
  } else {
    fprintf(stderr, "Error: could not re-allocate memory.\n");
    exit(1);
  }
}

void vector_add(vector_t v, void *item) {
  if (v->capacity == v->size) {
    vector_resize(v, v->capacity * 2);
  }
  v->items[v->size++] = item;
}

void vector_copy(vector_t c, vector_t a) {
  unsigned int i;
  for (i = 0; i < vector_size(a); i++) {
    vector_add(c, vector_get(a, i));
  }
}

void vector_free(vector_t v) {
  if (v) {
    if (v->items) {
      free(v->items);
    }

    v->capacity = 0;
    v->size = 0;

    free(v);
    v = NULL;
  }
}

typedef struct {
  GEN sk;
  GEN pk;
} cl_key_pair_st;

typedef cl_key_pair_st *cl_key_pair_t;

#define cl_key_pair_null(key_pair) key_pair = NULL;

#define cl_key_pair_new(key_pair)                     \
  do {                                                \
    key_pair = malloc(sizeof(cl_key_pair_st));        \
    if (key_pair == NULL) {                           \
      exit(1);                                        \
    }                                                 \
  } while (0)

#define cl_key_pair_free(key_pair)                    \
  do {                                                \
    free(key_pair);                                   \
    key_pair = NULL;                                  \
  } while (0)


uint64_t mtimer(void) {
    struct timespec time;
    clock_gettime(CLOCK_MONOTONIC, &time);
    return (uint64_t) (time.tv_sec * CLOCK_PRECISION + time.tv_nsec);
}

void test(vector_t keys, const unsigned count) {
  GEN bound = strtoi("25413151665722220203610173826311975594790577398151861612310606875883990655261658217495681782816066858410439979225400605895077952191850577877370585295070770312182177789916520342292660169492395314400288273917787194656036294620169343699612953311314935485124063580486497538161801803224580096");
  GEN g_q_a = strtoi("4008431686288539256019978212352910132512184203702279780629385896624473051840259706993970111658701503889384191610389161437594619493081376284617693948914940268917628321033421857293703008209538182518138447355678944124861126384966287069011522892641935034510731734298233539616955610665280660839844718152071538201031396242932605390717004106131705164194877377");
  GEN g_q_b = negi(strtoi("3117991088204303366418764671444893060060110057237597977724832444027781815030207752301780903747954421114626007829980376204206959818582486516608623149988315386149565855935873517607629155593328578131723080853521348613293428202727746191856239174267496577422490575311784334114151776741040697808029563449966072264511544769861326483835581088191752567148165409"));
  GEN g_q_c = strtoi("7226982982667784284607340011220616424554394853592495056851825214613723615410492468400146084481943091452495677425649405002137153382700126963171182913281089395393193450415031434185562111748472716618186256410737780813669746598943110785615647848722934493732187571819575328802273312361412673162473673367423560300753412593868713829574117975260110889575205719");
  GEN g_q = qfi(g_q_a, g_q_b, g_q_c);

  size_t numthread = MAX_THREAD;
  if (numthread > count) {
    numthread = count;
    omp_set_num_threads(numthread);
  }

  struct pari_thread pth[numthread];
  for (size_t i = 0; i < numthread; i++) {
    pari_thread_alloc(&pth[i], 100000000, NULL);
  }

  #pragma omp parallel
  {
    int thnum = omp_get_thread_num();
    if (thnum) {
      (void) pari_thread_start(&pth[thnum]);
    }

    vector_t keys_private = vector_init(count / numthread);

    #pragma omp for schedule(static)
    for (size_t i = 0; i < count; i++) {
      cl_key_pair_t key_pair_i;
      cl_key_pair_null(key_pair_i);
      cl_key_pair_new(key_pair_i);

      key_pair_i->sk = randomi(bound);
      key_pair_i->pk = nupow(g_q, key_pair_i->sk, NULL);
      vector_add(keys_private, key_pair_i);
    }

    #pragma omp for ordered
    for (size_t i = 0; i < numthread; i++) {
      #pragma omp ordered
      {
        vector_copy(keys, keys_private);
      }
    }

    if (thnum) {
      pari_thread_close();
    }
  }

  for (size_t i = 0; i < numthread; i++) {
    if (&pth[i] != NULL) pari_thread_free(&pth[i]);
  }
}

int main(void) {
  pari_init(1000000000, 2);
    setrand(getwalltime());

  vector_t keys = vector_init(MAX_COUNT);

  uint64_t start_time = mtimer();
  test(keys, MAX_COUNT);
  uint64_t stop_time = mtimer();
  double total_time = ((stop_time - start_time) / CLOCK_PRECISION);
  printf("Elapsed time: %.5lf s\n", total_time);

  printf("\nvector_size(keys): %u\n", vector_size(keys));
  for (size_t i = 0; i < vector_size(keys); i++) {
    cl_key_pair_t key_pair_i = (cl_key_pair_t) vector_get(keys, i);
    printf("%zu->sk: %s\n", i, GENtostr(key_pair_i->sk));
    printf("%zu->pk: %s\n", i, GENtostr(key_pair_i->pk));
  }

  vector_free(keys);
  pari_close();
  return 0;
}

Any ideas how to use GEN inside struct and with OpenMP?


Solution

  • The fix involves removing the following lines:

    for (size_t i = 0; i < numthread; i++) {
      if (&pth[i] != NULL) pari_thread_free(&pth[i]);
    }
    

    I received this fix in PARI/GP mailing list by Bill Allombert, who also provided the following explanation for it:

    Each threads use its own PARI stack to store results. Calling pari_thread_free destroys the PARI stack thread and any result computed by that thread. So whatever result you want to preserve need to be moved to the main stack using gcopy before calling pari_thread_free.

    In fact this is why the API has pari_thread_alloc: to allow the thread stacks to survive pari_thread_close so that the main thread get the results.