Search code examples
cgmp

Segmentation fault in C/GMP: calculate factors with Pollard Rho


I am an experienced programmer, but new to C. I am trying to learn C so I can use the gmp library to write fast big-integer programs; eventually I intend to write the performance parts of my programs in C functions that are called via a foreign function interface from other languages. As a vehicle for learning both C and the gmp library I have written the pollard rho integer factorization program shown below (yes, I know this isn't the best possible implementation of pollard rho, but it is simple, and at the moment I just want to get started). My program compiles correctly but when run it dies with the message "Segmentation fault." I assume that means I've got my pointers messed up somewhere, but I don't see it. Probably more than one. Can someone please look at my program and tell me where I've gone wrong? And point out any style issues or anything else I can improve? Many thanks, Phil

/* rho.c -- pollard rho factorization
 * usage: rho n -- prints factors of n then exits
 * compile as gcc -lgmp -o rho rho.c */

#include <stdlib.h>
#include <stdio.h>
#include <gmp.h>

typedef struct list {
    void *data;
    struct list *next;
} List;

List *insert(void *data, List *next)
{
    List *new;

    if (! (new = malloc(sizeof(List))))
        return NULL;
    new->data = data;
    new->next = next;
    return new;
}

List *insert_in_order(void *x, List *xs)
{
    if (xs == NULL || mpz_cmp(x, xs->data) < 0)
    {
        return insert(x, xs);
    } else {
        List *head = xs;
        while (xs->next != NULL && mpz_cmp(x, xs->next->data) < 0)
        {
            xs = xs->next;
        }
        xs->next = insert(x, xs->next);
        return head;
    }
}

void rho_factor(mpz_t f, mpz_t n, long long unsigned c)
{
    mpz_t t, h, d, r;

    mpz_init_set_ui(t, 2);
    mpz_init_set_ui(h, 2);
    mpz_init_set_ui(d, 1);
    mpz_init_set_ui(r, 0);

    while (mpz_cmp_si(d, 1) == 0)
    {
        mpz_mul(t, t, t);
        mpz_add_ui(t, t, c);
        mpz_mod(t, t, n);

        mpz_mul(h, h, h);
        mpz_add_ui(h, h, c);
        mpz_mod(h, h, n);

        mpz_mul(h, h, h);
        mpz_add_ui(h, h, c);
        mpz_mod(h, h, n);

        mpz_sub(r, t, h);
        mpz_gcd(d, r, n);
    }

    if (mpz_cmp(d, n) == 0)
    {
        rho_factor(f, n, c+1);
    }
    else if (mpz_probab_prime_p(d, 25))
    {
        mpz_set(f, d);
    }
    else
    {
        rho_factor(f, d, c+1);
    }

    mpz_clears(t, h, d, r);
}

void rho_factors(List *fs, mpz_t n)
{
    mpz_t f;
    mpz_init_set_ui(f, 0);

    while (! (mpz_probab_prime_p(n, 25)))
    {
        rho_factor(f, n, 1);
        fs = insert_in_order(f, fs);
        mpz_divexact(n, n, f);
    }

    mpz_clear(f);
    insert_in_order(n, fs);
}

int main(int argc, char *argv[])
{
    mpz_t f, n;
    mpz_init_set_ui(f, 0);
    List *fs = NULL;

    if (argc<2)
    {
        printf("usage: rho n\n");
        return 1;
    }

    mpz_init_set_str(n, argv[1], 10);

    if (mpz_probab_prime_p(n, 25))
    {
        printf("%s\n", argv[1]);
        return 0;
    }

    printf("%s", argv[1]);
    rho_factors(fs, n);
    while (fs != NULL) {
        printf(" %s", mpz_get_str(NULL, 10, fs->data));
        fs = fs->next;
    }

    return 0;
}

EDIT1: Thanks to Daniel Fischer, I have made some progress, and am no longer getting a segmentation fault. The code shown below correctly prints the factor 127 when rho_factor is called with n = 1234567, but rho_factors returns a null list, so there is still something wrong with rho_factors. I assume the problem is that I can't keep re-initializing the same variable the way I am doing, but I don't know the right way to do it. Somehow I need to make a copy of each factor that I find, then insert that copy in fs. Many thanks, Phil

/* rho.c -- pollard rho factorization
 * usage: rho n -- prints factors of n then exits
 * compile as gcc -lgmp -o rho rho.c */

#include <stdlib.h>
#include <stdio.h>
#include <gmp.h>

typedef struct list {
    void *data;
    struct list *next;
} List;

List *insert(void *data, List *next)
{
    List *new;

    if (! (new = malloc(sizeof(List))))
        return NULL;
    new->data = data;
    new->next = next;
    return new;

List *insert_in_order(void *x, List *xs)
{
    if (xs == NULL || mpz_cmp(x, xs->data) < 0)
    {
        return insert(x, xs);
    } else {
        List *head = xs;
        while (xs->next != NULL && mpz_cmp(x, xs->next->data) < 0)
        {
            xs = xs->next;
        }
        xs->next = insert(x, xs->next);
        return head;
    }
}

void rho_factor(mpz_t f, mpz_t n, long long unsigned c)
{
    mpz_t t, h, d, r;

    mpz_init_set_ui(t, 2);
    mpz_init_set_ui(h, 2);
    mpz_init_set_ui(d, 1);
    mpz_init_set_ui(r, 0);

    while (mpz_cmp_si(d, 1) == 0)
    {
        mpz_mul(t, t, t);
        mpz_add_ui(t, t, c);
        mpz_mod(t, t, n);

        mpz_mul(h, h, h);
        mpz_add_ui(h, h, c);
        mpz_mod(h, h, n);

        mpz_mul(h, h, h);
        mpz_add_ui(h, h, c);
        mpz_mod(h, h, n);

        mpz_sub(r, t, h);
        mpz_gcd(d, r, n);
    }

    if (mpz_cmp(d, n) == 0)
    {
        rho_factor(f, n, c+1);
    }
    else if (mpz_probab_prime_p(d, 25))
    {
        mpz_set(f, d);
    }
    else
    {
        rho_factor(f, d, c+1);
    }

    mpz_clears(t, h, d, r, NULL);
}

void rho_factors(List *fs, mpz_t n)
{
    while (! (mpz_probab_prime_p(n, 25)))
    {
        mpz_t f;
        mpz_init_set_ui(f, 0);

        rho_factor(f, n, 1);
        fs = insert_in_order(f, fs);
        mpz_divexact(n, n, f);
    }

    insert_in_order(n, fs);
}

int main(int argc, char *argv[])
{
    mpz_t f, n;
    mpz_init_set_ui(f, 0);
    List *fs = NULL;

    if (argc<2)
    {
        printf("usage: rho n\n");
        return 1;
    }

    mpz_init_set_str(n, argv[1], 10);

    if (mpz_probab_prime_p(n, 25))
    {
        printf("%s is prime\n", argv[1]);
        return 0;
    }

    rho_factor(f, n, 1);
    printf("%s\n", mpz_get_str(NULL, 10, f));

    printf("%s", argv[1]);
    rho_factors(fs, n);
    while (fs != NULL) {
        printf(" %s", mpz_get_str(NULL, 10, fs->data));
        fs = fs->next;
    }

    return 0;
}

EDIT2: Got it. Thanks to Daniel Fischer. Final version shown below.

/* rho.c -- pollard rho factorization
 * usage: rho n -- prints factors of n then exits
 * compile as gcc -lgmp -o rho rho.c */

#include <stdlib.h>
#include <stdio.h>
#include <gmp.h>

typedef struct list {
    void *data;
    struct list *next;
} List;

List *insert(void *data, List *next)
{
    List *new;

    new = malloc(sizeof(List));
    new->data = data;
    new->next = next;
    return new;
}

List *insert_in_order(void *x, List *xs)
{
    if (xs == NULL || mpz_cmp(x, xs->data) < 0)
    {
        return insert(x, xs);
    }
    else
    {
        List *head = xs;
        while (xs->next != NULL && mpz_cmp(x, xs->next->data) < 0)
        {
            xs = xs->next;
        }
        xs->next = insert(x, xs->next);
        return head;
    }
}

void rho_factor(mpz_t f, mpz_t n, long long unsigned c)
{
    mpz_t t, h, d, r;

    mpz_init_set_ui(t, 2);
    mpz_init_set_ui(h, 2);
    mpz_init_set_ui(d, 1);
    mpz_init_set_ui(r, 0);

    while (mpz_cmp_si(d, 1) == 0)
    {
        mpz_mul(t, t, t);
        mpz_add_ui(t, t, c);
        mpz_mod(t, t, n);

        mpz_mul(h, h, h);
        mpz_add_ui(h, h, c);
        mpz_mod(h, h, n);

        mpz_mul(h, h, h);
        mpz_add_ui(h, h, c);
        mpz_mod(h, h, n);

        mpz_sub(r, t, h);
        mpz_gcd(d, r, n);
    }

    if (mpz_cmp(d, n) == 0)
    {
        rho_factor(f, n, c+1);
    }
    else if (mpz_probab_prime_p(d, 25))
    {
        mpz_set(f, d);
    }
    else
    {
        rho_factor(f, d, c+1);
    }

    mpz_clears(t, h, d, r, NULL);
}

void rho_factors(List **fs, mpz_t n)
{
    while (! (mpz_probab_prime_p(n, 25)))
    {
        mpz_t *f = malloc(sizeof(*f));
        mpz_init_set_ui(*f, 0);

        rho_factor(*f, n, 1);
        *fs = insert_in_order(*f, *fs);
        mpz_divexact(n, n, *f);
    }

    *fs = insert_in_order(n, *fs);
}

int main(int argc, char *argv[])
{
    mpz_t f, n;
    mpz_init_set_ui(f, 0);
    List *fs = NULL;

    if (argc<2)
    {
        printf("usage: rho n\n");
        return 1;
    }

    mpz_init_set_str(n, argv[1], 10);

    if (mpz_probab_prime_p(n, 25))
    {
        printf("%s is prime\n", argv[1]);
        return 0;
    }

    printf("Factors of %s:", argv[1]);
    rho_factors(&fs, n);
    while (fs != NULL) {
        printf(" %s", mpz_get_str(NULL, 10, fs->data));
        fs = fs->next;
    }
    printf("\n");

    return 0;
}

Solution

  • You cannot reuse an mpz_t like that:

    void rho_factors(List *fs, mpz_t n)
    {
        mpz_t f;
        mpz_init_set_ui(f, 0);
    
        while (! (mpz_probab_prime_p(n, 25)))
        {
            rho_factor(f, n, 1);
            fs = insert_in_order(f, fs);
            mpz_divexact(n, n, f);
        }
    
        mpz_clear(f);
        insert_in_order(n, fs);
    }
    

    When GMP reallocates the storage pointed to via f, the old storage is freed. That may well happen in the while loop, but not necessarily. If it doesn't, all list entries point to the same number, however.

    But after the loop, when you mpz_clear(f), every data pointer in the list fs has become a wild pointer, and when you then insert_in_order(n, fs), you dereference previously freed pointers. (That very probably happens already during some insert in the while loop, but here it is guaranteed.)

    Also, in rho_factor, you should call

    mpz_clears(t, h, d, r, NULL);
    

    the argument list to mpz_clears must be NULL-terminated.

    Further problems:

    • You pass a List *, so the changes made to fs in rho_factors don't affect the fs in main. Pass a List ** and dereference it where appropriate, also you forgot to assign the final insert_in_order's returned value.
    • The local variable f goes out of scope at the end of the block, leading to corruption. malloc pointers to mpz_t to keep them alive.

      void rho_factors(List **fs, mpz_t n) { while (! (mpz_probab_prime_p(n, 25))) { mpz_t *f = malloc(sizeof(*f)); mpz_init_set_ui(*f, 0);

          rho_factor(*f, n, 1);
          *fs = insert_in_order(*f, *fs);
          mpz_divexact(n, n, *f);
      }
      
      *fs = insert_in_order(n, *fs);
      

      }

    and in main

    List *fs = NULL;
    /* snip */
    rho_factors(&fs, n);
    

    should do it. Finally (?), you have a glitch in insert_in_order, the comparison in the while loop is backwards.