Search code examples
calgorithmsolverdecompositioncryptarithmetic-puzzle

Efficiency of Cryptarithmetic Algorithm solver decomposition


The problem is the following: Given "ABC+DEF=GHI" format string, where A,B,C etc. represent unique digits, find the expression that gives maximum GHI. Ex: Input string is AAB+AAB=AAB, then there's no solution. If it is instead AAA + BBB = AAA, a solution is 999 + 000 = 999. Another example string: ABC + CBA = GGG, a result is => 543 + 345 = 888.

I have ruled out impossible cases easily. The algorithm I have in mind is a bruteforce, that simply tries maximizing the rhs first. However my problem was doing this fast, and also watching out for the unique digits. What's an efficient way to solve this problem?

Notes: I wish to solve this in a singlethreaded approach, and my current problem is detecting if a unique digit is used in "assign_value" function. Perhaps a better method to assign values is there?

EDIT: As per smci's suggestion, here's what I want to achieve, in the very end: ABRA + CADABRA + ABRA + CADABRA == HOUDINI ; 7457 + 1797457 + 7457 + 1797457 == 3609828 -- A system that can handle not only strings of the form I provided in the beginning (3 digit number + 3 digit number = 3 digit number) but also those. However it doesn't hurt to start simple and go with the solution of format I gave :)

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_EXPRESSION_SIZE 11 + 1
#define MAX_VARIABLES 9

int variables_read[MAX_VARIABLES] = { 0 };

struct variable {
    int coefficient;
    int* ptr;
    int side;
    int canhavezero;
    unsigned value_max;
};

typedef struct variable Variable;

struct equation {
    Variable* variables[9]; // max
    unsigned distinct_on_rhs;
    unsigned var_count;
};

typedef struct equation Equation;

int int_pow(int n, int k) {
    int res = 1;
    for(int i = 0; i < k; ++i)
        res *= n;
    return res;
}

void AddVariable(Equation* E, Variable* V) {
    E->variables[E->var_count++] = V;
}

int IsImpossible(char* expression) {
    // if all letters are same or end letters are same, no solution
    if(
        (expression[0] == expression[4] && expression[0] == expression[8]) ||
        (!strncmp(expression, expression + 4, 3) && !strncmp(expression, expression + 8, 3))
      )
        return 1;
    return 0;
}

int assign_value(Equation* E, int pos, int* values) {
    if(!E->variables[pos]->value_count) {
        if(pos < 0)
            return 2;
        // if no possible values left, reset this, but take one value count from the closest variable
        E->variables[pos - 1]->value_count--;
        E->variables[pos]->value_count = E->variables[pos]->value_max;
        return 0;
    }
    int i;
    for(i = 9; i >= 0 && values[i] == -1; --i)
    printf("Assigning %d to %c\n", E->variables[pos]->value_set[E->variables[pos]->value_count - 1], 'A' + (E->variables[pos]->ptr - E->variables[0]->ptr));
    *(E->variables[pos]->ptr) = values[i];
    values[i] = -1; // we have unique numbers
    return 0;
}

int isSolved(Equation E) {
    int sum = 0, coeff = 0;
    printf("Trying...\n");
    for(int i = 0; i < E.var_count; ++i) {
        coeff = E.variables[i]->coefficient * (*E.variables[i]->ptr);
        printf("%d ", *E.variables[i]->ptr);
        if(E.variables[i]->side)
            coeff *= -1;
        sum += coeff;
    }
    printf("\nSum was %d\n", sum);
    return !sum;
}

char* evaluate(char* expression) {
    char* res;
    // check for impossible cases first
    if(IsImpossible(expression)) {
        res = (char *) malloc(sizeof(char) * strlen("No Solution!"));
        strcpy(res, "No Solution!");
        return res;
    }
    res = (char *) malloc(sizeof(char) * MAX_EXPRESSION_SIZE);
    // now try to find solutions, first describe the given characters as equations
    Equation E;
    E.var_count = 0;
    E.distinct_on_rhs = 0;
    int side_mode = 0, powcounter = 0;
    int a = -1, b = -1, c = -1, d = -1, e = -1, f = -1, g = -1, h = -1, i = -1;
    int* max_variables[MAX_VARIABLES] = { &a, &b, &c, &d, &e, &f, &g, &h, &i };
    for(int j = 0; j < MAX_EXPRESSION_SIZE - 1; ++j) {
        if(expression[j] == '+')
            continue;
        if(expression[j] == '=') {
            side_mode = 1;
            continue;
        }
        Variable* V = (Variable *) malloc(sizeof(Variable));
        // we know we always get 3 digit numbers but we can easily change if we need to
        V->coefficient = int_pow(10, 2 - (powcounter % 3));
        V->ptr = max_variables[expression[j] - 'A'];
        V->side = side_mode;
        E.distinct_on_rhs += side_mode && !variables_read[expression[j] - 'A'];
        if(!(powcounter % 3)) { // beginning of a number
            V->value_count = 9;
            V->value_max = 9;
            V->canhavezero = 0;
        }
        else {
            V->value_count = 10;
            V->value_max = 10;
            V->canhavezero = 1;
        }
        AddVariable(&E, V);
        variables_read[expression[j] - 'A'] = 1;
        ++powcounter;
    }
    for(int j = 0; j < E.var_count; ++j)
        printf("%d %c %d\n", E.variables[j]->coefficient, 'A' + (E.variables[j]->ptr - max_variables[0]), E.variables[j]->side);
    // we got a representaion of the equation, now try to solve it
    int solved = 0;
    // O(9^N), where N is number of distinct variables.
    // An optimization we can do is, we first assign possible max values to rhs number, then go down. We need max number.
    printf("Distincts: %d\n", E.distinct_on_rhs);
    do {
        // try to assign values to all variables and try if it solves the equation
        // but first try to assign rhs as max as possible
        int values[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
        int temp = E.var_count - E.distinct_on_rhs;
        while(temp < E.var_count) {
            solved = assign_value(&E, temp, values);
            ++temp;
        }
        for(int j = E.var_count - 1 - E.distinct_on_rhs; j >= 0; --j)
            solved = assign_value(&E, j, values);
        if(solved) // can return no solution
            break;
        printf("Solving...\n");
        solved = isSolved(E);
        system("PAUSE");
    } while(!solved);
    if(solved == 2) {
        res = (char *) malloc(sizeof(char) * strlen("No Solution!"));
        strcpy(res, "No Solution!");
    }
    else {

    }
    return res;
}

int main() {
    char expression[MAX_EXPRESSION_SIZE] = { 0 };
    do {
        printf("Enter the formula: ");
        scanf("%s", expression);
        char* res = evaluate(expression);
        printf("%s\n", res);
        free(res);
    } while(expression[0] != '-');
    return 0;
}

Solution

  • I would start with the result. There are not that many different cases:

    • AAA
    • AAB, ABA, BAA
    • ABC

    All other cases can be reduced to these by renaming the variables. ABC + CBA = GGG would become DBC + CBD = AAA.

    Then you have

    • 10 possible solutions for the one-variable case AAA
    • 90 (10*9) for the two variable cases
    • 720 (10*9*8) for the three variable case

    assuming that zero is allowed anywhere. If not, you can filter out those that are not allowed.

    This sets the variables for the right side of the equation. Each variable that appears only on the left, adds possible solutions. B adds a factor of 9, C a factor of 8, D 7 and so forth.