Search code examples
ccs50vigenere

Vigenere Cipher - Formula Explanation


First of all there is no one that I can ask this kind of question so please pardon me

#include <stdio.h>
#include <cs50.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

int main(int argc, string argv[]) {
    string key = argv[1];
    int l = strlen(argv[1]);
    if (argc != 2) {
        return 0;
    }
    for (int i = 0, n = strlen(key); i < n; i++) {
        if (!isalpha(key[i])) {
            return 0;
        }
        key[i] = tolower(key[i]);
        key[i] = key[i] - 97;
    }
    string txt = GetString();
    for (int k = 0, p = strlen(txt); k < p; k++) {
        if (isalpha(txt[k])) {
            if (isupper(txt[k])) {
                printf("%c", (((txt[k] - 65) + (key[k % l])) % 26 + 65));
            }
            if (islower(txt[k])) {
                printf("%c", (((txt[k] - 97) + (key[k % l])) % 26 + 97));
            }
        } else
        if (!isalpha(txt[k])) {
            printf("%c", txt[k]);
        }
    }
    printf("\n");
    return 0;
}

I can't quite get these 2 lines of code

key[i] = key[i] - 97;
printf("%c", (((txt[k] - 97) + (key[k % l])) % 26 + 97));

Is there an easy explanation of why did we use the first and how the second one works?


Solution

  • The key used for the Vigenere cypher is supposed to be all letters. The first expression converts the string into an array of offsets, 0 for a, 1 for b, etc. 97 is the ASCII code for 'a'. It would be more readable to write:

    for (int i = 0, n = strlen(key); i < n; i++) {
        if (!isalpha((unsigned char)key[i])) {
            printf("key '%s' must contain only letters\n", key);
            return 1;
        }
        key[i] = tolower((unsigned char)key[i]);
        key[i] = key[i] - 'a';
    }
    

    For the second expression, if the character txt[k] is a lower case letter, printf("%c", (((txt[k] - 97) + (key[k % l])) % 26 + 97)); computes and prints the transposed letter by adding the shift value (each character in key is used as a shift value one after the other, shifting by 0 for a, 1 for b etc.). Here are the steps:

    • The program computes the letter index txt[k] - 97, 97 being the ASCII code for 'a',
    • it then adds the shift value key[k % l], cycling the values in key in a circular fashion,
    • it takes the modulo 26 to get a letter index between 0 and 25.
    • it finally adds 97, the ASCII value of 'a' to convert the index back into a lowercase letter.

    It would be less redundant and more readable to write it this way:

    for (int i = 0, j = 0; txt[i] != '\0'; i++) {
        int c = (unsigned char)txt[i];
        if (isupper(c)) {
            c = (c - 'A' + key[j++ % l]) % 26 + 'A';
        } else
        if (islower(c)) {
            c = (c - 'a' + key[j++ % l]) % 26 + 'a';
        }
        putchar(c);
    }
    

    Also note that argv[1] should not be passed to strlen() before checking that enough arguments have been passed on the command line.

    Here is a modified version of the program:

    #include <cs50.h>
    #include <ctype.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    int main(int argc, string argv[]) {
        if (argc != 2) {
            printf("missing key argument\n");
            return 1;
        }
        string key = argv[1];
        int klen = strlen(key);
        if (klen == 0) {
            printf("key cannot be empty\n");
            return 1;
        }
        for (int i = 0; i < klen; i++) {
            if (!isalpha((unsigned char)key[i])) {
                printf("key '%s' must contain only letters\n", key);
                return 1;
            }
            key[i] = tolower((unsigned char)key[i]) - 'a';
        }
    
        string txt = GetString();
        for (int i = 0, j = 0; txt[i] != '\0'; i++) {
            int c = (unsigned char)txt[i];
            if (isupper(c)) {
                c = (c - 'A' + key[j++ % klen]) % 26 + 'A';
            } else
            if (islower(c)) {
                c = (c - 'a' + key[j++ % klen]) % 26 + 'a';
            }
            putchar(c);
        }
        putchar('\n');
        return 0;
    }