Search code examples
cpointerscrossword

Converting Greek words to uppercase


I have to create a function that reads a file called grwords.txt containing around 540000 words which are written in Greek letters.

I have to convert these words to uppercase and fill an array called char **words.

This is what I have so far.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <windows.h>
#include <ctype.h>


void fp();

int main(int argc, char *argv[]) {

    SetConsoleOutputCP(1253);

    fp();
    return 0;
}

void fp(){
    char **words;
    words = malloc(546490 * sizeof(int *));
    for (i = 0; i < 546490; i++)
             words[i] = malloc(24 * sizeof(int));
    FILE *file;
    char *word;
    size_t cnt;

    file = fopen("grwords.txt", "rt");
    if (file == NULL){
        printf("File cannot be opened.\n");
        exit(1);
    }
    cnt = 0;
    while (1==fscanf(file, "%24s",word)){
        if (cnt == 546490)
            break;
        strcpy(words[cnt++], word);
    }
    fclose(file);
}

I'm still trying to figure out pointers. I know that & makes a pointer from a value and * a value from a pointer. Updated the program and it successfully fills the array with the words from the file! I still have no idea how to convert Greek lowercase to uppercase.


Solution

  • Handling Greek words can be dependent on your platform.

    First of all, you need to understand how file handling works. Here is what I wrote:

    #include <stdio.h>
    #include <string.h>
    #include <ctype.h>
    
    #define bufSize 1024 // max lenght of word
    // we are going to receive the .txt from cmd line
    int main(int argc, char *argv[])
    {
      FILE *fp;
    
      // Assume file has max 10 words
      const size_t N = 10;
    
      // Allocate a 2D array of N rows
      // and bufSize columns.
      // You can think of it like an array
      // of N strings, where every string
      // has, at most, bufSize length.
      char buf[N][bufSize];
    
      // make sure we got the .txt
      if (argc != 2)
      {
        fprintf(stderr,
                "Usage: %s <soure-file>\n", argv[0]);
        return 1;
      }
    
      // open the file
      if ((fp = fopen(argv[1], "r")) == NULL)
      { /* Open source file. */
        perror("fopen source-file");
        return 1;
      }
    
      // we will use that for toupper()
      char c;
    
      // counters
      int i = 0, j;
    
    
      while (fscanf(fp, "%1024s", buf[i]) == 1)
      { /* While we don't reach the end of source. */
        /* Read characters from source file to fill buffer. */
    
        // print what we read
        printf("%s\n", buf[i]);
    
        j = 0;
        // while we are on a letter of word placed
        // in buf[i]
        while (buf[i][j])
        {
          // make the letter capital and print it
          c = buf[i][j];
          putchar (toupper(c));
          j++;
        }
        i++;
        printf("\ndone with this word\n");
      }
      // close the file
      fclose(fp);
    
      return 0;
    }
    

    For this test.txt file:

    Georgios
    Samaras
    Γιώργος
    Σαμαράς
    

    the code would run as:

    ./exe test.txt
    Georgios
    GEORGIOS
    done with this word
    Samaras
    SAMARAS
    done with this word
    Γιώργος
    Γιώργος
    done with this word
    Σαμαράς
    Σαμαράς
    done with this word
    

    As you can see, I could read the Greek words, but failed to convert them in upper case ones.

    Once you got how file handling goes, you need to use wide characters to read a file with Greek words.

    So, by just modifying the above code, we get:

    #include <stdio.h>
    #include <string.h>
    #include <ctype.h>
    #include <wchar.h>
    #include <wctype.h>
    #include <locale.h>
    
    #define bufSize 1024
    
    int main(int argc, char *argv[])
    {
      setlocale(LC_CTYPE, "en_GB.UTF-8");
      FILE *fp;
      const size_t N = 15;
      wchar_t buf[N][bufSize];
      if (argc != 2)
      {
        fprintf(stderr,
                "Usage: %s <soure-file>\n", argv[0]);
        return 1;
      }
      if ((fp = fopen(argv[1], "r")) == NULL)
      {
        perror("fopen source-file");
        return 1;
      }
      wchar_t c;
      int i = 0, j;
      while (fwscanf(fp, L"%ls", buf[i]) == 1)
      {
        wprintf( L"%ls\n\n", buf[i]);
        j = 0;
        while (buf[i][j])
        {
          c = buf[i][j];
          putwchar (towupper(c));
          j++;
        }
        i++;
        wprintf(L"\ndone with this word\n");
      }
      fclose(fp);
      return 0;
    }
    

    And now the output is this:

    Georgios
    
    GEORGIOS
    done with this word
    Samaras
    
    SAMARAS
    done with this word
    Γιώργος
    
    ΓΙΏΡΓΟΣ
    done with this word
    Σαμαράς
    
    ΣΑΜΑΡΆΣ
    done with this word
    

    I see that you may want to create a function which reads the words. If you need a simple example of functions in C, you can visit my pseudo-site here.

    As for the 2D array I mentioned above, this picture might help:

    enter image description here

    where N is the number of rows (equal to 4) and M is the number of columns (equal to 5). In the code above, N is N and M is bufSize. I explain more here, were you can also found code for dynamic allocation of a 2D array.

    I know see that you are on Windows. I tested the code in Ubuntu.

    For Windows you might want to take a good look at this question.

    So, after you read all the above and understand them, you can see what you asked for with dynamic memory management.

    #include <stdio.h>
    #include <string.h>
    #include <stdlib.h>
    #include <wchar.h>
    #include <wctype.h>
    #include <locale.h>
    
    #define bufSize 1024
    
    wchar_t **get(int N, int M);
    void free2Darray(wchar_t** p, int N);
    
    int main(int argc, char *argv[])
    {
      setlocale(LC_CTYPE, "en_GB.UTF-8");
      FILE *fp;
      const size_t N = 15;
      wchar_t** buf = get(N, bufSize);
      if (argc != 2)
      {
        fprintf(stderr,
                "Usage: %s <soure-file>\n", argv[0]);
        return 1;
      }
      if ((fp = fopen(argv[1], "r")) == NULL)
      {
        perror("fopen source-file");
        return 1;
      }
      wchar_t c;
      int i = 0, j;
      while (fwscanf(fp, L"%ls", buf[i]) == 1)
      {
        wprintf( L"%ls\n", buf[i]);
        j = 0;
        while (buf[i][j])
        {
          c = buf[i][j];
          putwchar (towupper(c));
          j++;
        }
        i++;
        wprintf(L"\ndone with this word\n");
      }
      fclose(fp);
      // NEVER FORGET, FREE THE DYNAMIC MEMORY
      free2Darray(buf, N);
      return 0;
    }
    
    // We return the pointer
    wchar_t **get(int N, int M) /* Allocate the array */
    {
        /* Check if allocation succeeded. (check for NULL pointer) */
        int i;
        wchar_t **table;
        table = malloc(N*sizeof(wchar_t *));
        for(i = 0 ; i < N ; i++)
            table[i] = malloc( M*sizeof(wchar_t) );
        return table;
    }
    
    void free2Darray(wchar_t** p, int N)
    {
        int i;
        for(i = 0 ; i < N ; i++)
            free(p[i]);
        free(p);
    }
    

    Note that this code is expected to work on Linux (tested on Ubuntu 12.04), not on Windows (tested on Win 7).