Search code examples
cwindowsargvnon-ascii-characters

Accept non ASCII characters


Consider this program:

#include <stdio.h>

int main(int argc, char* argv[]) {
   printf("%s\n", argv[1]);  
   return 0;
}

I compile it like this:

x86_64-w64-mingw32-gcc -o alpha alpha.c

The problem is if I give it a non ASCII argument:

$ ./alpha róisín
r�is�n

How can I write and/or compile this program such that it accepts non ASCII characters? To respond to alk: no, the program is printing wrongly. See this example:

$ echo Ω | od -t x1c
0000000  ce  a9  0a
        316 251  \n
0000003

$ ./alpha Ω | od -t x1c
0000000  4f  0d  0a
          O  \r  \n
0000003

Solution

  • The easiest way to do this is with wmain:

    #include <fcntl.h>
    #include <stdio.h>
    
    int wmain (int argc, wchar_t** argv) {
      _setmode(_fileno(stdout), _O_WTEXT);
      wprintf(L"%s\n", argv[1]);
      return 0;
    }
    

    It can also be done with GetCommandLineW; here is a simple version of the code found at the HandBrake repo:

    #include <stdio.h>
    #include <windows.h>
    
    int get_argv_utf8(int* argc_ptr, char*** argv_ptr) {
      int argc;
      char** argv;
      wchar_t** argv_utf16 = CommandLineToArgvW(GetCommandLineW(), &argc);
      int i;
      int offset = (argc + 1) * sizeof(char*);
      int size = offset;
      for (i = 0; i < argc; i++)
        size += WideCharToMultiByte(CP_UTF8, 0, argv_utf16[i], -1, 0, 0, 0, 0);
      argv = malloc(size);
      for (i = 0; i < argc; i++) {
        argv[i] = (char*) argv + offset;
        offset += WideCharToMultiByte(CP_UTF8, 0, argv_utf16[i], -1,
          argv[i], size-offset, 0, 0);
      }
      *argc_ptr = argc;
      *argv_ptr = argv;
      return 0;
    }
    
    int main(int argc, char** argv) {
      get_argv_utf8(&argc, &argv);
      printf("%s\n", argv[1]);
      return 0;
    }