Search code examples
ciconvwidechar

Echoing one wide character in the current locale encoding from stdin back to the stdout


The following simple code is supposed to read one wide char from stdin and echo it back to stdout, except that it dies of SIGSEGV on the iconv() call. The question is – what's wrong with the code?

#include <unistd.h>   /* STDIN_FILENO */
#include <locale.h>   /* LC_ALL, setlocale() */
#include <langinfo.h> /* nl_langinfo(), CODESET */
#include <wchar.h>    /* wchar_t, putwchar() */
#include <iconv.h>    /* iconv_t, iconv_open(), iconv(), iconv_close() */
#include <stdlib.h>   /* malloc(), EXIT_SUCCESS */

int main(void) {
  setlocale(LC_ALL, "");                                            // We initialize the locale
  iconv_t converter = iconv_open("WCHAR_T", nl_langinfo(CODESET));  // We initialize a converter
  wchar_t out;                                                      // We allocate memory for one wide char on stack
  wchar_t* pOut = &out;
  size_t outLeft = sizeof(wchar_t); 

  while(outLeft > 0) {                                              // Until we've read one wide char...
    char in;                                                        // We allocate memory for one byte on stack
    char* pIn=&in;
    size_t inLeft = 1;

    if(read(STDIN_FILENO, pIn, 1) == 0) break;                      // We read one byte from stdin to the buffer
    iconv(&converter, &pIn, &inLeft, (char**)&pOut, &outLeft);      // We feed the byte to the converter
  }

  iconv_close(converter);                                           // We deinitialize a converter
  putwchar(out);                                                    // We echo the wide char back to stdout
  return EXIT_SUCCESS;
}

UPDATE: After the following update based on @gsg's answer:

iconv(converter, &pIn, &inLeft, &pOut, &outLeft);

the code doesn't throw SIGSEGV anymore, but out == L'\n' for any non-ASCII input.


Solution

  • The signature of iconv is

    size_t iconv(iconv_t cd,
                 char **inbuf, size_t *inbytesleft,
                 char **outbuf, size_t *outbytesleft);
    

    But you call it with a first argument of pointer to iconv_t:

    iconv(&converter, &pIn, &inLeft, (char**)&pOut, &outLeft);
    

    Which should be

    iconv(converter, &pIn, &inLeft, (char**)&pOut, &outLeft);
    

    An interesting question is why a warning is not generated. For that, let's look at the definition in iconv.h:

    /* Identifier for conversion method from one codeset to another.  */
    typedef void *iconv_t;
    

    That's an... unfortunate choice.

    I would program this a bit differently:

    #define _XOPEN_SOURCE 500
    #include <stdio.h>
    #include <unistd.h>
    #include <locale.h>
    #include <langinfo.h>
    #include <wchar.h>
    #include <iconv.h>
    #include <stdlib.h>
    #include <err.h>
    
    int main(void)
    {
        iconv_t converter;
        char input[8]; /* enough space for a multibyte char */
        wchar_t output[8];
        char *pinput = input;
        char *poutput = (char *)&output[0];
        ssize_t bytes_read;
        size_t error;
        size_t input_bytes_left, output_bytes_left;
    
        setlocale(LC_ALL, "");
    
        converter = iconv_open("WCHAR_T", nl_langinfo(CODESET));
        if (converter == (iconv_t)-1)
            err(2, "failed to alloc conv_t");
    
        bytes_read = read(STDIN_FILENO, input, sizeof input);
        if (bytes_read <= 0)
            err(2, "bad read");
        input_bytes_left = bytes_read;
        output_bytes_left = sizeof output;
    
        error = iconv(converter,
                      &pinput, &input_bytes_left,
                      &poutput, &output_bytes_left);
        if (error == (size_t)-1)
            err(2, "failed conversion");
    
        printf("%lc\n", output[0]);
    
        iconv_close(converter);
        return EXIT_SUCCESS;
    }