I am trying to use the GNU iconv
library to convert a UTF-8 encoded string to KOI8-R. My minimal example is
#include <iconv.h>
#include <stdio.h>
#include <stdlib.h>
int main() {
/* The letter П in UTF-8. */
char* buffer = "\xd0\x9f";
size_t len = 2;
/* Note: since KOI8-R is an 8-bit encoding, the buffer should only need a length of 1, but
* iconv returns -1 if the buffer is any smaller than 4 bytes,
*/
size_t len_in_koi = 4;
char* buffer_in_koi = malloc(len_in_koi+1);
/* A throwaway copy to give to iconv. */
char* buffer_in_koi_copy = buffer_in_koi;
iconv_t cd = iconv_open("UTF-8", "KOI8-R");
if (cd == (iconv_t) -1) {
fputs("Error while initializing iconv_t handle.\n", stderr);
return 2;
}
if (iconv(cd, &buffer, &len, &buffer_in_koi_copy, &len_in_koi) != (size_t) -1) {
/* Expecting f0 but get d0. */
printf("Conversion successful! The byte is %x.\n", (unsigned char)(*buffer_in_koi));
} else {
fputs("Error while converting buffer to KOI8-R.\n", stderr);
return 3;
}
iconv_close(cd);
free(buffer_in_koi);
return 0;
}
which (besides not working when my KOI8-R buffer is smaller than four bytes, although it should only need a single byte) incorrectly prints d0
(the correct encoding of 'П'
in KOI8-R is f0
).
iconv
gives me the correct answer from the command line (e.g., echo П | iconv -t KOI8-R | hexdump
), so what am I doing wrong in using its C interface?
You mixed up the "to" and "from" character set arguments to iconv_open
. It just so happens that the character in slot D0
in KOI8-R has D0
as the first byte of its UTF-8 encoding.