I'm trying to convert a string in German language to UTF-16 using iconv, but in vain. Here's the code for it:
#include <iconv.h>
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <errno.h>
using namespace std;
const size_t BUF_SIZE=1024;
class IConv {
iconv_t ic_;
public:
IConv(const char* to, const char* from)
: ic_(iconv_open(to,from)) { }
~IConv() { iconv_close(ic_); }
bool convert(char* input, char* output, size_t& out_size) {
size_t inbufsize = strlen(input)+1;
return iconv(ic_, &input, &inbufsize, &output, &out_size);
}
};
int main(void)
{
char str1[BUF_SIZE] = "tägelîch";
char str2[BUF_SIZE] = "something else";
IConv ic("en_US.UTF-8","UTF16LE");
bool ret;
cout << str1 << endl;
size_t outsize = BUF_SIZE; //you will need it
ret = ic.convert(str1, str2, outsize);
if (ret == false) {
cout << "iconv failed: " << errno << endl;
return -1;
}
cout << str2 << endl;
}
Output:
$ ./a.out
tägelîch
something else
If the from encoding type is changed to ISO-8859-1
, the result is the same.
On the other hand, iconv utility works fine from the command prompt:
$ echo "TägelîcH" | iconv -f "ISO-8859-1" -t UTF-16LE
T▒▒gel▒▒cH
iconv --list
shows that both ISO-8859-1
and UTF-16LE
are supported. What am I missing?
Is C++ in any way causing this behaviour?
Thanks!
No.
Isn't C++.
Your code has a lot of problem.
1) if I'm not wrong, you switch in-code with out-code; with ic("en_US.UTF-8","UTF16LE")
(and iconv_open(to,from)
) you're asking to convert from "UTF16LE" to "en_US.UTF-8"; if I understand well, you want the contrary
2) I don't know in your platform, but my Debian iconv
doesn't support the "en_US.UTF-8" encodind; try with iconv --list
to see if iconv
in your platform supports it
3) You don't test if iconv_open(to,from)
return iconv_t(-1)
, the error value
4) You return bool
from convert but iconv()
return a size_t
; in case of error, iconv()
returns size_t(-1)
that, converted in bool, become true
(no error, if I understan well)
5) You don't test the value of the third argument passed to convert()
; it should be zero after calling the function (or part of the input stream is unconverted)
6) when you initialize str1
to "tägelîch", you initialize it in ISO-8859-1, not in UTF-8
Briefly, if I'm not wrong, you pass a to-encoding that isn't in the list of encoding supported by iconv. The constructor of your IConv
class initialize cv_
with iconv_t(-1)
(error value). When you call the convert()
method, iconv()
fail and return size_t(-1)
(error value) that is returned as true
(no error value) by convert()
.
The following code isn't perfect but I hope it can help you
#include <iconv.h>
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <errno.h>
using namespace std;
const size_t BUF_SIZE=1024;
class IConv {
iconv_t ic_;
public:
IConv (const char* to, const char* from)
: ic_(iconv_open(to,from)) {
if ( iconv_t(-1) == ic_ )
throw std::runtime_error("error from iconv_open()");
}
~IConv ()
{ if ( iconv_t(-1) != ic_) iconv_close(ic_); }
bool convert (char* input, char* output, size_t& out_size) {
size_t inbufsize = strlen(input)+1;
return
(size_t(-1)
!= iconv(ic_, &input, &inbufsize, &output, &out_size))
&& (0U == inbufsize);
}
};
int main(void)
{
char str1[BUF_SIZE] = "tägelîch";
char str2[BUF_SIZE] = "something else";
IConv ic("UTF16LE", "ISO_8859-1");
bool ret;
size_t outsize = BUF_SIZE;
ret = ic.convert(str1, str2, outsize);
if (ret == false) {
cout << "iconv failed: " << errno << endl;
}
else {
cout << "outsize[" << outsize << "]\n";
cout << "str1[" << str1 << "]\n";
cout << "str2[" << str2 << "]\n";
for ( int i = 0 ; i < (BUF_SIZE - outsize) ; ++i )
if ( str2[i] )
cout << "str2[" << i << "]=[" << int(str2[i]) << "]("
<< str2[i] << ")\n";
}
return ret ? EXIT_SUCCESS : EXIT_FAILURE;
}
p.s.: sorry for my bad English.